summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--flags.asm49
-rw-r--r--main.asm53
2 files changed, 58 insertions, 44 deletions
diff --git a/flags.asm b/flags.asm
index 8df7c5d..0b85267 100644
--- a/flags.asm
+++ b/flags.asm
@@ -58,22 +58,26 @@ F_CLEAR MACRO
;;
;; Byte for which parity is calculated must be in \1. (d1
;; destroyed)
+;; XXX that's expensive. After making this a subroutine, to speed up parity computation, maybe you could use a 256-byte lookup table accessed by d(pc,ix.w).
+;; And if you have a spare address register, since xxx.l addressing mode is expensive speed-wise and size-wise (4 bytes + relocation),
+;; you should use lea d(pc) to preload the address of flag_valid into an address register,
+;; and then use (an) and d(an) to write to flag_valid and flag_byte.
F_PAR MACRO
move.b \1,d1 ; 4 2
- lsr #4,d1 ; 6 2
+ lsr.w #4,d1 ; 14 2
eor.b \1,d1 ; 4 2
- lsr #2,d1 ; 6 2
+ lsr.w #2,d1 ; 10 2
eor.b \1,d1 ; 4 2
- lsr #1,d1 ; 6 2
+ lsr.w #1,d1 ; 8 2
eor.b \1,d1 ; 4 2
andi.b #$01,d1 ; 8 4
;; odd parity is now in d1
- ori.b #%00000100,flag_valid ; 20 6
- andi.b #%11111011,flag_byte ; 20 6
+ ori.b #%00000100,flag_valid ; 20 8
+ andi.b #%11111011,flag_byte ; 20 8
rol.b #2,d1 ; 6 2
- or.b d1,flag_byte ; 8 4
- ENDM ; 86 cycles (!)
- ; 36 bytes (make this a subroutine)
+ or.b d1,flag_byte ; 16 4
+ ENDM ;xxx cycles (!)
+ ; xx bytes (make this a subroutine)
;; Use this when an instruction uses the P/V bit as Overflow.
@@ -86,37 +90,40 @@ F_OVFL MACRO
F_ADD_SAVE MACRO
move.b \1,f_tmp_src_b
move.b \2,f_tmp_dst_b
- movei.b #$01,f_tmp_byte
+ move.b #$01,f_tmp_byte
F_SET #%
ENDM
;; Normalize and return carry bit (is loaded into Z bit)
;; Destroys d1
f_norm_c:
- move.b flag_valid,d1
+ move.b flag_valid(pc),d1
+;; XXX you could use lsr #1 (same number of cycles, smaller) + bcc.s or bcs.s here.
andi.b #%00000001,d1
- bne FNC_ok ; Bit is valid
- move.b f_host_ccr,d1
+ bne.s FNC_ok ; Bit is valid
+ move.b f_host_ccr(pc),d1
andi.b #%00000001,d1
+;; XXX see above comment for using lea and then d(an) if you have a spare register.
or.b d1,flag_byte
ori.b #%00000001,flag_valid
FNC_ok:
- move.b flag_byte,d1
+ move.b flag_byte(pc),d1
andi.b #%00000001,d1
rts
;; Normalize and return zero bit (loaded into Z bit)
;; Destroys d1
f_norm_z:
- move.b flag_valid,d1
+ move.b flag_valid(pc),d1
andi.b #%01000000,d1
- bne FNZ_ok ; Bit is valid
- move.b f_host_ccr,d1
+ bne.s FNZ_ok ; Bit is valid
+ move.b f_host_ccr(pc),d1
andi.b #%01000000,d1
+;; XXX see above comment for using lea and then d(an) if you have a spare register.
or.b d1,flag_byte
ori.b #%01000000,flag_valid
FNZ_ok:
- move.b flag_byte,d1
+ move.b flag_byte(pc),d1
andi.b #%01000000,d1
rts
@@ -125,11 +132,11 @@ FNZ_ok:
;; Preconditions:
;; Flags to change are noted in d0 by a 1 bit
flags_normalize:
- move.b f_host_ccr,d1
- andi.b #%00011111,d1 ; Maybe TI uses the reserved bits for
+ move.b f_host_ccr(pc),d1
+;; XXX .w because you don't want garbage in bits 8-15 when using d(pc,ix.w) or d(an,ix.w) ea mode.
+ andi.w #%00011111,d1 ; Maybe TI uses the reserved bits for
; something ...
- movea lut_ccr(pc),a1
- move.b 0(a1,d1),d1
+ move.b lut_ccr(pc,d1.w),d1
;; XXX do this
rts
diff --git a/main.asm b/main.asm
index 66959db..e6febbb 100644
--- a/main.asm
+++ b/main.asm
@@ -62,8 +62,8 @@ PUTB MACRO ; 14 cycles, 4 bytes
FETCHW MACRO
;; XXX call deref
- move.b 1(a6,\1.w),\2 ; 14/4
- ror.w #8,\2 ; 4/2
+ move.b 1(a6,\1.w),-(sp); 18/4
+ move.w (sp)+,\2 ; 8/2
move.b 0(a6,\1.w),\2 ; 14/4
ENDM
@@ -128,6 +128,7 @@ FETCHWI MACRO ; 36 cycles, 12 bytes
;; XXX use deref
addq.w #2,d2 ; 4/2
move.b -1(a6,d2.w),\1 ; 14/4
+;; XXX why not rol #8,\1 ?? (and then you would be able to use the same trick as in FETCHW).
rol.w #8,d2 ; 4/2
move.b -2(a6,d2.w),\1 ; 14/4
ENDM
@@ -143,13 +144,13 @@ _align SET _align+$20
ENDM
;; When you want to use the high reg of a pair, use this first
-LOHI MACRO ; 6 cycles, 2 bytes
- ror #8,\1
+LOHI MACRO ; 22 cycles, 2 bytes
+ ror.w #8,\1
ENDM
;; Then do your shit and finish with this
-HILO MACRO ; 6 cycles, 2 bytes
- rol #8,\1
+HILO MACRO ; 22 cycles, 2 bytes
+ rol.w #8,\1
ENDM
;; calc84maniac suggests putting emu_fetch into this in order
@@ -163,6 +164,7 @@ DONE MACRO ; 8 cycles, 2 bytes
;; Do a SUB \2,\1
F_SUB_B MACRO ;14 bytes?
+;; XXX use lea and then d(an) if you have a spare register.
move.b \1,f_tmp_src_b ; preserve operands for flagging
move.b \2,f_tmp_dst_b
move.b #1,flag_n
@@ -207,6 +209,7 @@ F_DEC_W MACRO
_main:
+;; XXX in the current state of the code, you could just make _main and emu_setup point to the same address.
bsr emu_setup
rts
@@ -214,7 +217,7 @@ _main:
emu_setup:
movea emu_plain_op,a5
- movea emu_fetch(pc),a2
+ lea emu_fetch(pc),a2
;; XXX finish
rts
@@ -222,26 +225,30 @@ emu_setup:
;; Take a virtual address in d1 and dereference it. Returns the
;; host address in a0. Destroys a0, d0.
+;; XXX I added a masking of the upper bits of the Z80 address (d1) before translating them to host address.
+;; Please double-check, but AFAICT, it's the right thing to do.
deref:
move.w d1,d0
+ andi.w #$3FFF,d0
+ movea.w d0,a0
+ move.w d1,d0
andi.w #$C000,d0
rol.w #5,d0
- jmp 0(pc,d0)
+ jmp 0(pc,d0.w)
;; 00
- movea a1,a0
- bra deref_go
+ adda.l a1,a0
+ rts
;; 01
- movea a2,a0
- bra deref_go
+ adda.l a2,a0
+ rts
;; 02
- movea a3,a0
- bra deref_go
+ adda.l a3,a0
+ rts
;; 03
- movea a4,a0
-deref_go:
- adda d1,a0
+ adda.l a4,a0
rts
+
;; =========================================================================
;; instruction instruction instruction ================================
;; _ _ _ _ ================================
@@ -257,12 +264,12 @@ emu_fetch:
;; Move this into DONE, saving 8 more cycles but using extra
;; space.
;;
- ;; See if I can get rid of the eor
- eor.w d0,d0 ; 4 cycles
- move.b (a4)+,d0 ; 8 cycles
- rol.w #5,d0 ; 4 cycles adjust to actual alignment
- jmp 0(a5,d0) ;14 cycles
- ;; overhead: 30 cycles
+ ;; Likely impossible to get rid of the clr
+ clr.w d0,d0 ; 4 cycles
+ move.b (a4)+,d0 ; 8 cycles
+ rol.w #5,d0 ; 16 cycles adjust to actual alignment
+ jmp 0(a5,d0.w) ; 14 cycles
+ ;; overhead: 42 cycles
;;; ========================================================================
;;; ========================================================================