From aff789af91b14462ae9e6df79720b1f71e4947ca Mon Sep 17 00:00:00 2001
From: Duncan Smith
Date: Sat, 12 Jun 2010 08:32:53 -0700
Subject: Patch received in email from Lionel Debroux

---
 flags.asm | 49 ++++++++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 21 deletions(-)

(limited to 'flags.asm')

diff --git a/flags.asm b/flags.asm
index 8df7c5d..0b85267 100644
--- a/flags.asm
+++ b/flags.asm
@@ -58,22 +58,26 @@ F_CLEAR	MACRO
 	;;
 	;; Byte for which parity is calculated must be in \1. (d1
 	;; destroyed)
+;; XXX that's expensive. After making this a subroutine, to speed up parity computation, maybe you could use a 256-byte lookup table accessed by d(pc,ix.w).
+;; And if you have a spare address register, since xxx.l addressing mode is expensive speed-wise and size-wise (4 bytes + relocation),
+;; you should use lea d(pc) to preload the address of flag_valid into an address register,
+;; and then use (an) and d(an) to write to flag_valid and flag_byte.
 F_PAR	MACRO
 	move.b	\1,d1			;  4  2
-	lsr	#4,d1			;  6  2
+	lsr.w	#4,d1			; 14  2
 	eor.b	\1,d1			;  4  2
-	lsr	#2,d1			;  6  2
+	lsr.w	#2,d1			; 10  2
 	eor.b	\1,d1			;  4  2
-	lsr	#1,d1			;  6  2
+	lsr.w	#1,d1			;  8  2
 	eor.b	\1,d1			;  4  2
 	andi.b	#$01,d1			;  8  4
 	;; odd parity is now in d1
-	ori.b	#%00000100,flag_valid	; 20  6
-	andi.b	#%11111011,flag_byte	; 20  6
+	ori.b	#%00000100,flag_valid	; 20  8
+	andi.b	#%11111011,flag_byte	; 20  8
 	rol.b	#2,d1			;  6  2
-	or.b	d1,flag_byte		;  8  4
-	ENDM				; 86 cycles (!)
-					;    36 bytes (make this a subroutine)
+	or.b	d1,flag_byte		; 16  4
+	ENDM				;xxx cycles (!)
+					;    xx bytes (make this a subroutine)
 
 
 	;; Use this when an instruction uses the P/V bit as Overflow.
@@ -86,37 +90,40 @@ F_OVFL	MACRO
 F_ADD_SAVE	MACRO
 	move.b	\1,f_tmp_src_b
 	move.b	\2,f_tmp_dst_b
-	movei.b	#$01,f_tmp_byte
+	move.b	#$01,f_tmp_byte
 	F_SET	#%
 	ENDM
 
 	;; Normalize and return carry bit (is loaded into Z bit)
 	;; Destroys d1
 f_norm_c:
-	move.b	flag_valid,d1
+	move.b	flag_valid(pc),d1
+;; XXX you could use lsr #1 (same number of cycles, smaller) + bcc.s or bcs.s here.
 	andi.b	#%00000001,d1
-	bne	FNC_ok		; Bit is valid
-	move.b	f_host_ccr,d1
+	bne.s	FNC_ok		; Bit is valid
+	move.b	f_host_ccr(pc),d1
 	andi.b	#%00000001,d1
+;; XXX see above comment for using lea and then d(an) if you have a spare register.
 	or.b	d1,flag_byte
 	ori.b	#%00000001,flag_valid
 FNC_ok:
-	move.b	flag_byte,d1
+	move.b	flag_byte(pc),d1
 	andi.b	#%00000001,d1
 	rts
 
 	;; Normalize and return zero bit (loaded into Z bit)
 	;; Destroys d1
 f_norm_z:
-	move.b	flag_valid,d1
+	move.b	flag_valid(pc),d1
 	andi.b	#%01000000,d1
-	bne	FNZ_ok		; Bit is valid
-	move.b	f_host_ccr,d1
+	bne.s	FNZ_ok		; Bit is valid
+	move.b	f_host_ccr(pc),d1
 	andi.b	#%01000000,d1
+;; XXX see above comment for using lea and then d(an) if you have a spare register.
 	or.b	d1,flag_byte
 	ori.b	#%01000000,flag_valid
 FNZ_ok:
-	move.b	flag_byte,d1
+	move.b	flag_byte(pc),d1
 	andi.b	#%01000000,d1
 	rts
 
@@ -125,11 +132,11 @@ FNZ_ok:
 	;; Preconditions:
 	;;   Flags to change are noted in d0 by a 1 bit
 flags_normalize:
-	move.b	f_host_ccr,d1
-	andi.b	#%00011111,d1	; Maybe TI uses the reserved bits for
+	move.b	f_host_ccr(pc),d1
+;; XXX .w because you don't want garbage in bits 8-15 when using d(pc,ix.w) or d(an,ix.w) ea mode. 
+	andi.w	#%00011111,d1	; Maybe TI uses the reserved bits for
 				; something ...
-	movea	lut_ccr(pc),a1
-	move.b	0(a1,d1),d1
+	move.b	lut_ccr(pc,d1.w),d1
 	;; XXX do this
 	rts
 
-- 
cgit v1.2.3