;$Header: /home/dashley/cvsrep/e3ft_gpl01/e3ft_gpl01/dtaipubs/cron/2007/cpu08div16by16a/div16e16x16.s,v 1.4 2007/05/26 17:48:46 dashley Exp $:
;------------------------------------------------------------------------------
xdef c_udiv, c_umod
xref.b c_reg
.dcall "9,0,c_umod"
.dcall "7,0,c_udiv"
;
;Function Entry Conditions:
; NH in C_REG (a zero-page, i.e. direct addressing variable)
; NL in A
; DH in H
; DL in X
;
;Function Exit Conditions
; QH in C_REG
; QL in A
; RH in H
; RL in X
; Stack pointer (naturally) restored.
; No restrictions on condition codes (can be arbitrarily changed).
; No restrictions on other CPU registers (can be arbitrarily changed).
;
c_udiv:
psha ; save NL
pshh ; DH on the stack
tst 1,sp ; test zero
bne full ; full division
;
;Short divisor division.
;Divisor <= 255 (and may also be zero).
;
;Note to MJB: your staff did a better job on this than
;me. I had 48 clocks, this path seems to be 45.
;
pulh ; clean stack
lda c_reg ; NH in A
clrh ; 1st divide 8 X 8
div ; divide (H:A)/(X) -> A with remainder H
sta c_reg ; QH in place
pula ; complete dividend with NL
div ; divide
pshh ; move RL
pulx ; in X
clrh ; RH is zero
rts ; and return
;
;Long Divisor Division
;
;Mathematical Assertions
; Divisor >= 256 (and is definitely not zero).
; Numerator can be any value.
; Quotient <= 255.
; Remainder may be two bytes.
;
;Entry Conditions
; Stack frame already grown by 2 (and could use 1,SP and 2,SP
; for intermediate results).
; NH in C_REG (a zero-page, i.e. direct addressing variable)
; NL in A and 2,SP
; DH in H and 1,SP
; DL in X
;
full:
pshx ; save DL
psha ; start with
lda c_reg ; dividend
psha
;
;Stack condition
; 1,SP NH
; 2,SP NL
; 3,SP DL
; 4,SP DH
; 5,SP NL
;Due to redundancy, can probably re-use 5,SP for
;something else.
;
lda 4,sp ; DH
;
;Shift the numerator and denominator right until the denominator
;just fits in one byte. At least one shift will be required,
;because the denominator is known to be >= 256, hence the
;do-while() rather than while() loop.
;
sbcl:
lsr 1,sp
ror 2,sp
lsra
rorx
tsta
bne sbcl
;
;DTA note: the loop above looks excellent. No real fat. The only
;optimizations that seems possible are (a) to use c_reg to shift one byte
;of the numerator in so that a 4-cycle rather than a 5-cycle instruction
;is used, or (b) to use X rather than SP so that have 2 4-cycle rather than
;2 5-cycle instructions.
;
;Do the initial digit estimation division. Knuth calls out the possibility of
;an overflow, and that an overflow should be replaced by a quotient of 255.
;Not sure if an overflow is possible. 0 <= N <= 65535, and 256 <= D <= 65535.
;Assuming max N and min D after one shift gives 32767/128 = 255, still safe.
;Need to be more rigorous about the math to be sure that overflow not possible
;on the division. Note also that Knuth's algorithm calls for left-shifting to
;normalize and for the creation of an extra digit (in our case, extra byte).
;Because we have bounds on the divisor going in, right-shifting gives the same
;effect for us. There may be something about the setup that changes Knuth's
;assumptions that overflow is possible. Will need to revisit this and prove
;it rigorously.
;
txa
pulh
pulx
div
;
;Let:
; N be the numerator in to the function.
; D be the denominator in to the function.
; Q be the actual quotient.
; R be the actual remainder.
; Z(0) be the first trial quotient we calculated.
; Z(-1) be the trial quotient reduced by one.
; Z(-2) be the trial quotient reduced by two.
; Z() be the trial quotient we are working with at some
; point in time.
;
;The original MJB approach was to reconstruct Z * D, then
;subtract D up to twice to get Z(-1) * D and Z(-2) * D. That seemed to
;bloat the code. Let's try instead (to save FLASH) to loop using
;multiplication until N - (Z() * D) < D, or equivalently
;that Z() * D > N - D. We've gotta use the first expression, however,
;because the second would require either an exception case or 2's
;complement arithmetic and a third byte if N < D.
;
;
; +-------------------------------------+
; | Entry Condition: have Z(0) |
; | calculated. |
; +-------------------------------------+
; |
; |
; \|/
; +-------------------------------------+/
; | Calculate Z() * D |------+
; +-------------------------------------+\ |
; | |
; | |
; \|/ |
; Y +-------------------------------------+ |
; +-----| Z() * D > 65535 ? | |
; | +-------------------------------------+ |
; | | N |
; | | |
; | \|/ |
; | +-------------------------------------+ |
; | | Calculate N - Z() * D | |
; | +-------------------------------------+ |
; | | |
; | | |
; | \|/ |
; | +-------------------------------------+ |
; | | N - Z() * D < D ? |------------+
; | +-------------------------------------+ | |
; | | | |
; | | | |
; | \|/ | |
; | \+-------------------------------------+ | |
; +-----| Z() -- |------+ |
; /+-------------------------------------+ |
; |
; +--------------------------------+
; |
; \|/
; Done
;
;Stack Frame Setup
; Keeping track of the stack frame is a lot of work before everything is
; refined, so will just use the instructions and pretend the stack frame
; has been enlarged via AIS. (Note that I've swapped DH and DL below--they
; seemed to be out of order from before.)
;
; Imaginary Stack Setup
; 1,SP NH
; 2,SP NL
; 3,SP DH
; 4,SP DL
; 5,SP Z(0)
; 6,SP Z() * D, High Byte
; 7,SP Z() * D, Low Byte
; 8,SP N - Z() * D, High Byte
; 9,SP N - Z() * D, Low Byte
;
;
sta 5,sp ;Save Z(0).
;
nextz:
;-------------------
;Calculate Z() * D
;-------------------
ldx 4,sp
mul ;Lower product in X:A.
sta 7,sp
stx 8,sp
lda 5,sp
ldx 3,sp
mul ;Upper product in X:A.
tstx
bne p2big ;If the MSB of the upper product (byte 3) isn't zero,
;this automatically implies that Z() is too big. It may
;be possible to shave out this test ... requires more
;analysis.
add 6,sp
bcs p2big ;If there is a carry out of adding the MSB in, this is
;also an automatic cue that Z() is too big. It may
;be possible to shave out this test ... requires more
;analysis.
sta 6,sp ;Put result back. (6,sp):(7,sp) now contains product,
;and it definitely fits in 16 bits.
;
;-----------------------
;Calculate N - Z() * D
;-----------------------
lda 2,sp
sub 7,sp
sta 9,sp
lda 1,sp
sub 6,sp
sta 8,sp
bcs p2big ;If N-Z()*D is negative, Z() is too big.
;-------------------------------
;Compare N - Z() * D against D
;-------------------------------
;Branches below are imaginary -- too lazy to look up the
;instructions right now.
;-------------------------------
lda 3,sp
cmp 8,sp
bcc zwrong ;Need to look up right branches.
lda 4,sp
cmp 9,sp
bcc zwrong ;Need to look up right branches.
bra zright
;-------------------------------
;Z() --
;-------------------------------
p2big:
zwrong:
dec 5,sp
lda 5,sp
bra nextz
;At this point, the remainder is definitely is (8,sp):(9,sp), and
;the quotient is in 5,sp. Set up and return.
zright:
lda 8,sp
psha
pulh ;MSB of remainder in H.
ldx 9,sp ;LSB of remainder in X.
clr c_reg ;MSB of quotient zero.
lda 5,sp ;LSB of quotient in A.
ais #8 ;Adjust stack. Imaginary offset.
rts ;And return
;
end
;------------------------------------------------------------------------------
;$Log: div16e16x16.s,v $
;Revision 1.4 2007/05/26 17:48:46 dashley
;Preliminary alternate solution in place.
;
;Revision 1.3 2007/05/26 16:13:10 dashley
;Safety checkin after substantial edits.
;
;Revision 1.2 2007/05/25 23:34:31 dashley
;Proposed Knuth solution integrated into version-control system.