/[dtapublic]/pubs/papers/20070526_cpu08_divu16_by_u16/div16e16x16.s
ViewVC logotype

Annotation of /pubs/papers/20070526_cpu08_divu16_by_u16/div16e16x16.s

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (hide annotations) (download)
Fri Oct 7 03:23:35 2016 UTC (7 years, 7 months ago) by dashley
File size: 11041 byte(s)
Commit of u16 = u16 / u16 analysis for the CPU08.
1 dashley 10 ;$Header: /home/dashley/cvsrep/e3ft_gpl01/e3ft_gpl01/dtaipubs/cron/2007/cpu08div16by16a/div16e16x16.s,v 1.4 2007/05/26 17:48:46 dashley Exp $:
2     ;------------------------------------------------------------------------------
3     xdef c_udiv, c_umod
4     xref.b c_reg
5     .dcall "9,0,c_umod"
6     .dcall "7,0,c_udiv"
7     ;
8     ;Function Entry Conditions:
9     ; NH in C_REG (a zero-page, i.e. direct addressing variable)
10     ; NL in A
11     ; DH in H
12     ; DL in X
13     ;
14     ;Function Exit Conditions
15     ; QH in C_REG
16     ; QL in A
17     ; RH in H
18     ; RL in X
19     ; Stack pointer (naturally) restored.
20     ; No restrictions on condition codes (can be arbitrarily changed).
21     ; No restrictions on other CPU registers (can be arbitrarily changed).
22     ;
23     c_udiv:
24     psha ; save NL
25     pshh ; DH on the stack
26     tst 1,sp ; test zero
27     bne full ; full division
28     ;
29     ;Short divisor division.
30     ;Divisor <= 255 (and may also be zero).
31     ;
32     ;Note to MJB: your staff did a better job on this than
33     ;me. I had 48 clocks, this path seems to be 45.
34     ;
35     pulh ; clean stack
36     lda c_reg ; NH in A
37     clrh ; 1st divide 8 X 8
38     div ; divide (H:A)/(X) -> A with remainder H
39     sta c_reg ; QH in place
40     pula ; complete dividend with NL
41     div ; divide
42     pshh ; move RL
43     pulx ; in X
44     clrh ; RH is zero
45     rts ; and return
46     ;
47     ;Long Divisor Division
48     ;
49     ;Mathematical Assertions
50     ; Divisor >= 256 (and is definitely not zero).
51     ; Numerator can be any value.
52     ; Quotient <= 255.
53     ; Remainder may be two bytes.
54     ;
55     ;Entry Conditions
56     ; Stack frame already grown by 2 (and could use 1,SP and 2,SP
57     ; for intermediate results).
58     ; NH in C_REG (a zero-page, i.e. direct addressing variable)
59     ; NL in A and 2,SP
60     ; DH in H and 1,SP
61     ; DL in X
62     ;
63     full:
64     pshx ; save DL
65     psha ; start with
66     lda c_reg ; dividend
67     psha
68     ;
69     ;Stack condition
70     ; 1,SP NH
71     ; 2,SP NL
72     ; 3,SP DL
73     ; 4,SP DH
74     ; 5,SP NL
75     ;Due to redundancy, can probably re-use 5,SP for
76     ;something else.
77     ;
78     lda 4,sp ; DH
79     ;
80     ;Shift the numerator and denominator right until the denominator
81     ;just fits in one byte. At least one shift will be required,
82     ;because the denominator is known to be >= 256, hence the
83     ;do-while() rather than while() loop.
84     ;
85     sbcl:
86     lsr 1,sp
87     ror 2,sp
88     lsra
89     rorx
90     tsta
91     bne sbcl
92     ;
93     ;DTA note: the loop above looks excellent. No real fat. The only
94     ;optimizations that seems possible are (a) to use c_reg to shift one byte
95     ;of the numerator in so that a 4-cycle rather than a 5-cycle instruction
96     ;is used, or (b) to use X rather than SP so that have 2 4-cycle rather than
97     ;2 5-cycle instructions.
98     ;
99     ;Do the initial digit estimation division. Knuth calls out the possibility of
100     ;an overflow, and that an overflow should be replaced by a quotient of 255.
101     ;Not sure if an overflow is possible. 0 <= N <= 65535, and 256 <= D <= 65535.
102     ;Assuming max N and min D after one shift gives 32767/128 = 255, still safe.
103     ;Need to be more rigorous about the math to be sure that overflow not possible
104     ;on the division. Note also that Knuth's algorithm calls for left-shifting to
105     ;normalize and for the creation of an extra digit (in our case, extra byte).
106     ;Because we have bounds on the divisor going in, right-shifting gives the same
107     ;effect for us. There may be something about the setup that changes Knuth's
108     ;assumptions that overflow is possible. Will need to revisit this and prove
109     ;it rigorously.
110     ;
111     txa
112     pulh
113     pulx
114     div
115     ;
116     ;Let:
117     ; N be the numerator in to the function.
118     ; D be the denominator in to the function.
119     ; Q be the actual quotient.
120     ; R be the actual remainder.
121     ; Z(0) be the first trial quotient we calculated.
122     ; Z(-1) be the trial quotient reduced by one.
123     ; Z(-2) be the trial quotient reduced by two.
124     ; Z() be the trial quotient we are working with at some
125     ; point in time.
126     ;
127     ;The original MJB approach was to reconstruct Z * D, then
128     ;subtract D up to twice to get Z(-1) * D and Z(-2) * D. That seemed to
129     ;bloat the code. Let's try instead (to save FLASH) to loop using
130     ;multiplication until N - (Z() * D) < D, or equivalently
131     ;that Z() * D > N - D. We've gotta use the first expression, however,
132     ;because the second would require either an exception case or 2's
133     ;complement arithmetic and a third byte if N < D.
134     ;
135     ;
136     ; +-------------------------------------+
137     ; | Entry Condition: have Z(0) |
138     ; | calculated. |
139     ; +-------------------------------------+
140     ; |
141     ; |
142     ; \|/
143     ; +-------------------------------------+/
144     ; | Calculate Z() * D |------+
145     ; +-------------------------------------+\ |
146     ; | |
147     ; | |
148     ; \|/ |
149     ; Y +-------------------------------------+ |
150     ; +-----| Z() * D > 65535 ? | |
151     ; | +-------------------------------------+ |
152     ; | | N |
153     ; | | |
154     ; | \|/ |
155     ; | +-------------------------------------+ |
156     ; | | Calculate N - Z() * D | |
157     ; | +-------------------------------------+ |
158     ; | | |
159     ; | | |
160     ; | \|/ |
161     ; | +-------------------------------------+ |
162     ; | | N - Z() * D < D ? |------------+
163     ; | +-------------------------------------+ | |
164     ; | | | |
165     ; | | | |
166     ; | \|/ | |
167     ; | \+-------------------------------------+ | |
168     ; +-----| Z() -- |------+ |
169     ; /+-------------------------------------+ |
170     ; |
171     ; +--------------------------------+
172     ; |
173     ; \|/
174     ; Done
175     ;
176     ;Stack Frame Setup
177     ; Keeping track of the stack frame is a lot of work before everything is
178     ; refined, so will just use the instructions and pretend the stack frame
179     ; has been enlarged via AIS. (Note that I've swapped DH and DL below--they
180     ; seemed to be out of order from before.)
181     ;
182     ; Imaginary Stack Setup
183     ; 1,SP NH
184     ; 2,SP NL
185     ; 3,SP DH
186     ; 4,SP DL
187     ; 5,SP Z(0)
188     ; 6,SP Z() * D, High Byte
189     ; 7,SP Z() * D, Low Byte
190     ; 8,SP N - Z() * D, High Byte
191     ; 9,SP N - Z() * D, Low Byte
192     ;
193     ;
194     sta 5,sp ;Save Z(0).
195     ;
196     nextz:
197     ;-------------------
198     ;Calculate Z() * D
199     ;-------------------
200     ldx 4,sp
201     mul ;Lower product in X:A.
202     sta 7,sp
203     stx 8,sp
204     lda 5,sp
205     ldx 3,sp
206     mul ;Upper product in X:A.
207     tstx
208     bne p2big ;If the MSB of the upper product (byte 3) isn't zero,
209     ;this automatically implies that Z() is too big. It may
210     ;be possible to shave out this test ... requires more
211     ;analysis.
212     add 6,sp
213     bcs p2big ;If there is a carry out of adding the MSB in, this is
214     ;also an automatic cue that Z() is too big. It may
215     ;be possible to shave out this test ... requires more
216     ;analysis.
217     sta 6,sp ;Put result back. (6,sp):(7,sp) now contains product,
218     ;and it definitely fits in 16 bits.
219     ;
220     ;-----------------------
221     ;Calculate N - Z() * D
222     ;-----------------------
223     lda 2,sp
224     sub 7,sp
225     sta 9,sp
226     lda 1,sp
227     sub 6,sp
228     sta 8,sp
229     bcs p2big ;If N-Z()*D is negative, Z() is too big.
230    
231     ;-------------------------------
232     ;Compare N - Z() * D against D
233     ;-------------------------------
234     ;Branches below are imaginary -- too lazy to look up the
235     ;instructions right now.
236     ;-------------------------------
237     lda 3,sp
238     cmp 8,sp
239     bcc zwrong ;Need to look up right branches.
240     lda 4,sp
241     cmp 9,sp
242     bcc zwrong ;Need to look up right branches.
243     bra zright
244    
245     ;-------------------------------
246     ;Z() --
247     ;-------------------------------
248     p2big:
249     zwrong:
250     dec 5,sp
251     lda 5,sp
252     bra nextz
253    
254     ;At this point, the remainder is definitely is (8,sp):(9,sp), and
255     ;the quotient is in 5,sp. Set up and return.
256     zright:
257     lda 8,sp
258     psha
259     pulh ;MSB of remainder in H.
260     ldx 9,sp ;LSB of remainder in X.
261    
262     clr c_reg ;MSB of quotient zero.
263     lda 5,sp ;LSB of quotient in A.
264    
265     ais #8 ;Adjust stack. Imaginary offset.
266    
267     rts ;And return
268     ;
269     end
270     ;------------------------------------------------------------------------------
271     ;$Log: div16e16x16.s,v $
272     ;Revision 1.4 2007/05/26 17:48:46 dashley
273     ;Preliminary alternate solution in place.
274     ;
275     ;Revision 1.3 2007/05/26 16:13:10 dashley
276     ;Safety checkin after substantial edits.
277     ;
278     ;Revision 1.2 2007/05/25 23:34:31 dashley
279     ;Proposed Knuth solution integrated into version-control system.

dashley@gmail.com
ViewVC Help
Powered by ViewVC 1.1.25