1 |
;------------------------------------------------------------------------------- |
2 |
;$Header: /home/dashley/cvsrep/uculib01/uculib01/src/stm8/cosmic/modx0/btu32rotleftninplacerxn/src/btu32rotleftninplacerxn.s,v 1.6 2010/05/14 18:18:50 dashley Exp $ |
3 |
;------------------------------------------------------------------------------- |
4 |
;Copyright (c)2010 David T. Ashley |
5 |
; |
6 |
;Permission is hereby granted, free of charge, to any person obtaining a copy |
7 |
;of this software source code and associated documentation files (the |
8 |
;"Software"), to deal in the Software without restriction, including without |
9 |
;limitation the rights to use, copy, modify, merge, publish, distribute, |
10 |
;sublicense, and/or sell copies of the Software, and to permit persons to whom |
11 |
;the Software is furnished to do so, subject to the following conditions: |
12 |
; |
13 |
;The above copyright notice and this permission notice shall be included in |
14 |
;all copies or substantial portions of the Software. |
15 |
; |
16 |
;THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 |
;IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 |
;FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 |
;AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 |
;LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
21 |
;OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
22 |
;THE SOFTWARE. |
23 |
;------------------------------------------------------------------------------- |
24 |
;This function doesn't use any static storage, so it is OK for either mods0 or |
25 |
;modsl0 memory models. However, it does assume call/ret (rather than callf/retf) |
26 |
;which affects both instructions used, stack offsets, and numerical values in |
27 |
;the .dcall directive. For this reason, must error out if being assembled |
28 |
;for the wrong memory model. |
29 |
#ifndef UCU_BD_MMBP |
30 |
#error "Program memory model define not provided on command line (UCU_BD_MMBP)." |
31 |
#endif |
32 |
#if (UCU_BD_MMBP != 1) |
33 |
#error "Attempt to assemble module for wrong program memory model (UCU_BD_MMBP != 1)." |
34 |
#endif |
35 |
; |
36 |
switch .text |
37 |
xdef _UcuBtU32RotLeftNInPlaceRxn |
38 |
; |
39 |
;Per discussion with Cosmic, the first integer is the stack space used by the |
40 |
;call instruction plus any automatic storage used by the function. The second integer |
41 |
;is the number of bytes stacked by the caller. Haven't yet discussed the scenario |
42 |
;of one assembly-language function called by another. I've noticed that some |
43 |
;assembly-language functions have a larger first integer than the stack they |
44 |
;used, so there may be a special convention if C calls .S which then calls .S, |
45 |
;as maybe the tools don't detect the .S calling the .S. Will need to investigate |
46 |
;further with Cosmic. |
47 |
; |
48 |
;2 bytes pushed on the stack by the call, 6 bytes stacked locally. 1 byte |
49 |
;stacked by caller. |
50 |
.dcall "8,1,_UcuBtU32RotLeftNInPlaceRxn" |
51 |
; |
52 |
_UcuBtU32RotLeftNInPlaceRxn: |
53 |
;N (the number of bits to roll) has been pushed on the stack, and X |
54 |
;contains the address of the UCU_UINT32 to roll. |
55 |
;Stack frame now: |
56 |
; ( 1,SP) : Return address MSB. |
57 |
; ( 2,SP) : Return address LSB. |
58 |
; ( 3,SP) : N. |
59 |
;---------- |
60 |
pushw x ;Push X on the stack. We will need to refer to this |
61 |
;address later. |
62 |
;Stack frame now: |
63 |
; ( 1,SP) : TGT MSB |
64 |
; ( 2,SP) : TGT LSB |
65 |
; ( 3,SP) : Return address MSB. |
66 |
; ( 4,SP) : Return address LSB. |
67 |
; ( 5,SP) : N. |
68 |
;---------- |
69 |
subw sp,#4 ;Reserve another 4 bytes on the stack for the item |
70 |
;to be rolled. |
71 |
;---------- |
72 |
;Stack frame now: |
73 |
; ( 1,SP) : (*TGT) MSB |
74 |
; ( 2,SP) : (*TGT) Intermediate byte |
75 |
; ( 3,SP) : (*TGT) Intermediate byte |
76 |
; ( 4,SP) : (*TGT) LSB |
77 |
; ( 5,SP) : TGT MSB |
78 |
; ( 6,SP) : TGT LSB |
79 |
; ( 7,SP) : Return address MSB. |
80 |
; ( 8,SP) : Return address LSB. |
81 |
; ( 9,SP) : N. |
82 |
;---------- |
83 |
;Load *TGT into the stack frame. |
84 |
ld a,(x) |
85 |
ld (1,sp),a |
86 |
ld a,(1,x) |
87 |
ld (2,sp),a |
88 |
ld a,(2,x) |
89 |
ld (3,sp),a |
90 |
ld a,(3,x) |
91 |
ld (4,sp),a |
92 |
;---------- |
93 |
;Adjust N by AND'ing it with 31, which forms N mod 32. The reason for this |
94 |
;is that rolling a 32-bit quantity 32 places leaves it unchanged, so only |
95 |
;N mod 32 matters. |
96 |
ld a,(9,sp) |
97 |
and a,#31 |
98 |
ld (9,sp),a |
99 |
;---------- |
100 |
;Load X:Y with the 32-bit quantity to be rolled. Manipulating it in these |
101 |
;registers is probably faster than manipulating it in memory. |
102 |
ldw x,(1,sp) |
103 |
ldw y,(3,sp) |
104 |
;---------- |
105 |
;If N >= 16, we can do the 16 counts of the left roll just by exchanging words. |
106 |
ld a,(9,sp) |
107 |
cp a,#16 |
108 |
jrult n_less_than_16 |
109 |
;---------- |
110 |
;If we are here, N is 16 or larger. Should subtract 16, as we will handle |
111 |
;that part of the roll. |
112 |
sub a,#16 |
113 |
ld (9,sp),a |
114 |
; |
115 |
;Exchange the words, which effectively rolls by 16. |
116 |
exgw x,y |
117 |
;---------- |
118 |
n_less_than_16: |
119 |
;Check if N >= 8. If so, can do the left roll by careful register exchanges. |
120 |
ld a,(9,sp) |
121 |
cp a,#8 |
122 |
jrult n_less_than_8 |
123 |
;---------- |
124 |
;If we are here, N is 8 or larger. Should subtract 8, as we will handle |
125 |
;that part of the left roll. |
126 |
sub a,#8 |
127 |
ld (9,sp),a |
128 |
; |
129 |
;Do the register exchanges to roll left by 8. |
130 |
; |
131 |
; XH XL YH YL A |
132 |
;------------------------------------ |
133 |
; XH XL YH YL - |
134 |
ld a,xh ; XH XL YH YL XH |
135 |
swapw x ; XL XH YH YL XH |
136 |
swapw y ; XL XH YL YH XH |
137 |
exg a,yl ; XL XH YL XH YH |
138 |
exg a,xl ; XL YH YL XH XH |
139 |
; |
140 |
;Fall through to final sequence, which handles 7 or fewer left roll operations. |
141 |
;---------- |
142 |
n_less_than_8: |
143 |
;---------- |
144 |
;Skip loop entry if count is 0. |
145 |
tnz (9,sp) |
146 |
jreq done |
147 |
loop_top: |
148 |
;Get the MSB of X into the carry bit. This has to be done to prime the |
149 |
;pump so that it makes it into the LSB of Y. |
150 |
ld a,xh |
151 |
rlc a |
152 |
;Roll the 32-bit quantity. |
153 |
rlcw y |
154 |
rlcw x |
155 |
dec (9,sp) |
156 |
jrne loop_top |
157 |
;---------- |
158 |
done: |
159 |
;Restore X:Y to its place in the stack frame. |
160 |
ldw (1,sp),x |
161 |
ldw (3,sp),y |
162 |
; |
163 |
;Copy the result to the caller's *TGT location. The LDW instruction isn't |
164 |
;fully orthogonal, so it is easier to move things through the accumulator. |
165 |
ldw x,(5,sp) ;Get the TGT pointer. |
166 |
ld a,(1,sp) |
167 |
ld (x),a |
168 |
ld a,(2,sp) |
169 |
ld (1,x),a |
170 |
ld a,(3,sp) |
171 |
ld (2,x),a |
172 |
ld a,(4,sp) |
173 |
ld (3,x),a |
174 |
; |
175 |
;Restore the stack pointer. |
176 |
addw sp,#6 |
177 |
; |
178 |
;And return. |
179 |
ret |
180 |
; |
181 |
end |
182 |
; |
183 |
;------------------------------------------------------------------------------- |
184 |
;End of $Id: btu32rotleftninplacerxn.s,v 1.6 2010/05/14 18:18:50 dashley Exp $ |
185 |
;------------------------------------------------------------------------------- |
186 |
;$Log: btu32rotleftninplacerxn.s,v $ |
187 |
;Revision 1.6 2010/05/14 18:18:50 dashley |
188 |
;Minor comment enhancements. |
189 |
; |
190 |
;Revision 1.5 2010/05/14 17:14:25 dashley |
191 |
;Defect corrected. |
192 |
; |
193 |
;Revision 1.4 2010/05/13 18:33:56 dashley |
194 |
;Edits. |
195 |
; |
196 |
;Revision 1.3 2010/05/13 16:42:36 dashley |
197 |
;Edits. |
198 |
; |
199 |
;Revision 1.2 2010/05/13 16:14:09 dashley |
200 |
;Edits. |
201 |
; |
202 |
;Revision 1.1 2010/05/13 13:59:23 dashley |
203 |
;Initial checkin. |
204 |
;------------------------------------------------------------------------------- |
205 |
|