2bc3ccc0ed * This m4 code has been taken from The SPARC Architecture Manual Version 8.
3bc3ccc0ed */
5bc3ccc0ed * Division/Remainder
6bc3ccc0ed *
7bc3ccc0ed * Input is:
8bc3ccc0ed *   dividend -- the thing being divided
9bc3ccc0ed *   divisor -- how many ways to divide it
10bc3ccc0ed * Important parameters:
11bc3ccc0ed *   N -- how many bits per iteration we try to get
12bc3ccc0ed *        as our current guess:
13bc3ccc0ed *   WORDSIZE -- how many bits altogether we're talking about:
14bc3ccc0ed *               obviously:
15bc3ccc0ed * A derived constant:
16bc3ccc0ed *   TOPBITS -- how many bits are in the top "decade" of a number:
17bc3ccc0ed *
18bc3ccc0ed * Important variables are:
19bc3ccc0ed *   Q -- the partial quotient under development -- initially 0
20bc3ccc0ed *   R -- the remainder so far -- initially == the dividend
21bc3ccc0ed *   ITER -- number of iterations of the main division loop which will
22bc3ccc0ed *           be required. Equal to CEIL( lg2(quotient)/4 )
23bc3ccc0ed *           Note that this is log_base_(2��4) of the quotient.
24bc3ccc0ed *   V -- the current comparand -- initially divisor*2��(ITER*4-1)
25bc3ccc0ed * Cost:
26bc3ccc0ed *   current estimate for non-large dividend is
27bc3ccc0ed *        CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
28bc3ccc0ed *   a large dividend is one greater than 2��(31-4 ) and takes a
29bc3ccc0ed *   different path, as the upper bits of the quotient must be developed
30bc3ccc0ed *   one bit at a time.
31bc3ccc0ed *   This uses the m4 and cpp macro preprocessors.
32bc3ccc0ed */
34bc3ccc0ed * This is the recursive definition of how we develop quotient digits.
35bc3ccc0ed * It takes three important parameters:
36bc3ccc0ed *   $1 -- the current depth, 1<=$1<=4
37bc3ccc0ed *   $2 -- the current accumulation of quotient bits
38bc3ccc0ed *   4 -- max depth
39bc3ccc0ed * We add a new bit to $2 and either recurse or insert the bits in the quotient.
40bc3ccc0ed * Dynamic input:
41bc3ccc0ed *   %o3 -- current remainder
42bc3ccc0ed *   %o2 -- current quotient
43bc3ccc0ed *   %o5 -- current comparand
44bc3ccc0ed *   cc -- set on current value of %o3
45bc3ccc0ed * Dynamic output:
46bc3ccc0ed *   %o3', %o2', %o5', cc'
47bc3ccc0ed */
48bc3ccc0ed#include "../assembly.h"
5080005c5marius	.align 32
52bc3ccc0ed	b	divide
53bc3ccc0ed	mov	0,%g3			! result always nonnegative
5580005c5marius	.align 32
57bc3ccc0ed	orcc	%o1,%o0,%g0	! are either %o0 or %o1 negative
58bc3ccc0ed	bge	divide			! if not, skip this junk
59bc3ccc0ed	xor	%o1,%o0,%g3	! record sign of result in sign of %g3
60bc3ccc0ed	tst	%o1
61bc3ccc0ed	bge	2f
62bc3ccc0ed	tst	%o0
63bc3ccc0ed	! %o1 < 0
64bc3ccc0ed	bge	divide
65bc3ccc0ed	neg	%o1
66bc3ccc0ed	2:
67bc3ccc0ed	! %o0 < 0
68bc3ccc0ed	neg	%o0
69bc3ccc0ed	! FALL THROUGH
71bc3ccc0ed	! Compute size of quotient, scale comparand.
72bc3ccc0ed	orcc	%o1,%g0,%o5		! movcc %o1,%o5
73bc3ccc0ed	te	2			! if %o1 = 0
74bc3ccc0ed	mov	%o0,%o3
75bc3ccc0ed	mov	0,%o2
76bc3ccc0ed	sethi	%hi(1<<(32-4 -1)),%g1
77bc3ccc0ed	cmp	%o3,%g1
78bc3ccc0ed	blu	not_really_big
79bc3ccc0ed	mov	0,%o4
80bc3ccc0ed	!
81bc3ccc0ed	! Here, the %o0 is >= 2��(31-4) or so. We must be careful here,
82bc3ccc0ed	! as our usual 4-at-a-shot divide step will cause overflow and havoc.
83bc3ccc0ed	! The total number of bits in the result here is 4*%o4+%g2, where
84bc3ccc0ed	! %g2 <= 4.
85bc3ccc0ed	! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
86bc3ccc0ed! the top decade: so don't even bother to compare to %o3.
88bc3ccc0ed	cmp	%o5,%g1
89bc3ccc0ed	bgeu	3f
90bc3ccc0ed	mov	1,%g2
91bc3ccc0ed	sll	%o5,4,%o5
92bc3ccc0ed	b	1b
93bc3ccc0ed	inc	%o4
94bc3ccc0ed! Now compute %g2
95bc3ccc0ed2:	addcc	%o5,%o5,%o5
96bc3ccc0ed	bcc	not_too_big
97bc3ccc0ed	add	%g2,1,%g2
98bc3ccc0ed		! We're here if the %o1 overflowed when Shifting.
99bc3ccc0ed		! This means that %o3 has the high-order bit set.
100bc3ccc0ed		! Restore %o5 and subtract from %o3.
101bc3ccc0ed		sll	%g1,4 ,%g1	! high order bit
102bc3ccc0ed		srl	%o5,1,%o5		! rest of %o5
103bc3ccc0ed		add	%o5,%g1,%o5
104bc3ccc0ed		b	do_single_div
105bc3ccc0ed		dec	%g2
107bc3ccc0ed3:	cmp	%o5,%o3
108bc3ccc0ed	blu	2b
109bc3ccc0ed	nop
110bc3ccc0ed	be	do_single_div
111bc3ccc0ed	nop
112bc3ccc0ed! %o5 > %o3: went too far: back up 1 step
113bc3ccc0ed!     srl %o5,1,%o5
114bc3ccc0ed!      dec %g2
115bc3ccc0ed! do single-bit divide steps
117bc3ccc0ed! We have to be careful here. We know that %o3 >= %o5, so we can do the
118bc3ccc0ed! first divide step without thinking. BUT, the others are conditional,
119bc3ccc0ed! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
120bc3ccc0ed! order bit set in the first step, just falling into the regular
121bc3ccc0ed! division loop will mess up the first time around.
122bc3ccc0ed! So we unroll slightly...
124bc3ccc0ed	deccc	%g2
125bc3ccc0ed	bl	end_regular_divide
126bc3ccc0ed	nop
127bc3ccc0ed	sub	%o3,%o5,%o3
128bc3ccc0ed	mov	1,%o2
12980005c5marius	b,a	end_single_divloop
13080005c5marius	! EMPTY
132bc3ccc0ed	sll	%o2,1,%o2
133bc3ccc0ed	bl	1f
134bc3ccc0ed	srl	%o5,1,%o5
135bc3ccc0ed	! %o3 >= 0
136bc3ccc0ed		sub	%o3,%o5,%o3
137bc3ccc0ed		b	2f
138bc3ccc0ed		inc	%o2
139bc3ccc0ed	1:	! %o3 < 0
140bc3ccc0ed		add	%o3,%o5,%o3
141bc3ccc0ed		dec	%o2
142bc3ccc0ed	2:
143bc3ccc0ed	end_single_divloop:
144bc3ccc0ed		deccc	%g2
145bc3ccc0ed		bge	single_divloop
146bc3ccc0ed		tst	%o3
14780005c5marius		b,a	end_regular_divide
14880005c5marius		! EMPTY
151bc3ccc0ed	sll	%o5,4,%o5
152bc3ccc0ed	cmp	%o5,%o3
153bc3ccc0ed	bleu	1b
154bc3ccc0ed	inccc	%o4
155bc3ccc0ed	be	got_result
156bc3ccc0ed	dec	%o4
158bc3ccc0ed	! Do the main division iteration
159bc3ccc0ed	tst	%o3
160bc3ccc0ed	! Fall through into divide loop
162bc3ccc0ed	sll	%o2,4,%o2
163bc3ccc0ed		!depth 1, accumulated bits 0
164bc3ccc0ed	bl	L.1.16
165bc3ccc0ed	srl	%o5,1,%o5
166bc3ccc0ed	! remainder is nonnegative
167bc3ccc0ed	subcc	%o3,%o5,%o3
168bc3ccc0ed	 	!depth 2, accumulated bits 1
169bc3ccc0ed	bl	L.2.17
170bc3ccc0ed	srl	%o5,1,%o5
171bc3ccc0ed	! remainder is nonnegative
172bc3ccc0ed	subcc	%o3,%o5,%o3
173bc3ccc0ed	 	!depth 3, accumulated bits 3
174bc3ccc0ed	bl	L.3.19
175bc3ccc0ed	srl	%o5,1,%o5
176bc3ccc0ed	! remainder is nonnegative
177bc3ccc0ed	subcc	%o3,%o5,%o3
178bc3ccc0ed	 	!depth 4, accumulated bits 7
179bc3ccc0ed	bl	L.4.23
180bc3ccc0ed	srl	%o5,1,%o5
181bc3ccc0ed	! remainder is nonnegative
182bc3ccc0ed	subcc	%o3,%o5,%o3
183bc3ccc0ed		b	9f
184bc3ccc0ed		add	%o2, (7*2+1), %o2
186bc3ccc0ed	! remainder is negative
187bc3ccc0ed	addcc	%o3,%o5,%o3
188bc3ccc0ed		b	9f
189bc3ccc0ed		add	%o2, (7*2-1), %o2
191bc3ccc0ed	! remainder is negative
192bc3ccc0ed	addcc	%o3,%o5,%o3
193bc3ccc0ed	 	!depth 4, accumulated bits 5
194bc3ccc0ed	bl	L.4.21
195bc3ccc0ed	srl	%o5,1,%o5
196bc3ccc0ed	! remainder is nonnegative
197bc3ccc0ed	subcc	%o3,%o5,%o3
198bc3ccc0ed		b	9f
199bc3ccc0ed		add	%o2, (5*2+1), %o2
201bc3ccc0ed	! remainder is negative
202bc3ccc0ed	addcc	%o3,%o5,%o3
203bc3ccc0ed		b	9f
204bc3ccc0ed		add	%o2, (5*2-1), %o2
206bc3ccc0ed	! remainder is negative
207bc3ccc0ed	addcc	%o3,%o5,%o3
208bc3ccc0ed	 	!depth 3, accumulated bits 1
209bc3ccc0ed	bl	L.3.17
210bc3ccc0ed	srl	%o5,1,%o5
211bc3ccc0ed	! remainder is nonnegative
212bc3ccc0ed	subcc	%o3,%o5,%o3
213bc3ccc0ed	 	!depth 4, accumulated bits 3
214bc3ccc0ed	bl	L.4.19
215bc3ccc0ed	srl	%o5,1,%o5
216bc3ccc0ed	! remainder is nonnegative
217bc3ccc0ed	subcc	%o3,%o5,%o3
218bc3ccc0ed		b	9f
219bc3ccc0ed		add	%o2, (3*2+1), %o2
221bc3ccc0ed	! remainder is negative
222bc3ccc0ed	addcc	%o3,%o5,%o3
223bc3ccc0ed		b	9f
224bc3ccc0ed		add	%o2, (3*2-1), %o2
226bc3ccc0ed	! remainder is negative
227bc3ccc0ed	addcc	%o3,%o5,%o3
228bc3ccc0ed	 	!depth 4, accumulated bits 1
229bc3ccc0ed	bl	L.4.17
230bc3ccc0ed	srl	%o5,1,%o5
231bc3ccc0ed	! remainder is nonnegative
232bc3ccc0ed	subcc	%o3,%o5,%o3
233bc3ccc0ed		b	9f
234bc3ccc0ed		add	%o2, (1*2+1), %o2
236bc3ccc0ed	! remainder is negative
237bc3ccc0ed	addcc	%o3,%o5,%o3
238bc3ccc0ed		b	9f
239bc3ccc0ed		add	%o2, (1*2-1), %o2
241bc3ccc0ed	! remainder is negative
242bc3ccc0ed	addcc	%o3,%o5,%o3
243bc3ccc0ed	 	!depth 2, accumulated bits -1
244bc3ccc0ed	bl	L.2.15
245bc3ccc0ed	srl	%o5,1,%o5
246bc3ccc0ed	! remainder is nonnegative
247bc3ccc0ed	subcc	%o3,%o5,%o3
248bc3ccc0ed	 	!depth 3, accumulated bits -1
249bc3ccc0ed	bl	L.3.15
250bc3ccc0ed	srl	%o5,1,%o5
251bc3ccc0ed	! remainder is nonnegative
252bc3ccc0ed	subcc	%o3,%o5,%o3
253bc3ccc0ed	 	!depth 4, accumulated bits -1
254bc3ccc0ed	bl	L.4.15
255bc3ccc0ed	srl	%o5,1,%o5
256bc3ccc0ed	! remainder is nonnegative
257bc3ccc0ed	subcc	%o3,%o5,%o3
258bc3ccc0ed		b	9f
259bc3ccc0ed		add	%o2, (-1*2+1), %o2
261bc3ccc0ed	! remainder is negative
262bc3ccc0ed	addcc	%o3,%o5,%o3
263bc3ccc0ed		b	9f
264bc3ccc0ed		add	%o2, (-1*2-1), %o2
266bc3ccc0ed	! remainder is negative
267bc3ccc0ed	addcc	%o3,%o5,%o3
268bc3ccc0ed	 	!depth 4, accumulated bits -3
269bc3ccc0ed	bl	L.4.13
270bc3ccc0ed	srl	%o5,1,%o5
271bc3ccc0ed	! remainder is nonnegative
272bc3ccc0ed	subcc	%o3,%o5,%o3
273bc3ccc0ed		b	9f
274bc3ccc0ed		add	%o2, (-3*2+1), %o2
276bc3ccc0ed	! remainder is negative
277bc3ccc0ed	addcc	%o3,%o5,%o3
278bc3ccc0ed		b	9f
279bc3ccc0ed		add	%o2, (-3*2-1), %o2
281bc3ccc0ed	! remainder is negative
282bc3ccc0ed	addcc	%o3,%o5,%o3
283bc3ccc0ed	 	!depth 3, accumulated bits -3
284bc3ccc0ed	bl	L.3.13
285bc3ccc0ed	srl	%o5,1,%o5
286bc3ccc0ed	! remainder is nonnegative
287bc3ccc0ed	subcc	%o3,%o5,%o3
288bc3ccc0ed	 	!depth 4, accumulated bits -5
289bc3ccc0ed	bl	L.4.11
290bc3ccc0ed	srl	%o5,1,%o5
291bc3ccc0ed	! remainder is nonnegative
292bc3ccc0ed	subcc	%o3,%o5,%o3
293bc3ccc0ed		b	9f
294bc3ccc0ed		add	%o2, (-5*2+1), %o2
296bc3ccc0ed	! remainder is negative
297bc3ccc0ed	addcc	%o3,%o5,%o3
298bc3ccc0ed		b	9f
299bc3ccc0ed		add	%o2, (-5*2-1), %o2
301bc3ccc0ed	! remainder is negative
302bc3ccc0ed	addcc	%o3,%o5,%o3
303bc3ccc0ed	 	!depth 4, accumulated bits -7
304bc3ccc0ed	bl	L.4.9
305bc3ccc0ed	srl	%o5,1,%o5
306bc3ccc0ed	! remainder is nonnegative
307bc3ccc0ed	subcc	%o3,%o5,%o3
308bc3ccc0ed		b	9f
309bc3ccc0ed		add	%o2, (-7*2+1), %o2
311bc3ccc0ed	! remainder is negative
312bc3ccc0ed	addcc	%o3,%o5,%o3
313bc3ccc0ed		b	9f
314bc3ccc0ed		add	%o2, (-7*2-1), %o2
315bc3ccc0ed	9:
317bc3ccc0ed	deccc	%o4
318bc3ccc0ed	bge	divloop
319bc3ccc0ed	tst	%o3
32080005c5marius	bl,a	got_result
32180005c5marius	! non-restoring fixup if remainder < 0, otherwise annulled
322bc3ccc0ed	dec	%o2
324bc3ccc0ed	tst	%g3
32580005c5marius	bl,a	1f
32680005c5marius	! negate for answer < 0, otherwise annulled
32780005c5marius	neg	%o2,%o2			! %o2 <- -%o2
329bc3ccc0ed	retl				! leaf-routine return
330bc3ccc0ed	mov	%o2,%o0			! quotient <- %o2