xref: /illumos-gate/usr/src/uts/sparc/fpu/div.c (revision 7c478bd9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 1988 by Sun Microsystems, Inc.
24  */
25 
26 #ident	"%Z%%M%	%I%	%E% SMI"	/* SunOS-4.1 1.9 88/11/30 */
27 
28 #include <sys/fpu/fpu_simulator.h>
29 #include <sys/fpu/globals.h>
30 
31 void
_fp_div(pfpsd,px,py,pz)32 _fp_div(pfpsd, px, py, pz)
33 	fp_simd_type	*pfpsd;
34 	unpacked	*px, *py, *pz;
35 {
36 	unsigned	r[4], *y, q, c;
37 	int		n;
38 
39 	*pz = *px;
40 
41 	if ((py->fpclass >= fp_quiet) || (px->fpclass >= fp_quiet)) {
42 		if (py->fpclass >= px->fpclass) *pz = *py;
43 		return;
44 	}
45 
46 	pz->sign = px->sign ^ py->sign;
47 	switch (px->fpclass) {
48 	case fp_quiet:
49 	case fp_signaling:
50 		return;
51 	case fp_zero:
52 	case fp_infinity:
53 		if (px->fpclass == py->fpclass) {	/* 0/0 or inf/inf */
54 			fpu_error_nan(pfpsd, pz);
55 			pz->fpclass = fp_quiet;
56 		}
57 		return;
58 	case fp_normal:
59 		switch (py->fpclass) {
60 		case fp_zero:	/* number/0 */
61 			fpu_set_exception(pfpsd, fp_division);
62 			pz->fpclass = fp_infinity;
63 			return;
64 		case fp_infinity:	/* number/inf */
65 			pz->fpclass = fp_zero;
66 			return;
67 		}
68 	}
69 
70 	/* Now x and y are both normal or subnormal. */
71 
72 	r[0] = px->significand[0];
73 	r[1] = px->significand[1];
74 	r[2] = px->significand[2];
75 	r[3] = px->significand[3];
76 	y = py->significand;
77 
78 	if (fpu_cmpli(r, y, 4) >= 0)
79 		pz->exponent = px->exponent - py->exponent;
80 	else
81 		pz->exponent = px->exponent - py->exponent - 1;
82 
83 	q = 0;
84 	while (q < 0x10000) {	/* generate quo[0] */
85 		q <<= 1;
86 		if (fpu_cmpli(r, y, 4) >= 0) {
87 			q += 1; 	/* if r>y do r-=y and q+=1 */
88 			c = 0;
89 			c = fpu_sub3wc(&r[3], r[3], y[3], c);
90 			c = fpu_sub3wc(&r[2], r[2], y[2], c);
91 			c = fpu_sub3wc(&r[1], r[1], y[1], c);
92 			c = fpu_sub3wc(&r[0], r[0], y[0], c);
93 		}
94 		r[0] = (r[0]<<1)|((r[1]&0x80000000)>>31);	/* r << 1 */
95 		r[1] = (r[1]<<1)|((r[2]&0x80000000)>>31);
96 		r[2] = (r[2]<<1)|((r[3]&0x80000000)>>31);
97 		r[3] = (r[3]<<1);
98 	}
99 	pz->significand[0] = q;
100 	q = 0;			/* generate quo[1] */
101 	n = 32;
102 	while (n--) {
103 		q <<= 1;
104 		if (fpu_cmpli(r, y, 4) >= 0) {
105 			q += 1; 	/* if r>y do r-=y and q+=1 */
106 			c = 0;
107 			c = fpu_sub3wc(&r[3], r[3], y[3], c);
108 			c = fpu_sub3wc(&r[2], r[2], y[2], c);
109 			c = fpu_sub3wc(&r[1], r[1], y[1], c);
110 			c = fpu_sub3wc(&r[0], r[0], y[0], c);
111 		}
112 		r[0] = (r[0]<<1)|((r[1]&0x80000000)>>31);	/* r << 1 */
113 		r[1] = (r[1]<<1)|((r[2]&0x80000000)>>31);
114 		r[2] = (r[2]<<1)|((r[3]&0x80000000)>>31);
115 		r[3] = (r[3]<<1);
116 	}
117 	pz->significand[1] = q;
118 	q = 0;			/* generate quo[2] */
119 	n = 32;
120 	while (n--) {
121 		q <<= 1;
122 		if (fpu_cmpli(r, y, 4) >= 0) {
123 			q += 1; 	/* if r>y do r-=y and q+=1 */
124 			c = 0;
125 			c = fpu_sub3wc(&r[3], r[3], y[3], c);
126 			c = fpu_sub3wc(&r[2], r[2], y[2], c);
127 			c = fpu_sub3wc(&r[1], r[1], y[1], c);
128 			c = fpu_sub3wc(&r[0], r[0], y[0], c);
129 		}
130 		r[0] = (r[0]<<1)|((r[1]&0x80000000)>>31);	/* r << 1 */
131 		r[1] = (r[1]<<1)|((r[2]&0x80000000)>>31);
132 		r[2] = (r[2]<<1)|((r[3]&0x80000000)>>31);
133 		r[3] = (r[3]<<1);
134 	}
135 	pz->significand[2] = q;
136 	q = 0;			/* generate quo[3] */
137 	n = 32;
138 	while (n--) {
139 		q <<= 1;
140 		if (fpu_cmpli(r, y, 4) >= 0) {
141 			q += 1; 	/* if r>y do r-=y and q+=1 */
142 			c  = 0;
143 			c = fpu_sub3wc(&r[3], r[3], y[3], c);
144 			c = fpu_sub3wc(&r[2], r[2], y[2], c);
145 			c = fpu_sub3wc(&r[1], r[1], y[1], c);
146 			c = fpu_sub3wc(&r[0], r[0], y[0], c);
147 		}
148 		r[0] = (r[0]<<1)|((r[1]&0x80000000)>>31);	/* r << 1 */
149 		r[1] = (r[1]<<1)|((r[2]&0x80000000)>>31);
150 		r[2] = (r[2]<<1)|((r[3]&0x80000000)>>31);
151 		r[3] = (r[3]<<1);
152 	}
153 	pz->significand[3] = q;
154 	if ((r[0]|r[1]|r[2]|r[3]) == 0) pz->sticky = pz->rounded = 0;
155 	else {
156 		pz->sticky = 1;		/* half way case won't occur */
157 		if (fpu_cmpli(r, y, 4) >= 0) pz->rounded = 1;
158 	}
159 }
160 
161 void
_fp_sqrt(pfpsd,px,pz)162 _fp_sqrt(pfpsd, px, pz)
163 	fp_simd_type	*pfpsd;
164 	unpacked	*px, *pz;
165 {				/* *pz gets sqrt(*px) */
166 
167 	unsigned *x, r, c, q, t[4], s[4];
168 	*pz = *px;
169 	switch (px->fpclass) {
170 	case fp_quiet:
171 	case fp_signaling:
172 	case fp_zero:
173 		return;
174 	case fp_infinity:
175 		if (px->sign == 1) {	/* sqrt(-inf) */
176 			fpu_error_nan(pfpsd, pz);
177 			pz->fpclass = fp_quiet;
178 		}
179 		return;
180 	case fp_normal:
181 		if (px->sign == 1) {	/* sqrt(-norm) */
182 			fpu_error_nan(pfpsd, pz);
183 			pz->fpclass = fp_quiet;
184 			return;
185 		}
186 	}
187 
188 	/* Now x is normal. */
189 	x = px->significand;
190 	if (px->exponent & 1) {
191 				/*
192 				 * sqrt(1.f * 2**odd) = sqrt (2.+2f)
193 				 * 2**(odd-1)/2
194 				 */
195 		pz->exponent = (px->exponent - 1) / 2;
196 		x[0] = (x[0]<<1)|((x[1]&0x80000000)>>31);	/* x<<1 */
197 		x[1] = (x[1]<<1)|((x[2]&0x80000000)>>31);
198 		x[2] = (x[2]<<1)|((x[3]&0x80000000)>>31);
199 		x[3] = (x[3]<<1);
200 	} else {
201 				/*
202 				 * sqrt(1.f * 2**even) = sqrt (1.f)
203 				 * 2**(even)/2
204 				 */
205 		pz->exponent = px->exponent / 2;
206 	}
207 	s[0] = s[1] = s[2] = s[3] = t[0] = t[1] = t[2] = t[3] = 0;
208 	q = 0;
209 	r = 0x00010000;
210 	while (r != 0) {				/* compute sqrt[0] */
211 		t[0] = s[0] + r;
212 		if (t[0] <= x[0]) {
213 			s[0] = t[0] + r;
214 			x[0] -= t[0];
215 			q += r;
216 		}
217 		x[0] = (x[0]<<1)|((x[1]&0x80000000)>>31);	/* x<<1 */
218 		x[1] = (x[1]<<1)|((x[2]&0x80000000)>>31);
219 		x[2] = (x[2]<<1)|((x[3]&0x80000000)>>31);
220 		x[3] = (x[3]<<1);
221 		r >>= 1;
222 	}
223 	pz->significand[0] = q;
224 	q = 0;
225 	r = (unsigned)0x80000000;
226 	while (r != 0) {				/* compute sqrt[1] */
227 		t[1] = s[1] + r;			/* no carry */
228 		t[0] = s[0];
229 		if (fpu_cmpli(t, x, 2) <= 0) {
230 			c = 0;
231 			c = fpu_add3wc(&s[1], t[1], r, c);
232 			c = fpu_add3wc(&s[0], t[0], 0, c);
233 			c = 0;
234 			c = fpu_sub3wc(&x[1], x[1], t[1], c);
235 			c = fpu_sub3wc(&x[0], x[0], t[0], c);
236 			q += r;
237 		}
238 		x[0] = (x[0]<<1)|((x[1]&0x80000000)>>31);	/* x<<1 */
239 		x[1] = (x[1]<<1)|((x[2]&0x80000000)>>31);
240 		x[2] = (x[2]<<1)|((x[3]&0x80000000)>>31);
241 		x[3] = (x[3]<<1);
242 		r >>= 1;
243 	}
244 	pz->significand[1] = q;
245 	q = 0;
246 	r = (unsigned)0x80000000;
247 	while (r != 0) {				/* compute sqrt[2] */
248 		t[2] = s[2] + r;			/* no carry */
249 		t[1] = s[1];
250 		t[0] = s[0];
251 		if (fpu_cmpli(t, x, 3) <= 0) {
252 			c = 0;
253 			c = fpu_add3wc(&s[2], t[2], r, c);
254 			c = fpu_add3wc(&s[1], t[1], 0, c);
255 			c = fpu_add3wc(&s[0], t[0], 0, c);
256 			c = 0;
257 			c = fpu_sub3wc(&x[2], x[2], t[2], c);
258 			c = fpu_sub3wc(&x[1], x[1], t[1], c);
259 			c = fpu_sub3wc(&x[0], x[0], t[0], c);
260 			q += r;
261 		}
262 		x[0] = (x[0]<<1)|((x[1]&0x80000000)>>31);	/* x<<1 */
263 		x[1] = (x[1]<<1)|((x[2]&0x80000000)>>31);
264 		x[2] = (x[2]<<1)|((x[3]&0x80000000)>>31);
265 		x[3] = (x[3]<<1);
266 		r >>= 1;
267 	}
268 	pz->significand[2] = q;
269 	q = 0;
270 	r = (unsigned)0x80000000;
271 	while (r != 0) {				/* compute sqrt[3] */
272 		t[3] = s[3] + r;				/* no carry */
273 		t[2] = s[2];
274 		t[1] = s[1];
275 		t[0] = s[0];
276 		if (fpu_cmpli(t, x, 4) <= 0) {
277 			c = 0;
278 			c = fpu_add3wc(&s[3], t[3], r, c);
279 			c = fpu_add3wc(&s[2], t[2], 0, c);
280 			c = fpu_add3wc(&s[1], t[1], 0, c);
281 			c = fpu_add3wc(&s[0], t[0], 0, c);
282 			c = 0;
283 			c = fpu_sub3wc(&x[3], x[3], t[3], c);
284 			c = fpu_sub3wc(&x[2], x[2], t[2], c);
285 			c = fpu_sub3wc(&x[1], x[1], t[1], c);
286 			c = fpu_sub3wc(&x[0], x[0], t[0], c);
287 			q += r;
288 		}
289 		x[0] = (x[0]<<1)|((x[1]&0x80000000)>>31);	/* x<<1 */
290 		x[1] = (x[1]<<1)|((x[2]&0x80000000)>>31);
291 		x[2] = (x[2]<<1)|((x[3]&0x80000000)>>31);
292 		x[3] = (x[3]<<1);
293 		r >>= 1;
294 	}
295 	pz->significand[3] = q;
296 	if ((x[0]|x[1]|x[2]|x[3]) == 0) {
297 		pz->sticky = pz->rounded = 0;
298 	} else {
299 		pz->sticky = 1;
300 		if (fpu_cmpli(s, x, 4) < 0)
301 			pz->rounded = 1;
302 		else
303 			pz->rounded = 0;
304 	}
305 }
306