xref: /illumos-gate/usr/src/lib/libc/sparc/fp/_Q_mul.c (revision 7c478bd9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "quad.h"
30 
31 static const double C[] = {
32 	0.0,
33 	0.5,
34 	1.0,
35 	2.0,
36 	68719476736.0,
37 	1048576.0,
38 	16.0,
39 	1.52587890625000000000e-05,
40 	5.96046447753906250000e-08,
41 	3.72529029846191406250e-09,
42 	8.67361737988403547206e-19,
43 	2.16840434497100886801e-19,
44 	1.32348898008484427979e-23,
45 	9.62964972193617926528e-35,
46 	4.70197740328915003187e-38
47 };
48 
49 #define	zero	C[0]
50 #define	half	C[1]
51 #define	one	C[2]
52 #define	two	C[3]
53 #define	two36	C[4]
54 #define	two20	C[5]
55 #define	two4	C[6]
56 #define	twom16	C[7]
57 #define	twom24	C[8]
58 #define	twom28	C[9]
59 #define	twom60	C[10]
60 #define	twom62	C[11]
61 #define	twom76	C[12]
62 #define	twom113	C[13]
63 #define	twom124	C[14]
64 
65 static const unsigned fsr_rn = 0xc0000000u;
66 
67 #ifdef __sparcv9
68 
69 /*
70  * _Qp_mul(pz, x, y) sets *pz = *x * *y.
71  */
72 void
73 _Qp_mul(union longdouble *pz, const union longdouble *x,
74 	const union longdouble *y)
75 
76 #else
77 
78 /*
79  * _Q_mul(x, y) returns *x * *y.
80  */
81 union longdouble
82 _Q_mul(const union longdouble *x, const union longdouble *y)
83 
84 #endif	/* __sparcv9 */
85 
86 {
87 	union longdouble	z;
88 	union xdouble		u;
89 	double			xx[5], yy[5], c, d, zz[9];
90 	unsigned int		xm, ym, fsr, lx, ly, wx[3], wy[3];
91 	unsigned int		msw, frac2, frac3, frac4, rm;
92 	int			ibit, ex, ey, ez, sign;
93 
94 	xm = x->l.msw & 0x7fffffff;
95 	ym = y->l.msw & 0x7fffffff;
96 	sign = (x->l.msw ^ y->l.msw) & ~0x7fffffff;
97 
98 	__quad_getfsrp(&fsr);
99 
100 	/* handle nan and inf cases */
101 	if (xm >= 0x7fff0000 || ym >= 0x7fff0000) {
102 		/* handle nan cases according to V9 app. B */
103 		if (QUAD_ISNAN(*y)) {
104 			if (!(y->l.msw & 0x8000)) {
105 				/* snan, signal invalid */
106 				if (fsr & FSR_NVM) {
107 					__quad_fmulq(x, y, &Z);
108 				} else {
109 					Z = *y;
110 					Z.l.msw |= 0x8000;
111 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
112 					    FSR_NVC;
113 					__quad_setfsrp(&fsr);
114 				}
115 				QUAD_RETURN(Z);
116 			} else if (QUAD_ISNAN(*x) && !(x->l.msw & 0x8000)) {
117 				/* snan, signal invalid */
118 				if (fsr & FSR_NVM) {
119 					__quad_fmulq(x, y, &Z);
120 				} else {
121 					Z = *x;
122 					Z.l.msw |= 0x8000;
123 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
124 					    FSR_NVC;
125 					__quad_setfsrp(&fsr);
126 				}
127 				QUAD_RETURN(Z);
128 			}
129 			Z = *y;
130 			QUAD_RETURN(Z);
131 		}
132 		if (QUAD_ISNAN(*x)) {
133 			if (!(x->l.msw & 0x8000)) {
134 				/* snan, signal invalid */
135 				if (fsr & FSR_NVM) {
136 					__quad_fmulq(x, y, &Z);
137 				} else {
138 					Z = *x;
139 					Z.l.msw |= 0x8000;
140 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
141 					    FSR_NVC;
142 					__quad_setfsrp(&fsr);
143 				}
144 				QUAD_RETURN(Z);
145 			}
146 			Z = *x;
147 			QUAD_RETURN(Z);
148 		}
149 		if (xm == 0x7fff0000) {
150 			/* x is inf */
151 			if (QUAD_ISZERO(*y)) {
152 				/* zero * inf, signal invalid */
153 				if (fsr & FSR_NVM) {
154 					__quad_fmulq(x, y, &Z);
155 				} else {
156 					Z.l.msw = 0x7fffffff;
157 					Z.l.frac2 = Z.l.frac3 =
158 					    Z.l.frac4 = 0xffffffff;
159 					fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
160 					    FSR_NVC;
161 					__quad_setfsrp(&fsr);
162 				}
163 				QUAD_RETURN(Z);
164 			}
165 			/* inf * finite, return inf */
166 			Z.l.msw = sign | 0x7fff0000;
167 			Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
168 			QUAD_RETURN(Z);
169 		}
170 		/* y is inf */
171 		if (QUAD_ISZERO(*x)) {
172 			/* zero * inf, signal invalid */
173 			if (fsr & FSR_NVM) {
174 				__quad_fmulq(x, y, &Z);
175 			} else {
176 				Z.l.msw = 0x7fffffff;
177 				Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff;
178 				fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC;
179 				__quad_setfsrp(&fsr);
180 			}
181 			QUAD_RETURN(Z);
182 		}
183 		/* inf * finite, return inf */
184 		Z.l.msw = sign | 0x7fff0000;
185 		Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
186 		QUAD_RETURN(Z);
187 	}
188 
189 	/* handle zero cases */
190 	if (xm == 0 || ym == 0) {
191 		if (QUAD_ISZERO(*x) || QUAD_ISZERO(*y)) {
192 			Z.l.msw = sign;
193 			Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
194 			QUAD_RETURN(Z);
195 		}
196 	}
197 
198 	/* now x and y are finite, nonzero */
199 	__quad_setfsrp(&fsr_rn);
200 
201 	/* get their normalized significands and exponents */
202 	ex = (int)(xm >> 16);
203 	lx = xm & 0xffff;
204 	if (ex) {
205 		lx |= 0x10000;
206 		wx[0] = x->l.frac2;
207 		wx[1] = x->l.frac3;
208 		wx[2] = x->l.frac4;
209 	} else {
210 		if (lx | (x->l.frac2 & 0xfffe0000)) {
211 			wx[0] = x->l.frac2;
212 			wx[1] = x->l.frac3;
213 			wx[2] = x->l.frac4;
214 			ex = 1;
215 		} else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) {
216 			lx = x->l.frac2;
217 			wx[0] = x->l.frac3;
218 			wx[1] = x->l.frac4;
219 			wx[2] = 0;
220 			ex = -31;
221 		} else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) {
222 			lx = x->l.frac3;
223 			wx[0] = x->l.frac4;
224 			wx[1] = wx[2] = 0;
225 			ex = -63;
226 		} else {
227 			lx = x->l.frac4;
228 			wx[0] = wx[1] = wx[2] = 0;
229 			ex = -95;
230 		}
231 		while ((lx & 0x10000) == 0) {
232 			lx = (lx << 1) | (wx[0] >> 31);
233 			wx[0] = (wx[0] << 1) | (wx[1] >> 31);
234 			wx[1] = (wx[1] << 1) | (wx[2] >> 31);
235 			wx[2] <<= 1;
236 			ex--;
237 		}
238 	}
239 	ez = ex - 0x3fff;
240 
241 	ey = (int)(ym >> 16);
242 	ly = ym & 0xffff;
243 	if (ey) {
244 		ly |= 0x10000;
245 		wy[0] = y->l.frac2;
246 		wy[1] = y->l.frac3;
247 		wy[2] = y->l.frac4;
248 	} else {
249 		if (ly | (y->l.frac2 & 0xfffe0000)) {
250 			wy[0] = y->l.frac2;
251 			wy[1] = y->l.frac3;
252 			wy[2] = y->l.frac4;
253 			ey = 1;
254 		} else if (y->l.frac2 | (y->l.frac3 & 0xfffe0000)) {
255 			ly = y->l.frac2;
256 			wy[0] = y->l.frac3;
257 			wy[1] = y->l.frac4;
258 			wy[2] = 0;
259 			ey = -31;
260 		} else if (y->l.frac3 | (y->l.frac4 & 0xfffe0000)) {
261 			ly = y->l.frac3;
262 			wy[0] = y->l.frac4;
263 			wy[1] = wy[2] = 0;
264 			ey = -63;
265 		} else {
266 			ly = y->l.frac4;
267 			wy[0] = wy[1] = wy[2] = 0;
268 			ey = -95;
269 		}
270 		while ((ly & 0x10000) == 0) {
271 			ly = (ly << 1) | (wy[0] >> 31);
272 			wy[0] = (wy[0] << 1) | (wy[1] >> 31);
273 			wy[1] = (wy[1] << 1) | (wy[2] >> 31);
274 			wy[2] <<= 1;
275 			ey--;
276 		}
277 	}
278 	ez += ey;
279 
280 	/* extract the significand into five doubles */
281 	c = twom16;
282 	xx[0] = (double)((int)lx) * c;
283 	yy[0] = (double)((int)ly) * c;
284 
285 	c *= twom24;
286 	xx[1] = (double)((int)(wx[0] >> 8)) * c;
287 	yy[1] = (double)((int)(wy[0] >> 8)) * c;
288 
289 	c *= twom24;
290 	xx[2] = (double)((int)(((wx[0] << 16) | (wx[1] >> 16)) &
291 	    0xffffff)) * c;
292 	yy[2] = (double)((int)(((wy[0] << 16) | (wy[1] >> 16)) &
293 	    0xffffff)) * c;
294 
295 	c *= twom24;
296 	xx[3] = (double)((int)(((wx[1] << 8) | (wx[2] >> 24)) &
297 	    0xffffff)) * c;
298 	yy[3] = (double)((int)(((wy[1] << 8) | (wy[2] >> 24)) &
299 	    0xffffff)) * c;
300 
301 	c *= twom24;
302 	xx[4] = (double)((int)(wx[2] & 0xffffff)) * c;
303 	yy[4] = (double)((int)(wy[2] & 0xffffff)) * c;
304 
305 	/* form the "digits" of the product */
306 	zz[0] = xx[0] * yy[0];
307 	zz[1] = xx[0] * yy[1] + xx[1] * yy[0];
308 	zz[2] = xx[0] * yy[2] + xx[1] * yy[1] + xx[2] * yy[0];
309 	zz[3] = xx[0] * yy[3] + xx[1] * yy[2] + xx[2] * yy[1] +
310 	    xx[3] * yy[0];
311 	zz[4] = xx[0] * yy[4] + xx[1] * yy[3] + xx[2] * yy[2] +
312 	    xx[3] * yy[1] + xx[4] * yy[0];
313 	zz[5] = xx[1] * yy[4] + xx[2] * yy[3] + xx[3] * yy[2] +
314 	    xx[4] * yy[1];
315 	zz[6] = xx[2] * yy[4] + xx[3] * yy[3] + xx[4] * yy[2];
316 	zz[7] = xx[3] * yy[4] + xx[4] * yy[3];
317 	zz[8] = xx[4] * yy[4];
318 
319 	/* collect the first few terms */
320 	c = (zz[1] + two20) - two20;
321 	zz[0] += c;
322 	zz[1] = zz[2] + (zz[1] - c);
323 	c = (zz[3] + twom28) - twom28;
324 	zz[1] += c;
325 	zz[2] = zz[4] + (zz[3] - c);
326 
327 	/* propagate carries into the third term */
328 	d = zz[6] + (zz[7] + zz[8]);
329 	zz[2] += zz[5] + d;
330 
331 	/* if the third term might lie on a rounding boundary, perturb it */
332 	/* as need be */
333 	if (zz[2] == (twom62 + zz[2]) - twom62)
334 	{
335 		c = (zz[5] + twom76) - twom76;
336 		if ((zz[5] - c) + d != zero)
337 			zz[2] += twom124;
338 	}
339 
340 	/* propagate carries to the leading term */
341 	c = zz[1] + zz[2];
342 	zz[2] = zz[2] + (zz[1] - c);
343 	zz[1] = c;
344 	c = zz[0] + zz[1];
345 	zz[1] = zz[1] + (zz[0] - c);
346 	zz[0] = c;
347 
348 	/* check for carry out */
349 	if (c >= two) {
350 		/* postnormalize */
351 		zz[0] *= half;
352 		zz[1] *= half;
353 		zz[2] *= half;
354 		ez++;
355 	}
356 
357 	/* if exponent > 0 strip off integer bit, else denormalize */
358 	if (ez > 0) {
359 		ibit = 1;
360 		zz[0] -= one;
361 	} else {
362 		ibit = 0;
363 		if (ez > -128)
364 			u.l.hi = (unsigned)(0x3fe + ez) << 20;
365 		else
366 			u.l.hi = 0x37e00000;
367 		u.l.lo = 0;
368 		zz[0] *= u.d;
369 		zz[1] *= u.d;
370 		zz[2] *= u.d;
371 		ez = 0;
372 	}
373 
374 	/* the first 48 bits of fraction come from zz[0] */
375 	u.d = d = two36 + zz[0];
376 	msw = u.l.lo;
377 	zz[0] -= (d - two36);
378 
379 	u.d = d = two4 + zz[0];
380 	frac2 = u.l.lo;
381 	zz[0] -= (d - two4);
382 
383 	/* the next 32 come from zz[0] and zz[1] */
384 	u.d = d = twom28 + (zz[0] + zz[1]);
385 	frac3 = u.l.lo;
386 	zz[0] -= (d - twom28);
387 
388 	/* condense the remaining fraction; errors here won't matter */
389 	c = zz[0] + zz[1];
390 	zz[1] = ((zz[0] - c) + zz[1]) + zz[2];
391 	zz[0] = c;
392 
393 	/* get the last word of fraction */
394 	u.d = d = twom60 + (zz[0] + zz[1]);
395 	frac4 = u.l.lo;
396 	zz[0] -= (d - twom60);
397 
398 	/* keep track of what's left for rounding; note that the error */
399 	/* in computing c will be non-negative due to rounding mode */
400 	c = zz[0] + zz[1];
401 
402 	/* get the rounding mode, fudging directed rounding modes */
403 	/* as though the result were positive */
404 	rm = fsr >> 30;
405 	if (sign)
406 		rm ^= (rm >> 1);
407 
408 	/* round and raise exceptions */
409 	fsr &= ~FSR_CEXC;
410 	if (c != zero) {
411 		fsr |= FSR_NXC;
412 
413 		/* decide whether to round the fraction up */
414 		if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 ||
415 		    (c == twom113 && ((frac4 & 1) || (c - zz[0] != zz[1])))))) {
416 			/* round up and renormalize if necessary */
417 			if (++frac4 == 0)
418 				if (++frac3 == 0)
419 					if (++frac2 == 0)
420 						if (++msw == 0x10000) {
421 							msw = 0;
422 							ez++;
423 						}
424 		}
425 	}
426 
427 	/* check for under/overflow */
428 	if (ez >= 0x7fff) {
429 		if (rm == FSR_RN || rm == FSR_RP) {
430 			z.l.msw = sign | 0x7fff0000;
431 			z.l.frac2 = z.l.frac3 = z.l.frac4 = 0;
432 		} else {
433 			z.l.msw = sign | 0x7ffeffff;
434 			z.l.frac2 = z.l.frac3 = z.l.frac4 = 0xffffffff;
435 		}
436 		fsr |= FSR_OFC | FSR_NXC;
437 	} else {
438 		z.l.msw = sign | (ez << 16) | msw;
439 		z.l.frac2 = frac2;
440 		z.l.frac3 = frac3;
441 		z.l.frac4 = frac4;
442 
443 		/* !ibit => exact result was tiny before rounding, */
444 		/* t nonzero => result delivered is inexact */
445 		if (!ibit) {
446 			if (c != zero)
447 				fsr |= FSR_UFC | FSR_NXC;
448 			else if (fsr & FSR_UFM)
449 				fsr |= FSR_UFC;
450 		}
451 	}
452 
453 	if ((fsr & FSR_CEXC) & (fsr >> 23)) {
454 		__quad_setfsrp(&fsr);
455 		__quad_fmulq(x, y, &Z);
456 	} else {
457 		Z = z;
458 		fsr |= (fsr & 0x1f) << 5;
459 		__quad_setfsrp(&fsr);
460 	}
461 	QUAD_RETURN(Z);
462 }
463