xref: /illumos-gate/usr/src/lib/libm/common/m9x/fma.c (revision 25c28e83)
1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis  * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis  *
4*25c28e83SPiotr Jasiukajtis  * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis  * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis  * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis  *
8*25c28e83SPiotr Jasiukajtis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis  * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis  * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis  * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis  *
13*25c28e83SPiotr Jasiukajtis  * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis  * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis  * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis  *
19*25c28e83SPiotr Jasiukajtis  * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis  */
21*25c28e83SPiotr Jasiukajtis 
22*25c28e83SPiotr Jasiukajtis /*
23*25c28e83SPiotr Jasiukajtis  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24*25c28e83SPiotr Jasiukajtis  */
25*25c28e83SPiotr Jasiukajtis /*
26*25c28e83SPiotr Jasiukajtis  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27*25c28e83SPiotr Jasiukajtis  * Use is subject to license terms.
28*25c28e83SPiotr Jasiukajtis  */
29*25c28e83SPiotr Jasiukajtis 
30*25c28e83SPiotr Jasiukajtis #if defined(ELFOBJ)
31*25c28e83SPiotr Jasiukajtis #pragma weak fma = __fma
32*25c28e83SPiotr Jasiukajtis #endif
33*25c28e83SPiotr Jasiukajtis 
34*25c28e83SPiotr Jasiukajtis #include "libm.h"
35*25c28e83SPiotr Jasiukajtis #include "fma.h"
36*25c28e83SPiotr Jasiukajtis #include "fenv_inlines.h"
37*25c28e83SPiotr Jasiukajtis 
38*25c28e83SPiotr Jasiukajtis #if defined(__sparc)
39*25c28e83SPiotr Jasiukajtis 
40*25c28e83SPiotr Jasiukajtis static const union {
41*25c28e83SPiotr Jasiukajtis 	unsigned i[2];
42*25c28e83SPiotr Jasiukajtis 	double d;
43*25c28e83SPiotr Jasiukajtis } C[] = {
44*25c28e83SPiotr Jasiukajtis 	{ 0x3fe00000u, 0 },
45*25c28e83SPiotr Jasiukajtis 	{ 0x40000000u, 0 },
46*25c28e83SPiotr Jasiukajtis 	{ 0x43300000u, 0 },
47*25c28e83SPiotr Jasiukajtis 	{ 0x41a00000u, 0 },
48*25c28e83SPiotr Jasiukajtis 	{ 0x3e500000u, 0 },
49*25c28e83SPiotr Jasiukajtis 	{ 0x3df00000u, 0 },
50*25c28e83SPiotr Jasiukajtis 	{ 0x3bf00000u, 0 },
51*25c28e83SPiotr Jasiukajtis 	{ 0x7fe00000u, 0 },
52*25c28e83SPiotr Jasiukajtis 	{ 0x00100000u, 0 },
53*25c28e83SPiotr Jasiukajtis 	{ 0x00100001u, 0 }
54*25c28e83SPiotr Jasiukajtis };
55*25c28e83SPiotr Jasiukajtis 
56*25c28e83SPiotr Jasiukajtis #define	half	C[0].d
57*25c28e83SPiotr Jasiukajtis #define	two	C[1].d
58*25c28e83SPiotr Jasiukajtis #define	two52	C[2].d
59*25c28e83SPiotr Jasiukajtis #define	two27	C[3].d
60*25c28e83SPiotr Jasiukajtis #define	twom26	C[4].d
61*25c28e83SPiotr Jasiukajtis #define	twom32	C[5].d
62*25c28e83SPiotr Jasiukajtis #define	twom64	C[6].d
63*25c28e83SPiotr Jasiukajtis #define	huge	C[7].d
64*25c28e83SPiotr Jasiukajtis #define	tiny	C[8].d
65*25c28e83SPiotr Jasiukajtis #define	tiny2	C[9].d
66*25c28e83SPiotr Jasiukajtis 
67*25c28e83SPiotr Jasiukajtis static const unsigned int fsr_rm = 0xc0000000u;
68*25c28e83SPiotr Jasiukajtis 
69*25c28e83SPiotr Jasiukajtis /*
70*25c28e83SPiotr Jasiukajtis  * fma for SPARC: 64-bit double precision, big-endian
71*25c28e83SPiotr Jasiukajtis  */
72*25c28e83SPiotr Jasiukajtis double
73*25c28e83SPiotr Jasiukajtis __fma(double x, double y, double z) {
74*25c28e83SPiotr Jasiukajtis 	union {
75*25c28e83SPiotr Jasiukajtis 		unsigned i[2];
76*25c28e83SPiotr Jasiukajtis 		double d;
77*25c28e83SPiotr Jasiukajtis 	} xx, yy, zz;
78*25c28e83SPiotr Jasiukajtis 	double xhi, yhi, xlo, ylo, t;
79*25c28e83SPiotr Jasiukajtis 	unsigned int xy0, xy1, xy2, xy3, z0, z1, z2, z3, fsr, rm, sticky;
80*25c28e83SPiotr Jasiukajtis 	int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
81*25c28e83SPiotr Jasiukajtis 	volatile double	dummy;
82*25c28e83SPiotr Jasiukajtis 
83*25c28e83SPiotr Jasiukajtis 	/* extract the high order words of the arguments */
84*25c28e83SPiotr Jasiukajtis 	xx.d = x;
85*25c28e83SPiotr Jasiukajtis 	yy.d = y;
86*25c28e83SPiotr Jasiukajtis 	zz.d = z;
87*25c28e83SPiotr Jasiukajtis 	hx = xx.i[0] & ~0x80000000;
88*25c28e83SPiotr Jasiukajtis 	hy = yy.i[0] & ~0x80000000;
89*25c28e83SPiotr Jasiukajtis 	hz = zz.i[0] & ~0x80000000;
90*25c28e83SPiotr Jasiukajtis 
91*25c28e83SPiotr Jasiukajtis 	/* dispense with inf, nan, and zero cases */
92*25c28e83SPiotr Jasiukajtis 	if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 ||
93*25c28e83SPiotr Jasiukajtis 		(hy | yy.i[1]) == 0)	/* x or y is inf, nan, or zero */
94*25c28e83SPiotr Jasiukajtis 		return (x * y + z);
95*25c28e83SPiotr Jasiukajtis 
96*25c28e83SPiotr Jasiukajtis 	if (hz >= 0x7ff00000)	/* z is inf or nan */
97*25c28e83SPiotr Jasiukajtis 		return (x + z);	/* avoid spurious under/overflow in x * y */
98*25c28e83SPiotr Jasiukajtis 
99*25c28e83SPiotr Jasiukajtis 	if ((hz | zz.i[1]) == 0)	/* z is zero */
100*25c28e83SPiotr Jasiukajtis 		/*
101*25c28e83SPiotr Jasiukajtis 		 * x * y isn't zero but could underflow to zero,
102*25c28e83SPiotr Jasiukajtis 		 * so don't add z, lest we perturb the sign
103*25c28e83SPiotr Jasiukajtis 		 */
104*25c28e83SPiotr Jasiukajtis 		return (x * y);
105*25c28e83SPiotr Jasiukajtis 
106*25c28e83SPiotr Jasiukajtis 	/*
107*25c28e83SPiotr Jasiukajtis 	 * now x, y, and z are all finite and nonzero; save the fsr and
108*25c28e83SPiotr Jasiukajtis 	 * set round-to-negative-infinity mode (and clear nonstandard
109*25c28e83SPiotr Jasiukajtis 	 * mode before we try to scale subnormal operands)
110*25c28e83SPiotr Jasiukajtis 	 */
111*25c28e83SPiotr Jasiukajtis 	__fenv_getfsr32(&fsr);
112*25c28e83SPiotr Jasiukajtis 	__fenv_setfsr32(&fsr_rm);
113*25c28e83SPiotr Jasiukajtis 
114*25c28e83SPiotr Jasiukajtis 	/* extract signs and exponents, and normalize subnormals */
115*25c28e83SPiotr Jasiukajtis 	sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
116*25c28e83SPiotr Jasiukajtis 	sz = zz.i[0] & 0x80000000;
117*25c28e83SPiotr Jasiukajtis 	ex = hx >> 20;
118*25c28e83SPiotr Jasiukajtis 	if (!ex) {
119*25c28e83SPiotr Jasiukajtis 		xx.d = x * two52;
120*25c28e83SPiotr Jasiukajtis 		ex = ((xx.i[0] & ~0x80000000) >> 20) - 52;
121*25c28e83SPiotr Jasiukajtis 	}
122*25c28e83SPiotr Jasiukajtis 	ey = hy >> 20;
123*25c28e83SPiotr Jasiukajtis 	if (!ey) {
124*25c28e83SPiotr Jasiukajtis 		yy.d = y * two52;
125*25c28e83SPiotr Jasiukajtis 		ey = ((yy.i[0] & ~0x80000000) >> 20) - 52;
126*25c28e83SPiotr Jasiukajtis 	}
127*25c28e83SPiotr Jasiukajtis 	ez = hz >> 20;
128*25c28e83SPiotr Jasiukajtis 	if (!ez) {
129*25c28e83SPiotr Jasiukajtis 		zz.d = z * two52;
130*25c28e83SPiotr Jasiukajtis 		ez = ((zz.i[0] & ~0x80000000) >> 20) - 52;
131*25c28e83SPiotr Jasiukajtis 	}
132*25c28e83SPiotr Jasiukajtis 
133*25c28e83SPiotr Jasiukajtis 	/* multiply x*y to 106 bits */
134*25c28e83SPiotr Jasiukajtis 	exy = ex + ey - 0x3ff;
135*25c28e83SPiotr Jasiukajtis 	xx.i[0] = (xx.i[0] & 0xfffff) | 0x3ff00000;
136*25c28e83SPiotr Jasiukajtis 	yy.i[0] = (yy.i[0] & 0xfffff) | 0x3ff00000;
137*25c28e83SPiotr Jasiukajtis 	x = xx.d;
138*25c28e83SPiotr Jasiukajtis 	y = yy.d;
139*25c28e83SPiotr Jasiukajtis 	xhi = ((x + twom26) + two27) - two27;
140*25c28e83SPiotr Jasiukajtis 	yhi = ((y + twom26) + two27) - two27;
141*25c28e83SPiotr Jasiukajtis 	xlo = x - xhi;
142*25c28e83SPiotr Jasiukajtis 	ylo = y - yhi;
143*25c28e83SPiotr Jasiukajtis 	x *= y;
144*25c28e83SPiotr Jasiukajtis 	y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
145*25c28e83SPiotr Jasiukajtis 	if (x >= two) {
146*25c28e83SPiotr Jasiukajtis 		x *= half;
147*25c28e83SPiotr Jasiukajtis 		y *= half;
148*25c28e83SPiotr Jasiukajtis 		exy++;
149*25c28e83SPiotr Jasiukajtis 	}
150*25c28e83SPiotr Jasiukajtis 
151*25c28e83SPiotr Jasiukajtis 	/* extract the significands */
152*25c28e83SPiotr Jasiukajtis 	xx.d = x;
153*25c28e83SPiotr Jasiukajtis 	xy0 = (xx.i[0] & 0xfffff) | 0x100000;
154*25c28e83SPiotr Jasiukajtis 	xy1 = xx.i[1];
155*25c28e83SPiotr Jasiukajtis 	yy.d = t = y + twom32;
156*25c28e83SPiotr Jasiukajtis 	xy2 = yy.i[1];
157*25c28e83SPiotr Jasiukajtis 	yy.d = (y - (t - twom32)) + twom64;
158*25c28e83SPiotr Jasiukajtis 	xy3 = yy.i[1];
159*25c28e83SPiotr Jasiukajtis 	z0 = (zz.i[0] & 0xfffff) | 0x100000;
160*25c28e83SPiotr Jasiukajtis 	z1 = zz.i[1];
161*25c28e83SPiotr Jasiukajtis 	z2 = z3 = 0;
162*25c28e83SPiotr Jasiukajtis 
163*25c28e83SPiotr Jasiukajtis 	/*
164*25c28e83SPiotr Jasiukajtis 	 * now x*y is represented by sxy, exy, and xy[0-3], and z is
165*25c28e83SPiotr Jasiukajtis 	 * represented likewise; swap if need be so |xy| <= |z|
166*25c28e83SPiotr Jasiukajtis 	 */
167*25c28e83SPiotr Jasiukajtis 	if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 &&
168*25c28e83SPiotr Jasiukajtis 		(xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) {
169*25c28e83SPiotr Jasiukajtis 		e = sxy; sxy = sz; sz = e;
170*25c28e83SPiotr Jasiukajtis 		e = exy; exy = ez; ez = e;
171*25c28e83SPiotr Jasiukajtis 		e = xy0; xy0 = z0; z0 = e;
172*25c28e83SPiotr Jasiukajtis 		e = xy1; xy1 = z1; z1 = e;
173*25c28e83SPiotr Jasiukajtis 		z2 = xy2; xy2 = 0;
174*25c28e83SPiotr Jasiukajtis 		z3 = xy3; xy3 = 0;
175*25c28e83SPiotr Jasiukajtis 	}
176*25c28e83SPiotr Jasiukajtis 
177*25c28e83SPiotr Jasiukajtis 	/* shift the significand of xy keeping a sticky bit */
178*25c28e83SPiotr Jasiukajtis 	e = ez - exy;
179*25c28e83SPiotr Jasiukajtis 	if (e > 116) {
180*25c28e83SPiotr Jasiukajtis 		xy0 = xy1 = xy2 = 0;
181*25c28e83SPiotr Jasiukajtis 		xy3 = 1;
182*25c28e83SPiotr Jasiukajtis 	} else if (e >= 96) {
183*25c28e83SPiotr Jasiukajtis 		sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (127 - e));
184*25c28e83SPiotr Jasiukajtis 		xy3 = xy0 >> (e - 96);
185*25c28e83SPiotr Jasiukajtis 		if (sticky)
186*25c28e83SPiotr Jasiukajtis 			xy3 |= 1;
187*25c28e83SPiotr Jasiukajtis 		xy0 = xy1 = xy2 = 0;
188*25c28e83SPiotr Jasiukajtis 	} else if (e >= 64) {
189*25c28e83SPiotr Jasiukajtis 		sticky = xy3 | xy2 | ((xy1 << 1) << (95 - e));
190*25c28e83SPiotr Jasiukajtis 		xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
191*25c28e83SPiotr Jasiukajtis 		if (sticky)
192*25c28e83SPiotr Jasiukajtis 			xy3 |= 1;
193*25c28e83SPiotr Jasiukajtis 		xy2 = xy0 >> (e - 64);
194*25c28e83SPiotr Jasiukajtis 		xy0 = xy1 = 0;
195*25c28e83SPiotr Jasiukajtis 	} else if (e >= 32) {
196*25c28e83SPiotr Jasiukajtis 		sticky = xy3 | ((xy2 << 1) << (63 - e));
197*25c28e83SPiotr Jasiukajtis 		xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
198*25c28e83SPiotr Jasiukajtis 		if (sticky)
199*25c28e83SPiotr Jasiukajtis 			xy3 |= 1;
200*25c28e83SPiotr Jasiukajtis 		xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
201*25c28e83SPiotr Jasiukajtis 		xy1 = xy0 >> (e - 32);
202*25c28e83SPiotr Jasiukajtis 		xy0 = 0;
203*25c28e83SPiotr Jasiukajtis 	} else if (e) {
204*25c28e83SPiotr Jasiukajtis 		sticky = (xy3 << 1) << (31 - e);
205*25c28e83SPiotr Jasiukajtis 		xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
206*25c28e83SPiotr Jasiukajtis 		if (sticky)
207*25c28e83SPiotr Jasiukajtis 			xy3 |= 1;
208*25c28e83SPiotr Jasiukajtis 		xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
209*25c28e83SPiotr Jasiukajtis 		xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
210*25c28e83SPiotr Jasiukajtis 		xy0 >>= e;
211*25c28e83SPiotr Jasiukajtis 	}
212*25c28e83SPiotr Jasiukajtis 
213*25c28e83SPiotr Jasiukajtis 	/* if this is a magnitude subtract, negate the significand of xy */
214*25c28e83SPiotr Jasiukajtis 	if (sxy ^ sz) {
215*25c28e83SPiotr Jasiukajtis 		xy0 = ~xy0;
216*25c28e83SPiotr Jasiukajtis 		xy1 = ~xy1;
217*25c28e83SPiotr Jasiukajtis 		xy2 = ~xy2;
218*25c28e83SPiotr Jasiukajtis 		xy3 = -xy3;
219*25c28e83SPiotr Jasiukajtis 		if (xy3 == 0)
220*25c28e83SPiotr Jasiukajtis 			if (++xy2 == 0)
221*25c28e83SPiotr Jasiukajtis 				if (++xy1 == 0)
222*25c28e83SPiotr Jasiukajtis 					xy0++;
223*25c28e83SPiotr Jasiukajtis 	}
224*25c28e83SPiotr Jasiukajtis 
225*25c28e83SPiotr Jasiukajtis 	/* add, propagating carries */
226*25c28e83SPiotr Jasiukajtis 	z3 += xy3;
227*25c28e83SPiotr Jasiukajtis 	e = (z3 < xy3);
228*25c28e83SPiotr Jasiukajtis 	z2 += xy2;
229*25c28e83SPiotr Jasiukajtis 	if (e) {
230*25c28e83SPiotr Jasiukajtis 		z2++;
231*25c28e83SPiotr Jasiukajtis 		e = (z2 <= xy2);
232*25c28e83SPiotr Jasiukajtis 	} else
233*25c28e83SPiotr Jasiukajtis 		e = (z2 < xy2);
234*25c28e83SPiotr Jasiukajtis 	z1 += xy1;
235*25c28e83SPiotr Jasiukajtis 	if (e) {
236*25c28e83SPiotr Jasiukajtis 		z1++;
237*25c28e83SPiotr Jasiukajtis 		e = (z1 <= xy1);
238*25c28e83SPiotr Jasiukajtis 	} else
239*25c28e83SPiotr Jasiukajtis 		e = (z1 < xy1);
240*25c28e83SPiotr Jasiukajtis 	z0 += xy0;
241*25c28e83SPiotr Jasiukajtis 	if (e)
242*25c28e83SPiotr Jasiukajtis 		z0++;
243*25c28e83SPiotr Jasiukajtis 
244*25c28e83SPiotr Jasiukajtis 	/* postnormalize and collect rounding information into z2 */
245*25c28e83SPiotr Jasiukajtis 	if (ez < 1) {
246*25c28e83SPiotr Jasiukajtis 		/* result is tiny; shift right until exponent is within range */
247*25c28e83SPiotr Jasiukajtis 		e = 1 - ez;
248*25c28e83SPiotr Jasiukajtis 		if (e > 56) {
249*25c28e83SPiotr Jasiukajtis 			z2 = 1;	/* result can't be exactly zero */
250*25c28e83SPiotr Jasiukajtis 			z0 = z1 = 0;
251*25c28e83SPiotr Jasiukajtis 		} else if (e >= 32) {
252*25c28e83SPiotr Jasiukajtis 			sticky = z3 | z2 | ((z1 << 1) << (63 - e));
253*25c28e83SPiotr Jasiukajtis 			z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
254*25c28e83SPiotr Jasiukajtis 			if (sticky)
255*25c28e83SPiotr Jasiukajtis 				z2 |= 1;
256*25c28e83SPiotr Jasiukajtis 			z1 = z0 >> (e - 32);
257*25c28e83SPiotr Jasiukajtis 			z0 = 0;
258*25c28e83SPiotr Jasiukajtis 		} else {
259*25c28e83SPiotr Jasiukajtis 			sticky = z3 | (z2 << 1) << (31 - e);
260*25c28e83SPiotr Jasiukajtis 			z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
261*25c28e83SPiotr Jasiukajtis 			if (sticky)
262*25c28e83SPiotr Jasiukajtis 				z2 |= 1;
263*25c28e83SPiotr Jasiukajtis 			z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
264*25c28e83SPiotr Jasiukajtis 			z0 >>= e;
265*25c28e83SPiotr Jasiukajtis 		}
266*25c28e83SPiotr Jasiukajtis 		ez = 1;
267*25c28e83SPiotr Jasiukajtis 	} else if (z0 >= 0x200000) {
268*25c28e83SPiotr Jasiukajtis 		/* carry out; shift right by one */
269*25c28e83SPiotr Jasiukajtis 		sticky = (z2 & 1) | z3;
270*25c28e83SPiotr Jasiukajtis 		z2 = (z2 >> 1) | (z1 << 31);
271*25c28e83SPiotr Jasiukajtis 		if (sticky)
272*25c28e83SPiotr Jasiukajtis 			z2 |= 1;
273*25c28e83SPiotr Jasiukajtis 		z1 = (z1 >> 1) | (z0 << 31);
274*25c28e83SPiotr Jasiukajtis 		z0 >>= 1;
275*25c28e83SPiotr Jasiukajtis 		ez++;
276*25c28e83SPiotr Jasiukajtis 	} else {
277*25c28e83SPiotr Jasiukajtis 		if (z0 < 0x100000 && (z0 | z1 | z2 | z3) != 0) {
278*25c28e83SPiotr Jasiukajtis 			/*
279*25c28e83SPiotr Jasiukajtis 			 * borrow/cancellation; shift left as much as
280*25c28e83SPiotr Jasiukajtis 			 * exponent allows
281*25c28e83SPiotr Jasiukajtis 			 */
282*25c28e83SPiotr Jasiukajtis 			while (!(z0 | (z1 & 0xffe00000)) && ez >= 33) {
283*25c28e83SPiotr Jasiukajtis 				z0 = z1;
284*25c28e83SPiotr Jasiukajtis 				z1 = z2;
285*25c28e83SPiotr Jasiukajtis 				z2 = z3;
286*25c28e83SPiotr Jasiukajtis 				z3 = 0;
287*25c28e83SPiotr Jasiukajtis 				ez -= 32;
288*25c28e83SPiotr Jasiukajtis 			}
289*25c28e83SPiotr Jasiukajtis 			while (z0 < 0x100000 && ez > 1) {
290*25c28e83SPiotr Jasiukajtis 				z0 = (z0 << 1) | (z1 >> 31);
291*25c28e83SPiotr Jasiukajtis 				z1 = (z1 << 1) | (z2 >> 31);
292*25c28e83SPiotr Jasiukajtis 				z2 = (z2 << 1) | (z3 >> 31);
293*25c28e83SPiotr Jasiukajtis 				z3 <<= 1;
294*25c28e83SPiotr Jasiukajtis 				ez--;
295*25c28e83SPiotr Jasiukajtis 			}
296*25c28e83SPiotr Jasiukajtis 		}
297*25c28e83SPiotr Jasiukajtis 		if (z3)
298*25c28e83SPiotr Jasiukajtis 			z2 |= 1;
299*25c28e83SPiotr Jasiukajtis 	}
300*25c28e83SPiotr Jasiukajtis 
301*25c28e83SPiotr Jasiukajtis 	/* get the rounding mode and clear current exceptions */
302*25c28e83SPiotr Jasiukajtis 	rm = fsr >> 30;
303*25c28e83SPiotr Jasiukajtis 	fsr &= ~FSR_CEXC;
304*25c28e83SPiotr Jasiukajtis 
305*25c28e83SPiotr Jasiukajtis 	/* strip off the integer bit, if there is one */
306*25c28e83SPiotr Jasiukajtis 	ibit = z0 & 0x100000;
307*25c28e83SPiotr Jasiukajtis 	if (ibit)
308*25c28e83SPiotr Jasiukajtis 		z0 -= 0x100000;
309*25c28e83SPiotr Jasiukajtis 	else {
310*25c28e83SPiotr Jasiukajtis 		ez = 0;
311*25c28e83SPiotr Jasiukajtis 		if (!(z0 | z1 | z2)) { /* exact zero */
312*25c28e83SPiotr Jasiukajtis 			zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
313*25c28e83SPiotr Jasiukajtis 			zz.i[1] = 0;
314*25c28e83SPiotr Jasiukajtis 			__fenv_setfsr32(&fsr);
315*25c28e83SPiotr Jasiukajtis 			return (zz.d);
316*25c28e83SPiotr Jasiukajtis 		}
317*25c28e83SPiotr Jasiukajtis 	}
318*25c28e83SPiotr Jasiukajtis 
319*25c28e83SPiotr Jasiukajtis 	/*
320*25c28e83SPiotr Jasiukajtis 	 * flip the sense of directed roundings if the result is negative;
321*25c28e83SPiotr Jasiukajtis 	 * the logic below applies to a positive result
322*25c28e83SPiotr Jasiukajtis 	 */
323*25c28e83SPiotr Jasiukajtis 	if (sz)
324*25c28e83SPiotr Jasiukajtis 		rm ^= rm >> 1;
325*25c28e83SPiotr Jasiukajtis 
326*25c28e83SPiotr Jasiukajtis 	/* round and raise exceptions */
327*25c28e83SPiotr Jasiukajtis 	if (z2) {
328*25c28e83SPiotr Jasiukajtis 		fsr |= FSR_NXC;
329*25c28e83SPiotr Jasiukajtis 
330*25c28e83SPiotr Jasiukajtis 		/* decide whether to round the fraction up */
331*25c28e83SPiotr Jasiukajtis 		if (rm == FSR_RP || (rm == FSR_RN && (z2 > 0x80000000u ||
332*25c28e83SPiotr Jasiukajtis 			(z2 == 0x80000000u && (z1 & 1))))) {
333*25c28e83SPiotr Jasiukajtis 			/* round up and renormalize if necessary */
334*25c28e83SPiotr Jasiukajtis 			if (++z1 == 0) {
335*25c28e83SPiotr Jasiukajtis 				if (++z0 == 0x100000) {
336*25c28e83SPiotr Jasiukajtis 					z0 = 0;
337*25c28e83SPiotr Jasiukajtis 					ez++;
338*25c28e83SPiotr Jasiukajtis 				}
339*25c28e83SPiotr Jasiukajtis 			}
340*25c28e83SPiotr Jasiukajtis 		}
341*25c28e83SPiotr Jasiukajtis 	}
342*25c28e83SPiotr Jasiukajtis 
343*25c28e83SPiotr Jasiukajtis 	/* check for under/overflow */
344*25c28e83SPiotr Jasiukajtis 	if (ez >= 0x7ff) {
345*25c28e83SPiotr Jasiukajtis 		if (rm == FSR_RN || rm == FSR_RP) {
346*25c28e83SPiotr Jasiukajtis 			zz.i[0] = sz | 0x7ff00000;
347*25c28e83SPiotr Jasiukajtis 			zz.i[1] = 0;
348*25c28e83SPiotr Jasiukajtis 		} else {
349*25c28e83SPiotr Jasiukajtis 			zz.i[0] = sz | 0x7fefffff;
350*25c28e83SPiotr Jasiukajtis 			zz.i[1] = 0xffffffff;
351*25c28e83SPiotr Jasiukajtis 		}
352*25c28e83SPiotr Jasiukajtis 		fsr |= FSR_OFC | FSR_NXC;
353*25c28e83SPiotr Jasiukajtis 	} else {
354*25c28e83SPiotr Jasiukajtis 		zz.i[0] = sz | (ez << 20) | z0;
355*25c28e83SPiotr Jasiukajtis 		zz.i[1] = z1;
356*25c28e83SPiotr Jasiukajtis 
357*25c28e83SPiotr Jasiukajtis 		/*
358*25c28e83SPiotr Jasiukajtis 		 * !ibit => exact result was tiny before rounding,
359*25c28e83SPiotr Jasiukajtis 		 * z2 nonzero => result delivered is inexact
360*25c28e83SPiotr Jasiukajtis 		 */
361*25c28e83SPiotr Jasiukajtis 		if (!ibit) {
362*25c28e83SPiotr Jasiukajtis 			if (z2)
363*25c28e83SPiotr Jasiukajtis 				fsr |= FSR_UFC | FSR_NXC;
364*25c28e83SPiotr Jasiukajtis 			else if (fsr & FSR_UFM)
365*25c28e83SPiotr Jasiukajtis 				fsr |= FSR_UFC;
366*25c28e83SPiotr Jasiukajtis 		}
367*25c28e83SPiotr Jasiukajtis 	}
368*25c28e83SPiotr Jasiukajtis 
369*25c28e83SPiotr Jasiukajtis 	/* restore the fsr and emulate exceptions as needed */
370*25c28e83SPiotr Jasiukajtis 	if ((fsr & FSR_CEXC) & (fsr >> 23)) {
371*25c28e83SPiotr Jasiukajtis 		__fenv_setfsr32(&fsr);
372*25c28e83SPiotr Jasiukajtis 		if (fsr & FSR_OFC) {
373*25c28e83SPiotr Jasiukajtis 			dummy = huge;
374*25c28e83SPiotr Jasiukajtis 			dummy *= huge;
375*25c28e83SPiotr Jasiukajtis 		} else if (fsr & FSR_UFC) {
376*25c28e83SPiotr Jasiukajtis 			dummy = tiny;
377*25c28e83SPiotr Jasiukajtis 			if (fsr & FSR_NXC)
378*25c28e83SPiotr Jasiukajtis 				dummy *= tiny;
379*25c28e83SPiotr Jasiukajtis 			else
380*25c28e83SPiotr Jasiukajtis 				dummy -= tiny2;
381*25c28e83SPiotr Jasiukajtis 		} else {
382*25c28e83SPiotr Jasiukajtis 			dummy = huge;
383*25c28e83SPiotr Jasiukajtis 			dummy += tiny;
384*25c28e83SPiotr Jasiukajtis 		}
385*25c28e83SPiotr Jasiukajtis 	} else {
386*25c28e83SPiotr Jasiukajtis 		fsr |= (fsr & 0x1f) << 5;
387*25c28e83SPiotr Jasiukajtis 		__fenv_setfsr32(&fsr);
388*25c28e83SPiotr Jasiukajtis 	}
389*25c28e83SPiotr Jasiukajtis 	return (zz.d);
390*25c28e83SPiotr Jasiukajtis }
391*25c28e83SPiotr Jasiukajtis 
392*25c28e83SPiotr Jasiukajtis #elif defined(__x86)
393*25c28e83SPiotr Jasiukajtis 
394*25c28e83SPiotr Jasiukajtis #if defined(__amd64)
395*25c28e83SPiotr Jasiukajtis #define	NI	4
396*25c28e83SPiotr Jasiukajtis #else
397*25c28e83SPiotr Jasiukajtis #define	NI	3
398*25c28e83SPiotr Jasiukajtis #endif
399*25c28e83SPiotr Jasiukajtis 
400*25c28e83SPiotr Jasiukajtis /*
401*25c28e83SPiotr Jasiukajtis  *  fma for x86: 64-bit double precision, little-endian
402*25c28e83SPiotr Jasiukajtis  */
403*25c28e83SPiotr Jasiukajtis double
404*25c28e83SPiotr Jasiukajtis __fma(double x, double y, double z) {
405*25c28e83SPiotr Jasiukajtis 	union {
406*25c28e83SPiotr Jasiukajtis 		unsigned i[NI];
407*25c28e83SPiotr Jasiukajtis 		long double e;
408*25c28e83SPiotr Jasiukajtis 	} xx, yy, zz;
409*25c28e83SPiotr Jasiukajtis 	long double xe, ye, xhi, xlo, yhi, ylo;
410*25c28e83SPiotr Jasiukajtis 	int ex, ey, ez;
411*25c28e83SPiotr Jasiukajtis 	unsigned cwsw, oldcwsw, rm;
412*25c28e83SPiotr Jasiukajtis 
413*25c28e83SPiotr Jasiukajtis 	/* convert the operands to double extended */
414*25c28e83SPiotr Jasiukajtis 	xx.e = (long double) x;
415*25c28e83SPiotr Jasiukajtis 	yy.e = (long double) y;
416*25c28e83SPiotr Jasiukajtis 	zz.e = (long double) z;
417*25c28e83SPiotr Jasiukajtis 
418*25c28e83SPiotr Jasiukajtis 	/* extract the exponents of the arguments */
419*25c28e83SPiotr Jasiukajtis 	ex = xx.i[2] & 0x7fff;
420*25c28e83SPiotr Jasiukajtis 	ey = yy.i[2] & 0x7fff;
421*25c28e83SPiotr Jasiukajtis 	ez = zz.i[2] & 0x7fff;
422*25c28e83SPiotr Jasiukajtis 
423*25c28e83SPiotr Jasiukajtis 	/* dispense with inf, nan, and zero cases */
424*25c28e83SPiotr Jasiukajtis 	if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0)
425*25c28e83SPiotr Jasiukajtis 		/* x or y is inf, nan, or zero */
426*25c28e83SPiotr Jasiukajtis 		return ((double) (xx.e * yy.e + zz.e));
427*25c28e83SPiotr Jasiukajtis 
428*25c28e83SPiotr Jasiukajtis 	if (ez >= 0x7fff) /* z is inf or nan */
429*25c28e83SPiotr Jasiukajtis 		return ((double) (xx.e + zz.e));
430*25c28e83SPiotr Jasiukajtis 					/* avoid spurious inexact in x * y */
431*25c28e83SPiotr Jasiukajtis 
432*25c28e83SPiotr Jasiukajtis 	/*
433*25c28e83SPiotr Jasiukajtis 	 * save the control and status words, mask all exceptions, and
434*25c28e83SPiotr Jasiukajtis 	 * set rounding to 64-bit precision and to-nearest
435*25c28e83SPiotr Jasiukajtis 	 */
436*25c28e83SPiotr Jasiukajtis 	__fenv_getcwsw(&oldcwsw);
437*25c28e83SPiotr Jasiukajtis 	cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000;
438*25c28e83SPiotr Jasiukajtis 	__fenv_setcwsw(&cwsw);
439*25c28e83SPiotr Jasiukajtis 
440*25c28e83SPiotr Jasiukajtis 	/* multiply x*y to 106 bits */
441*25c28e83SPiotr Jasiukajtis 	xe = xx.e;
442*25c28e83SPiotr Jasiukajtis 	xx.i[0] = 0;
443*25c28e83SPiotr Jasiukajtis 	xhi = xx.e; /* hi 32 bits */
444*25c28e83SPiotr Jasiukajtis 	xlo = xe - xhi; /* lo 21 bits */
445*25c28e83SPiotr Jasiukajtis 	ye = yy.e;
446*25c28e83SPiotr Jasiukajtis 	yy.i[0] = 0;
447*25c28e83SPiotr Jasiukajtis 	yhi = yy.e;
448*25c28e83SPiotr Jasiukajtis 	ylo = ye - yhi;
449*25c28e83SPiotr Jasiukajtis 	xe = xe * ye;
450*25c28e83SPiotr Jasiukajtis 	ye = ((xhi * yhi - xe) + xhi * ylo + xlo * yhi) + xlo * ylo;
451*25c28e83SPiotr Jasiukajtis 
452*25c28e83SPiotr Jasiukajtis 	/* distill the sum of xe, ye, and z */
453*25c28e83SPiotr Jasiukajtis 	xhi = ye + zz.e;
454*25c28e83SPiotr Jasiukajtis 	yhi = xhi - ye;
455*25c28e83SPiotr Jasiukajtis 	xlo = (zz.e - yhi) + (ye - (xhi - yhi));
456*25c28e83SPiotr Jasiukajtis 						/* now (xhi,xlo) = ye + z */
457*25c28e83SPiotr Jasiukajtis 
458*25c28e83SPiotr Jasiukajtis 	yhi = xe + xhi;
459*25c28e83SPiotr Jasiukajtis 	ye = yhi - xe;
460*25c28e83SPiotr Jasiukajtis 	ylo = (xhi - ye) + (xe - (yhi - ye));	/* now (yhi,ylo) = xe + xhi */
461*25c28e83SPiotr Jasiukajtis 
462*25c28e83SPiotr Jasiukajtis 	xhi = xlo + ylo;
463*25c28e83SPiotr Jasiukajtis 	xe = xhi - xlo;
464*25c28e83SPiotr Jasiukajtis 	xlo = (ylo - xe) + (xlo - (xhi - xe));	/* now (xhi,xlo) = xlo + ylo */
465*25c28e83SPiotr Jasiukajtis 
466*25c28e83SPiotr Jasiukajtis 	yy.e = yhi + xhi;
467*25c28e83SPiotr Jasiukajtis 	ylo = (yhi - yy.e) + xhi;		/* now (yy.e,ylo) = xhi + yhi */
468*25c28e83SPiotr Jasiukajtis 
469*25c28e83SPiotr Jasiukajtis 	if (yy.i[1] != 0) {	/* yy.e is nonzero */
470*25c28e83SPiotr Jasiukajtis 		/* perturb yy.e if its least significant 10 bits are zero */
471*25c28e83SPiotr Jasiukajtis 		if (!(yy.i[0] & 0x3ff)) {
472*25c28e83SPiotr Jasiukajtis 			xx.e = ylo + xlo;
473*25c28e83SPiotr Jasiukajtis 			if (xx.i[1] != 0) {
474*25c28e83SPiotr Jasiukajtis 				xx.i[2] = (xx.i[2] & 0x8000) |
475*25c28e83SPiotr Jasiukajtis 					((yy.i[2] & 0x7fff) - 63);
476*25c28e83SPiotr Jasiukajtis 				xx.i[1] = 0x80000000;
477*25c28e83SPiotr Jasiukajtis 				xx.i[0] = 0;
478*25c28e83SPiotr Jasiukajtis 				yy.e += xx.e;
479*25c28e83SPiotr Jasiukajtis 			}
480*25c28e83SPiotr Jasiukajtis 		}
481*25c28e83SPiotr Jasiukajtis 	} else {
482*25c28e83SPiotr Jasiukajtis 		/* set sign of zero result according to rounding direction */
483*25c28e83SPiotr Jasiukajtis 		rm = oldcwsw & 0x0c000000;
484*25c28e83SPiotr Jasiukajtis 		yy.i[2] = ((rm == FCW_RM)? 0x8000 : 0);
485*25c28e83SPiotr Jasiukajtis 	}
486*25c28e83SPiotr Jasiukajtis 
487*25c28e83SPiotr Jasiukajtis 	/*
488*25c28e83SPiotr Jasiukajtis 	 * restore the control and status words and convert the result
489*25c28e83SPiotr Jasiukajtis 	 * to double
490*25c28e83SPiotr Jasiukajtis 	 */
491*25c28e83SPiotr Jasiukajtis 	__fenv_setcwsw(&oldcwsw);
492*25c28e83SPiotr Jasiukajtis 	return ((double) yy.e);
493*25c28e83SPiotr Jasiukajtis }
494*25c28e83SPiotr Jasiukajtis 
495*25c28e83SPiotr Jasiukajtis #else
496*25c28e83SPiotr Jasiukajtis #error Unknown architecture
497*25c28e83SPiotr Jasiukajtis #endif
498