1*25c28e83SPiotr Jasiukajtis /* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis 22*25c28e83SPiotr Jasiukajtis /* 23*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24*25c28e83SPiotr Jasiukajtis */ 25*25c28e83SPiotr Jasiukajtis /* 26*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 28*25c28e83SPiotr Jasiukajtis */ 29*25c28e83SPiotr Jasiukajtis 30*25c28e83SPiotr Jasiukajtis #if defined(ELFOBJ) 31*25c28e83SPiotr Jasiukajtis #pragma weak fma = __fma 32*25c28e83SPiotr Jasiukajtis #endif 33*25c28e83SPiotr Jasiukajtis 34*25c28e83SPiotr Jasiukajtis #include "libm.h" 35*25c28e83SPiotr Jasiukajtis #include "fma.h" 36*25c28e83SPiotr Jasiukajtis #include "fenv_inlines.h" 37*25c28e83SPiotr Jasiukajtis 38*25c28e83SPiotr Jasiukajtis #if defined(__sparc) 39*25c28e83SPiotr Jasiukajtis 40*25c28e83SPiotr Jasiukajtis static const union { 41*25c28e83SPiotr Jasiukajtis unsigned i[2]; 42*25c28e83SPiotr Jasiukajtis double d; 43*25c28e83SPiotr Jasiukajtis } C[] = { 44*25c28e83SPiotr Jasiukajtis { 0x3fe00000u, 0 }, 45*25c28e83SPiotr Jasiukajtis { 0x40000000u, 0 }, 46*25c28e83SPiotr Jasiukajtis { 0x43300000u, 0 }, 47*25c28e83SPiotr Jasiukajtis { 0x41a00000u, 0 }, 48*25c28e83SPiotr Jasiukajtis { 0x3e500000u, 0 }, 49*25c28e83SPiotr Jasiukajtis { 0x3df00000u, 0 }, 50*25c28e83SPiotr Jasiukajtis { 0x3bf00000u, 0 }, 51*25c28e83SPiotr Jasiukajtis { 0x7fe00000u, 0 }, 52*25c28e83SPiotr Jasiukajtis { 0x00100000u, 0 }, 53*25c28e83SPiotr Jasiukajtis { 0x00100001u, 0 } 54*25c28e83SPiotr Jasiukajtis }; 55*25c28e83SPiotr Jasiukajtis 56*25c28e83SPiotr Jasiukajtis #define half C[0].d 57*25c28e83SPiotr Jasiukajtis #define two C[1].d 58*25c28e83SPiotr Jasiukajtis #define two52 C[2].d 59*25c28e83SPiotr Jasiukajtis #define two27 C[3].d 60*25c28e83SPiotr Jasiukajtis #define twom26 C[4].d 61*25c28e83SPiotr Jasiukajtis #define twom32 C[5].d 62*25c28e83SPiotr Jasiukajtis #define twom64 C[6].d 63*25c28e83SPiotr Jasiukajtis #define huge C[7].d 64*25c28e83SPiotr Jasiukajtis #define tiny C[8].d 65*25c28e83SPiotr Jasiukajtis #define tiny2 C[9].d 66*25c28e83SPiotr Jasiukajtis 67*25c28e83SPiotr Jasiukajtis static const unsigned int fsr_rm = 0xc0000000u; 68*25c28e83SPiotr Jasiukajtis 69*25c28e83SPiotr Jasiukajtis /* 70*25c28e83SPiotr Jasiukajtis * fma for SPARC: 64-bit double precision, big-endian 71*25c28e83SPiotr Jasiukajtis */ 72*25c28e83SPiotr Jasiukajtis double 73*25c28e83SPiotr Jasiukajtis __fma(double x, double y, double z) { 74*25c28e83SPiotr Jasiukajtis union { 75*25c28e83SPiotr Jasiukajtis unsigned i[2]; 76*25c28e83SPiotr Jasiukajtis double d; 77*25c28e83SPiotr Jasiukajtis } xx, yy, zz; 78*25c28e83SPiotr Jasiukajtis double xhi, yhi, xlo, ylo, t; 79*25c28e83SPiotr Jasiukajtis unsigned int xy0, xy1, xy2, xy3, z0, z1, z2, z3, fsr, rm, sticky; 80*25c28e83SPiotr Jasiukajtis int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit; 81*25c28e83SPiotr Jasiukajtis volatile double dummy; 82*25c28e83SPiotr Jasiukajtis 83*25c28e83SPiotr Jasiukajtis /* extract the high order words of the arguments */ 84*25c28e83SPiotr Jasiukajtis xx.d = x; 85*25c28e83SPiotr Jasiukajtis yy.d = y; 86*25c28e83SPiotr Jasiukajtis zz.d = z; 87*25c28e83SPiotr Jasiukajtis hx = xx.i[0] & ~0x80000000; 88*25c28e83SPiotr Jasiukajtis hy = yy.i[0] & ~0x80000000; 89*25c28e83SPiotr Jasiukajtis hz = zz.i[0] & ~0x80000000; 90*25c28e83SPiotr Jasiukajtis 91*25c28e83SPiotr Jasiukajtis /* dispense with inf, nan, and zero cases */ 92*25c28e83SPiotr Jasiukajtis if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 || 93*25c28e83SPiotr Jasiukajtis (hy | yy.i[1]) == 0) /* x or y is inf, nan, or zero */ 94*25c28e83SPiotr Jasiukajtis return (x * y + z); 95*25c28e83SPiotr Jasiukajtis 96*25c28e83SPiotr Jasiukajtis if (hz >= 0x7ff00000) /* z is inf or nan */ 97*25c28e83SPiotr Jasiukajtis return (x + z); /* avoid spurious under/overflow in x * y */ 98*25c28e83SPiotr Jasiukajtis 99*25c28e83SPiotr Jasiukajtis if ((hz | zz.i[1]) == 0) /* z is zero */ 100*25c28e83SPiotr Jasiukajtis /* 101*25c28e83SPiotr Jasiukajtis * x * y isn't zero but could underflow to zero, 102*25c28e83SPiotr Jasiukajtis * so don't add z, lest we perturb the sign 103*25c28e83SPiotr Jasiukajtis */ 104*25c28e83SPiotr Jasiukajtis return (x * y); 105*25c28e83SPiotr Jasiukajtis 106*25c28e83SPiotr Jasiukajtis /* 107*25c28e83SPiotr Jasiukajtis * now x, y, and z are all finite and nonzero; save the fsr and 108*25c28e83SPiotr Jasiukajtis * set round-to-negative-infinity mode (and clear nonstandard 109*25c28e83SPiotr Jasiukajtis * mode before we try to scale subnormal operands) 110*25c28e83SPiotr Jasiukajtis */ 111*25c28e83SPiotr Jasiukajtis __fenv_getfsr32(&fsr); 112*25c28e83SPiotr Jasiukajtis __fenv_setfsr32(&fsr_rm); 113*25c28e83SPiotr Jasiukajtis 114*25c28e83SPiotr Jasiukajtis /* extract signs and exponents, and normalize subnormals */ 115*25c28e83SPiotr Jasiukajtis sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000; 116*25c28e83SPiotr Jasiukajtis sz = zz.i[0] & 0x80000000; 117*25c28e83SPiotr Jasiukajtis ex = hx >> 20; 118*25c28e83SPiotr Jasiukajtis if (!ex) { 119*25c28e83SPiotr Jasiukajtis xx.d = x * two52; 120*25c28e83SPiotr Jasiukajtis ex = ((xx.i[0] & ~0x80000000) >> 20) - 52; 121*25c28e83SPiotr Jasiukajtis } 122*25c28e83SPiotr Jasiukajtis ey = hy >> 20; 123*25c28e83SPiotr Jasiukajtis if (!ey) { 124*25c28e83SPiotr Jasiukajtis yy.d = y * two52; 125*25c28e83SPiotr Jasiukajtis ey = ((yy.i[0] & ~0x80000000) >> 20) - 52; 126*25c28e83SPiotr Jasiukajtis } 127*25c28e83SPiotr Jasiukajtis ez = hz >> 20; 128*25c28e83SPiotr Jasiukajtis if (!ez) { 129*25c28e83SPiotr Jasiukajtis zz.d = z * two52; 130*25c28e83SPiotr Jasiukajtis ez = ((zz.i[0] & ~0x80000000) >> 20) - 52; 131*25c28e83SPiotr Jasiukajtis } 132*25c28e83SPiotr Jasiukajtis 133*25c28e83SPiotr Jasiukajtis /* multiply x*y to 106 bits */ 134*25c28e83SPiotr Jasiukajtis exy = ex + ey - 0x3ff; 135*25c28e83SPiotr Jasiukajtis xx.i[0] = (xx.i[0] & 0xfffff) | 0x3ff00000; 136*25c28e83SPiotr Jasiukajtis yy.i[0] = (yy.i[0] & 0xfffff) | 0x3ff00000; 137*25c28e83SPiotr Jasiukajtis x = xx.d; 138*25c28e83SPiotr Jasiukajtis y = yy.d; 139*25c28e83SPiotr Jasiukajtis xhi = ((x + twom26) + two27) - two27; 140*25c28e83SPiotr Jasiukajtis yhi = ((y + twom26) + two27) - two27; 141*25c28e83SPiotr Jasiukajtis xlo = x - xhi; 142*25c28e83SPiotr Jasiukajtis ylo = y - yhi; 143*25c28e83SPiotr Jasiukajtis x *= y; 144*25c28e83SPiotr Jasiukajtis y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo; 145*25c28e83SPiotr Jasiukajtis if (x >= two) { 146*25c28e83SPiotr Jasiukajtis x *= half; 147*25c28e83SPiotr Jasiukajtis y *= half; 148*25c28e83SPiotr Jasiukajtis exy++; 149*25c28e83SPiotr Jasiukajtis } 150*25c28e83SPiotr Jasiukajtis 151*25c28e83SPiotr Jasiukajtis /* extract the significands */ 152*25c28e83SPiotr Jasiukajtis xx.d = x; 153*25c28e83SPiotr Jasiukajtis xy0 = (xx.i[0] & 0xfffff) | 0x100000; 154*25c28e83SPiotr Jasiukajtis xy1 = xx.i[1]; 155*25c28e83SPiotr Jasiukajtis yy.d = t = y + twom32; 156*25c28e83SPiotr Jasiukajtis xy2 = yy.i[1]; 157*25c28e83SPiotr Jasiukajtis yy.d = (y - (t - twom32)) + twom64; 158*25c28e83SPiotr Jasiukajtis xy3 = yy.i[1]; 159*25c28e83SPiotr Jasiukajtis z0 = (zz.i[0] & 0xfffff) | 0x100000; 160*25c28e83SPiotr Jasiukajtis z1 = zz.i[1]; 161*25c28e83SPiotr Jasiukajtis z2 = z3 = 0; 162*25c28e83SPiotr Jasiukajtis 163*25c28e83SPiotr Jasiukajtis /* 164*25c28e83SPiotr Jasiukajtis * now x*y is represented by sxy, exy, and xy[0-3], and z is 165*25c28e83SPiotr Jasiukajtis * represented likewise; swap if need be so |xy| <= |z| 166*25c28e83SPiotr Jasiukajtis */ 167*25c28e83SPiotr Jasiukajtis if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && 168*25c28e83SPiotr Jasiukajtis (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) { 169*25c28e83SPiotr Jasiukajtis e = sxy; sxy = sz; sz = e; 170*25c28e83SPiotr Jasiukajtis e = exy; exy = ez; ez = e; 171*25c28e83SPiotr Jasiukajtis e = xy0; xy0 = z0; z0 = e; 172*25c28e83SPiotr Jasiukajtis e = xy1; xy1 = z1; z1 = e; 173*25c28e83SPiotr Jasiukajtis z2 = xy2; xy2 = 0; 174*25c28e83SPiotr Jasiukajtis z3 = xy3; xy3 = 0; 175*25c28e83SPiotr Jasiukajtis } 176*25c28e83SPiotr Jasiukajtis 177*25c28e83SPiotr Jasiukajtis /* shift the significand of xy keeping a sticky bit */ 178*25c28e83SPiotr Jasiukajtis e = ez - exy; 179*25c28e83SPiotr Jasiukajtis if (e > 116) { 180*25c28e83SPiotr Jasiukajtis xy0 = xy1 = xy2 = 0; 181*25c28e83SPiotr Jasiukajtis xy3 = 1; 182*25c28e83SPiotr Jasiukajtis } else if (e >= 96) { 183*25c28e83SPiotr Jasiukajtis sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (127 - e)); 184*25c28e83SPiotr Jasiukajtis xy3 = xy0 >> (e - 96); 185*25c28e83SPiotr Jasiukajtis if (sticky) 186*25c28e83SPiotr Jasiukajtis xy3 |= 1; 187*25c28e83SPiotr Jasiukajtis xy0 = xy1 = xy2 = 0; 188*25c28e83SPiotr Jasiukajtis } else if (e >= 64) { 189*25c28e83SPiotr Jasiukajtis sticky = xy3 | xy2 | ((xy1 << 1) << (95 - e)); 190*25c28e83SPiotr Jasiukajtis xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); 191*25c28e83SPiotr Jasiukajtis if (sticky) 192*25c28e83SPiotr Jasiukajtis xy3 |= 1; 193*25c28e83SPiotr Jasiukajtis xy2 = xy0 >> (e - 64); 194*25c28e83SPiotr Jasiukajtis xy0 = xy1 = 0; 195*25c28e83SPiotr Jasiukajtis } else if (e >= 32) { 196*25c28e83SPiotr Jasiukajtis sticky = xy3 | ((xy2 << 1) << (63 - e)); 197*25c28e83SPiotr Jasiukajtis xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); 198*25c28e83SPiotr Jasiukajtis if (sticky) 199*25c28e83SPiotr Jasiukajtis xy3 |= 1; 200*25c28e83SPiotr Jasiukajtis xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); 201*25c28e83SPiotr Jasiukajtis xy1 = xy0 >> (e - 32); 202*25c28e83SPiotr Jasiukajtis xy0 = 0; 203*25c28e83SPiotr Jasiukajtis } else if (e) { 204*25c28e83SPiotr Jasiukajtis sticky = (xy3 << 1) << (31 - e); 205*25c28e83SPiotr Jasiukajtis xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); 206*25c28e83SPiotr Jasiukajtis if (sticky) 207*25c28e83SPiotr Jasiukajtis xy3 |= 1; 208*25c28e83SPiotr Jasiukajtis xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); 209*25c28e83SPiotr Jasiukajtis xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); 210*25c28e83SPiotr Jasiukajtis xy0 >>= e; 211*25c28e83SPiotr Jasiukajtis } 212*25c28e83SPiotr Jasiukajtis 213*25c28e83SPiotr Jasiukajtis /* if this is a magnitude subtract, negate the significand of xy */ 214*25c28e83SPiotr Jasiukajtis if (sxy ^ sz) { 215*25c28e83SPiotr Jasiukajtis xy0 = ~xy0; 216*25c28e83SPiotr Jasiukajtis xy1 = ~xy1; 217*25c28e83SPiotr Jasiukajtis xy2 = ~xy2; 218*25c28e83SPiotr Jasiukajtis xy3 = -xy3; 219*25c28e83SPiotr Jasiukajtis if (xy3 == 0) 220*25c28e83SPiotr Jasiukajtis if (++xy2 == 0) 221*25c28e83SPiotr Jasiukajtis if (++xy1 == 0) 222*25c28e83SPiotr Jasiukajtis xy0++; 223*25c28e83SPiotr Jasiukajtis } 224*25c28e83SPiotr Jasiukajtis 225*25c28e83SPiotr Jasiukajtis /* add, propagating carries */ 226*25c28e83SPiotr Jasiukajtis z3 += xy3; 227*25c28e83SPiotr Jasiukajtis e = (z3 < xy3); 228*25c28e83SPiotr Jasiukajtis z2 += xy2; 229*25c28e83SPiotr Jasiukajtis if (e) { 230*25c28e83SPiotr Jasiukajtis z2++; 231*25c28e83SPiotr Jasiukajtis e = (z2 <= xy2); 232*25c28e83SPiotr Jasiukajtis } else 233*25c28e83SPiotr Jasiukajtis e = (z2 < xy2); 234*25c28e83SPiotr Jasiukajtis z1 += xy1; 235*25c28e83SPiotr Jasiukajtis if (e) { 236*25c28e83SPiotr Jasiukajtis z1++; 237*25c28e83SPiotr Jasiukajtis e = (z1 <= xy1); 238*25c28e83SPiotr Jasiukajtis } else 239*25c28e83SPiotr Jasiukajtis e = (z1 < xy1); 240*25c28e83SPiotr Jasiukajtis z0 += xy0; 241*25c28e83SPiotr Jasiukajtis if (e) 242*25c28e83SPiotr Jasiukajtis z0++; 243*25c28e83SPiotr Jasiukajtis 244*25c28e83SPiotr Jasiukajtis /* postnormalize and collect rounding information into z2 */ 245*25c28e83SPiotr Jasiukajtis if (ez < 1) { 246*25c28e83SPiotr Jasiukajtis /* result is tiny; shift right until exponent is within range */ 247*25c28e83SPiotr Jasiukajtis e = 1 - ez; 248*25c28e83SPiotr Jasiukajtis if (e > 56) { 249*25c28e83SPiotr Jasiukajtis z2 = 1; /* result can't be exactly zero */ 250*25c28e83SPiotr Jasiukajtis z0 = z1 = 0; 251*25c28e83SPiotr Jasiukajtis } else if (e >= 32) { 252*25c28e83SPiotr Jasiukajtis sticky = z3 | z2 | ((z1 << 1) << (63 - e)); 253*25c28e83SPiotr Jasiukajtis z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); 254*25c28e83SPiotr Jasiukajtis if (sticky) 255*25c28e83SPiotr Jasiukajtis z2 |= 1; 256*25c28e83SPiotr Jasiukajtis z1 = z0 >> (e - 32); 257*25c28e83SPiotr Jasiukajtis z0 = 0; 258*25c28e83SPiotr Jasiukajtis } else { 259*25c28e83SPiotr Jasiukajtis sticky = z3 | (z2 << 1) << (31 - e); 260*25c28e83SPiotr Jasiukajtis z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); 261*25c28e83SPiotr Jasiukajtis if (sticky) 262*25c28e83SPiotr Jasiukajtis z2 |= 1; 263*25c28e83SPiotr Jasiukajtis z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); 264*25c28e83SPiotr Jasiukajtis z0 >>= e; 265*25c28e83SPiotr Jasiukajtis } 266*25c28e83SPiotr Jasiukajtis ez = 1; 267*25c28e83SPiotr Jasiukajtis } else if (z0 >= 0x200000) { 268*25c28e83SPiotr Jasiukajtis /* carry out; shift right by one */ 269*25c28e83SPiotr Jasiukajtis sticky = (z2 & 1) | z3; 270*25c28e83SPiotr Jasiukajtis z2 = (z2 >> 1) | (z1 << 31); 271*25c28e83SPiotr Jasiukajtis if (sticky) 272*25c28e83SPiotr Jasiukajtis z2 |= 1; 273*25c28e83SPiotr Jasiukajtis z1 = (z1 >> 1) | (z0 << 31); 274*25c28e83SPiotr Jasiukajtis z0 >>= 1; 275*25c28e83SPiotr Jasiukajtis ez++; 276*25c28e83SPiotr Jasiukajtis } else { 277*25c28e83SPiotr Jasiukajtis if (z0 < 0x100000 && (z0 | z1 | z2 | z3) != 0) { 278*25c28e83SPiotr Jasiukajtis /* 279*25c28e83SPiotr Jasiukajtis * borrow/cancellation; shift left as much as 280*25c28e83SPiotr Jasiukajtis * exponent allows 281*25c28e83SPiotr Jasiukajtis */ 282*25c28e83SPiotr Jasiukajtis while (!(z0 | (z1 & 0xffe00000)) && ez >= 33) { 283*25c28e83SPiotr Jasiukajtis z0 = z1; 284*25c28e83SPiotr Jasiukajtis z1 = z2; 285*25c28e83SPiotr Jasiukajtis z2 = z3; 286*25c28e83SPiotr Jasiukajtis z3 = 0; 287*25c28e83SPiotr Jasiukajtis ez -= 32; 288*25c28e83SPiotr Jasiukajtis } 289*25c28e83SPiotr Jasiukajtis while (z0 < 0x100000 && ez > 1) { 290*25c28e83SPiotr Jasiukajtis z0 = (z0 << 1) | (z1 >> 31); 291*25c28e83SPiotr Jasiukajtis z1 = (z1 << 1) | (z2 >> 31); 292*25c28e83SPiotr Jasiukajtis z2 = (z2 << 1) | (z3 >> 31); 293*25c28e83SPiotr Jasiukajtis z3 <<= 1; 294*25c28e83SPiotr Jasiukajtis ez--; 295*25c28e83SPiotr Jasiukajtis } 296*25c28e83SPiotr Jasiukajtis } 297*25c28e83SPiotr Jasiukajtis if (z3) 298*25c28e83SPiotr Jasiukajtis z2 |= 1; 299*25c28e83SPiotr Jasiukajtis } 300*25c28e83SPiotr Jasiukajtis 301*25c28e83SPiotr Jasiukajtis /* get the rounding mode and clear current exceptions */ 302*25c28e83SPiotr Jasiukajtis rm = fsr >> 30; 303*25c28e83SPiotr Jasiukajtis fsr &= ~FSR_CEXC; 304*25c28e83SPiotr Jasiukajtis 305*25c28e83SPiotr Jasiukajtis /* strip off the integer bit, if there is one */ 306*25c28e83SPiotr Jasiukajtis ibit = z0 & 0x100000; 307*25c28e83SPiotr Jasiukajtis if (ibit) 308*25c28e83SPiotr Jasiukajtis z0 -= 0x100000; 309*25c28e83SPiotr Jasiukajtis else { 310*25c28e83SPiotr Jasiukajtis ez = 0; 311*25c28e83SPiotr Jasiukajtis if (!(z0 | z1 | z2)) { /* exact zero */ 312*25c28e83SPiotr Jasiukajtis zz.i[0] = rm == FSR_RM ? 0x80000000 : 0; 313*25c28e83SPiotr Jasiukajtis zz.i[1] = 0; 314*25c28e83SPiotr Jasiukajtis __fenv_setfsr32(&fsr); 315*25c28e83SPiotr Jasiukajtis return (zz.d); 316*25c28e83SPiotr Jasiukajtis } 317*25c28e83SPiotr Jasiukajtis } 318*25c28e83SPiotr Jasiukajtis 319*25c28e83SPiotr Jasiukajtis /* 320*25c28e83SPiotr Jasiukajtis * flip the sense of directed roundings if the result is negative; 321*25c28e83SPiotr Jasiukajtis * the logic below applies to a positive result 322*25c28e83SPiotr Jasiukajtis */ 323*25c28e83SPiotr Jasiukajtis if (sz) 324*25c28e83SPiotr Jasiukajtis rm ^= rm >> 1; 325*25c28e83SPiotr Jasiukajtis 326*25c28e83SPiotr Jasiukajtis /* round and raise exceptions */ 327*25c28e83SPiotr Jasiukajtis if (z2) { 328*25c28e83SPiotr Jasiukajtis fsr |= FSR_NXC; 329*25c28e83SPiotr Jasiukajtis 330*25c28e83SPiotr Jasiukajtis /* decide whether to round the fraction up */ 331*25c28e83SPiotr Jasiukajtis if (rm == FSR_RP || (rm == FSR_RN && (z2 > 0x80000000u || 332*25c28e83SPiotr Jasiukajtis (z2 == 0x80000000u && (z1 & 1))))) { 333*25c28e83SPiotr Jasiukajtis /* round up and renormalize if necessary */ 334*25c28e83SPiotr Jasiukajtis if (++z1 == 0) { 335*25c28e83SPiotr Jasiukajtis if (++z0 == 0x100000) { 336*25c28e83SPiotr Jasiukajtis z0 = 0; 337*25c28e83SPiotr Jasiukajtis ez++; 338*25c28e83SPiotr Jasiukajtis } 339*25c28e83SPiotr Jasiukajtis } 340*25c28e83SPiotr Jasiukajtis } 341*25c28e83SPiotr Jasiukajtis } 342*25c28e83SPiotr Jasiukajtis 343*25c28e83SPiotr Jasiukajtis /* check for under/overflow */ 344*25c28e83SPiotr Jasiukajtis if (ez >= 0x7ff) { 345*25c28e83SPiotr Jasiukajtis if (rm == FSR_RN || rm == FSR_RP) { 346*25c28e83SPiotr Jasiukajtis zz.i[0] = sz | 0x7ff00000; 347*25c28e83SPiotr Jasiukajtis zz.i[1] = 0; 348*25c28e83SPiotr Jasiukajtis } else { 349*25c28e83SPiotr Jasiukajtis zz.i[0] = sz | 0x7fefffff; 350*25c28e83SPiotr Jasiukajtis zz.i[1] = 0xffffffff; 351*25c28e83SPiotr Jasiukajtis } 352*25c28e83SPiotr Jasiukajtis fsr |= FSR_OFC | FSR_NXC; 353*25c28e83SPiotr Jasiukajtis } else { 354*25c28e83SPiotr Jasiukajtis zz.i[0] = sz | (ez << 20) | z0; 355*25c28e83SPiotr Jasiukajtis zz.i[1] = z1; 356*25c28e83SPiotr Jasiukajtis 357*25c28e83SPiotr Jasiukajtis /* 358*25c28e83SPiotr Jasiukajtis * !ibit => exact result was tiny before rounding, 359*25c28e83SPiotr Jasiukajtis * z2 nonzero => result delivered is inexact 360*25c28e83SPiotr Jasiukajtis */ 361*25c28e83SPiotr Jasiukajtis if (!ibit) { 362*25c28e83SPiotr Jasiukajtis if (z2) 363*25c28e83SPiotr Jasiukajtis fsr |= FSR_UFC | FSR_NXC; 364*25c28e83SPiotr Jasiukajtis else if (fsr & FSR_UFM) 365*25c28e83SPiotr Jasiukajtis fsr |= FSR_UFC; 366*25c28e83SPiotr Jasiukajtis } 367*25c28e83SPiotr Jasiukajtis } 368*25c28e83SPiotr Jasiukajtis 369*25c28e83SPiotr Jasiukajtis /* restore the fsr and emulate exceptions as needed */ 370*25c28e83SPiotr Jasiukajtis if ((fsr & FSR_CEXC) & (fsr >> 23)) { 371*25c28e83SPiotr Jasiukajtis __fenv_setfsr32(&fsr); 372*25c28e83SPiotr Jasiukajtis if (fsr & FSR_OFC) { 373*25c28e83SPiotr Jasiukajtis dummy = huge; 374*25c28e83SPiotr Jasiukajtis dummy *= huge; 375*25c28e83SPiotr Jasiukajtis } else if (fsr & FSR_UFC) { 376*25c28e83SPiotr Jasiukajtis dummy = tiny; 377*25c28e83SPiotr Jasiukajtis if (fsr & FSR_NXC) 378*25c28e83SPiotr Jasiukajtis dummy *= tiny; 379*25c28e83SPiotr Jasiukajtis else 380*25c28e83SPiotr Jasiukajtis dummy -= tiny2; 381*25c28e83SPiotr Jasiukajtis } else { 382*25c28e83SPiotr Jasiukajtis dummy = huge; 383*25c28e83SPiotr Jasiukajtis dummy += tiny; 384*25c28e83SPiotr Jasiukajtis } 385*25c28e83SPiotr Jasiukajtis } else { 386*25c28e83SPiotr Jasiukajtis fsr |= (fsr & 0x1f) << 5; 387*25c28e83SPiotr Jasiukajtis __fenv_setfsr32(&fsr); 388*25c28e83SPiotr Jasiukajtis } 389*25c28e83SPiotr Jasiukajtis return (zz.d); 390*25c28e83SPiotr Jasiukajtis } 391*25c28e83SPiotr Jasiukajtis 392*25c28e83SPiotr Jasiukajtis #elif defined(__x86) 393*25c28e83SPiotr Jasiukajtis 394*25c28e83SPiotr Jasiukajtis #if defined(__amd64) 395*25c28e83SPiotr Jasiukajtis #define NI 4 396*25c28e83SPiotr Jasiukajtis #else 397*25c28e83SPiotr Jasiukajtis #define NI 3 398*25c28e83SPiotr Jasiukajtis #endif 399*25c28e83SPiotr Jasiukajtis 400*25c28e83SPiotr Jasiukajtis /* 401*25c28e83SPiotr Jasiukajtis * fma for x86: 64-bit double precision, little-endian 402*25c28e83SPiotr Jasiukajtis */ 403*25c28e83SPiotr Jasiukajtis double 404*25c28e83SPiotr Jasiukajtis __fma(double x, double y, double z) { 405*25c28e83SPiotr Jasiukajtis union { 406*25c28e83SPiotr Jasiukajtis unsigned i[NI]; 407*25c28e83SPiotr Jasiukajtis long double e; 408*25c28e83SPiotr Jasiukajtis } xx, yy, zz; 409*25c28e83SPiotr Jasiukajtis long double xe, ye, xhi, xlo, yhi, ylo; 410*25c28e83SPiotr Jasiukajtis int ex, ey, ez; 411*25c28e83SPiotr Jasiukajtis unsigned cwsw, oldcwsw, rm; 412*25c28e83SPiotr Jasiukajtis 413*25c28e83SPiotr Jasiukajtis /* convert the operands to double extended */ 414*25c28e83SPiotr Jasiukajtis xx.e = (long double) x; 415*25c28e83SPiotr Jasiukajtis yy.e = (long double) y; 416*25c28e83SPiotr Jasiukajtis zz.e = (long double) z; 417*25c28e83SPiotr Jasiukajtis 418*25c28e83SPiotr Jasiukajtis /* extract the exponents of the arguments */ 419*25c28e83SPiotr Jasiukajtis ex = xx.i[2] & 0x7fff; 420*25c28e83SPiotr Jasiukajtis ey = yy.i[2] & 0x7fff; 421*25c28e83SPiotr Jasiukajtis ez = zz.i[2] & 0x7fff; 422*25c28e83SPiotr Jasiukajtis 423*25c28e83SPiotr Jasiukajtis /* dispense with inf, nan, and zero cases */ 424*25c28e83SPiotr Jasiukajtis if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0) 425*25c28e83SPiotr Jasiukajtis /* x or y is inf, nan, or zero */ 426*25c28e83SPiotr Jasiukajtis return ((double) (xx.e * yy.e + zz.e)); 427*25c28e83SPiotr Jasiukajtis 428*25c28e83SPiotr Jasiukajtis if (ez >= 0x7fff) /* z is inf or nan */ 429*25c28e83SPiotr Jasiukajtis return ((double) (xx.e + zz.e)); 430*25c28e83SPiotr Jasiukajtis /* avoid spurious inexact in x * y */ 431*25c28e83SPiotr Jasiukajtis 432*25c28e83SPiotr Jasiukajtis /* 433*25c28e83SPiotr Jasiukajtis * save the control and status words, mask all exceptions, and 434*25c28e83SPiotr Jasiukajtis * set rounding to 64-bit precision and to-nearest 435*25c28e83SPiotr Jasiukajtis */ 436*25c28e83SPiotr Jasiukajtis __fenv_getcwsw(&oldcwsw); 437*25c28e83SPiotr Jasiukajtis cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000; 438*25c28e83SPiotr Jasiukajtis __fenv_setcwsw(&cwsw); 439*25c28e83SPiotr Jasiukajtis 440*25c28e83SPiotr Jasiukajtis /* multiply x*y to 106 bits */ 441*25c28e83SPiotr Jasiukajtis xe = xx.e; 442*25c28e83SPiotr Jasiukajtis xx.i[0] = 0; 443*25c28e83SPiotr Jasiukajtis xhi = xx.e; /* hi 32 bits */ 444*25c28e83SPiotr Jasiukajtis xlo = xe - xhi; /* lo 21 bits */ 445*25c28e83SPiotr Jasiukajtis ye = yy.e; 446*25c28e83SPiotr Jasiukajtis yy.i[0] = 0; 447*25c28e83SPiotr Jasiukajtis yhi = yy.e; 448*25c28e83SPiotr Jasiukajtis ylo = ye - yhi; 449*25c28e83SPiotr Jasiukajtis xe = xe * ye; 450*25c28e83SPiotr Jasiukajtis ye = ((xhi * yhi - xe) + xhi * ylo + xlo * yhi) + xlo * ylo; 451*25c28e83SPiotr Jasiukajtis 452*25c28e83SPiotr Jasiukajtis /* distill the sum of xe, ye, and z */ 453*25c28e83SPiotr Jasiukajtis xhi = ye + zz.e; 454*25c28e83SPiotr Jasiukajtis yhi = xhi - ye; 455*25c28e83SPiotr Jasiukajtis xlo = (zz.e - yhi) + (ye - (xhi - yhi)); 456*25c28e83SPiotr Jasiukajtis /* now (xhi,xlo) = ye + z */ 457*25c28e83SPiotr Jasiukajtis 458*25c28e83SPiotr Jasiukajtis yhi = xe + xhi; 459*25c28e83SPiotr Jasiukajtis ye = yhi - xe; 460*25c28e83SPiotr Jasiukajtis ylo = (xhi - ye) + (xe - (yhi - ye)); /* now (yhi,ylo) = xe + xhi */ 461*25c28e83SPiotr Jasiukajtis 462*25c28e83SPiotr Jasiukajtis xhi = xlo + ylo; 463*25c28e83SPiotr Jasiukajtis xe = xhi - xlo; 464*25c28e83SPiotr Jasiukajtis xlo = (ylo - xe) + (xlo - (xhi - xe)); /* now (xhi,xlo) = xlo + ylo */ 465*25c28e83SPiotr Jasiukajtis 466*25c28e83SPiotr Jasiukajtis yy.e = yhi + xhi; 467*25c28e83SPiotr Jasiukajtis ylo = (yhi - yy.e) + xhi; /* now (yy.e,ylo) = xhi + yhi */ 468*25c28e83SPiotr Jasiukajtis 469*25c28e83SPiotr Jasiukajtis if (yy.i[1] != 0) { /* yy.e is nonzero */ 470*25c28e83SPiotr Jasiukajtis /* perturb yy.e if its least significant 10 bits are zero */ 471*25c28e83SPiotr Jasiukajtis if (!(yy.i[0] & 0x3ff)) { 472*25c28e83SPiotr Jasiukajtis xx.e = ylo + xlo; 473*25c28e83SPiotr Jasiukajtis if (xx.i[1] != 0) { 474*25c28e83SPiotr Jasiukajtis xx.i[2] = (xx.i[2] & 0x8000) | 475*25c28e83SPiotr Jasiukajtis ((yy.i[2] & 0x7fff) - 63); 476*25c28e83SPiotr Jasiukajtis xx.i[1] = 0x80000000; 477*25c28e83SPiotr Jasiukajtis xx.i[0] = 0; 478*25c28e83SPiotr Jasiukajtis yy.e += xx.e; 479*25c28e83SPiotr Jasiukajtis } 480*25c28e83SPiotr Jasiukajtis } 481*25c28e83SPiotr Jasiukajtis } else { 482*25c28e83SPiotr Jasiukajtis /* set sign of zero result according to rounding direction */ 483*25c28e83SPiotr Jasiukajtis rm = oldcwsw & 0x0c000000; 484*25c28e83SPiotr Jasiukajtis yy.i[2] = ((rm == FCW_RM)? 0x8000 : 0); 485*25c28e83SPiotr Jasiukajtis } 486*25c28e83SPiotr Jasiukajtis 487*25c28e83SPiotr Jasiukajtis /* 488*25c28e83SPiotr Jasiukajtis * restore the control and status words and convert the result 489*25c28e83SPiotr Jasiukajtis * to double 490*25c28e83SPiotr Jasiukajtis */ 491*25c28e83SPiotr Jasiukajtis __fenv_setcwsw(&oldcwsw); 492*25c28e83SPiotr Jasiukajtis return ((double) yy.e); 493*25c28e83SPiotr Jasiukajtis } 494*25c28e83SPiotr Jasiukajtis 495*25c28e83SPiotr Jasiukajtis #else 496*25c28e83SPiotr Jasiukajtis #error Unknown architecture 497*25c28e83SPiotr Jasiukajtis #endif 498