/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include "libm_inlines.h" #ifdef _LITTLE_ENDIAN #define HI(x) *(1+(int*)x) #define LO(x) *(unsigned*)x #else #define HI(x) *(int*)x #define LO(x) *(1+(unsigned*)x) #endif #ifdef __RESTRICT #define restrict _Restrict #else #define restrict #endif /* double hypot(double x, double y) * * Method : * 1. Special cases: * x or y is +Inf or -Inf => +Inf * x or y is NaN => QNaN * 2. Computes hypot(x,y): * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm) * Where: * m = max(|x|,|y|) * xnm = x * (1/m) * ynm = y * (1/m) * * Compute xnm * xnm + ynm * ynm by simulating * muti-precision arithmetic. * * Accuracy: * Maximum error observed: less than 0.872 ulp after 16.777.216.000 * results. */ extern double sqrt(double); extern double fabs(double); static const unsigned long long LCONST[] = { 0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */ 0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */ 0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */ }; static void __vhypot_n(int n, double * restrict px, int stridex, double * restrict py, int stridey, double * restrict pz, int stridez); #pragma no_inline(__vhypot_n) #define RETURN(ret) \ { \ *pz = (ret); \ py += stridey; \ pz += stridez; \ if (n_n == 0) \ { \ hx0 = HI(px); \ hy0 = HI(py); \ spx = px; spy = py; spz = pz; \ continue; \ } \ n--; \ break; \ } void __vhypot(int n, double * restrict px, int stridex, double * restrict py, int stridey, double * restrict pz, int stridez) { int hx0, hx1, hy0, j0, diff; double x_hi, x_lo, y_hi, y_lo; double scl = 0; double x, y, res; double *spx, *spy, *spz; int n_n; double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ double D2ONM1022 = ((double*)LCONST)[1]; /* 2 **-1022 */ double D2ONP1022 = ((double*)LCONST)[2]; /* 2 ** 1022 */ while (n > 1) { n_n = 0; spx = px; spy = py; spz = pz; hx0 = HI(px); hy0 = HI(py); for (; n > 1 ; n--) { px += stridex; hx0 &= 0x7fffffff; hy0 &= 0x7fffffff; if (hx0 >= 0x7fe00000) /* |X| >= 2**1023 or Inf or NaN */ { diff = hy0 - hx0; j0 = diff >> 31; j0 = hy0 - (diff & j0); j0 &= 0x7ff00000; x = *(px - stridex); y = *py; x = fabs(x); y = fabs(y); if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ { int lx = LO((px - stridex)); int ly = LO(py); if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; else res = x + y; RETURN (res) } else { j0 = diff >> 31; if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ { x *= D2ONM1022; y *= D2ONM1022; x_hi = (x + D2ON28) - D2ON28; x_lo = x - x_hi; y_hi = (y + D2ON28) - D2ON28; y_lo = y - y_hi; res = (x_hi * x_hi + y_hi * y_hi); res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); res = sqrt (res); res = D2ONP1022 * res; RETURN (res) } else RETURN (x + y) } } if (hy0 >= 0x7fe00000) /* |Y| >= 2**1023 or Inf or NaN */ { diff = hy0 - hx0; j0 = diff >> 31; j0 = hy0 - (diff & j0); j0 &= 0x7ff00000; x = *(px - stridex); y = *py; x = fabs(x); y = fabs(y); if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ { int lx = LO((px - stridex)); int ly = LO(py); if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; else res = x + y; RETURN (res) } else { j0 = diff >> 31; if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ { x *= D2ONM1022; y *= D2ONM1022; x_hi = (x + D2ON28) - D2ON28; x_lo = x - x_hi; y_hi = (y + D2ON28) - D2ON28; y_lo = y - y_hi; res = (x_hi * x_hi + y_hi * y_hi); res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); res = sqrt (res); res = D2ONP1022 * res; RETURN (res) } else RETURN (x + y) } } hx1 = HI(px); if (hx0 < 0x00100000 && hy0 < 0x00100000) /* X and Y are subnormal */ { x = *(px - stridex); y = *py; x *= D2ONP1022; y *= D2ONP1022; x_hi = (x + D2ON28) - D2ON28; x_lo = x - x_hi; y_hi = (y + D2ON28) - D2ON28; y_lo = y - y_hi; res = (x_hi * x_hi + y_hi * y_hi); res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); res = sqrt(res); res = D2ONM1022 * res; RETURN (res) } hx0 = hx1; py += stridey; pz += stridez; n_n++; hy0 = HI(py); } if (n_n > 0) __vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez); } if (n > 0) { x = *px; y = *py; hx0 = HI(px); hy0 = HI(py); hx0 &= 0x7fffffff; hy0 &= 0x7fffffff; diff = hy0 - hx0; j0 = diff >> 31; j0 = hy0 - (diff & j0); j0 &= 0x7ff00000; if (j0 >= 0x7fe00000) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */ { x = fabs(x); y = fabs(y); if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ { int lx = LO(px); int ly = LO(py); if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; else res = x + y; *pz = res; return; } else { j0 = diff >> 31; if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ { x *= D2ONM1022; y *= D2ONM1022; x_hi = (x + D2ON28) - D2ON28; x_lo = x - x_hi; y_hi = (y + D2ON28) - D2ON28; y_lo = y - y_hi; res = (x_hi * x_hi + y_hi * y_hi); res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); res = sqrt (res); res = D2ONP1022 * res; *pz = res; return; } else { *pz = x + y; return; } } } if (j0 < 0x00100000) /* X and Y are subnormal */ { x *= D2ONP1022; y *= D2ONP1022; x_hi = (x + D2ON28) - D2ON28; x_lo = x - x_hi; y_hi = (y + D2ON28) - D2ON28; y_lo = y - y_hi; res = (x_hi * x_hi + y_hi * y_hi); res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); res = sqrt(res); res = D2ONM1022 * res; *pz = res; return; } HI(&scl) = (0x7fe00000 - j0); x *= scl; y *= scl; x_hi = (x + D2ON28) - D2ON28; y_hi = (y + D2ON28) - D2ON28; x_lo = x - x_hi; y_lo = y - y_hi; res = (x_hi * x_hi + y_hi * y_hi); res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); res = sqrt(res); HI(&scl) = j0; res = scl * res; *pz = res; } } static void __vhypot_n(int n, double * restrict px, int stridex, double * restrict py, int stridey, double * restrict pz, int stridez) { int hx0, hy0, j0, diff0; double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0; double x0, y0, res0; double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ for(; n > 0 ; n--) { x0 = *px; y0 = *py; hx0 = HI(px); hy0 = HI(py); hx0 &= 0x7fffffff; hy0 &= 0x7fffffff; diff0 = hy0 - hx0; j0 = diff0 >> 31; j0 = hy0 - (diff0 & j0); j0 &= 0x7ff00000; px += stridex; py += stridey; HI(&scl0) = (0x7fe00000 - j0); x0 *= scl0; y0 *= scl0; x_hi0 = (x0 + D2ON28) - D2ON28; y_hi0 = (y0 + D2ON28) - D2ON28; x_lo0 = x0 - x_hi0; y_lo0 = y0 - y_hi0; res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0); res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); res0 = sqrt(res0); HI(&scl0) = j0; res0 = scl0 * res0; *pz = res0; pz += stridez; } }