1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis  * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis  *
4*25c28e83SPiotr Jasiukajtis  * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis  * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis  * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis  *
8*25c28e83SPiotr Jasiukajtis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis  * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis  * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis  * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis  *
13*25c28e83SPiotr Jasiukajtis  * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis  * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis  * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis  *
19*25c28e83SPiotr Jasiukajtis  * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis  */
21*25c28e83SPiotr Jasiukajtis 
22*25c28e83SPiotr Jasiukajtis /*
23*25c28e83SPiotr Jasiukajtis  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24*25c28e83SPiotr Jasiukajtis  */
25*25c28e83SPiotr Jasiukajtis /*
26*25c28e83SPiotr Jasiukajtis  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27*25c28e83SPiotr Jasiukajtis  * Use is subject to license terms.
28*25c28e83SPiotr Jasiukajtis  */
29*25c28e83SPiotr Jasiukajtis 
30*25c28e83SPiotr Jasiukajtis #include <sys/isa_defs.h>
31*25c28e83SPiotr Jasiukajtis #include "libm_inlines.h"
32*25c28e83SPiotr Jasiukajtis 
33*25c28e83SPiotr Jasiukajtis #ifdef _LITTLE_ENDIAN
34*25c28e83SPiotr Jasiukajtis #define HI(x)	*(1+(int*)x)
35*25c28e83SPiotr Jasiukajtis #define LO(x)	*(unsigned*)x
36*25c28e83SPiotr Jasiukajtis #else
37*25c28e83SPiotr Jasiukajtis #define HI(x)	*(int*)x
38*25c28e83SPiotr Jasiukajtis #define LO(x)	*(1+(unsigned*)x)
39*25c28e83SPiotr Jasiukajtis #endif
40*25c28e83SPiotr Jasiukajtis 
41*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT
42*25c28e83SPiotr Jasiukajtis #define restrict _Restrict
43*25c28e83SPiotr Jasiukajtis #else
44*25c28e83SPiotr Jasiukajtis #define restrict
45*25c28e83SPiotr Jasiukajtis #endif
46*25c28e83SPiotr Jasiukajtis 
47*25c28e83SPiotr Jasiukajtis /* double hypot(double x, double y)
48*25c28e83SPiotr Jasiukajtis  *
49*25c28e83SPiotr Jasiukajtis  * Method :
50*25c28e83SPiotr Jasiukajtis  *	1. Special cases:
51*25c28e83SPiotr Jasiukajtis  *		x or y is +Inf or -Inf				=> +Inf
52*25c28e83SPiotr Jasiukajtis  *		x or y is NaN					=> QNaN
53*25c28e83SPiotr Jasiukajtis  *	2. Computes hypot(x,y):
54*25c28e83SPiotr Jasiukajtis  *		hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
55*25c28e83SPiotr Jasiukajtis  *	Where:
56*25c28e83SPiotr Jasiukajtis  *		m = max(|x|,|y|)
57*25c28e83SPiotr Jasiukajtis  *		xnm = x * (1/m)
58*25c28e83SPiotr Jasiukajtis  *		ynm = y * (1/m)
59*25c28e83SPiotr Jasiukajtis  *
60*25c28e83SPiotr Jasiukajtis  *	Compute xnm * xnm + ynm * ynm by simulating
61*25c28e83SPiotr Jasiukajtis  *	muti-precision arithmetic.
62*25c28e83SPiotr Jasiukajtis  *
63*25c28e83SPiotr Jasiukajtis  * Accuracy:
64*25c28e83SPiotr Jasiukajtis  *	Maximum error observed: less than 0.872 ulp after 16.777.216.000
65*25c28e83SPiotr Jasiukajtis  *	results.
66*25c28e83SPiotr Jasiukajtis  */
67*25c28e83SPiotr Jasiukajtis 
68*25c28e83SPiotr Jasiukajtis extern double sqrt(double);
69*25c28e83SPiotr Jasiukajtis extern double fabs(double);
70*25c28e83SPiotr Jasiukajtis 
71*25c28e83SPiotr Jasiukajtis static const unsigned long long LCONST[] = {
72*25c28e83SPiotr Jasiukajtis 0x41b0000000000000ULL,	/* D2ON28 = 2 ** 28		*/
73*25c28e83SPiotr Jasiukajtis 0x0010000000000000ULL,	/* D2ONM1022 = 2 ** -1022	*/
74*25c28e83SPiotr Jasiukajtis 0x7fd0000000000000ULL	/* D2ONP1022 = 2 **  1022	*/
75*25c28e83SPiotr Jasiukajtis };
76*25c28e83SPiotr Jasiukajtis 
77*25c28e83SPiotr Jasiukajtis static void
78*25c28e83SPiotr Jasiukajtis __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
79*25c28e83SPiotr Jasiukajtis 	int stridey, double * restrict pz, int stridez);
80*25c28e83SPiotr Jasiukajtis 
81*25c28e83SPiotr Jasiukajtis #pragma no_inline(__vhypot_n)
82*25c28e83SPiotr Jasiukajtis 
83*25c28e83SPiotr Jasiukajtis #define RETURN(ret)						\
84*25c28e83SPiotr Jasiukajtis {								\
85*25c28e83SPiotr Jasiukajtis 	*pz = (ret);						\
86*25c28e83SPiotr Jasiukajtis 	py += stridey;						\
87*25c28e83SPiotr Jasiukajtis 	pz += stridez;						\
88*25c28e83SPiotr Jasiukajtis 	if (n_n == 0)						\
89*25c28e83SPiotr Jasiukajtis 	{							\
90*25c28e83SPiotr Jasiukajtis 		hx0 = HI(px);					\
91*25c28e83SPiotr Jasiukajtis 		hy0 = HI(py);					\
92*25c28e83SPiotr Jasiukajtis 		spx = px; spy = py; spz = pz;			\
93*25c28e83SPiotr Jasiukajtis 		continue;					\
94*25c28e83SPiotr Jasiukajtis 	}							\
95*25c28e83SPiotr Jasiukajtis 	n--;							\
96*25c28e83SPiotr Jasiukajtis 	break;							\
97*25c28e83SPiotr Jasiukajtis }
98*25c28e83SPiotr Jasiukajtis 
99*25c28e83SPiotr Jasiukajtis void
__vhypot(int n,double * restrict px,int stridex,double * restrict py,int stridey,double * restrict pz,int stridez)100*25c28e83SPiotr Jasiukajtis __vhypot(int n, double * restrict px, int stridex, double * restrict py,
101*25c28e83SPiotr Jasiukajtis 	int stridey, double * restrict pz, int stridez)
102*25c28e83SPiotr Jasiukajtis {
103*25c28e83SPiotr Jasiukajtis 	int		hx0, hx1, hy0, j0, diff;
104*25c28e83SPiotr Jasiukajtis 	double		x_hi, x_lo, y_hi, y_lo;
105*25c28e83SPiotr Jasiukajtis 	double		scl = 0;
106*25c28e83SPiotr Jasiukajtis 	double		x, y, res;
107*25c28e83SPiotr Jasiukajtis 	double		*spx, *spy, *spz;
108*25c28e83SPiotr Jasiukajtis 	int		n_n;
109*25c28e83SPiotr Jasiukajtis 	double		D2ON28 = ((double*)LCONST)[0];		/* 2 ** 28	*/
110*25c28e83SPiotr Jasiukajtis 	double		D2ONM1022 = ((double*)LCONST)[1];	/* 2 **-1022	*/
111*25c28e83SPiotr Jasiukajtis 	double		D2ONP1022 = ((double*)LCONST)[2];	/* 2 ** 1022	*/
112*25c28e83SPiotr Jasiukajtis 
113*25c28e83SPiotr Jasiukajtis 	while (n > 1)
114*25c28e83SPiotr Jasiukajtis 	{
115*25c28e83SPiotr Jasiukajtis 		n_n = 0;
116*25c28e83SPiotr Jasiukajtis 		spx = px;
117*25c28e83SPiotr Jasiukajtis 		spy = py;
118*25c28e83SPiotr Jasiukajtis 		spz = pz;
119*25c28e83SPiotr Jasiukajtis 		hx0 = HI(px);
120*25c28e83SPiotr Jasiukajtis 		hy0 = HI(py);
121*25c28e83SPiotr Jasiukajtis 		for (; n > 1 ; n--)
122*25c28e83SPiotr Jasiukajtis 		{
123*25c28e83SPiotr Jasiukajtis 			px += stridex;
124*25c28e83SPiotr Jasiukajtis 			hx0 &= 0x7fffffff;
125*25c28e83SPiotr Jasiukajtis 			hy0 &= 0x7fffffff;
126*25c28e83SPiotr Jasiukajtis 
127*25c28e83SPiotr Jasiukajtis 			if (hx0 >= 0x7fe00000)	/* |X| >= 2**1023 or Inf or NaN */
128*25c28e83SPiotr Jasiukajtis 			{
129*25c28e83SPiotr Jasiukajtis 				diff = hy0 - hx0;
130*25c28e83SPiotr Jasiukajtis 				j0 = diff >> 31;
131*25c28e83SPiotr Jasiukajtis 				j0 = hy0 - (diff & j0);
132*25c28e83SPiotr Jasiukajtis 				j0 &= 0x7ff00000;
133*25c28e83SPiotr Jasiukajtis 				x = *(px - stridex);
134*25c28e83SPiotr Jasiukajtis 				y = *py;
135*25c28e83SPiotr Jasiukajtis 				x = fabs(x);
136*25c28e83SPiotr Jasiukajtis 				y = fabs(y);
137*25c28e83SPiotr Jasiukajtis 				if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
138*25c28e83SPiotr Jasiukajtis 				{
139*25c28e83SPiotr Jasiukajtis 					int lx = LO((px - stridex));
140*25c28e83SPiotr Jasiukajtis 					int ly = LO(py);
141*25c28e83SPiotr Jasiukajtis 					if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
142*25c28e83SPiotr Jasiukajtis 					else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
143*25c28e83SPiotr Jasiukajtis 					else res = x + y;
144*25c28e83SPiotr Jasiukajtis 					RETURN (res)
145*25c28e83SPiotr Jasiukajtis 				}
146*25c28e83SPiotr Jasiukajtis 				else
147*25c28e83SPiotr Jasiukajtis 				{
148*25c28e83SPiotr Jasiukajtis 					j0 = diff >> 31;
149*25c28e83SPiotr Jasiukajtis 					if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
150*25c28e83SPiotr Jasiukajtis 					{
151*25c28e83SPiotr Jasiukajtis 						x *= D2ONM1022;
152*25c28e83SPiotr Jasiukajtis 						y *= D2ONM1022;
153*25c28e83SPiotr Jasiukajtis 
154*25c28e83SPiotr Jasiukajtis 						x_hi = (x + D2ON28) - D2ON28;
155*25c28e83SPiotr Jasiukajtis 						x_lo = x - x_hi;
156*25c28e83SPiotr Jasiukajtis 						y_hi = (y + D2ON28) - D2ON28;
157*25c28e83SPiotr Jasiukajtis 						y_lo = y - y_hi;
158*25c28e83SPiotr Jasiukajtis 						res = (x_hi * x_hi + y_hi * y_hi);
159*25c28e83SPiotr Jasiukajtis 						res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
160*25c28e83SPiotr Jasiukajtis 
161*25c28e83SPiotr Jasiukajtis 						res = sqrt (res);
162*25c28e83SPiotr Jasiukajtis 
163*25c28e83SPiotr Jasiukajtis 						res = D2ONP1022 * res;
164*25c28e83SPiotr Jasiukajtis 						RETURN (res)
165*25c28e83SPiotr Jasiukajtis 					}
166*25c28e83SPiotr Jasiukajtis 					else RETURN (x + y)
167*25c28e83SPiotr Jasiukajtis 				}
168*25c28e83SPiotr Jasiukajtis 			}
169*25c28e83SPiotr Jasiukajtis 			if (hy0 >= 0x7fe00000)	/* |Y| >= 2**1023 or Inf or NaN */
170*25c28e83SPiotr Jasiukajtis 			{
171*25c28e83SPiotr Jasiukajtis 				diff = hy0 - hx0;
172*25c28e83SPiotr Jasiukajtis 				j0 = diff >> 31;
173*25c28e83SPiotr Jasiukajtis 				j0 = hy0 - (diff & j0);
174*25c28e83SPiotr Jasiukajtis 				j0 &= 0x7ff00000;
175*25c28e83SPiotr Jasiukajtis 				x = *(px - stridex);
176*25c28e83SPiotr Jasiukajtis 				y = *py;
177*25c28e83SPiotr Jasiukajtis 				x = fabs(x);
178*25c28e83SPiotr Jasiukajtis 				y = fabs(y);
179*25c28e83SPiotr Jasiukajtis 				if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
180*25c28e83SPiotr Jasiukajtis 				{
181*25c28e83SPiotr Jasiukajtis 					int lx = LO((px - stridex));
182*25c28e83SPiotr Jasiukajtis 					int ly = LO(py);
183*25c28e83SPiotr Jasiukajtis 					if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
184*25c28e83SPiotr Jasiukajtis 					else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
185*25c28e83SPiotr Jasiukajtis 					else res = x + y;
186*25c28e83SPiotr Jasiukajtis 					RETURN (res)
187*25c28e83SPiotr Jasiukajtis 				}
188*25c28e83SPiotr Jasiukajtis 				else
189*25c28e83SPiotr Jasiukajtis 				{
190*25c28e83SPiotr Jasiukajtis 					j0 = diff >> 31;
191*25c28e83SPiotr Jasiukajtis 					if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
192*25c28e83SPiotr Jasiukajtis 					{
193*25c28e83SPiotr Jasiukajtis 						x *= D2ONM1022;
194*25c28e83SPiotr Jasiukajtis 						y *= D2ONM1022;
195*25c28e83SPiotr Jasiukajtis 
196*25c28e83SPiotr Jasiukajtis 						x_hi = (x + D2ON28) - D2ON28;
197*25c28e83SPiotr Jasiukajtis 						x_lo = x - x_hi;
198*25c28e83SPiotr Jasiukajtis 						y_hi = (y + D2ON28) - D2ON28;
199*25c28e83SPiotr Jasiukajtis 						y_lo = y - y_hi;
200*25c28e83SPiotr Jasiukajtis 						res = (x_hi * x_hi + y_hi * y_hi);
201*25c28e83SPiotr Jasiukajtis 						res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
202*25c28e83SPiotr Jasiukajtis 
203*25c28e83SPiotr Jasiukajtis 						res = sqrt (res);
204*25c28e83SPiotr Jasiukajtis 
205*25c28e83SPiotr Jasiukajtis 						res = D2ONP1022 * res;
206*25c28e83SPiotr Jasiukajtis 						RETURN (res)
207*25c28e83SPiotr Jasiukajtis 					}
208*25c28e83SPiotr Jasiukajtis 					else RETURN (x + y)
209*25c28e83SPiotr Jasiukajtis 				}
210*25c28e83SPiotr Jasiukajtis 			}
211*25c28e83SPiotr Jasiukajtis 
212*25c28e83SPiotr Jasiukajtis 			hx1 = HI(px);
213*25c28e83SPiotr Jasiukajtis 
214*25c28e83SPiotr Jasiukajtis 			if (hx0 < 0x00100000 && hy0 < 0x00100000)	/* X and Y are subnormal */
215*25c28e83SPiotr Jasiukajtis 			{
216*25c28e83SPiotr Jasiukajtis 				x = *(px - stridex);
217*25c28e83SPiotr Jasiukajtis 				y = *py;
218*25c28e83SPiotr Jasiukajtis 
219*25c28e83SPiotr Jasiukajtis 				x *= D2ONP1022;
220*25c28e83SPiotr Jasiukajtis 				y *= D2ONP1022;
221*25c28e83SPiotr Jasiukajtis 
222*25c28e83SPiotr Jasiukajtis 				x_hi = (x + D2ON28) - D2ON28;
223*25c28e83SPiotr Jasiukajtis 				x_lo = x - x_hi;
224*25c28e83SPiotr Jasiukajtis 				y_hi = (y + D2ON28) - D2ON28;
225*25c28e83SPiotr Jasiukajtis 				y_lo = y - y_hi;
226*25c28e83SPiotr Jasiukajtis 				res = (x_hi * x_hi + y_hi * y_hi);
227*25c28e83SPiotr Jasiukajtis 				res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
228*25c28e83SPiotr Jasiukajtis 
229*25c28e83SPiotr Jasiukajtis 				res = sqrt(res);
230*25c28e83SPiotr Jasiukajtis 
231*25c28e83SPiotr Jasiukajtis 				res = D2ONM1022 * res;
232*25c28e83SPiotr Jasiukajtis 				RETURN (res)
233*25c28e83SPiotr Jasiukajtis 			}
234*25c28e83SPiotr Jasiukajtis 
235*25c28e83SPiotr Jasiukajtis 			hx0 = hx1;
236*25c28e83SPiotr Jasiukajtis 			py += stridey;
237*25c28e83SPiotr Jasiukajtis 			pz += stridez;
238*25c28e83SPiotr Jasiukajtis 			n_n++;
239*25c28e83SPiotr Jasiukajtis 			hy0 = HI(py);
240*25c28e83SPiotr Jasiukajtis 		}
241*25c28e83SPiotr Jasiukajtis 		if (n_n > 0)
242*25c28e83SPiotr Jasiukajtis 			__vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez);
243*25c28e83SPiotr Jasiukajtis 	}
244*25c28e83SPiotr Jasiukajtis 
245*25c28e83SPiotr Jasiukajtis 	if (n > 0)
246*25c28e83SPiotr Jasiukajtis 	{
247*25c28e83SPiotr Jasiukajtis 		x = *px;
248*25c28e83SPiotr Jasiukajtis 		y = *py;
249*25c28e83SPiotr Jasiukajtis 		hx0 = HI(px);
250*25c28e83SPiotr Jasiukajtis 		hy0 = HI(py);
251*25c28e83SPiotr Jasiukajtis 
252*25c28e83SPiotr Jasiukajtis 		hx0 &= 0x7fffffff;
253*25c28e83SPiotr Jasiukajtis 		hy0 &= 0x7fffffff;
254*25c28e83SPiotr Jasiukajtis 
255*25c28e83SPiotr Jasiukajtis 		diff = hy0 - hx0;
256*25c28e83SPiotr Jasiukajtis 		j0 = diff >> 31;
257*25c28e83SPiotr Jasiukajtis 		j0 = hy0 - (diff & j0);
258*25c28e83SPiotr Jasiukajtis 		j0 &= 0x7ff00000;
259*25c28e83SPiotr Jasiukajtis 
260*25c28e83SPiotr Jasiukajtis 		if (j0 >= 0x7fe00000)	/* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
261*25c28e83SPiotr Jasiukajtis 		{
262*25c28e83SPiotr Jasiukajtis 			x = fabs(x);
263*25c28e83SPiotr Jasiukajtis 			y = fabs(y);
264*25c28e83SPiotr Jasiukajtis 			if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
265*25c28e83SPiotr Jasiukajtis 			{
266*25c28e83SPiotr Jasiukajtis 				int lx = LO(px);
267*25c28e83SPiotr Jasiukajtis 				int ly = LO(py);
268*25c28e83SPiotr Jasiukajtis 				if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
269*25c28e83SPiotr Jasiukajtis 				else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
270*25c28e83SPiotr Jasiukajtis 				else res = x + y;
271*25c28e83SPiotr Jasiukajtis 				*pz = res;
272*25c28e83SPiotr Jasiukajtis 				return;
273*25c28e83SPiotr Jasiukajtis 			}
274*25c28e83SPiotr Jasiukajtis 			else
275*25c28e83SPiotr Jasiukajtis 			{
276*25c28e83SPiotr Jasiukajtis 				j0 = diff >> 31;
277*25c28e83SPiotr Jasiukajtis 				if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
278*25c28e83SPiotr Jasiukajtis 				{
279*25c28e83SPiotr Jasiukajtis 					x *= D2ONM1022;
280*25c28e83SPiotr Jasiukajtis 					y *= D2ONM1022;
281*25c28e83SPiotr Jasiukajtis 
282*25c28e83SPiotr Jasiukajtis 					x_hi = (x + D2ON28) - D2ON28;
283*25c28e83SPiotr Jasiukajtis 					x_lo = x - x_hi;
284*25c28e83SPiotr Jasiukajtis 					y_hi = (y + D2ON28) - D2ON28;
285*25c28e83SPiotr Jasiukajtis 					y_lo = y - y_hi;
286*25c28e83SPiotr Jasiukajtis 					res = (x_hi * x_hi + y_hi * y_hi);
287*25c28e83SPiotr Jasiukajtis 					res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
288*25c28e83SPiotr Jasiukajtis 
289*25c28e83SPiotr Jasiukajtis 					res = sqrt (res);
290*25c28e83SPiotr Jasiukajtis 
291*25c28e83SPiotr Jasiukajtis 					res = D2ONP1022 * res;
292*25c28e83SPiotr Jasiukajtis 					*pz = res;
293*25c28e83SPiotr Jasiukajtis 					return;
294*25c28e83SPiotr Jasiukajtis 				}
295*25c28e83SPiotr Jasiukajtis 				else
296*25c28e83SPiotr Jasiukajtis 				{
297*25c28e83SPiotr Jasiukajtis 					*pz = x + y;
298*25c28e83SPiotr Jasiukajtis 					return;
299*25c28e83SPiotr Jasiukajtis 				}
300*25c28e83SPiotr Jasiukajtis 			}
301*25c28e83SPiotr Jasiukajtis 		}
302*25c28e83SPiotr Jasiukajtis 
303*25c28e83SPiotr Jasiukajtis 		if (j0 < 0x00100000)	/* X and Y are subnormal */
304*25c28e83SPiotr Jasiukajtis 		{
305*25c28e83SPiotr Jasiukajtis 			x *= D2ONP1022;
306*25c28e83SPiotr Jasiukajtis 			y *= D2ONP1022;
307*25c28e83SPiotr Jasiukajtis 
308*25c28e83SPiotr Jasiukajtis 			x_hi = (x + D2ON28) - D2ON28;
309*25c28e83SPiotr Jasiukajtis 			x_lo = x - x_hi;
310*25c28e83SPiotr Jasiukajtis 			y_hi = (y + D2ON28) - D2ON28;
311*25c28e83SPiotr Jasiukajtis 			y_lo = y - y_hi;
312*25c28e83SPiotr Jasiukajtis 			res = (x_hi * x_hi + y_hi * y_hi);
313*25c28e83SPiotr Jasiukajtis 			res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
314*25c28e83SPiotr Jasiukajtis 
315*25c28e83SPiotr Jasiukajtis 			res = sqrt(res);
316*25c28e83SPiotr Jasiukajtis 
317*25c28e83SPiotr Jasiukajtis 			res = D2ONM1022 * res;
318*25c28e83SPiotr Jasiukajtis 			*pz = res;
319*25c28e83SPiotr Jasiukajtis 			return;
320*25c28e83SPiotr Jasiukajtis 		}
321*25c28e83SPiotr Jasiukajtis 
322*25c28e83SPiotr Jasiukajtis 		HI(&scl) = (0x7fe00000 - j0);
323*25c28e83SPiotr Jasiukajtis 
324*25c28e83SPiotr Jasiukajtis 		x *= scl;
325*25c28e83SPiotr Jasiukajtis 		y *= scl;
326*25c28e83SPiotr Jasiukajtis 
327*25c28e83SPiotr Jasiukajtis 		x_hi = (x + D2ON28) - D2ON28;
328*25c28e83SPiotr Jasiukajtis 		y_hi = (y + D2ON28) - D2ON28;
329*25c28e83SPiotr Jasiukajtis 		x_lo = x - x_hi;
330*25c28e83SPiotr Jasiukajtis 		y_lo = y - y_hi;
331*25c28e83SPiotr Jasiukajtis 
332*25c28e83SPiotr Jasiukajtis 		res = (x_hi * x_hi + y_hi * y_hi);
333*25c28e83SPiotr Jasiukajtis 		res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
334*25c28e83SPiotr Jasiukajtis 
335*25c28e83SPiotr Jasiukajtis 		res = sqrt(res);
336*25c28e83SPiotr Jasiukajtis 
337*25c28e83SPiotr Jasiukajtis 		HI(&scl) = j0;
338*25c28e83SPiotr Jasiukajtis 
339*25c28e83SPiotr Jasiukajtis 		res = scl * res;
340*25c28e83SPiotr Jasiukajtis 		*pz = res;
341*25c28e83SPiotr Jasiukajtis 	}
342*25c28e83SPiotr Jasiukajtis }
343*25c28e83SPiotr Jasiukajtis 
344*25c28e83SPiotr Jasiukajtis static void
__vhypot_n(int n,double * restrict px,int stridex,double * restrict py,int stridey,double * restrict pz,int stridez)345*25c28e83SPiotr Jasiukajtis __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
346*25c28e83SPiotr Jasiukajtis 	int stridey, double * restrict pz, int stridez)
347*25c28e83SPiotr Jasiukajtis {
348*25c28e83SPiotr Jasiukajtis 	int		hx0, hy0, j0, diff0;
349*25c28e83SPiotr Jasiukajtis 	double		x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
350*25c28e83SPiotr Jasiukajtis 	double		x0, y0, res0;
351*25c28e83SPiotr Jasiukajtis 	double		D2ON28 = ((double*)LCONST)[0];		/* 2 ** 28	*/
352*25c28e83SPiotr Jasiukajtis 
353*25c28e83SPiotr Jasiukajtis 	for(; n > 0 ; n--)
354*25c28e83SPiotr Jasiukajtis 	{
355*25c28e83SPiotr Jasiukajtis 		x0 = *px;
356*25c28e83SPiotr Jasiukajtis 		y0 = *py;
357*25c28e83SPiotr Jasiukajtis 		hx0 = HI(px);
358*25c28e83SPiotr Jasiukajtis 		hy0 = HI(py);
359*25c28e83SPiotr Jasiukajtis 
360*25c28e83SPiotr Jasiukajtis 		hx0 &= 0x7fffffff;
361*25c28e83SPiotr Jasiukajtis 		hy0 &= 0x7fffffff;
362*25c28e83SPiotr Jasiukajtis 
363*25c28e83SPiotr Jasiukajtis 		diff0 = hy0 - hx0;
364*25c28e83SPiotr Jasiukajtis 		j0 = diff0 >> 31;
365*25c28e83SPiotr Jasiukajtis 		j0 = hy0 - (diff0 & j0);
366*25c28e83SPiotr Jasiukajtis 		j0 &= 0x7ff00000;
367*25c28e83SPiotr Jasiukajtis 
368*25c28e83SPiotr Jasiukajtis 		px += stridex;
369*25c28e83SPiotr Jasiukajtis 		py += stridey;
370*25c28e83SPiotr Jasiukajtis 
371*25c28e83SPiotr Jasiukajtis 		HI(&scl0) = (0x7fe00000 - j0);
372*25c28e83SPiotr Jasiukajtis 
373*25c28e83SPiotr Jasiukajtis 		x0 *= scl0;
374*25c28e83SPiotr Jasiukajtis 		y0 *= scl0;
375*25c28e83SPiotr Jasiukajtis 
376*25c28e83SPiotr Jasiukajtis 		x_hi0 = (x0 + D2ON28) - D2ON28;
377*25c28e83SPiotr Jasiukajtis 		y_hi0 = (y0 + D2ON28) - D2ON28;
378*25c28e83SPiotr Jasiukajtis 		x_lo0 = x0 - x_hi0;
379*25c28e83SPiotr Jasiukajtis 		y_lo0 = y0 - y_hi0;
380*25c28e83SPiotr Jasiukajtis 
381*25c28e83SPiotr Jasiukajtis 		res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
382*25c28e83SPiotr Jasiukajtis 		res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
383*25c28e83SPiotr Jasiukajtis 
384*25c28e83SPiotr Jasiukajtis 		res0 = sqrt(res0);
385*25c28e83SPiotr Jasiukajtis 
386*25c28e83SPiotr Jasiukajtis 		HI(&scl0) = j0;
387*25c28e83SPiotr Jasiukajtis 
388*25c28e83SPiotr Jasiukajtis 		res0 = scl0 * res0;
389*25c28e83SPiotr Jasiukajtis 		*pz = res0;
390*25c28e83SPiotr Jasiukajtis 
391*25c28e83SPiotr Jasiukajtis 		pz += stridez;
392*25c28e83SPiotr Jasiukajtis 	}
393*25c28e83SPiotr Jasiukajtis }
394