1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis  * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis  *
4*25c28e83SPiotr Jasiukajtis  * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis  * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis  * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis  *
8*25c28e83SPiotr Jasiukajtis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis  * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis  * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis  * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis  *
13*25c28e83SPiotr Jasiukajtis  * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis  * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis  * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis  *
19*25c28e83SPiotr Jasiukajtis  * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis  */
21*25c28e83SPiotr Jasiukajtis 
22*25c28e83SPiotr Jasiukajtis /*
23*25c28e83SPiotr Jasiukajtis  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24*25c28e83SPiotr Jasiukajtis  */
25*25c28e83SPiotr Jasiukajtis /*
26*25c28e83SPiotr Jasiukajtis  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27*25c28e83SPiotr Jasiukajtis  * Use is subject to license terms.
28*25c28e83SPiotr Jasiukajtis  */
29*25c28e83SPiotr Jasiukajtis 
30*25c28e83SPiotr Jasiukajtis #include <sys/isa_defs.h>
31*25c28e83SPiotr Jasiukajtis #include "libm_synonyms.h"
32*25c28e83SPiotr Jasiukajtis #include "libm_inlines.h"
33*25c28e83SPiotr Jasiukajtis 
34*25c28e83SPiotr Jasiukajtis #ifdef _LITTLE_ENDIAN
35*25c28e83SPiotr Jasiukajtis #define HI(x)	*(1+(int*)x)
36*25c28e83SPiotr Jasiukajtis #define LO(x)	*(unsigned*)x
37*25c28e83SPiotr Jasiukajtis #else
38*25c28e83SPiotr Jasiukajtis #define HI(x)	*(int*)x
39*25c28e83SPiotr Jasiukajtis #define LO(x)	*(1+(unsigned*)x)
40*25c28e83SPiotr Jasiukajtis #endif
41*25c28e83SPiotr Jasiukajtis 
42*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT
43*25c28e83SPiotr Jasiukajtis #define restrict _Restrict
44*25c28e83SPiotr Jasiukajtis #else
45*25c28e83SPiotr Jasiukajtis #define restrict
46*25c28e83SPiotr Jasiukajtis #endif
47*25c28e83SPiotr Jasiukajtis 
48*25c28e83SPiotr Jasiukajtis /* double rhypot(double x, double y)
49*25c28e83SPiotr Jasiukajtis  *
50*25c28e83SPiotr Jasiukajtis  * Method :
51*25c28e83SPiotr Jasiukajtis  *	1. Special cases:
52*25c28e83SPiotr Jasiukajtis  *		x or  y = Inf					=> 0
53*25c28e83SPiotr Jasiukajtis  *		x or  y = NaN					=> QNaN
54*25c28e83SPiotr Jasiukajtis  *		x and y = 0					=> Inf + divide-by-zero
55*25c28e83SPiotr Jasiukajtis  *	2. Computes rhypot(x,y):
56*25c28e83SPiotr Jasiukajtis  *		rhypot(x,y) = m * sqrt(1/(xnm * xnm + ynm * ynm))
57*25c28e83SPiotr Jasiukajtis  *	Where:
58*25c28e83SPiotr Jasiukajtis  *		m = 1/max(|x|,|y|)
59*25c28e83SPiotr Jasiukajtis  *		xnm = x * m
60*25c28e83SPiotr Jasiukajtis  *		ynm = y * m
61*25c28e83SPiotr Jasiukajtis  *
62*25c28e83SPiotr Jasiukajtis  *	Compute 1/(xnm * xnm + ynm * ynm) by simulating
63*25c28e83SPiotr Jasiukajtis  *	muti-precision arithmetic.
64*25c28e83SPiotr Jasiukajtis  *
65*25c28e83SPiotr Jasiukajtis  * Accuracy:
66*25c28e83SPiotr Jasiukajtis  *	Maximum error observed: less than 0.869 ulp after 1.000.000.000
67*25c28e83SPiotr Jasiukajtis  *	results.
68*25c28e83SPiotr Jasiukajtis  */
69*25c28e83SPiotr Jasiukajtis 
70*25c28e83SPiotr Jasiukajtis #define sqrt __sqrt
71*25c28e83SPiotr Jasiukajtis 
72*25c28e83SPiotr Jasiukajtis extern double sqrt(double);
73*25c28e83SPiotr Jasiukajtis 
74*25c28e83SPiotr Jasiukajtis extern double fabs(double);
75*25c28e83SPiotr Jasiukajtis 
76*25c28e83SPiotr Jasiukajtis static const int __vlibm_TBL_rhypot[] = {
77*25c28e83SPiotr Jasiukajtis /* i = [0,127]
78*25c28e83SPiotr Jasiukajtis  * TBL[i] = 0x3ff00000 + *(int*)&(1.0 / *(double*)&(0x3ff0000000000000ULL + (i << 45))); */
79*25c28e83SPiotr Jasiukajtis  0x7fe00000,  0x7fdfc07f, 0x7fdf81f8,  0x7fdf4465,
80*25c28e83SPiotr Jasiukajtis  0x7fdf07c1,  0x7fdecc07, 0x7fde9131,  0x7fde573a,
81*25c28e83SPiotr Jasiukajtis  0x7fde1e1e,  0x7fdde5d6, 0x7fddae60,  0x7fdd77b6,
82*25c28e83SPiotr Jasiukajtis  0x7fdd41d4,  0x7fdd0cb5, 0x7fdcd856,  0x7fdca4b3,
83*25c28e83SPiotr Jasiukajtis  0x7fdc71c7,  0x7fdc3f8f, 0x7fdc0e07,  0x7fdbdd2b,
84*25c28e83SPiotr Jasiukajtis  0x7fdbacf9,  0x7fdb7d6c, 0x7fdb4e81,  0x7fdb2036,
85*25c28e83SPiotr Jasiukajtis  0x7fdaf286,  0x7fdac570, 0x7fda98ef,  0x7fda6d01,
86*25c28e83SPiotr Jasiukajtis  0x7fda41a4,  0x7fda16d3, 0x7fd9ec8e,  0x7fd9c2d1,
87*25c28e83SPiotr Jasiukajtis  0x7fd99999,  0x7fd970e4, 0x7fd948b0,  0x7fd920fb,
88*25c28e83SPiotr Jasiukajtis  0x7fd8f9c1,  0x7fd8d301, 0x7fd8acb9,  0x7fd886e5,
89*25c28e83SPiotr Jasiukajtis  0x7fd86186,  0x7fd83c97, 0x7fd81818,  0x7fd7f405,
90*25c28e83SPiotr Jasiukajtis  0x7fd7d05f,  0x7fd7ad22, 0x7fd78a4c,  0x7fd767dc,
91*25c28e83SPiotr Jasiukajtis  0x7fd745d1,  0x7fd72428, 0x7fd702e0,  0x7fd6e1f7,
92*25c28e83SPiotr Jasiukajtis  0x7fd6c16c,  0x7fd6a13c, 0x7fd68168,  0x7fd661ec,
93*25c28e83SPiotr Jasiukajtis  0x7fd642c8,  0x7fd623fa, 0x7fd60581,  0x7fd5e75b,
94*25c28e83SPiotr Jasiukajtis  0x7fd5c988,  0x7fd5ac05, 0x7fd58ed2,  0x7fd571ed,
95*25c28e83SPiotr Jasiukajtis  0x7fd55555,  0x7fd53909, 0x7fd51d07,  0x7fd50150,
96*25c28e83SPiotr Jasiukajtis  0x7fd4e5e0,  0x7fd4cab8, 0x7fd4afd6,  0x7fd49539,
97*25c28e83SPiotr Jasiukajtis  0x7fd47ae1,  0x7fd460cb, 0x7fd446f8,  0x7fd42d66,
98*25c28e83SPiotr Jasiukajtis  0x7fd41414,  0x7fd3fb01, 0x7fd3e22c,  0x7fd3c995,
99*25c28e83SPiotr Jasiukajtis  0x7fd3b13b,  0x7fd3991c, 0x7fd38138,  0x7fd3698d,
100*25c28e83SPiotr Jasiukajtis  0x7fd3521c,  0x7fd33ae4, 0x7fd323e3,  0x7fd30d19,
101*25c28e83SPiotr Jasiukajtis  0x7fd2f684,  0x7fd2e025, 0x7fd2c9fb,  0x7fd2b404,
102*25c28e83SPiotr Jasiukajtis  0x7fd29e41,  0x7fd288b0, 0x7fd27350,  0x7fd25e22,
103*25c28e83SPiotr Jasiukajtis  0x7fd24924,  0x7fd23456, 0x7fd21fb7,  0x7fd20b47,
104*25c28e83SPiotr Jasiukajtis  0x7fd1f704,  0x7fd1e2ef, 0x7fd1cf06,  0x7fd1bb4a,
105*25c28e83SPiotr Jasiukajtis  0x7fd1a7b9,  0x7fd19453, 0x7fd18118,  0x7fd16e06,
106*25c28e83SPiotr Jasiukajtis  0x7fd15b1e,  0x7fd1485f, 0x7fd135c8,  0x7fd12358,
107*25c28e83SPiotr Jasiukajtis  0x7fd11111,  0x7fd0fef0, 0x7fd0ecf5,  0x7fd0db20,
108*25c28e83SPiotr Jasiukajtis  0x7fd0c971,  0x7fd0b7e6, 0x7fd0a681,  0x7fd0953f,
109*25c28e83SPiotr Jasiukajtis  0x7fd08421,  0x7fd07326, 0x7fd0624d,  0x7fd05197,
110*25c28e83SPiotr Jasiukajtis  0x7fd04104,  0x7fd03091, 0x7fd02040,  0x7fd01010,
111*25c28e83SPiotr Jasiukajtis };
112*25c28e83SPiotr Jasiukajtis 
113*25c28e83SPiotr Jasiukajtis static const unsigned long long LCONST[] = {
114*25c28e83SPiotr Jasiukajtis 0x3ff0000000000000ULL,	/* DONE = 1.0		*/
115*25c28e83SPiotr Jasiukajtis 0x4000000000000000ULL,	/* DTWO = 2.0		*/
116*25c28e83SPiotr Jasiukajtis 0x4230000000000000ULL,	/* D2ON36 = 2**36	*/
117*25c28e83SPiotr Jasiukajtis 0x7fd0000000000000ULL,	/* D2ON1022 = 2**1022	*/
118*25c28e83SPiotr Jasiukajtis 0x3cb0000000000000ULL,	/* D2ONM52 = 2**-52	*/
119*25c28e83SPiotr Jasiukajtis };
120*25c28e83SPiotr Jasiukajtis 
121*25c28e83SPiotr Jasiukajtis #define RET_SC(I)										\
122*25c28e83SPiotr Jasiukajtis 	px += stridex;										\
123*25c28e83SPiotr Jasiukajtis 	py += stridey;										\
124*25c28e83SPiotr Jasiukajtis 	pz += stridez;										\
125*25c28e83SPiotr Jasiukajtis 	if (--n <= 0)										\
126*25c28e83SPiotr Jasiukajtis 		break;										\
127*25c28e83SPiotr Jasiukajtis 	goto start##I;
128*25c28e83SPiotr Jasiukajtis 
129*25c28e83SPiotr Jasiukajtis #define RETURN(I, ret)										\
130*25c28e83SPiotr Jasiukajtis {												\
131*25c28e83SPiotr Jasiukajtis 	pz[0] = (ret);										\
132*25c28e83SPiotr Jasiukajtis 	RET_SC(I)										\
133*25c28e83SPiotr Jasiukajtis }
134*25c28e83SPiotr Jasiukajtis 
135*25c28e83SPiotr Jasiukajtis #define PREP(I)											\
136*25c28e83SPiotr Jasiukajtis hx##I = HI(px);										\
137*25c28e83SPiotr Jasiukajtis hy##I = HI(py);										\
138*25c28e83SPiotr Jasiukajtis hx##I &= 0x7fffffff;										\
139*25c28e83SPiotr Jasiukajtis hy##I &= 0x7fffffff;										\
140*25c28e83SPiotr Jasiukajtis pz##I = pz;											\
141*25c28e83SPiotr Jasiukajtis if (hx##I >= 0x7ff00000 || hy##I >= 0x7ff00000)	/* |X| or |Y| = Inf,NaN */		\
142*25c28e83SPiotr Jasiukajtis {												\
143*25c28e83SPiotr Jasiukajtis 	lx = LO(px);									\
144*25c28e83SPiotr Jasiukajtis 	ly = LO(py);									\
145*25c28e83SPiotr Jasiukajtis 	x = *px;										\
146*25c28e83SPiotr Jasiukajtis 	y = *py;										\
147*25c28e83SPiotr Jasiukajtis 	if (hx##I == 0x7ff00000 && lx == 0) res0 = 0.0;		/* |X| = Inf */		\
148*25c28e83SPiotr Jasiukajtis 	else if (hy##I == 0x7ff00000 && ly == 0) res0 = 0.0;	/* |Y| = Inf */		\
149*25c28e83SPiotr Jasiukajtis 	else res0 = fabs(x) + fabs(y);								\
150*25c28e83SPiotr Jasiukajtis 												\
151*25c28e83SPiotr Jasiukajtis 	RETURN (I, res0)									\
152*25c28e83SPiotr Jasiukajtis }												\
153*25c28e83SPiotr Jasiukajtis x##I = *px;											\
154*25c28e83SPiotr Jasiukajtis y##I = *py;											\
155*25c28e83SPiotr Jasiukajtis diff0 = hy##I - hx##I;										\
156*25c28e83SPiotr Jasiukajtis j0 = diff0 >> 31;										\
157*25c28e83SPiotr Jasiukajtis if (hx##I < 0x00100000 && hy##I < 0x00100000)	/* |X| and |Y| = subnormal or zero */		\
158*25c28e83SPiotr Jasiukajtis {												\
159*25c28e83SPiotr Jasiukajtis 	lx = LO(px);									\
160*25c28e83SPiotr Jasiukajtis 	ly = LO(py);									\
161*25c28e83SPiotr Jasiukajtis 	x = x##I;										\
162*25c28e83SPiotr Jasiukajtis 	y = y##I;										\
163*25c28e83SPiotr Jasiukajtis 												\
164*25c28e83SPiotr Jasiukajtis 	if ((hx##I | hy##I | lx | ly) == 0)	/* |X| and |Y| = 0 */				\
165*25c28e83SPiotr Jasiukajtis 		RETURN (I, DONE / 0.0)							\
166*25c28e83SPiotr Jasiukajtis 												\
167*25c28e83SPiotr Jasiukajtis 	x = fabs(x);										\
168*25c28e83SPiotr Jasiukajtis 	y = fabs(y);										\
169*25c28e83SPiotr Jasiukajtis 												\
170*25c28e83SPiotr Jasiukajtis 	x = *(long long*)&x;									\
171*25c28e83SPiotr Jasiukajtis 	y = *(long long*)&y;									\
172*25c28e83SPiotr Jasiukajtis 												\
173*25c28e83SPiotr Jasiukajtis 	x *= D2ONM52;										\
174*25c28e83SPiotr Jasiukajtis 	y *= D2ONM52;										\
175*25c28e83SPiotr Jasiukajtis 												\
176*25c28e83SPiotr Jasiukajtis 	x_hi0 = (x + D2ON36) - D2ON36;							\
177*25c28e83SPiotr Jasiukajtis 	y_hi0 = (y + D2ON36) - D2ON36;							\
178*25c28e83SPiotr Jasiukajtis 	x_lo0 = x - x_hi0;									\
179*25c28e83SPiotr Jasiukajtis 	y_lo0 = y - y_hi0;									\
180*25c28e83SPiotr Jasiukajtis 	res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0);						\
181*25c28e83SPiotr Jasiukajtis 	res0_lo = ((x + x_hi0) * x_lo0 + (y + y_hi0) * y_lo0);					\
182*25c28e83SPiotr Jasiukajtis 												\
183*25c28e83SPiotr Jasiukajtis 	dres0 = res0_hi + res0_lo;								\
184*25c28e83SPiotr Jasiukajtis 												\
185*25c28e83SPiotr Jasiukajtis 	iarr0 = HI(&dres0);								\
186*25c28e83SPiotr Jasiukajtis 	iexp0 = iarr0 & 0xfff00000;								\
187*25c28e83SPiotr Jasiukajtis 												\
188*25c28e83SPiotr Jasiukajtis 	iarr0 = (iarr0 >> 11) & 0x1fc;								\
189*25c28e83SPiotr Jasiukajtis 	itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0];					\
190*25c28e83SPiotr Jasiukajtis 	itbl0 -= iexp0;										\
191*25c28e83SPiotr Jasiukajtis 	HI(&dd0) = itbl0;						\
192*25c28e83SPiotr Jasiukajtis 	LO(&dd0) = 0;								\
193*25c28e83SPiotr Jasiukajtis 												\
194*25c28e83SPiotr Jasiukajtis 	dd0 = dd0 * (DTWO - dd0 * dres0);							\
195*25c28e83SPiotr Jasiukajtis 	dd0 = dd0 * (DTWO - dd0 * dres0);							\
196*25c28e83SPiotr Jasiukajtis 	dres0 = dd0 * (DTWO - dd0 * dres0);							\
197*25c28e83SPiotr Jasiukajtis 												\
198*25c28e83SPiotr Jasiukajtis 	HI(&res0) = HI(&dres0) & 0xffffff00;					\
199*25c28e83SPiotr Jasiukajtis 	LO(&res0) = 0;								\
200*25c28e83SPiotr Jasiukajtis 	res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0;				\
201*25c28e83SPiotr Jasiukajtis 	res0 = sqrt (res0);									\
202*25c28e83SPiotr Jasiukajtis 												\
203*25c28e83SPiotr Jasiukajtis 	res0 = D2ON1022 * res0;									\
204*25c28e83SPiotr Jasiukajtis 	RETURN (I, res0)									\
205*25c28e83SPiotr Jasiukajtis }												\
206*25c28e83SPiotr Jasiukajtis j0 = hy##I - (diff0 & j0);									\
207*25c28e83SPiotr Jasiukajtis j0 &= 0x7ff00000;										\
208*25c28e83SPiotr Jasiukajtis HI(&scl##I) = 0x7ff00000 - j0;
209*25c28e83SPiotr Jasiukajtis 
210*25c28e83SPiotr Jasiukajtis void
211*25c28e83SPiotr Jasiukajtis __vrhypot(int n, double * restrict px, int stridex, double * restrict py,
212*25c28e83SPiotr Jasiukajtis 	int stridey, double * restrict pz, int stridez)
213*25c28e83SPiotr Jasiukajtis {
214*25c28e83SPiotr Jasiukajtis 	int		i = 0;
215*25c28e83SPiotr Jasiukajtis 	double		x, y;
216*25c28e83SPiotr Jasiukajtis 	double		x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
217*25c28e83SPiotr Jasiukajtis 	double		x0, y0, res0, dd0;
218*25c28e83SPiotr Jasiukajtis 	double		res0_hi,res0_lo, dres0;
219*25c28e83SPiotr Jasiukajtis 	double		x_hi1, x_lo1, y_hi1, y_lo1, scl1 = 0;
220*25c28e83SPiotr Jasiukajtis 	double		x1 = 0.0L, y1 = 0.0L, res1, dd1;
221*25c28e83SPiotr Jasiukajtis 	double		res1_hi,res1_lo, dres1;
222*25c28e83SPiotr Jasiukajtis 	double		x_hi2, x_lo2, y_hi2, y_lo2, scl2 = 0;
223*25c28e83SPiotr Jasiukajtis 	double		x2, y2, res2, dd2;
224*25c28e83SPiotr Jasiukajtis 	double		res2_hi,res2_lo, dres2;
225*25c28e83SPiotr Jasiukajtis 
226*25c28e83SPiotr Jasiukajtis 	int		hx0, hy0, j0, diff0;
227*25c28e83SPiotr Jasiukajtis 	int		iarr0, iexp0, itbl0;
228*25c28e83SPiotr Jasiukajtis 	int		hx1, hy1;
229*25c28e83SPiotr Jasiukajtis 	int		iarr1, iexp1, itbl1;
230*25c28e83SPiotr Jasiukajtis 	int		hx2, hy2;
231*25c28e83SPiotr Jasiukajtis 	int		iarr2, iexp2, itbl2;
232*25c28e83SPiotr Jasiukajtis 
233*25c28e83SPiotr Jasiukajtis 	int		lx, ly;
234*25c28e83SPiotr Jasiukajtis 
235*25c28e83SPiotr Jasiukajtis 	double		DONE = ((double*)LCONST)[0];
236*25c28e83SPiotr Jasiukajtis 	double		DTWO = ((double*)LCONST)[1];
237*25c28e83SPiotr Jasiukajtis 	double		D2ON36 = ((double*)LCONST)[2];
238*25c28e83SPiotr Jasiukajtis 	double		D2ON1022 = ((double*)LCONST)[3];
239*25c28e83SPiotr Jasiukajtis 	double		D2ONM52 = ((double*)LCONST)[4];
240*25c28e83SPiotr Jasiukajtis 
241*25c28e83SPiotr Jasiukajtis 	double		*pz0, *pz1 = 0, *pz2;
242*25c28e83SPiotr Jasiukajtis 
243*25c28e83SPiotr Jasiukajtis 	do
244*25c28e83SPiotr Jasiukajtis 	{
245*25c28e83SPiotr Jasiukajtis start0:
246*25c28e83SPiotr Jasiukajtis 		PREP(0)
247*25c28e83SPiotr Jasiukajtis 		px += stridex;
248*25c28e83SPiotr Jasiukajtis 		py += stridey;
249*25c28e83SPiotr Jasiukajtis 		pz += stridez;
250*25c28e83SPiotr Jasiukajtis 		i = 1;
251*25c28e83SPiotr Jasiukajtis 		if (--n <= 0)
252*25c28e83SPiotr Jasiukajtis 			break;
253*25c28e83SPiotr Jasiukajtis 
254*25c28e83SPiotr Jasiukajtis start1:
255*25c28e83SPiotr Jasiukajtis 		PREP(1)
256*25c28e83SPiotr Jasiukajtis 		px += stridex;
257*25c28e83SPiotr Jasiukajtis 		py += stridey;
258*25c28e83SPiotr Jasiukajtis 		pz += stridez;
259*25c28e83SPiotr Jasiukajtis 		i = 2;
260*25c28e83SPiotr Jasiukajtis 		if (--n <= 0)
261*25c28e83SPiotr Jasiukajtis 			break;
262*25c28e83SPiotr Jasiukajtis 
263*25c28e83SPiotr Jasiukajtis start2:
264*25c28e83SPiotr Jasiukajtis 		PREP(2)
265*25c28e83SPiotr Jasiukajtis 
266*25c28e83SPiotr Jasiukajtis 		x0 *= scl0;
267*25c28e83SPiotr Jasiukajtis 		y0 *= scl0;
268*25c28e83SPiotr Jasiukajtis 		x1 *= scl1;
269*25c28e83SPiotr Jasiukajtis 		y1 *= scl1;
270*25c28e83SPiotr Jasiukajtis 		x2 *= scl2;
271*25c28e83SPiotr Jasiukajtis 		y2 *= scl2;
272*25c28e83SPiotr Jasiukajtis 
273*25c28e83SPiotr Jasiukajtis 		x_hi0 = (x0 + D2ON36) - D2ON36;
274*25c28e83SPiotr Jasiukajtis 		y_hi0 = (y0 + D2ON36) - D2ON36;
275*25c28e83SPiotr Jasiukajtis 		x_hi1 = (x1 + D2ON36) - D2ON36;
276*25c28e83SPiotr Jasiukajtis 		y_hi1 = (y1 + D2ON36) - D2ON36;
277*25c28e83SPiotr Jasiukajtis 		x_hi2 = (x2 + D2ON36) - D2ON36;
278*25c28e83SPiotr Jasiukajtis 		y_hi2 = (y2 + D2ON36) - D2ON36;
279*25c28e83SPiotr Jasiukajtis 		x_lo0 = x0 - x_hi0;
280*25c28e83SPiotr Jasiukajtis 		y_lo0 = y0 - y_hi0;
281*25c28e83SPiotr Jasiukajtis 		x_lo1 = x1 - x_hi1;
282*25c28e83SPiotr Jasiukajtis 		y_lo1 = y1 - y_hi1;
283*25c28e83SPiotr Jasiukajtis 		x_lo2 = x2 - x_hi2;
284*25c28e83SPiotr Jasiukajtis 		y_lo2 = y2 - y_hi2;
285*25c28e83SPiotr Jasiukajtis 		res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
286*25c28e83SPiotr Jasiukajtis 		res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1);
287*25c28e83SPiotr Jasiukajtis 		res2_hi = (x_hi2 * x_hi2 + y_hi2 * y_hi2);
288*25c28e83SPiotr Jasiukajtis 		res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
289*25c28e83SPiotr Jasiukajtis 		res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1);
290*25c28e83SPiotr Jasiukajtis 		res2_lo = ((x2 + x_hi2) * x_lo2 + (y2 + y_hi2) * y_lo2);
291*25c28e83SPiotr Jasiukajtis 
292*25c28e83SPiotr Jasiukajtis 		dres0 = res0_hi + res0_lo;
293*25c28e83SPiotr Jasiukajtis 		dres1 = res1_hi + res1_lo;
294*25c28e83SPiotr Jasiukajtis 		dres2 = res2_hi + res2_lo;
295*25c28e83SPiotr Jasiukajtis 
296*25c28e83SPiotr Jasiukajtis 		iarr0 = HI(&dres0);
297*25c28e83SPiotr Jasiukajtis 		iarr1 = HI(&dres1);
298*25c28e83SPiotr Jasiukajtis 		iarr2 = HI(&dres2);
299*25c28e83SPiotr Jasiukajtis 		iexp0 = iarr0 & 0xfff00000;
300*25c28e83SPiotr Jasiukajtis 		iexp1 = iarr1 & 0xfff00000;
301*25c28e83SPiotr Jasiukajtis 		iexp2 = iarr2 & 0xfff00000;
302*25c28e83SPiotr Jasiukajtis 
303*25c28e83SPiotr Jasiukajtis 		iarr0 = (iarr0 >> 11) & 0x1fc;
304*25c28e83SPiotr Jasiukajtis 		iarr1 = (iarr1 >> 11) & 0x1fc;
305*25c28e83SPiotr Jasiukajtis 		iarr2 = (iarr2 >> 11) & 0x1fc;
306*25c28e83SPiotr Jasiukajtis 		itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0];
307*25c28e83SPiotr Jasiukajtis 		itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0];
308*25c28e83SPiotr Jasiukajtis 		itbl2 = ((int*)((char*)__vlibm_TBL_rhypot + iarr2))[0];
309*25c28e83SPiotr Jasiukajtis 		itbl0 -= iexp0;
310*25c28e83SPiotr Jasiukajtis 		itbl1 -= iexp1;
311*25c28e83SPiotr Jasiukajtis 		itbl2 -= iexp2;
312*25c28e83SPiotr Jasiukajtis 		HI(&dd0) = itbl0;
313*25c28e83SPiotr Jasiukajtis 		HI(&dd1) = itbl1;
314*25c28e83SPiotr Jasiukajtis 		HI(&dd2) = itbl2;
315*25c28e83SPiotr Jasiukajtis 		LO(&dd0) = 0;
316*25c28e83SPiotr Jasiukajtis 		LO(&dd1) = 0;
317*25c28e83SPiotr Jasiukajtis 		LO(&dd2) = 0;
318*25c28e83SPiotr Jasiukajtis 
319*25c28e83SPiotr Jasiukajtis 		dd0 = dd0 * (DTWO - dd0 * dres0);
320*25c28e83SPiotr Jasiukajtis 		dd1 = dd1 * (DTWO - dd1 * dres1);
321*25c28e83SPiotr Jasiukajtis 		dd2 = dd2 * (DTWO - dd2 * dres2);
322*25c28e83SPiotr Jasiukajtis 		dd0 = dd0 * (DTWO - dd0 * dres0);
323*25c28e83SPiotr Jasiukajtis 		dd1 = dd1 * (DTWO - dd1 * dres1);
324*25c28e83SPiotr Jasiukajtis 		dd2 = dd2 * (DTWO - dd2 * dres2);
325*25c28e83SPiotr Jasiukajtis 		dres0 = dd0 * (DTWO - dd0 * dres0);
326*25c28e83SPiotr Jasiukajtis 		dres1 = dd1 * (DTWO - dd1 * dres1);
327*25c28e83SPiotr Jasiukajtis 		dres2 = dd2 * (DTWO - dd2 * dres2);
328*25c28e83SPiotr Jasiukajtis 
329*25c28e83SPiotr Jasiukajtis 		HI(&res0) = HI(&dres0) & 0xffffff00;
330*25c28e83SPiotr Jasiukajtis 		HI(&res1) = HI(&dres1) & 0xffffff00;
331*25c28e83SPiotr Jasiukajtis 		HI(&res2) = HI(&dres2) & 0xffffff00;
332*25c28e83SPiotr Jasiukajtis 		LO(&res0) = 0;
333*25c28e83SPiotr Jasiukajtis 		LO(&res1) = 0;
334*25c28e83SPiotr Jasiukajtis 		LO(&res2) = 0;
335*25c28e83SPiotr Jasiukajtis 		res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0;
336*25c28e83SPiotr Jasiukajtis 		res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1;
337*25c28e83SPiotr Jasiukajtis 		res2 += (DONE - res2_hi * res2 - res2_lo * res2) * dres2;
338*25c28e83SPiotr Jasiukajtis 		res0 = sqrt (res0);
339*25c28e83SPiotr Jasiukajtis 		res1 = sqrt (res1);
340*25c28e83SPiotr Jasiukajtis 		res2 = sqrt (res2);
341*25c28e83SPiotr Jasiukajtis 
342*25c28e83SPiotr Jasiukajtis 		res0 = scl0 * res0;
343*25c28e83SPiotr Jasiukajtis 		res1 = scl1 * res1;
344*25c28e83SPiotr Jasiukajtis 		res2 = scl2 * res2;
345*25c28e83SPiotr Jasiukajtis 
346*25c28e83SPiotr Jasiukajtis 		*pz0 = res0;
347*25c28e83SPiotr Jasiukajtis 		*pz1 = res1;
348*25c28e83SPiotr Jasiukajtis 		*pz2 = res2;
349*25c28e83SPiotr Jasiukajtis 
350*25c28e83SPiotr Jasiukajtis 		px += stridex;
351*25c28e83SPiotr Jasiukajtis 		py += stridey;
352*25c28e83SPiotr Jasiukajtis 		pz += stridez;
353*25c28e83SPiotr Jasiukajtis 		i = 0;
354*25c28e83SPiotr Jasiukajtis 
355*25c28e83SPiotr Jasiukajtis 	} while (--n > 0);
356*25c28e83SPiotr Jasiukajtis 
357*25c28e83SPiotr Jasiukajtis 	if (i > 0)
358*25c28e83SPiotr Jasiukajtis 	{
359*25c28e83SPiotr Jasiukajtis 		x0 *= scl0;
360*25c28e83SPiotr Jasiukajtis 		y0 *= scl0;
361*25c28e83SPiotr Jasiukajtis 
362*25c28e83SPiotr Jasiukajtis 		x_hi0 = (x0 + D2ON36) - D2ON36;
363*25c28e83SPiotr Jasiukajtis 		y_hi0 = (y0 + D2ON36) - D2ON36;
364*25c28e83SPiotr Jasiukajtis 		x_lo0 = x0 - x_hi0;
365*25c28e83SPiotr Jasiukajtis 		y_lo0 = y0 - y_hi0;
366*25c28e83SPiotr Jasiukajtis 		res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
367*25c28e83SPiotr Jasiukajtis 		res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
368*25c28e83SPiotr Jasiukajtis 
369*25c28e83SPiotr Jasiukajtis 		dres0 = res0_hi + res0_lo;
370*25c28e83SPiotr Jasiukajtis 
371*25c28e83SPiotr Jasiukajtis 		iarr0 = HI(&dres0);
372*25c28e83SPiotr Jasiukajtis 		iexp0 = iarr0 & 0xfff00000;
373*25c28e83SPiotr Jasiukajtis 
374*25c28e83SPiotr Jasiukajtis 		iarr0 = (iarr0 >> 11) & 0x1fc;
375*25c28e83SPiotr Jasiukajtis 		itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0];
376*25c28e83SPiotr Jasiukajtis 		itbl0 -= iexp0;
377*25c28e83SPiotr Jasiukajtis 		HI(&dd0) = itbl0;
378*25c28e83SPiotr Jasiukajtis 		LO(&dd0) = 0;
379*25c28e83SPiotr Jasiukajtis 
380*25c28e83SPiotr Jasiukajtis 		dd0 = dd0 * (DTWO - dd0 * dres0);
381*25c28e83SPiotr Jasiukajtis 		dd0 = dd0 * (DTWO - dd0 * dres0);
382*25c28e83SPiotr Jasiukajtis 		dres0 = dd0 * (DTWO - dd0 * dres0);
383*25c28e83SPiotr Jasiukajtis 
384*25c28e83SPiotr Jasiukajtis 		HI(&res0) = HI(&dres0) & 0xffffff00;
385*25c28e83SPiotr Jasiukajtis 		LO(&res0) = 0;
386*25c28e83SPiotr Jasiukajtis 		res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0;
387*25c28e83SPiotr Jasiukajtis 		res0 = sqrt (res0);
388*25c28e83SPiotr Jasiukajtis 
389*25c28e83SPiotr Jasiukajtis 		res0 = scl0 * res0;
390*25c28e83SPiotr Jasiukajtis 
391*25c28e83SPiotr Jasiukajtis 		*pz0 = res0;
392*25c28e83SPiotr Jasiukajtis 
393*25c28e83SPiotr Jasiukajtis 		if (i > 1)
394*25c28e83SPiotr Jasiukajtis 		{
395*25c28e83SPiotr Jasiukajtis 			x1 *= scl1;
396*25c28e83SPiotr Jasiukajtis 			y1 *= scl1;
397*25c28e83SPiotr Jasiukajtis 
398*25c28e83SPiotr Jasiukajtis 			x_hi1 = (x1 + D2ON36) - D2ON36;
399*25c28e83SPiotr Jasiukajtis 			y_hi1 = (y1 + D2ON36) - D2ON36;
400*25c28e83SPiotr Jasiukajtis 			x_lo1 = x1 - x_hi1;
401*25c28e83SPiotr Jasiukajtis 			y_lo1 = y1 - y_hi1;
402*25c28e83SPiotr Jasiukajtis 			res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1);
403*25c28e83SPiotr Jasiukajtis 			res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1);
404*25c28e83SPiotr Jasiukajtis 
405*25c28e83SPiotr Jasiukajtis 			dres1 = res1_hi + res1_lo;
406*25c28e83SPiotr Jasiukajtis 
407*25c28e83SPiotr Jasiukajtis 			iarr1 = HI(&dres1);
408*25c28e83SPiotr Jasiukajtis 			iexp1 = iarr1 & 0xfff00000;
409*25c28e83SPiotr Jasiukajtis 
410*25c28e83SPiotr Jasiukajtis 			iarr1 = (iarr1 >> 11) & 0x1fc;
411*25c28e83SPiotr Jasiukajtis 			itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0];
412*25c28e83SPiotr Jasiukajtis 			itbl1 -= iexp1;
413*25c28e83SPiotr Jasiukajtis 			HI(&dd1) = itbl1;
414*25c28e83SPiotr Jasiukajtis 			LO(&dd1) = 0;
415*25c28e83SPiotr Jasiukajtis 
416*25c28e83SPiotr Jasiukajtis 			dd1 = dd1 * (DTWO - dd1 * dres1);
417*25c28e83SPiotr Jasiukajtis 			dd1 = dd1 * (DTWO - dd1 * dres1);
418*25c28e83SPiotr Jasiukajtis 			dres1 = dd1 * (DTWO - dd1 * dres1);
419*25c28e83SPiotr Jasiukajtis 
420*25c28e83SPiotr Jasiukajtis 			HI(&res1) = HI(&dres1) & 0xffffff00;
421*25c28e83SPiotr Jasiukajtis 			LO(&res1) = 0;
422*25c28e83SPiotr Jasiukajtis 			res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1;
423*25c28e83SPiotr Jasiukajtis 			res1 = sqrt (res1);
424*25c28e83SPiotr Jasiukajtis 
425*25c28e83SPiotr Jasiukajtis 			res1 = scl1 * res1;
426*25c28e83SPiotr Jasiukajtis 
427*25c28e83SPiotr Jasiukajtis 			*pz1 = res1;
428*25c28e83SPiotr Jasiukajtis 		}
429*25c28e83SPiotr Jasiukajtis 	}
430*25c28e83SPiotr Jasiukajtis }
431*25c28e83SPiotr Jasiukajtis 
432