1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24 */
25/*
26 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30#include <sys/isa_defs.h>
31#include "libm_inlines.h"
32
33#ifdef _LITTLE_ENDIAN
34#define HI(x)	*(1+(int*)x)
35#define LO(x)	*(unsigned*)x
36#else
37#define HI(x)	*(int*)x
38#define LO(x)	*(1+(unsigned*)x)
39#endif
40
41#ifdef __RESTRICT
42#define restrict _Restrict
43#else
44#define restrict
45#endif
46
47/* double hypot(double x, double y)
48 *
49 * Method :
50 *	1. Special cases:
51 *		x or y is +Inf or -Inf				=> +Inf
52 *		x or y is NaN					=> QNaN
53 *	2. Computes hypot(x,y):
54 *		hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
55 *	Where:
56 *		m = max(|x|,|y|)
57 *		xnm = x * (1/m)
58 *		ynm = y * (1/m)
59 *
60 *	Compute xnm * xnm + ynm * ynm by simulating
61 *	muti-precision arithmetic.
62 *
63 * Accuracy:
64 *	Maximum error observed: less than 0.872 ulp after 16.777.216.000
65 *	results.
66 */
67
68extern double sqrt(double);
69extern double fabs(double);
70
71static const unsigned long long LCONST[] = {
720x41b0000000000000ULL,	/* D2ON28 = 2 ** 28		*/
730x0010000000000000ULL,	/* D2ONM1022 = 2 ** -1022	*/
740x7fd0000000000000ULL	/* D2ONP1022 = 2 **  1022	*/
75};
76
77static void
78__vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
79	int stridey, double * restrict pz, int stridez);
80
81#pragma no_inline(__vhypot_n)
82
83#define RETURN(ret)						\
84{								\
85	*pz = (ret);						\
86	py += stridey;						\
87	pz += stridez;						\
88	if (n_n == 0)						\
89	{							\
90		hx0 = HI(px);					\
91		hy0 = HI(py);					\
92		spx = px; spy = py; spz = pz;			\
93		continue;					\
94	}							\
95	n--;							\
96	break;							\
97}
98
99void
100__vhypot(int n, double * restrict px, int stridex, double * restrict py,
101	int stridey, double * restrict pz, int stridez)
102{
103	int		hx0, hx1, hy0, j0, diff;
104	double		x_hi, x_lo, y_hi, y_lo;
105	double		scl = 0;
106	double		x, y, res;
107	double		*spx, *spy, *spz;
108	int		n_n;
109	double		D2ON28 = ((double*)LCONST)[0];		/* 2 ** 28	*/
110	double		D2ONM1022 = ((double*)LCONST)[1];	/* 2 **-1022	*/
111	double		D2ONP1022 = ((double*)LCONST)[2];	/* 2 ** 1022	*/
112
113	while (n > 1)
114	{
115		n_n = 0;
116		spx = px;
117		spy = py;
118		spz = pz;
119		hx0 = HI(px);
120		hy0 = HI(py);
121		for (; n > 1 ; n--)
122		{
123			px += stridex;
124			hx0 &= 0x7fffffff;
125			hy0 &= 0x7fffffff;
126
127			if (hx0 >= 0x7fe00000)	/* |X| >= 2**1023 or Inf or NaN */
128			{
129				diff = hy0 - hx0;
130				j0 = diff >> 31;
131				j0 = hy0 - (diff & j0);
132				j0 &= 0x7ff00000;
133				x = *(px - stridex);
134				y = *py;
135				x = fabs(x);
136				y = fabs(y);
137				if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
138				{
139					int lx = LO((px - stridex));
140					int ly = LO(py);
141					if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
142					else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
143					else res = x + y;
144					RETURN (res)
145				}
146				else
147				{
148					j0 = diff >> 31;
149					if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
150					{
151						x *= D2ONM1022;
152						y *= D2ONM1022;
153
154						x_hi = (x + D2ON28) - D2ON28;
155						x_lo = x - x_hi;
156						y_hi = (y + D2ON28) - D2ON28;
157						y_lo = y - y_hi;
158						res = (x_hi * x_hi + y_hi * y_hi);
159						res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
160
161						res = sqrt (res);
162
163						res = D2ONP1022 * res;
164						RETURN (res)
165					}
166					else RETURN (x + y)
167				}
168			}
169			if (hy0 >= 0x7fe00000)	/* |Y| >= 2**1023 or Inf or NaN */
170			{
171				diff = hy0 - hx0;
172				j0 = diff >> 31;
173				j0 = hy0 - (diff & j0);
174				j0 &= 0x7ff00000;
175				x = *(px - stridex);
176				y = *py;
177				x = fabs(x);
178				y = fabs(y);
179				if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
180				{
181					int lx = LO((px - stridex));
182					int ly = LO(py);
183					if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
184					else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
185					else res = x + y;
186					RETURN (res)
187				}
188				else
189				{
190					j0 = diff >> 31;
191					if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
192					{
193						x *= D2ONM1022;
194						y *= D2ONM1022;
195
196						x_hi = (x + D2ON28) - D2ON28;
197						x_lo = x - x_hi;
198						y_hi = (y + D2ON28) - D2ON28;
199						y_lo = y - y_hi;
200						res = (x_hi * x_hi + y_hi * y_hi);
201						res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
202
203						res = sqrt (res);
204
205						res = D2ONP1022 * res;
206						RETURN (res)
207					}
208					else RETURN (x + y)
209				}
210			}
211
212			hx1 = HI(px);
213
214			if (hx0 < 0x00100000 && hy0 < 0x00100000)	/* X and Y are subnormal */
215			{
216				x = *(px - stridex);
217				y = *py;
218
219				x *= D2ONP1022;
220				y *= D2ONP1022;
221
222				x_hi = (x + D2ON28) - D2ON28;
223				x_lo = x - x_hi;
224				y_hi = (y + D2ON28) - D2ON28;
225				y_lo = y - y_hi;
226				res = (x_hi * x_hi + y_hi * y_hi);
227				res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
228
229				res = sqrt(res);
230
231				res = D2ONM1022 * res;
232				RETURN (res)
233			}
234
235			hx0 = hx1;
236			py += stridey;
237			pz += stridez;
238			n_n++;
239			hy0 = HI(py);
240		}
241		if (n_n > 0)
242			__vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez);
243	}
244
245	if (n > 0)
246	{
247		x = *px;
248		y = *py;
249		hx0 = HI(px);
250		hy0 = HI(py);
251
252		hx0 &= 0x7fffffff;
253		hy0 &= 0x7fffffff;
254
255		diff = hy0 - hx0;
256		j0 = diff >> 31;
257		j0 = hy0 - (diff & j0);
258		j0 &= 0x7ff00000;
259
260		if (j0 >= 0x7fe00000)	/* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
261		{
262			x = fabs(x);
263			y = fabs(y);
264			if (j0 >= 0x7ff00000)	/* |X| or |Y| = Inf or NaN */
265			{
266				int lx = LO(px);
267				int ly = LO(py);
268				if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
269				else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
270				else res = x + y;
271				*pz = res;
272				return;
273			}
274			else
275			{
276				j0 = diff >> 31;
277				if (((diff ^ j0) - j0) < 0x03600000)	/* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
278				{
279					x *= D2ONM1022;
280					y *= D2ONM1022;
281
282					x_hi = (x + D2ON28) - D2ON28;
283					x_lo = x - x_hi;
284					y_hi = (y + D2ON28) - D2ON28;
285					y_lo = y - y_hi;
286					res = (x_hi * x_hi + y_hi * y_hi);
287					res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
288
289					res = sqrt (res);
290
291					res = D2ONP1022 * res;
292					*pz = res;
293					return;
294				}
295				else
296				{
297					*pz = x + y;
298					return;
299				}
300			}
301		}
302
303		if (j0 < 0x00100000)	/* X and Y are subnormal */
304		{
305			x *= D2ONP1022;
306			y *= D2ONP1022;
307
308			x_hi = (x + D2ON28) - D2ON28;
309			x_lo = x - x_hi;
310			y_hi = (y + D2ON28) - D2ON28;
311			y_lo = y - y_hi;
312			res = (x_hi * x_hi + y_hi * y_hi);
313			res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
314
315			res = sqrt(res);
316
317			res = D2ONM1022 * res;
318			*pz = res;
319			return;
320		}
321
322		HI(&scl) = (0x7fe00000 - j0);
323
324		x *= scl;
325		y *= scl;
326
327		x_hi = (x + D2ON28) - D2ON28;
328		y_hi = (y + D2ON28) - D2ON28;
329		x_lo = x - x_hi;
330		y_lo = y - y_hi;
331
332		res = (x_hi * x_hi + y_hi * y_hi);
333		res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
334
335		res = sqrt(res);
336
337		HI(&scl) = j0;
338
339		res = scl * res;
340		*pz = res;
341	}
342}
343
344static void
345__vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
346	int stridey, double * restrict pz, int stridez)
347{
348	int		hx0, hy0, j0, diff0;
349	double		x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
350	double		x0, y0, res0;
351	double		D2ON28 = ((double*)LCONST)[0];		/* 2 ** 28	*/
352
353	for(; n > 0 ; n--)
354	{
355		x0 = *px;
356		y0 = *py;
357		hx0 = HI(px);
358		hy0 = HI(py);
359
360		hx0 &= 0x7fffffff;
361		hy0 &= 0x7fffffff;
362
363		diff0 = hy0 - hx0;
364		j0 = diff0 >> 31;
365		j0 = hy0 - (diff0 & j0);
366		j0 &= 0x7ff00000;
367
368		px += stridex;
369		py += stridey;
370
371		HI(&scl0) = (0x7fe00000 - j0);
372
373		x0 *= scl0;
374		y0 *= scl0;
375
376		x_hi0 = (x0 + D2ON28) - D2ON28;
377		y_hi0 = (y0 + D2ON28) - D2ON28;
378		x_lo0 = x0 - x_hi0;
379		y_lo0 = y0 - y_hi0;
380
381		res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
382		res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
383
384		res0 = sqrt(res0);
385
386		HI(&scl0) = j0;
387
388		res0 = scl0 * res0;
389		*pz = res0;
390
391		pz += stridez;
392	}
393}
394