1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24 */
25/*
26 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30#include <sys/isa_defs.h>
31#include "libm_inlines.h"
32
33#ifdef _LITTLE_ENDIAN
34#define HI(x)	*(1+(int*)x)
35#define LO(x)	*(unsigned*)x
36#else
37#define HI(x)	*(int*)x
38#define LO(x)	*(1+(unsigned*)x)
39#endif
40
41#ifdef __RESTRICT
42#define restrict _Restrict
43#else
44#define restrict
45#endif
46
47extern const double __vlibm_TBL_atan2[];
48
49static const double
50zero	=  0.0,
51twom3	=  0.125,
52one		=  1.0,
53two110	=  1.2980742146337069071e+33,
54pio4	=  7.8539816339744827900e-01,
55pio2	=  1.5707963267948965580e+00,
56pio2_lo	=  6.1232339957367658860e-17,
57pi		=  3.1415926535897931160e+00,
58pi_lo	=  1.2246467991473531772e-16,
59p1		= -3.33333333333327571893331786354179101074860633009e-0001,
60p2		=  1.99999999942671624230086497610394721817438631379e-0001,
61p3		= -1.42856965565428636896183013324727205980484158356e-0001,
62p4		=  1.10894981496317081405107718475040168084164825641e-0001;
63
64/* Don't __ the following; acomp will handle it */
65extern double fabs(double);
66
67void
68__vatan2(int n, double * restrict y, int stridey, double * restrict x,
69	int stridex, double * restrict z, int stridez)
70{
71	double		x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2;
72	double		ah0, ah1, ah2, al0, al1, al2, t0, t1, t2;
73	double		z0, z1, z2, sign0, sign1, sign2, xh;
74	int			i, k, hx, hy, sx, sy;
75
76	do
77	{
78loop0:
79		hy = HI(y);
80		sy = hy & 0x80000000;
81		hy &= ~0x80000000;
82		sign0 = (sy)? -one : one;
83
84		hx = HI(x);
85		sx = hx & 0x80000000;
86		hx &= ~0x80000000;
87
88		if (hy > hx || (hy == hx && LO(y) > LO(x)))
89		{
90			i = hx;
91			hx = hy;
92			hy = i;
93			x0 = fabs(*y);
94			y0 = fabs(*x);
95			if (sx)
96			{
97				ah0 = pio2;
98				al0 = pio2_lo;
99			}
100			else
101			{
102				ah0 = -pio2;
103				al0 = -pio2_lo;
104				sign0 = -sign0;
105			}
106		}
107		else
108		{
109			x0 = fabs(*x);
110			y0 = fabs(*y);
111			if (sx)
112			{
113				ah0 = -pi;
114				al0 = -pi_lo;
115				sign0 = -sign0;
116			}
117			else
118				ah0 = al0 = zero;
119		}
120
121		if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
122		{
123			if (hx >= 0x7ff00000)
124			{
125				if ((hx ^ 0x7ff00000) | LO(&x0)) /* nan */
126					ah0 =  x0 + y0;
127				else if (hy >= 0x7ff00000)
128					ah0 += pio4;
129				*z = sign0 * ah0;
130				x += stridex;
131				y += stridey;
132				z += stridez;
133				i = 0;
134				if (--n <= 0)
135					break;
136				goto loop0;
137			}
138			if (hx - hy >= 0x03600000)
139			{
140				if ((int) ah0 == 0)
141					ah0 = y0 / x0;
142				*z = sign0 * ah0;
143				x += stridex;
144				y += stridey;
145				z += stridez;
146				i = 0;
147				if (--n <= 0)
148					break;
149				goto loop0;
150			}
151			y0 *= twom3;
152			x0 *= twom3;
153			hy -= 0x00300000;
154			hx -= 0x00300000;
155		}
156		else if (hy < 0x00100000)
157		{
158			if ((hy | LO(&y0)) == 0)
159			{
160				*z = sign0 * ah0;
161				x += stridex;
162				y += stridey;
163				z += stridez;
164				i = 0;
165				if (--n <= 0)
166					break;
167				goto loop0;
168			}
169			y0 *= two110;
170			x0 *= two110;
171			hy = HI(&y0);
172			hx = HI(&x0);
173		}
174
175		k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
176		if (k > 644)
177			k = 644;
178		ah0 += __vlibm_TBL_atan2[k];
179		al0 += __vlibm_TBL_atan2[k+1];
180		t0 = __vlibm_TBL_atan2[k+2];
181
182		xh = x0;
183		LO(&xh) = 0;
184		z0 = ((y0 - t0 * xh) - t0 * (x0 - xh)) / (x0 + y0 * t0);
185		pz0 = z;
186		x += stridex;
187		y += stridey;
188		z += stridez;
189		i = 1;
190		if (--n <= 0)
191			break;
192
193loop1:
194		hy = HI(y);
195		sy = hy & 0x80000000;
196		hy &= ~0x80000000;
197		sign1 = (sy)? -one : one;
198
199		hx = HI(x);
200		sx = hx & 0x80000000;
201		hx &= ~0x80000000;
202
203		if (hy > hx || (hy == hx && LO(y) > LO(x)))
204		{
205			i = hx;
206			hx = hy;
207			hy = i;
208			x1 = fabs(*y);
209			y1 = fabs(*x);
210			if (sx)
211			{
212				ah1 = pio2;
213				al1 = pio2_lo;
214			}
215			else
216			{
217				ah1 = -pio2;
218				al1 = -pio2_lo;
219				sign1 = -sign1;
220			}
221		}
222		else
223		{
224			x1 = fabs(*x);
225			y1 = fabs(*y);
226			if (sx)
227			{
228				ah1 = -pi;
229				al1 = -pi_lo;
230				sign1 = -sign1;
231			}
232			else
233				ah1 = al1 = zero;
234		}
235
236		if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
237		{
238			if (hx >= 0x7ff00000)
239			{
240				if ((hx ^ 0x7ff00000) | LO(&x1)) /* nan */
241					ah1 =  x1 + y1;
242				else if (hy >= 0x7ff00000)
243					ah1 += pio4;
244				*z = sign1 * ah1;
245				x += stridex;
246				y += stridey;
247				z += stridez;
248				i = 1;
249				if (--n <= 0)
250					break;
251				goto loop1;
252			}
253			if (hx - hy >= 0x03600000)
254			{
255				if ((int) ah1 == 0)
256					ah1 = y1 / x1;
257				*z = sign1 * ah1;
258				x += stridex;
259				y += stridey;
260				z += stridez;
261				i = 1;
262				if (--n <= 0)
263					break;
264				goto loop1;
265			}
266			y1 *= twom3;
267			x1 *= twom3;
268			hy -= 0x00300000;
269			hx -= 0x00300000;
270		}
271		else if (hy < 0x00100000)
272		{
273			if ((hy | LO(&y1)) == 0)
274			{
275				*z = sign1 * ah1;
276				x += stridex;
277				y += stridey;
278				z += stridez;
279				i = 1;
280				if (--n <= 0)
281					break;
282				goto loop1;
283			}
284			y1 *= two110;
285			x1 *= two110;
286			hy = HI(&y1);
287			hx = HI(&x1);
288		}
289
290		k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
291		if (k > 644)
292			k = 644;
293		ah1 += __vlibm_TBL_atan2[k];
294		al1 += __vlibm_TBL_atan2[k+1];
295		t1 = __vlibm_TBL_atan2[k+2];
296
297		xh = x1;
298		LO(&xh) = 0;
299		z1 = ((y1 - t1 * xh) - t1 * (x1 - xh)) / (x1 + y1 * t1);
300		pz1 = z;
301		x += stridex;
302		y += stridey;
303		z += stridez;
304		i = 2;
305		if (--n <= 0)
306			break;
307
308loop2:
309		hy = HI(y);
310		sy = hy & 0x80000000;
311		hy &= ~0x80000000;
312		sign2 = (sy)? -one : one;
313
314		hx = HI(x);
315		sx = hx & 0x80000000;
316		hx &= ~0x80000000;
317
318		if (hy > hx || (hy == hx && LO(y) > LO(x)))
319		{
320			i = hx;
321			hx = hy;
322			hy = i;
323			x2 = fabs(*y);
324			y2 = fabs(*x);
325			if (sx)
326			{
327				ah2 = pio2;
328				al2 = pio2_lo;
329			}
330			else
331			{
332				ah2 = -pio2;
333				al2 = -pio2_lo;
334				sign2 = -sign2;
335			}
336		}
337		else
338		{
339			x2 = fabs(*x);
340			y2 = fabs(*y);
341			if (sx)
342			{
343				ah2 = -pi;
344				al2 = -pi_lo;
345				sign2 = -sign2;
346			}
347			else
348				ah2 = al2 = zero;
349		}
350
351		if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
352		{
353			if (hx >= 0x7ff00000)
354			{
355				if ((hx ^ 0x7ff00000) | LO(&x2)) /* nan */
356					ah2 =  x2 + y2;
357				else if (hy >= 0x7ff00000)
358					ah2 += pio4;
359				*z = sign2 * ah2;
360				x += stridex;
361				y += stridey;
362				z += stridez;
363				i = 2;
364				if (--n <= 0)
365					break;
366				goto loop2;
367			}
368			if (hx - hy >= 0x03600000)
369			{
370				if ((int) ah2 == 0)
371					ah2 = y2 / x2;
372				*z = sign2 * ah2;
373				x += stridex;
374				y += stridey;
375				z += stridez;
376				i = 2;
377				if (--n <= 0)
378					break;
379				goto loop2;
380			}
381			y2 *= twom3;
382			x2 *= twom3;
383			hy -= 0x00300000;
384			hx -= 0x00300000;
385		}
386		else if (hy < 0x00100000)
387		{
388			if ((hy | LO(&y2)) == 0)
389			{
390				*z = sign2 * ah2;
391				x += stridex;
392				y += stridey;
393				z += stridez;
394				i = 2;
395				if (--n <= 0)
396					break;
397				goto loop2;
398			}
399			y2 *= two110;
400			x2 *= two110;
401			hy = HI(&y2);
402			hx = HI(&x2);
403		}
404
405		k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
406		if (k > 644)
407			k = 644;
408		ah2 += __vlibm_TBL_atan2[k];
409		al2 += __vlibm_TBL_atan2[k+1];
410		t2 = __vlibm_TBL_atan2[k+2];
411
412		xh = x2;
413		LO(&xh) = 0;
414		z2 = ((y2 - t2 * xh) - t2 * (x2 - xh)) / (x2 + y2 * t2);
415		pz2 = z;
416
417		x0 = z0 * z0;
418		x1 = z1 * z1;
419		x2 = z2 * z2;
420
421		t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 *
422			(p2 + x0 * (p3 + x0 * p4)))));
423		t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 *
424			(p2 + x1 * (p3 + x1 * p4)))));
425		t2 = ah2 + (z2 + (al2 + (z2 * x2) * (p1 + x2 *
426			(p2 + x2 * (p3 + x2 * p4)))));
427
428		*pz0 = sign0 * t0;
429		*pz1 = sign1 * t1;
430		*pz2 = sign2 * t2;
431
432		x += stridex;
433		y += stridey;
434		z += stridez;
435		i = 0;
436	} while (--n > 0);
437
438	if (i > 0)
439	{
440		if (i > 1)
441		{
442			x1 = z1 * z1;
443			t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 *
444				(p2 + x1 * (p3 + x1 * p4)))));
445			*pz1 = sign1 * t1;
446		}
447
448		x0 = z0 * z0;
449		t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 *
450			(p2 + x0 * (p3 + x0 * p4)))));
451		*pz0 = sign0 * t0;
452	}
453}
454