1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24 */
25/*
26 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30#ifdef __RESTRICT
31#define restrict _Restrict
32#else
33#define restrict
34#endif
35
36extern const double __vlibm_TBL_atan1[];
37
38static const double
39pio4	=  7.8539816339744827900e-01,
40pio2	=  1.5707963267948965580e+00,
41pi	=  3.1415926535897931160e+00;
42
43static const float
44zero	=  0.0f,
45one	=  1.0f,
46q1      = -3.3333333333296428046e-01f,
47q2      =  1.9999999186853752618e-01f,
48twop24  =  16777216.0f;
49
50void
51__vatan2f(int n, float * restrict y, int stridey, float * restrict x,
52	int stridex, float * restrict z, int stridez)
53{
54	float		x0, x1, x2, y0, y1, y2, *pz0 = 0, *pz1, *pz2;
55	double		ah0, ah1, ah2;
56	double		t0, t1, t2;
57	double		sx0, sx1, sx2;
58	double		sign0, sign1, sign2;
59	int		i, k0 = 0, k1, k2, hx, sx, sy;
60	int		hy0, hy1, hy2;
61	float		base0 = 0.0, base1, base2;
62	double		num0, num1, num2;
63	double		den0, den1, den2;
64	double		dx0, dx1, dx2;
65	double		dy0, dy1, dy2;
66	double		db0, db1, db2;
67
68	do
69	{
70loop0:
71		hy0 = *(int*)y;
72		hx = *(int*)x;
73		sign0 = one;
74		sy = hy0 & 0x80000000;
75		hy0 &= ~0x80000000;
76
77		sx = hx & 0x80000000;
78		hx &= ~0x80000000;
79
80		if (hy0 > hx)
81		{
82			x0 = *y;
83			y0 = *x;
84			i = hx;
85			hx = hy0;
86			hy0 = i;
87			if (sy)
88			{
89				x0 = -x0;
90				sign0 = -sign0;
91			}
92			if (sx)
93			{
94				y0 = -y0;
95				ah0 = pio2;
96			}
97			else
98			{
99				ah0 = -pio2;
100				sign0 = -sign0;
101			}
102		}
103		else
104		{
105			y0 = *y;
106			x0 = *x;
107			if (sy)
108			{
109				y0 = -y0;
110				sign0 = -sign0;
111			}
112			if (sx)
113			{
114				x0 = -x0;
115				ah0 = -pi;
116				sign0 = -sign0;
117			}
118			else
119				ah0 = zero;
120		}
121
122		if (hx >= 0x7f800000 || hx - hy0 >= 0x0c800000)
123		{
124			if (hx >= 0x7f800000)
125			{
126				if (hx ^ 0x7f800000) /* nan */
127					ah0 =  x0 + y0;
128				else if (hy0 >= 0x7f800000)
129					ah0 += pio4;
130			}
131			else if ((int) ah0 == 0)
132				ah0 = y0 / x0;
133			*z = (sign0 == one) ? ah0 : -ah0;
134/* sign0*ah0 would change nan behavior relative to previous release */
135			x += stridex;
136			y += stridey;
137			z += stridez;
138			i = 0;
139			if (--n <= 0)
140				break;
141			goto loop0;
142		}
143		if (hy0 < 0x00800000) {
144			if (hy0 == 0)
145			{
146				*z = sign0 * (float) ah0;
147				x += stridex;
148				y += stridey;
149				z += stridez;
150				i = 0;
151				if (--n <= 0)
152					break;
153				goto loop0;
154			}
155			y0 *= twop24; /* scale subnormal y */
156			x0 *= twop24; /* scale possibly subnormal x */
157			hy0 = *(int*)&y0;
158                        hx = *(int*)&x0;
159		}
160		pz0 = z;
161
162		k0 = (hy0 - hx + 0x3f800000) & 0xfff80000;
163		if (k0 >= 0x3C800000)          /* if |x| >= (1/64)... */
164    		{
165			*(int*)&base0 = k0;
166       		 	k0 = (k0 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
167			k0 += 4;
168				/* skip over 0,0,pi/2,pi/2 */
169    		}
170    		else                            /* |x| < 1/64 */
171    		{
172			k0 = 0;
173			base0 = zero;
174    		}
175
176		x += stridex;
177		y += stridey;
178		z += stridez;
179		i = 1;
180		if (--n <= 0)
181			break;
182
183
184loop1:
185		hy1 = *(int*)y;
186		hx = *(int*)x;
187		sign1 = one;
188		sy = hy1 & 0x80000000;
189		hy1 &= ~0x80000000;
190
191		sx = hx & 0x80000000;
192		hx &= ~0x80000000;
193
194		if (hy1 > hx)
195		{
196			x1 = *y;
197			y1 = *x;
198			i = hx;
199			hx = hy1;
200			hy1 = i;
201			if (sy)
202			{
203				x1 = -x1;
204				sign1 = -sign1;
205			}
206			if (sx)
207			{
208				y1 = -y1;
209				ah1 = pio2;
210			}
211			else
212			{
213				ah1 = -pio2;
214				sign1 = -sign1;
215			}
216		}
217		else
218		{
219			y1 = *y;
220			x1 = *x;
221			if (sy)
222			{
223				y1 = -y1;
224				sign1 = -sign1;
225			}
226			if (sx)
227			{
228				x1 = -x1;
229				ah1 = -pi;
230				sign1 = -sign1;
231			}
232			else
233				ah1 = zero;
234		}
235
236		if (hx >= 0x7f800000 || hx - hy1 >= 0x0c800000)
237		{
238			if (hx >= 0x7f800000)
239			{
240				if (hx ^ 0x7f800000) /* nan */
241					ah1 =  x1 + y1;
242				else if (hy1 >= 0x7f800000)
243					ah1 += pio4;
244			}
245			else if ((int) ah1 == 0)
246				ah1 = y1 / x1;
247			*z = (sign1 == one)? ah1 : -ah1;
248			x += stridex;
249			y += stridey;
250			z += stridez;
251			i = 1;
252			if (--n <= 0)
253				break;
254			goto loop1;
255		}
256		if (hy1 < 0x00800000) {
257			if (hy1 == 0)
258			{
259				*z = sign1 * (float) ah1;
260				x += stridex;
261				y += stridey;
262				z += stridez;
263				i = 1;
264				if (--n <= 0)
265					break;
266				goto loop1;
267			}
268			y1 *= twop24; /* scale subnormal y */
269			x1 *= twop24; /* scale possibly subnormal x */
270			hy1 = *(int*)&y1;
271                        hx = *(int*)&x1;
272		}
273		pz1 = z;
274
275		k1 = (hy1 - hx + 0x3f800000) & 0xfff80000;
276		if (k1 >= 0x3C800000)          /* if |x| >= (1/64)... */
277    		{
278			*(int*)&base1 = k1;
279       		 	k1 = (k1 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
280			k1 += 4;
281				/* skip over 0,0,pi/2,pi/2 */
282    		}
283    		else                            /* |x| < 1/64 */
284    		{
285       			k1 = 0;
286			base1 = zero;
287    		}
288
289		x += stridex;
290		y += stridey;
291		z += stridez;
292		i = 2;
293		if (--n <= 0)
294			break;
295
296loop2:
297		hy2 = *(int*)y;
298		hx = *(int*)x;
299		sign2 = one;
300		sy = hy2 & 0x80000000;
301		hy2 &= ~0x80000000;
302
303		sx = hx & 0x80000000;
304		hx &= ~0x80000000;
305
306		if (hy2 > hx)
307		{
308			x2 = *y;
309			y2 = *x;
310			i = hx;
311			hx = hy2;
312			hy2 = i;
313			if (sy)
314			{
315				x2 = -x2;
316				sign2 = -sign2;
317			}
318			if (sx)
319			{
320				y2 = -y2;
321				ah2 = pio2;
322			}
323			else
324			{
325				ah2 = -pio2;
326				sign2 = -sign2;
327			}
328		}
329		else
330		{
331			y2 = *y;
332			x2 = *x;
333			if (sy)
334			{
335				y2 = -y2;
336				sign2 = -sign2;
337			}
338			if (sx)
339			{
340				x2 = -x2;
341				ah2 = -pi;
342				sign2 = -sign2;
343			}
344			else
345				ah2 = zero;
346		}
347
348		if (hx >= 0x7f800000 || hx - hy2 >= 0x0c800000)
349		{
350			if (hx >= 0x7f800000)
351			{
352				if (hx ^ 0x7f800000) /* nan */
353					ah2 =  x2 + y2;
354				else if (hy2 >= 0x7f800000)
355					ah2 += pio4;
356			}
357			else if ((int) ah2 == 0)
358				ah2 = y2 / x2;
359			*z = (sign2 == one)? ah2 : -ah2;
360			x += stridex;
361			y += stridey;
362			z += stridez;
363			i = 2;
364			if (--n <= 0)
365				break;
366			goto loop2;
367		}
368		if (hy2 < 0x00800000) {
369			if (hy2 == 0)
370			{
371				*z = sign2 * (float) ah2;
372				x += stridex;
373				y += stridey;
374				z += stridez;
375				i = 2;
376				if (--n <= 0)
377					break;
378				goto loop2;
379			}
380			y2 *= twop24; /* scale subnormal y */
381			x2 *= twop24; /* scale possibly subnormal x */
382			hy2 = *(int*)&y2;
383                        hx = *(int*)&x2;
384		}
385
386		pz2 = z;
387
388		k2 = (hy2 - hx + 0x3f800000) & 0xfff80000;
389		if (k2 >= 0x3C800000)          /* if |x| >= (1/64)... */
390    		{
391			*(int*)&base2 = k2;
392       		 	k2 = (k2 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
393			k2 += 4;
394				/* skip over 0,0,pi/2,pi/2 */
395    		}
396    		else                            /* |x| < 1/64 */
397    		{
398			k2 = 0;
399			base2 = zero;
400    		}
401
402		goto endloop;
403
404endloop:
405
406		ah2 += __vlibm_TBL_atan1[k2];
407		ah1 += __vlibm_TBL_atan1[k1];
408		ah0 += __vlibm_TBL_atan1[k0];
409
410		db2 = base2;
411		db1 = base1;
412		db0 = base0;
413		dy2 = y2;
414		dy1 = y1;
415		dy0 = y0;
416		dx2 = x2;
417		dx1 = x1;
418		dx0 = x0;
419
420		num2 = dy2 - dx2 * db2;
421		den2 = dx2 + dy2 * db2;
422
423		num1 = dy1 - dx1 * db1;
424		den1 = dx1 + dy1 * db1;
425
426		num0 = dy0 - dx0 * db0;
427		den0 = dx0 + dy0 * db0;
428
429		t2 = num2 / den2;
430		t1 = num1 / den1;
431		t0 = num0 / den0;
432
433		sx2 = t2 * t2;
434		sx1 = t1 * t1;
435		sx0 = t0 * t0;
436
437		t2 += t2 * sx2 * (q1 + sx2 * q2);
438 		t1 += t1 * sx1 * (q1 + sx1 * q2);
439 		t0 += t0 * sx0 * (q1 + sx0 * q2);
440
441		t2 += ah2;
442		t1 += ah1;
443		t0 += ah0;
444
445		*pz2 = sign2 * t2;
446		*pz1 = sign1 * t1;
447		*pz0 = sign0 * t0;
448
449		x += stridex;
450		y += stridey;
451		z += stridez;
452		i = 0;
453	} while (--n > 0);
454
455	if (i > 1)
456	{
457		ah1 += __vlibm_TBL_atan1[k1];
458		t1 = (y1 - x1 * (double)base1) /
459			(x1 + y1 * (double)base1);
460		sx1 = t1 * t1;
461 		t1 += t1 * sx1 * (q1 + sx1 * q2);
462		t1 += ah1;
463		*pz1 = sign1 * t1;
464	}
465
466	if (i > 0)
467	{
468		ah0 += __vlibm_TBL_atan1[k0];
469		t0 = (y0 - x0 * (double)base0) /
470			(x0 + y0 * (double)base0);
471		sx0 = t0 * t0;
472 		t0 += t0 * sx0 * (q1 + sx0 * q2);
473		t0 += ah0;
474		*pz0 = sign0 * t0;
475	}
476}
477