1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24 */
25/*
26 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30#include <sys/isa_defs.h>
31#include "libm_inlines.h"
32
33#ifdef _LITTLE_ENDIAN
34#define HI(x)	*(1+(int*)x)
35#define LO(x)	*(unsigned*)x
36#else
37#define HI(x)	*(int*)x
38#define LO(x)	*(1+(unsigned*)x)
39#endif
40
41#ifdef __RESTRICT
42#define restrict _Restrict
43#else
44#define restrict
45#endif
46
47void
48__vatan(int n, double * restrict x, int stridex, double * restrict y, int stridey)
49{
50  double  f, z, ans = 0.0L, ansu, ansl, tmp, poly, conup, conlo, dummy;
51  double  f1,   ans1, ansu1, ansl1, tmp1, poly1, conup1, conlo1;
52  double  f2,   ans2, ansu2, ansl2, tmp2, poly2, conup2, conlo2;
53  int index, sign, intf, intflo, intz, argcount;
54  int index1, sign1 = 0;
55  int index2, sign2 = 0;
56  double *yaddr,*yaddr1 = 0,*yaddr2 = 0;
57  extern const double __vlibm_TBL_atan1[];
58  extern double fabs(double);
59
60/*    Power series  atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
61 *    Error =  -3.08254E-18   On the interval  |x| < 1/64 */
62
63/* define dummy names for readability.  Use parray to help compiler optimize loads */
64#define p3    parray[0]
65#define p2    parray[1]
66#define p1    parray[2]
67
68  static const double parray[] = {
69   -1.428029046844299722E-01,		/* p[3]		*/
70    1.999999917247000615E-01, 		/* p[2]		*/
71   -3.333333333329292858E-01, 		/* p[1]		*/
72    1.0, 				/* not used for p[0], though		*/
73   -1.0,				/* used to flip sign of answer 		*/
74  };
75
76  if (n <= 0) return;		/* if no. of elements is 0 or neg, do nothing */
77  do
78  {
79  LOOP0:
80
81    f        = fabs(*x);			/* fetch argument		*/
82    intf     = HI(x);			/* upper half of x, as integer	*/
83    intflo   = LO(x);			/* lower half of x, as integer	*/
84    sign     = intf &  0x80000000;		/* sign of argument		*/
85    intf     = intf & ~0x80000000;		/* abs(upper argument)		*/
86
87    if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
88    {
89      if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
90      {
91	ans   = f - f; 				/* return NaN if x=NaN*/
92      }
93      else if (intf < 0x3e300000) 		/* avoid underflow for small arg */
94      {
95        dummy = 1.0e37 + f;
96        dummy = dummy;
97	ans   = f;
98      }
99      else if (intf > 0x43600000)		/* avoid underflow for big arg  */
100      {
101        index = 2;
102        ans   = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low   */
103      }
104      *y      = (sign) ? -ans: ans;		/* store answer, with sign bit 	*/
105      x      += stridex;
106      y      += stridey;
107      argcount = 0;				/* initialize argcount		*/
108      if (--n <=0) break;			/* we are done 			*/
109      goto LOOP0;				/* otherwise, examine next arg  */
110    }
111
112    index    = 0;				/* points to 0,0 in table	*/
113    if (intf > 0x40500000)			/* if (|x| > 64               	*/
114    { f = -1.0/f;
115      index  = 2; 				/* point to pi/2 upper, lower	*/
116    }
117    else if (intf >= 0x3f900000)		/* if |x| >= (1/64)... 		*/
118    {
119      intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper	*/
120      HI(&z)  = intz;				/* store as a double (z)	*/
121      LO(&z)  = 0;				/* ...lower			*/
122      f      = (f - z)/(1.0 + f*z); 		/* get reduced argument		*/
123      index  = (intz - 0x3f900000) >> 15;	/* (index >> 16) << 1)		*/
124      index  = index + 4;			/* skip over 0,0,pi/2,pi/2	*/
125    }
126    yaddr    = y;				/* address to store this answer */
127    x       += stridex;				/* point to next arg		*/
128    y       += stridey;				/* point to next result		*/
129    argcount = 1;				/* we now have 1 good argument  */
130    if (--n <=0)
131    {
132      f1      = 0.0;				/* put dummy values in args 1,2 */
133      f2      = 0.0;
134      index1  = 0;
135      index2  = 0;
136      goto UNROLL3;				/* finish up with 1 good arg 	*/
137    }
138
139    /*--------------------------------------------------------------------------*/
140    /*--------------------------------------------------------------------------*/
141    /*--------------------------------------------------------------------------*/
142
143  LOOP1:
144
145    f1       = fabs(*x);			/* fetch argument		*/
146    intf     = HI(x);			/* upper half of x, as integer	*/
147    intflo   = LO(x);			/* lower half of x, as integer	*/
148    sign1    = intf &  0x80000000;		/* sign of argument		*/
149    intf     = intf & ~0x80000000;		/* abs(upper argument)		*/
150
151    if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
152    {
153      if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
154      {
155	ans   = f1 - f1;			/* return NaN if x=NaN*/
156      }
157      else if (intf < 0x3e300000) 		/* avoid underflow for small arg */
158      {
159        dummy = 1.0e37 + f1;
160        dummy = dummy;
161	ans   = f1;
162      }
163      else if (intf > 0x43600000)		/* avoid underflow for big arg  */
164      {
165        index1 = 2;
166        ans   = __vlibm_TBL_atan1[index1] + __vlibm_TBL_atan1[index1+1];/* pi/2 up + pi/2 low   */
167      }
168      *y      = (sign1) ? -ans: ans;		/* store answer, with sign bit 	*/
169      x      += stridex;
170      y      += stridey;
171      argcount = 1;				/* we still have 1 good arg 	*/
172      if (--n <=0)
173      {
174        f1      = 0.0;				/* put dummy values in args 1,2 */
175        f2      = 0.0;
176        index1  = 0;
177        index2  = 0;
178        goto UNROLL3;				/* finish up with 1 good arg 	*/
179      }
180      goto LOOP1;				/* otherwise, examine next arg  */
181    }
182
183    index1   = 0;				/* points to 0,0 in table	*/
184    if (intf > 0x40500000)			/* if (|x| > 64               	*/
185    { f1 = -1.0/f1;
186      index1 = 2; 				/* point to pi/2 upper, lower	*/
187    }
188    else if (intf >= 0x3f900000)		/* if |x| >= (1/64)... 		*/
189    {
190      intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper	*/
191      HI(&z) = intz;				/* store as a double (z)	*/
192      LO(&z) = 0;				/* ...lower			*/
193      f1     = (f1 - z)/(1.0 + f1*z); 		/* get reduced argument		*/
194      index1 = (intz - 0x3f900000) >> 15;	/* (index >> 16) << 1)		*/
195      index1 = index1 + 4;			/* skip over 0,0,pi/2,pi/2	*/
196    }
197    yaddr1   = y;				/* address to store this answer */
198    x       += stridex;				/* point to next arg		*/
199    y       += stridey;				/* point to next result		*/
200    argcount = 2;				/* we now have 2 good arguments */
201    if (--n <=0)
202    {
203      f2      = 0.0;				/* put dummy value in arg 2 */
204      index2  = 0;
205      goto UNROLL3;				/* finish up with 2 good args 	*/
206    }
207
208    /*--------------------------------------------------------------------------*/
209    /*--------------------------------------------------------------------------*/
210    /*--------------------------------------------------------------------------*/
211
212  LOOP2:
213
214    f2       = fabs(*x);			/* fetch argument		*/
215    intf     = HI(x);			/* upper half of x, as integer	*/
216    intflo   = LO(x);			/* lower half of x, as integer	*/
217    sign2    = intf &  0x80000000;		/* sign of argument		*/
218    intf     = intf & ~0x80000000;		/* abs(upper argument)		*/
219
220    if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
221    {
222      if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
223      {
224	ans   = f2 - f2;			/* return NaN if x=NaN*/
225      }
226      else if (intf < 0x3e300000) 		/* avoid underflow for small arg */
227      {
228        dummy = 1.0e37 + f2;
229        dummy = dummy;
230	ans   = f2;
231      }
232      else if (intf > 0x43600000)		/* avoid underflow for big arg  */
233      {
234        index2 = 2;
235        ans   = __vlibm_TBL_atan1[index2] + __vlibm_TBL_atan1[index2+1];/* pi/2 up + pi/2 low   */
236      }
237      *y      = (sign2) ? -ans: ans;		/* store answer, with sign bit 	*/
238      x      += stridex;
239      y      += stridey;
240      argcount = 2;				/* we still have 2 good args 	*/
241      if (--n <=0)
242      {
243        f2      = 0.0;				/* put dummy value in arg 2 */
244        index2  = 0;
245        goto UNROLL3;				/* finish up with 2 good args 	*/
246      }
247      goto LOOP2;				/* otherwise, examine next arg  */
248    }
249
250    index2   = 0;				/* points to 0,0 in table	*/
251    if (intf > 0x40500000)			/* if (|x| > 64               	*/
252    { f2 = -1.0/f2;
253      index2 = 2; 				/* point to pi/2 upper, lower	*/
254    }
255    else if (intf >= 0x3f900000)		/* if |x| >= (1/64)... 		*/
256    {
257      intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper	*/
258      HI(&z) = intz;				/* store as a double (z)	*/
259      LO(&z) = 0;				/* ...lower			*/
260      f2     = (f2 - z)/(1.0 + f2*z); 		/* get reduced argument		*/
261      index2 = (intz - 0x3f900000) >> 15;	/* (index >> 16) << 1)		*/
262      index2 = index2 + 4;			/* skip over 0,0,pi/2,pi/2	*/
263    }
264    yaddr2   = y;				/* address to store this answer */
265    x       += stridex;				/* point to next arg		*/
266    y       += stridey;				/* point to next result		*/
267    argcount = 3;				/* we now have 3 good arguments */
268
269
270/* here is the 3 way unrolled section,
271   note, we may actually only have
272   1,2, or 3 'real' arguments at this point
273*/
274
275UNROLL3:
276
277    conup    = __vlibm_TBL_atan1[index ];	/* upper table 			*/
278    conup1   = __vlibm_TBL_atan1[index1];	/* upper table 			*/
279    conup2   = __vlibm_TBL_atan1[index2];	/* upper table 			*/
280
281    conlo    = __vlibm_TBL_atan1[index +1];	/* lower table 			*/
282    conlo1   = __vlibm_TBL_atan1[index1+1];	/* lower table 			*/
283    conlo2   = __vlibm_TBL_atan1[index2+1];	/* lower table 			*/
284
285    tmp      = f *f ;
286    tmp1     = f1*f1;
287    tmp2     = f2*f2;
288
289    poly     = f *((p3*tmp  + p2)*tmp  + p1)*tmp ;
290    poly1    = f1*((p3*tmp1 + p2)*tmp1 + p1)*tmp1;
291    poly2    = f2*((p3*tmp2 + p2)*tmp2 + p1)*tmp2;
292
293    ansu     = conup  + f ;    			/* compute atan(f)  upper 	*/
294    ansu1    = conup1 + f1;    			/* compute atan(f)  upper 	*/
295    ansu2    = conup2 + f2;    			/* compute atan(f)  upper 	*/
296
297    ansl     = (((conup  - ansu) + f) + poly) + conlo ;
298    ansl1    = (((conup1 - ansu1) + f1) + poly1) + conlo1;
299    ansl2    = (((conup2 - ansu2) + f2) + poly2) + conlo2;
300
301    ans      = ansu  + ansl ;
302    ans1     = ansu1 + ansl1;
303    ans2     = ansu2 + ansl2;
304
305/* now check to see if these are 'real' or 'dummy' arguments BEFORE storing */
306
307   *yaddr    = sign ? -ans: ans;		/* this one is always good	*/
308   if (argcount < 3) break;			/* end loop and finish up 	*/
309     *yaddr1   = sign1 ? -ans1: ans1;
310     *yaddr2   = sign2 ? -ans2: ans2;
311
312  }  while (--n > 0);
313
314 if (argcount == 2)
315   {  *yaddr1  = sign1 ? -ans1: ans1;
316   }
317}
318