1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24 */
25/*
26 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30#include <sys/isa_defs.h>
31#include <sys/ccompile.h>
32
33#ifdef _LITTLE_ENDIAN
34#define HI(x)	*(1+(int*)x)
35#define LO(x)	*(unsigned*)x
36#else
37#define HI(x)	*(int*)x
38#define LO(x)	*(1+(unsigned*)x)
39#endif
40
41#ifdef __RESTRICT
42#define restrict _Restrict
43#else
44#define restrict
45#endif
46
47/*
48 * vsincos.c
49 *
50 * Vector sine and cosine function.  Just slight modifications to vcos.c.
51 */
52
53extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
54
55static const double
56	half[2]	= { 0.5, -0.5 },
57	one		= 1.0,
58	invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */
59	pio2_1	= 1.570796326734125614166,  /* first 33 bits of pi/2 */
60	pio2_2	= 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
61	pio2_3	= 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
62	pio2_3t	= 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
63	pp1		= -1.666666666605760465276263943134982554676e-0001,
64	pp2		=  8.333261209690963126718376566146180944442e-0003,
65	qq1		= -4.999999999977710986407023955908711557870e-0001,
66	qq2		=  4.166654863857219350645055881018842089580e-0002,
67	poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
68				-4.999999999999931701464060878888294524481e-0001 },
69	poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
70				 4.166666666394861917535640593963708222319e-0002 },
71	poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
72				-1.388888552656142867832756687736851681462e-0003 },
73	poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
74				 2.478519423681460796618128289454530524759e-0005 };
75
76/* Don't __ the following; acomp will handle it */
77extern double fabs(double);
78extern void __vlibm_vsincos_big(int, double *, int, double *, int, double *, int, int);
79
80/*
81 * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n.
82 * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
83 *
84 * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
85 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
86 *
87 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
88 */
89void
90__vsincos(int n, double * restrict x, int stridex,
91				double * restrict y, int stridey,
92				double * restrict c, int stridec)
93{
94	double		x0_or_one[4], x1_or_one[4], x2_or_one[4];
95	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
96	double		x0, x1, x2,
97			*py0, *py1, *py2,
98			*pc0, *pc1, *pc2,
99			*xsave, *ysave, *csave;
100	unsigned	hx0, hx1, hx2, xsb0, xsb1, xsb2;
101	int		i, biguns, nsave, sxsave, sysave, scsave;
102	volatile int	v __unused;
103	nsave = n;
104	xsave = x;
105	sxsave = stridex;
106	ysave = y;
107	sysave = stridey;
108	csave = c;
109	scsave = stridec;
110	biguns = 0;
111
112	do /* MAIN LOOP */
113	{
114
115		/* Gotos here so _break_ exits MAIN LOOP. */
116LOOP0:  /* Find first arg in right range. */
117		xsb0 = HI(x); /* get most significant word */
118		hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
119		if (hx0 > 0x3fe921fb) {
120			/* Too big: arg reduction needed, so leave for second part */
121			biguns = 1;
122			x += stridex;
123			y += stridey;
124			c += stridec;
125			i = 0;
126			if (--n <= 0)
127				break;
128			goto LOOP0;
129		}
130		if (hx0 < 0x3e400000) {
131			/* Too small.  cos x ~ 1, sin x ~ x. */
132			v = *x;
133			*c = 1.0;
134			*y = *x;
135			x += stridex;
136			y += stridey;
137			c += stridec;
138			i = 0;
139			if (--n <= 0)
140				break;
141			goto LOOP0;
142		}
143		x0 = *x;
144		py0 = y;
145		pc0 = c;
146		x += stridex;
147		y += stridey;
148		c += stridec;
149		i = 1;
150		if (--n <= 0)
151			break;
152
153LOOP1: /* Get second arg, same as above. */
154		xsb1 = HI(x);
155		hx1 = xsb1 & ~0x80000000;
156		if (hx1 > 0x3fe921fb)
157		{
158			biguns = 1;
159			x += stridex;
160			y += stridey;
161			c += stridec;
162			i = 1;
163			if (--n <= 0)
164				break;
165			goto LOOP1;
166		}
167		if (hx1 < 0x3e400000)
168		{
169			v = *x;
170			*c = 1.0;
171			*y = *x;
172			x += stridex;
173			y += stridey;
174			c += stridec;
175			i = 1;
176			if (--n <= 0)
177				break;
178			goto LOOP1;
179		}
180		x1 = *x;
181		py1 = y;
182		pc1 = c;
183		x += stridex;
184		y += stridey;
185		c += stridec;
186		i = 2;
187		if (--n <= 0)
188			break;
189
190LOOP2: /* Get third arg, same as above. */
191		xsb2 = HI(x);
192		hx2 = xsb2 & ~0x80000000;
193		if (hx2 > 0x3fe921fb)
194		{
195			biguns = 1;
196			x += stridex;
197			y += stridey;
198			c += stridec;
199			i = 2;
200			if (--n <= 0)
201				break;
202			goto LOOP2;
203		}
204		if (hx2 < 0x3e400000)
205		{
206			v = *x;
207			*c = 1.0;
208			*y = *x;
209			x += stridex;
210			y += stridey;
211			c += stridec;
212			i = 2;
213			if (--n <= 0)
214				break;
215			goto LOOP2;
216		}
217		x2 = *x;
218		py2 = y;
219		pc2 = c;
220
221		/*
222		 * 0x3fc40000 = 5/32 ~ 0.15625
223		 * Get msb after subtraction.  Will be 1 only if
224		 * hx0 - 5/32 is negative.
225		 */
226		i = (hx2 - 0x3fc40000) >> 31;
227		i |= ((hx1 - 0x3fc40000) >> 30) & 2;
228		i |= ((hx0 - 0x3fc40000) >> 29) & 4;
229		switch (i)
230		{
231			double		a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
232			double		w0, w1, w2;
233			double		t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
234			double		z0, z1, z2;
235			unsigned	j0, j1, j2;
236
237		case 0: /* All are > 5/32 */
238			j0 = (xsb0 + 0x4000) & 0xffff8000;
239			j1 = (xsb1 + 0x4000) & 0xffff8000;
240			j2 = (xsb2 + 0x4000) & 0xffff8000;
241
242			HI(&t0) = j0;
243			HI(&t1) = j1;
244			HI(&t2) = j2;
245			LO(&t0) = 0;
246			LO(&t1) = 0;
247			LO(&t2) = 0;
248
249			x0 -= t0;
250			x1 -= t1;
251			x2 -= t2;
252
253			z0 = x0 * x0;
254			z1 = x1 * x1;
255			z2 = x2 * x2;
256
257			t0 = z0 * (qq1 + z0 * qq2);
258			t1 = z1 * (qq1 + z1 * qq2);
259			t2 = z2 * (qq1 + z2 * qq2);
260
261			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
262			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
263			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
264
265			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
266			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
267			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
268
269			xsb0 = (xsb0 >> 30) & 2;
270			xsb1 = (xsb1 >> 30) & 2;
271			xsb2 = (xsb2 >> 30) & 2;
272
273			a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
274			a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
275			a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
276
277			a2_0 = __vlibm_TBL_sincos_hi[j0+1];	/* cos_hi(t) */
278			a2_1 = __vlibm_TBL_sincos_hi[j1+1];
279			a2_2 = __vlibm_TBL_sincos_hi[j2+1];
280				/* cos_lo(t) */
281			t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
282			t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
283			t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
284
285			*pc0 = a2_0 + t2_0;
286			*pc1 = a2_1 + t2_1;
287			*pc2 = a2_2 + t2_2;
288
289			t1_0 = a2_0*w0 + a1_0*t0;
290			t1_1 = a2_1*w1 + a1_1*t1;
291			t1_2 = a2_2*w2 + a1_2*t2;
292
293			t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
294			t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
295			t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
296
297			*py0 = a1_0 + t1_0;
298			*py1 = a1_1 + t1_1;
299			*py2 = a1_2 + t1_2;
300
301			break;
302
303		case 1:
304			j0 = (xsb0 + 0x4000) & 0xffff8000;
305			j1 = (xsb1 + 0x4000) & 0xffff8000;
306			HI(&t0) = j0;
307			HI(&t1) = j1;
308			LO(&t0) = 0;
309			LO(&t1) = 0;
310			x0 -= t0;
311			x1 -= t1;
312			z0 = x0 * x0;
313			z1 = x1 * x1;
314			z2 = x2 * x2;
315			t0 = z0 * (qq1 + z0 * qq2);
316			t1 = z1 * (qq1 + z1 * qq2);
317			t2 = z2 * (poly3[1] + z2 * poly4[1]);
318			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
319			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
320			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
321			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
322			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
323			xsb0 = (xsb0 >> 30) & 2;
324			xsb1 = (xsb1 >> 30) & 2;
325
326			a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
327			a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
328
329			a2_0 = __vlibm_TBL_sincos_hi[j0+1];	/* cos_hi(t) */
330			a2_1 = __vlibm_TBL_sincos_hi[j1+1];
331				/* cos_lo(t) */
332			t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
333			t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
334
335			*pc0 = a2_0 + t2_0;
336			*pc1 = a2_1 + t2_1;
337			*pc2 = one + t2;
338
339			t1_0 = a2_0*w0 + a1_0*t0;
340			t1_1 = a2_1*w1 + a1_1*t1;
341			t2 = z2 * (poly3[0] + z2 * poly4[0]);
342
343			t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
344			t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
345			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
346
347			*py0 = a1_0 + t1_0;
348			*py1 = a1_1 + t1_1;
349			t2 = x2 + x2 * t2;
350			*py2 = t2;
351
352			break;
353
354		case 2:
355			j0 = (xsb0 + 0x4000) & 0xffff8000;
356			j2 = (xsb2 + 0x4000) & 0xffff8000;
357			HI(&t0) = j0;
358			HI(&t2) = j2;
359			LO(&t0) = 0;
360			LO(&t2) = 0;
361			x0 -= t0;
362			x2 -= t2;
363			z0 = x0 * x0;
364			z1 = x1 * x1;
365			z2 = x2 * x2;
366			t0 = z0 * (qq1 + z0 * qq2);
367			t1 = z1 * (poly3[1] + z1 * poly4[1]);
368			t2 = z2 * (qq1 + z2 * qq2);
369			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
370			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
371			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
372			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
373			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
374			xsb0 = (xsb0 >> 30) & 2;
375			xsb2 = (xsb2 >> 30) & 2;
376
377			a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
378			a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
379
380			a2_0 = __vlibm_TBL_sincos_hi[j0+1];	/* cos_hi(t) */
381			a2_2 = __vlibm_TBL_sincos_hi[j2+1];
382				/* cos_lo(t) */
383			t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
384			t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
385
386			*pc0 = a2_0 + t2_0;
387			*pc1 = one + t1;
388			*pc2 = a2_2 + t2_2;
389
390			t1_0 = a2_0*w0 + a1_0*t0;
391			t1 = z1 * (poly3[0] + z1 * poly4[0]);
392			t1_2 = a2_2*w2 + a1_2*t2;
393
394			t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
395			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
396			t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
397
398			*py0 = a1_0 + t1_0;
399			t1 = x1 + x1 * t1;
400			*py1 = t1;
401			*py2 = a1_2 + t1_2;
402
403			break;
404
405		case 3:
406			j0 = (xsb0 + 0x4000) & 0xffff8000;
407			HI(&t0) = j0;
408			LO(&t0) = 0;
409			x0 -= t0;
410			z0 = x0 * x0;
411			z1 = x1 * x1;
412			z2 = x2 * x2;
413			t0 = z0 * (qq1 + z0 * qq2);
414			t1 = z1 * (poly3[1] + z1 * poly4[1]);
415			t2 = z2 * (poly3[1] + z2 * poly4[1]);
416			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
417			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
418			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
419			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
420			xsb0 = (xsb0 >> 30) & 2;
421			a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
422
423			a2_0 = __vlibm_TBL_sincos_hi[j0+1];	/* cos_hi(t) */
424
425			t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
426
427			*pc0 = a2_0 + t2_0;
428			*pc1 = one + t1;
429			*pc2 = one + t2;
430
431			t1_0 = a2_0*w0 + a1_0*t0;
432			t1 = z1 * (poly3[0] + z1 * poly4[0]);
433			t2 = z2 * (poly3[0] + z2 * poly4[0]);
434
435			t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
436			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
437			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
438
439			*py0 = a1_0 + t1_0;
440			t1 = x1 + x1 * t1;
441			*py1 = t1;
442			t2 = x2 + x2 * t2;
443			*py2 = t2;
444
445			break;
446
447		case 4:
448			j1 = (xsb1 + 0x4000) & 0xffff8000;
449			j2 = (xsb2 + 0x4000) & 0xffff8000;
450			HI(&t1) = j1;
451			HI(&t2) = j2;
452			LO(&t1) = 0;
453			LO(&t2) = 0;
454			x1 -= t1;
455			x2 -= t2;
456			z0 = x0 * x0;
457			z1 = x1 * x1;
458			z2 = x2 * x2;
459			t0 = z0 * (poly3[1] + z0 * poly4[1]);
460			t1 = z1 * (qq1 + z1 * qq2);
461			t2 = z2 * (qq1 + z2 * qq2);
462			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
463			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
464			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
465			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
466			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
467			xsb1 = (xsb1 >> 30) & 2;
468			xsb2 = (xsb2 >> 30) & 2;
469
470			a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
471			a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
472
473			a2_1 = __vlibm_TBL_sincos_hi[j1+1];
474			a2_2 = __vlibm_TBL_sincos_hi[j2+1];
475				/* cos_lo(t) */
476			t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
477			t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
478
479			*pc0 = one + t0;
480			*pc1 = a2_1 + t2_1;
481			*pc2 = a2_2 + t2_2;
482
483			t0 = z0 * (poly3[0] + z0 * poly4[0]);
484			t1_1 = a2_1*w1 + a1_1*t1;
485			t1_2 = a2_2*w2 + a1_2*t2;
486
487			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
488			t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
489			t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
490
491			t0 = x0 + x0 * t0;
492			*py0 = t0;
493			*py1 = a1_1 + t1_1;
494			*py2 = a1_2 + t1_2;
495
496			break;
497
498		case 5:
499			j1 = (xsb1 + 0x4000) & 0xffff8000;
500			HI(&t1) = j1;
501			LO(&t1) = 0;
502			x1 -= t1;
503			z0 = x0 * x0;
504			z1 = x1 * x1;
505			z2 = x2 * x2;
506			t0 = z0 * (poly3[1] + z0 * poly4[1]);
507			t1 = z1 * (qq1 + z1 * qq2);
508			t2 = z2 * (poly3[1] + z2 * poly4[1]);
509			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
510			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
511			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
512			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
513			xsb1 = (xsb1 >> 30) & 2;
514
515			a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
516
517			a2_1 = __vlibm_TBL_sincos_hi[j1+1];
518
519			t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
520
521			*pc0 = one + t0;
522			*pc1 = a2_1 + t2_1;
523			*pc2 = one + t2;
524
525			t0 = z0 * (poly3[0] + z0 * poly4[0]);
526			t1_1 = a2_1*w1 + a1_1*t1;
527			t2 = z2 * (poly3[0] + z2 * poly4[0]);
528
529			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
530			t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
531			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
532
533			t0 = x0 + x0 * t0;
534			*py0 = t0;
535			*py1 = a1_1 + t1_1;
536			t2 = x2 + x2 * t2;
537			*py2 = t2;
538
539			break;
540
541		case 6:
542			j2 = (xsb2 + 0x4000) & 0xffff8000;
543			HI(&t2) = j2;
544			LO(&t2) = 0;
545			x2 -= t2;
546			z0 = x0 * x0;
547			z1 = x1 * x1;
548			z2 = x2 * x2;
549			t0 = z0 * (poly3[1] + z0 * poly4[1]);
550			t1 = z1 * (poly3[1] + z1 * poly4[1]);
551			t2 = z2 * (qq1 + z2 * qq2);
552			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
553			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
554			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
555			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
556			xsb2 = (xsb2 >> 30) & 2;
557			a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
558
559			a2_2 = __vlibm_TBL_sincos_hi[j2+1];
560
561			t2_2 = __vlibm_TBL_sincos_lo[j2+1] - (a1_2*w2 - a2_2*t2);
562
563			*pc0 = one + t0;
564			*pc1 = one + t1;
565			*pc2 = a2_2 + t2_2;
566
567			t0 = z0 * (poly3[0] + z0 * poly4[0]);
568			t1 = z1 * (poly3[0] + z1 * poly4[0]);
569			t1_2 = a2_2*w2 + a1_2*t2;
570
571			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
572			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
573			t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
574
575			t0 = x0 + x0 * t0;
576			*py0 = t0;
577			t1 = x1 + x1 * t1;
578			*py1 = t1;
579			*py2 = a1_2 + t1_2;
580
581			break;
582
583		case 7: /* All are < 5/32 */
584			z0 = x0 * x0;
585			z1 = x1 * x1;
586			z2 = x2 * x2;
587			t0 = z0 * (poly3[1] + z0 * poly4[1]);
588			t1 = z1 * (poly3[1] + z1 * poly4[1]);
589			t2 = z2 * (poly3[1] + z2 * poly4[1]);
590			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
591			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
592			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
593			*pc0 = one + t0;
594			*pc1 = one + t1;
595			*pc2 = one + t2;
596			t0 = z0 * (poly3[0] + z0 * poly4[0]);
597			t1 = z1 * (poly3[0] + z1 * poly4[0]);
598			t2 = z2 * (poly3[0] + z2 * poly4[0]);
599			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
600			t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
601			t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
602			t0 = x0 + x0 * t0;
603			t1 = x1 + x1 * t1;
604			t2 = x2 + x2 * t2;
605			*py0 = t0;
606			*py1 = t1;
607			*py2 = t2;
608			break;
609		}
610
611		x += stridex;
612		y += stridey;
613		c += stridec;
614		i = 0;
615	} while (--n > 0); /* END MAIN LOOP */
616
617	/*
618	 * CLEAN UP last 0, 1, or 2 elts.
619	 */
620	if (i > 0) /* Clean up elts at tail.  i < 3. */
621	{
622		double		a1_0, a1_1, a2_0, a2_1;
623		double		w0, w1;
624		double		t0, t1, t1_0, t1_1, t2_0, t2_1;
625		double		z0, z1;
626		unsigned	j0, j1;
627
628		if (i > 1)
629		{
630			if (hx1 < 0x3fc40000)
631			{
632				z1 = x1 * x1;
633				t1 = z1 * (poly3[1] + z1 * poly4[1]);
634				t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
635				t1 = one + t1;
636				*pc1 = t1;
637				t1 = z1 * (poly3[0] + z1 * poly4[0]);
638				t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
639				t1 = x1 + x1 * t1;
640				*py1 = t1;
641			}
642			else
643			{
644				j1 = (xsb1 + 0x4000) & 0xffff8000;
645				HI(&t1) = j1;
646				LO(&t1) = 0;
647				x1 -= t1;
648				z1 = x1 * x1;
649				t1 = z1 * (qq1 + z1 * qq2);
650				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
651				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
652				xsb1 = (xsb1 >> 30) & 2;
653				a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
654				a2_1 = __vlibm_TBL_sincos_hi[j1+1];
655				t2_1 = __vlibm_TBL_sincos_lo[j1+1] - (a1_1*w1 - a2_1*t1);
656				*pc1 = a2_1 + t2_1;
657				t1_1 = a2_1*w1 + a1_1*t1;
658				t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
659				*py1 = a1_1 + t1_1;
660			}
661		}
662		if (hx0 < 0x3fc40000)
663		{
664			z0 = x0 * x0;
665			t0 = z0 * (poly3[1] + z0 * poly4[1]);
666			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
667			t0 = one + t0;
668			*pc0 = t0;
669			t0 = z0 * (poly3[0] + z0 * poly4[0]);
670			t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
671			t0 = x0 + x0 * t0;
672			*py0 = t0;
673		}
674		else
675		{
676			j0 = (xsb0 + 0x4000) & 0xffff8000;
677			HI(&t0) = j0;
678			LO(&t0) = 0;
679			x0 -= t0;
680			z0 = x0 * x0;
681			t0 = z0 * (qq1 + z0 * qq2);
682			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
683			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
684			xsb0 = (xsb0 >> 30) & 2;
685			a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
686			a2_0 = __vlibm_TBL_sincos_hi[j0+1];	/* cos_hi(t) */
687			t2_0 = __vlibm_TBL_sincos_lo[j0+1] - (a1_0*w0 - a2_0*t0);
688			*pc0 = a2_0 + t2_0;
689			t1_0 = a2_0*w0 + a1_0*t0;
690			t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
691			*py0 = a1_0 + t1_0;
692		}
693	} /* END CLEAN UP */
694
695	if (!biguns)
696		return;
697
698	/*
699	 * Take care of BIGUNS.
700	 */
701	n = nsave;
702	x = xsave;
703	stridex = sxsave;
704	y = ysave;
705	stridey = sysave;
706	c = csave;
707	stridec = scsave;
708	biguns = 0;
709
710	x0_or_one[1] = 1.0;
711	x1_or_one[1] = 1.0;
712	x2_or_one[1] = 1.0;
713	x0_or_one[3] = -1.0;
714	x1_or_one[3] = -1.0;
715	x2_or_one[3] = -1.0;
716	y0_or_zero[1] = 0.0;
717	y1_or_zero[1] = 0.0;
718	y2_or_zero[1] = 0.0;
719	y0_or_zero[3] = 0.0;
720	y1_or_zero[3] = 0.0;
721	y2_or_zero[3] = 0.0;
722
723	do
724	{
725		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
726		unsigned	hx;
727		int			n0, n1, n2;
728
729		/*
730		 * Find 3 more to work on: Not already done, not too big.
731		 */
732loop0:
733		hx = HI(x);
734		xsb0 = hx >> 31;
735		hx &= ~0x80000000;
736		if (hx <= 0x3fe921fb) /* Done above. */
737		{
738			x += stridex;
739			y += stridey;
740			c += stridec;
741			i = 0;
742			if (--n <= 0)
743				break;
744			goto loop0;
745		}
746		if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */
747		{
748			if (hx >= 0x7ff00000) /* Inf or NaN */
749			{
750				x0 = *x;
751				*y = x0 - x0;
752				*c = x0 - x0;
753			}
754			else {
755				biguns = 1;
756			}
757			x += stridex;
758			y += stridey;
759			c += stridec;
760			i = 0;
761			if (--n <= 0)
762				break;
763			goto loop0;
764		}
765		x0 = *x;
766		py0 = y;
767		pc0 = c;
768		x += stridex;
769		y += stridey;
770		c += stridec;
771		i = 1;
772		if (--n <= 0)
773			break;
774
775loop1:
776		hx = HI(x);
777		xsb1 = hx >> 31;
778		hx &= ~0x80000000;
779		if (hx <= 0x3fe921fb)
780		{
781			x += stridex;
782			y += stridey;
783			c += stridec;
784			i = 1;
785			if (--n <= 0)
786				break;
787			goto loop1;
788		}
789		if (hx > 0x413921fb)
790		{
791			if (hx >= 0x7ff00000)
792			{
793				x1 = *x;
794				*y = x1 - x1;
795				*c = x1 - x1;
796			}
797			else {
798				biguns = 1;
799			}
800			x += stridex;
801			y += stridey;
802			c += stridec;
803			i = 1;
804			if (--n <= 0)
805				break;
806			goto loop1;
807		}
808		x1 = *x;
809		py1 = y;
810		pc1 = c;
811		x += stridex;
812		y += stridey;
813		c += stridec;
814		i = 2;
815		if (--n <= 0)
816			break;
817
818loop2:
819		hx = HI(x);
820		xsb2 = hx >> 31;
821		hx &= ~0x80000000;
822		if (hx <= 0x3fe921fb)
823		{
824			x += stridex;
825			y += stridey;
826			c += stridec;
827			i = 2;
828			if (--n <= 0)
829				break;
830			goto loop2;
831		}
832		if (hx > 0x413921fb)
833		{
834			if (hx >= 0x7ff00000)
835			{
836				x2 = *x;
837				*y = x2 - x2;
838				*c = x2 - x2;
839			}
840			else {
841				biguns = 1;
842			}
843			x += stridex;
844			y += stridey;
845			c += stridec;
846			i = 2;
847			if (--n <= 0)
848				break;
849			goto loop2;
850		}
851		x2 = *x;
852		py2 = y;
853		pc2 = c;
854
855		n0 = (int) (x0 * invpio2 + half[xsb0]);
856		n1 = (int) (x1 * invpio2 + half[xsb1]);
857		n2 = (int) (x2 * invpio2 + half[xsb2]);
858		fn0 = (double) n0;
859		fn1 = (double) n1;
860		fn2 = (double) n2;
861		n0 &= 3;
862		n1 &= 3;
863		n2 &= 3;
864		a0 = x0 - fn0 * pio2_1;
865		a1 = x1 - fn1 * pio2_1;
866		a2 = x2 - fn2 * pio2_1;
867		w0 = fn0 * pio2_2;
868		w1 = fn1 * pio2_2;
869		w2 = fn2 * pio2_2;
870		x0 = a0 - w0;
871		x1 = a1 - w1;
872		x2 = a2 - w2;
873		y0 = (a0 - x0) - w0;
874		y1 = (a1 - x1) - w1;
875		y2 = (a2 - x2) - w2;
876		a0 = x0;
877		a1 = x1;
878		a2 = x2;
879		w0 = fn0 * pio2_3 - y0;
880		w1 = fn1 * pio2_3 - y1;
881		w2 = fn2 * pio2_3 - y2;
882		x0 = a0 - w0;
883		x1 = a1 - w1;
884		x2 = a2 - w2;
885		y0 = (a0 - x0) - w0;
886		y1 = (a1 - x1) - w1;
887		y2 = (a2 - x2) - w2;
888		a0 = x0;
889		a1 = x1;
890		a2 = x2;
891		w0 = fn0 * pio2_3t - y0;
892		w1 = fn1 * pio2_3t - y1;
893		w2 = fn2 * pio2_3t - y2;
894		x0 = a0 - w0;
895		x1 = a1 - w1;
896		x2 = a2 - w2;
897		y0 = (a0 - x0) - w0;
898		y1 = (a1 - x1) - w1;
899		y2 = (a2 - x2) - w2;
900		xsb2 = HI(&x2);
901		i = ((xsb2 & ~0x80000000) - 0x3fc40000) >> 31;
902		xsb1 = HI(&x1);
903		i |= (((xsb1 & ~0x80000000) - 0x3fc40000) >> 30) & 2;
904		xsb0 = HI(&x0);
905		i |= (((xsb0 & ~0x80000000) - 0x3fc40000) >> 29) & 4;
906		switch (i)
907		{
908			double		a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
909			double		t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
910			double		z0, z1, z2;
911			unsigned	j0, j1, j2;
912
913		case 0:
914			j0 = (xsb0 + 0x4000) & 0xffff8000;
915			j1 = (xsb1 + 0x4000) & 0xffff8000;
916			j2 = (xsb2 + 0x4000) & 0xffff8000;
917			HI(&t0) = j0;
918			HI(&t1) = j1;
919			HI(&t2) = j2;
920			LO(&t0) = 0;
921			LO(&t1) = 0;
922			LO(&t2) = 0;
923			x0 = (x0 - t0) + y0;
924			x1 = (x1 - t1) + y1;
925			x2 = (x2 - t2) + y2;
926			z0 = x0 * x0;
927			z1 = x1 * x1;
928			z2 = x2 * x2;
929			t0 = z0 * (qq1 + z0 * qq2);
930			t1 = z1 * (qq1 + z1 * qq2);
931			t2 = z2 * (qq1 + z2 * qq2);
932			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
933			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
934			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
935			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
936			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
937			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
938			xsb0 = (xsb0 >> 30) & 2;
939			xsb1 = (xsb1 >> 30) & 2;
940			xsb2 = (xsb2 >> 30) & 2;
941			n0 ^= (xsb0 & ~(n0 << 1));
942			n1 ^= (xsb1 & ~(n1 << 1));
943			n2 ^= (xsb2 & ~(n2 << 1));
944			xsb0 |= 1;
945			xsb1 |= 1;
946			xsb2 |= 1;
947
948			a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
949			a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
950			a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
951
952			a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
953			a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
954			a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
955
956			t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
957			t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
958			t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
959
960			w0 *= a2_0;
961			w1 *= a2_1;
962			w2 *= a2_2;
963
964			*pc0 = a2_0 + t2_0;
965			*pc1 = a2_1 + t2_1;
966			*pc2 = a2_2 + t2_2;
967
968			t1_0 = w0 + a1_0*t0;
969			t1_1 = w1 + a1_1*t1;
970			t1_2 = w2 + a1_2*t2;
971
972			t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
973			t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
974			t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
975
976			*py0 = a1_0 + t1_0;
977			*py1 = a1_1 + t1_1;
978			*py2 = a1_2 + t1_2;
979
980			break;
981
982		case 1:
983			j0 = (xsb0 + 0x4000) & 0xffff8000;
984			j1 = (xsb1 + 0x4000) & 0xffff8000;
985			j2 = n2 & 1;
986			HI(&t0) = j0;
987			HI(&t1) = j1;
988			LO(&t0) = 0;
989			LO(&t1) = 0;
990			x2_or_one[0] = x2;
991			x2_or_one[2] = -x2;
992			x0 = (x0 - t0) + y0;
993			x1 = (x1 - t1) + y1;
994			y2_or_zero[0] = y2;
995			y2_or_zero[2] = -y2;
996			z0 = x0 * x0;
997			z1 = x1 * x1;
998			z2 = x2 * x2;
999			t0 = z0 * (qq1 + z0 * qq2);
1000			t1 = z1 * (qq1 + z1 * qq2);
1001			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1002			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1003			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1004			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1005			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1006			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1007			xsb0 = (xsb0 >> 30) & 2;
1008			xsb1 = (xsb1 >> 30) & 2;
1009			n0 ^= (xsb0 & ~(n0 << 1));
1010			n1 ^= (xsb1 & ~(n1 << 1));
1011			xsb0 |= 1;
1012			xsb1 |= 1;
1013			a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1014			a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1015
1016			a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1017			a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1018
1019			t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1020			t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1021			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1022
1023			*pc0 = a2_0 + t2_0;
1024			*pc1 = a2_1 + t2_1;
1025			*py2 = t2;
1026
1027			n2 = (n2 + 1) & 3;
1028			j2 = (j2 + 1) & 1;
1029			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1030
1031			t1_0 = a2_0*w0 + a1_0*t0;
1032			t1_1 = a2_1*w1 + a1_1*t1;
1033			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1034
1035			t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1036			t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1037			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1038
1039			*py0 = a1_0 + t1_0;
1040			*py1 = a1_1 + t1_1;
1041			*pc2 = t2;
1042
1043			break;
1044
1045		case 2:
1046			j0 = (xsb0 + 0x4000) & 0xffff8000;
1047			j1 = n1 & 1;
1048			j2 = (xsb2 + 0x4000) & 0xffff8000;
1049			HI(&t0) = j0;
1050			HI(&t2) = j2;
1051			LO(&t0) = 0;
1052			LO(&t2) = 0;
1053			x1_or_one[0] = x1;
1054			x1_or_one[2] = -x1;
1055			x0 = (x0 - t0) + y0;
1056			y1_or_zero[0] = y1;
1057			y1_or_zero[2] = -y1;
1058			x2 = (x2 - t2) + y2;
1059			z0 = x0 * x0;
1060			z1 = x1 * x1;
1061			z2 = x2 * x2;
1062			t0 = z0 * (qq1 + z0 * qq2);
1063			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1064			t2 = z2 * (qq1 + z2 * qq2);
1065			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1066			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1067			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
1068			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1069			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1070			xsb0 = (xsb0 >> 30) & 2;
1071			xsb2 = (xsb2 >> 30) & 2;
1072			n0 ^= (xsb0 & ~(n0 << 1));
1073			n2 ^= (xsb2 & ~(n2 << 1));
1074			xsb0 |= 1;
1075			xsb2 |= 1;
1076
1077			a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1078			a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1079
1080			a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1081			a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1082
1083			t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1084			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1085			t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
1086
1087			*pc0 = a2_0 + t2_0;
1088			*py1 = t1;
1089			*pc2 = a2_2 + t2_2;
1090
1091			n1 = (n1 + 1) & 3;
1092			j1 = (j1 + 1) & 1;
1093			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1094
1095			t1_0 = a2_0*w0 + a1_0*t0;
1096			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1097			t1_2 = a2_2*w2 + a1_2*t2;
1098
1099			t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1100			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1101			t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1102
1103			*py0 = a1_0 + t1_0;
1104			*pc1 = t1;
1105			*py2 = a1_2 + t1_2;
1106
1107			break;
1108
1109		case 3:
1110			j0 = (xsb0 + 0x4000) & 0xffff8000;
1111			j1 = n1 & 1;
1112			j2 = n2 & 1;
1113			HI(&t0) = j0;
1114			LO(&t0) = 0;
1115			x1_or_one[0] = x1;
1116			x1_or_one[2] = -x1;
1117			x2_or_one[0] = x2;
1118			x2_or_one[2] = -x2;
1119			x0 = (x0 - t0) + y0;
1120			y1_or_zero[0] = y1;
1121			y1_or_zero[2] = -y1;
1122			y2_or_zero[0] = y2;
1123			y2_or_zero[2] = -y2;
1124			z0 = x0 * x0;
1125			z1 = x1 * x1;
1126			z2 = x2 * x2;
1127			t0 = z0 * (qq1 + z0 * qq2);
1128			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1129			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1130			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1131			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1132			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1133			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1134			xsb0 = (xsb0 >> 30) & 2;
1135			n0 ^= (xsb0 & ~(n0 << 1));
1136			xsb0 |= 1;
1137
1138			a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1139			a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1140
1141			t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1142			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1143			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1144
1145			*pc0 = a2_0 + t2_0;
1146			*py1 = t1;
1147			*py2 = t2;
1148
1149			n1 = (n1 + 1) & 3;
1150			n2 = (n2 + 1) & 3;
1151			j1 = (j1 + 1) & 1;
1152			j2 = (j2 + 1) & 1;
1153
1154			t1_0 = a2_0*w0 + a1_0*t0;
1155			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1156			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1157
1158			t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1159			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1160			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1161
1162			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1163			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1164
1165			*py0 = a1_0 + t1_0;
1166			*pc1 = t1;
1167			*pc2 = t2;
1168
1169			break;
1170
1171		case 4:
1172			j0 = n0 & 1;
1173			j1 = (xsb1 + 0x4000) & 0xffff8000;
1174			j2 = (xsb2 + 0x4000) & 0xffff8000;
1175			HI(&t1) = j1;
1176			HI(&t2) = j2;
1177			LO(&t1) = 0;
1178			LO(&t2) = 0;
1179			x0_or_one[0] = x0;
1180			x0_or_one[2] = -x0;
1181			y0_or_zero[0] = y0;
1182			y0_or_zero[2] = -y0;
1183			x1 = (x1 - t1) + y1;
1184			x2 = (x2 - t2) + y2;
1185			z0 = x0 * x0;
1186			z1 = x1 * x1;
1187			z2 = x2 * x2;
1188			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1189			t1 = z1 * (qq1 + z1 * qq2);
1190			t2 = z2 * (qq1 + z2 * qq2);
1191			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1192			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1193			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
1194			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1195			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1196			xsb1 = (xsb1 >> 30) & 2;
1197			xsb2 = (xsb2 >> 30) & 2;
1198			n1 ^= (xsb1 & ~(n1 << 1));
1199			n2 ^= (xsb2 & ~(n2 << 1));
1200			xsb1 |= 1;
1201			xsb2 |= 1;
1202
1203			a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1204			a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1205
1206			a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1207			a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1208
1209			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1210			t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1211			t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
1212
1213			*py0 = t0;
1214			*pc1 = a2_1 + t2_1;
1215			*pc2 = a2_2 + t2_2;
1216
1217			n0 = (n0 + 1) & 3;
1218			j0 = (j0 + 1) & 1;
1219			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1220
1221			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1222			t1_1 = a2_1*w1 + a1_1*t1;
1223			t1_2 = a2_2*w2 + a1_2*t2;
1224
1225			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1226			t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1227			t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1228
1229			*py1 = a1_1 + t1_1;
1230			*py2 = a1_2 + t1_2;
1231			*pc0 = t0;
1232
1233			break;
1234
1235		case 5:
1236			j0 = n0 & 1;
1237			j1 = (xsb1 + 0x4000) & 0xffff8000;
1238			j2 = n2 & 1;
1239			HI(&t1) = j1;
1240			LO(&t1) = 0;
1241			x0_or_one[0] = x0;
1242			x0_or_one[2] = -x0;
1243			x2_or_one[0] = x2;
1244			x2_or_one[2] = -x2;
1245			y0_or_zero[0] = y0;
1246			y0_or_zero[2] = -y0;
1247			x1 = (x1 - t1) + y1;
1248			y2_or_zero[0] = y2;
1249			y2_or_zero[2] = -y2;
1250			z0 = x0 * x0;
1251			z1 = x1 * x1;
1252			z2 = x2 * x2;
1253			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1254			t1 = z1 * (qq1 + z1 * qq2);
1255			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1256			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1257			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1258			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1259			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1260			xsb1 = (xsb1 >> 30) & 2;
1261			n1 ^= (xsb1 & ~(n1 << 1));
1262			xsb1 |= 1;
1263
1264			a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1265			a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1266
1267			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1268			t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1269			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1270
1271			*py0 = t0;
1272			*pc1 = a2_1 + t2_1;
1273			*py2 = t2;
1274
1275			n0 = (n0 + 1) & 3;
1276			n2 = (n2 + 1) & 3;
1277			j0 = (j0 + 1) & 1;
1278			j2 = (j2 + 1) & 1;
1279
1280			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1281			t1_1 = a2_1*w1 + a1_1*t1;
1282			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1283
1284			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1285			t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1286			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1287
1288			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1289			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1290
1291			*pc0 = t0;
1292			*py1 = a1_1 + t1_1;
1293			*pc2 = t2;
1294
1295			break;
1296
1297		case 6:
1298			j0 = n0 & 1;
1299			j1 = n1 & 1;
1300			j2 = (xsb2 + 0x4000) & 0xffff8000;
1301			HI(&t2) = j2;
1302			LO(&t2) = 0;
1303			x0_or_one[0] = x0;
1304			x0_or_one[2] = -x0;
1305			x1_or_one[0] = x1;
1306			x1_or_one[2] = -x1;
1307			y0_or_zero[0] = y0;
1308			y0_or_zero[2] = -y0;
1309			y1_or_zero[0] = y1;
1310			y1_or_zero[2] = -y1;
1311			x2 = (x2 - t2) + y2;
1312			z0 = x0 * x0;
1313			z1 = x1 * x1;
1314			z2 = x2 * x2;
1315			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1316			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1317			t2 = z2 * (qq1 + z2 * qq2);
1318			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1319			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1320			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
1321			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1322			xsb2 = (xsb2 >> 30) & 2;
1323			n2 ^= (xsb2 & ~(n2 << 1));
1324			xsb2 |= 1;
1325
1326			a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1327			a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1328
1329			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1330			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1331			t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - (a1_2*w2 - a2_2*t2);
1332
1333			*py0 = t0;
1334			*py1 = t1;
1335			*pc2 = a2_2 + t2_2;
1336
1337			n0 = (n0 + 1) & 3;
1338			n1 = (n1 + 1) & 3;
1339			j0 = (j0 + 1) & 1;
1340			j1 = (j1 + 1) & 1;
1341
1342			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1343			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1344			t1_2 = a2_2*w2 + a1_2*t2;
1345
1346			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1347			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1348			t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1349
1350			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1351			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1352
1353			*pc0 = t0;
1354			*pc1 = t1;
1355			*py2 = a1_2 + t1_2;
1356
1357			break;
1358
1359		case 7:
1360			j0 = n0 & 1;
1361			j1 = n1 & 1;
1362			j2 = n2 & 1;
1363			x0_or_one[0] = x0;
1364			x0_or_one[2] = -x0;
1365			x1_or_one[0] = x1;
1366			x1_or_one[2] = -x1;
1367			x2_or_one[0] = x2;
1368			x2_or_one[2] = -x2;
1369			y0_or_zero[0] = y0;
1370			y0_or_zero[2] = -y0;
1371			y1_or_zero[0] = y1;
1372			y1_or_zero[2] = -y1;
1373			y2_or_zero[0] = y2;
1374			y2_or_zero[2] = -y2;
1375			z0 = x0 * x0;
1376			z1 = x1 * x1;
1377			z2 = x2 * x2;
1378			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1379			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1380			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1381			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1382			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1383			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1384			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1385			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1386			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1387			*py0 = t0;
1388			*py1 = t1;
1389			*py2 = t2;
1390
1391			n0 = (n0 + 1) & 3;
1392			n1 = (n1 + 1) & 3;
1393			n2 = (n2 + 1) & 3;
1394			j0 = (j0 + 1) & 1;
1395			j1 = (j1 + 1) & 1;
1396			j2 = (j2 + 1) & 1;
1397			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1398			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1399			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
1400			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1401			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1402			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
1403			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1404			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1405			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
1406			*pc0 = t0;
1407			*pc1 = t1;
1408			*pc2 = t2;
1409			break;
1410		}
1411
1412		x += stridex;
1413		y += stridey;
1414		c += stridec;
1415		i = 0;
1416	} while (--n > 0);
1417
1418	if (i > 0)
1419	{
1420		double		a1_0, a1_1, a2_0, a2_1;
1421		double		t0, t1, t1_0, t1_1, t2_0, t2_1;
1422		double		fn0, fn1, a0, a1, w0, w1, y0, y1;
1423		double		z0, z1;
1424		unsigned	j0, j1;
1425		int		n0, n1;
1426
1427		if (i > 1)
1428		{
1429			n1 = (int) (x1 * invpio2 + half[xsb1]);
1430			fn1 = (double) n1;
1431			n1 &= 3;
1432			a1 = x1 - fn1 * pio2_1;
1433			w1 = fn1 * pio2_2;
1434			x1 = a1 - w1;
1435			y1 = (a1 - x1) - w1;
1436			a1 = x1;
1437			w1 = fn1 * pio2_3 - y1;
1438			x1 = a1 - w1;
1439			y1 = (a1 - x1) - w1;
1440			a1 = x1;
1441			w1 = fn1 * pio2_3t - y1;
1442			x1 = a1 - w1;
1443			y1 = (a1 - x1) - w1;
1444			xsb1 = HI(&x1);
1445			if ((xsb1 & ~0x80000000) < 0x3fc40000)
1446			{
1447				j1 = n1 & 1;
1448				x1_or_one[0] = x1;
1449				x1_or_one[2] = -x1;
1450				y1_or_zero[0] = y1;
1451				y1_or_zero[2] = -y1;
1452				z1 = x1 * x1;
1453				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1454				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1455				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1456				*py1 = t1;
1457				n1 = (n1 + 1) & 3;
1458				j1 = (j1 + 1) & 1;
1459				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1460				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1461				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1462				*pc1 = t1;
1463			}
1464			else
1465			{
1466				j1 = (xsb1 + 0x4000) & 0xffff8000;
1467				HI(&t1) = j1;
1468				LO(&t1) = 0;
1469				x1 = (x1 - t1) + y1;
1470				z1 = x1 * x1;
1471				t1 = z1 * (qq1 + z1 * qq2);
1472				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1473				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1474				xsb1 = (xsb1 >> 30) & 2;
1475				n1 ^= (xsb1 & ~(n1 << 1));
1476				xsb1 |= 1;
1477				a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1478				a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1479				t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - (a1_1*w1 - a2_1*t1);
1480				*pc1 = a2_1 + t2_1;
1481				t1_1 = a2_1*w1 + a1_1*t1;
1482				t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1483				*py1 = a1_1 + t1_1;
1484			}
1485		}
1486		n0 = (int) (x0 * invpio2 + half[xsb0]);
1487		fn0 = (double) n0;
1488		n0 &= 3;
1489		a0 = x0 - fn0 * pio2_1;
1490		w0 = fn0 * pio2_2;
1491		x0 = a0 - w0;
1492		y0 = (a0 - x0) - w0;
1493		a0 = x0;
1494		w0 = fn0 * pio2_3 - y0;
1495		x0 = a0 - w0;
1496		y0 = (a0 - x0) - w0;
1497		a0 = x0;
1498		w0 = fn0 * pio2_3t - y0;
1499		x0 = a0 - w0;
1500		y0 = (a0 - x0) - w0;
1501		xsb0 = HI(&x0);
1502		if ((xsb0 & ~0x80000000) < 0x3fc40000)
1503		{
1504			j0 = n0 & 1;
1505			x0_or_one[0] = x0;
1506			x0_or_one[2] = -x0;
1507			y0_or_zero[0] = y0;
1508			y0_or_zero[2] = -y0;
1509			z0 = x0 * x0;
1510			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1511			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1512			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1513			*py0 = t0;
1514			n0 = (n0 + 1) & 3;
1515			j0 = (j0 + 1) & 1;
1516			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1517			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1518			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1519			*pc0 = t0;
1520		}
1521		else
1522		{
1523			j0 = (xsb0 + 0x4000) & 0xffff8000;
1524			HI(&t0) = j0;
1525			LO(&t0) = 0;
1526			x0 = (x0 - t0) + y0;
1527			z0 = x0 * x0;
1528			t0 = z0 * (qq1 + z0 * qq2);
1529			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1530			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1531			xsb0 = (xsb0 >> 30) & 2;
1532			n0 ^= (xsb0 & ~(n0 << 1));
1533			xsb0 |= 1;
1534			a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1535			a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1536			t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - (a1_0*w0 - a2_0*t0);
1537			*pc0 = a2_0 + t2_0;
1538			t1_0 = a2_0*w0 + a1_0*t0;
1539			t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1540			*py0 = a1_0 + t1_0;
1541		}
1542	}
1543
1544	if (biguns) {
1545		__vlibm_vsincos_big(nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb);
1546	}
1547}
1548