1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24 */
25/*
26 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30#include <sys/isa_defs.h>
31#include <sys/ccompile.h>
32
33#ifdef _LITTLE_ENDIAN
34#define HI(x)	*(1+(int*)x)
35#define LO(x)	*(unsigned*)x
36#else
37#define HI(x)	*(int*)x
38#define LO(x)	*(1+(unsigned*)x)
39#endif
40
41#ifdef __RESTRICT
42#define restrict _Restrict
43#else
44#define restrict
45#endif
46
47/*
48 * vcos.1.c
49 *
50 * Vector cosine function.  Just slight modifications to vsin.8.c, mainly
51 * in the primary range part.
52 *
53 * Modification to primary range processing.  If an argument that does not
54 * fall in the primary range is encountered, then processing is continued
55 * in the medium range.
56 *
57 */
58
59extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
60
61static const double
62	half[2]	= { 0.5, -0.5 },
63	one		= 1.0,
64	invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */
65	pio2_1	= 1.570796326734125614166,	/* first 33 bits of pi/2 */
66	pio2_2	= 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
67	pio2_3	= 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
68	pio2_3t	= 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
69	pp1		= -1.666666666605760465276263943134982554676e-0001,
70	pp2		=  8.333261209690963126718376566146180944442e-0003,
71	qq1		= -4.999999999977710986407023955908711557870e-0001,
72	qq2		=  4.166654863857219350645055881018842089580e-0002,
73	poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
74				-4.999999999999931701464060878888294524481e-0001 },
75	poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
76				 4.166666666394861917535640593963708222319e-0002 },
77	poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
78				-1.388888552656142867832756687736851681462e-0003 },
79	poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
80				 2.478519423681460796618128289454530524759e-0005 };
81
82static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
83
84/* Don't __ the following; acomp will handle it */
85extern double fabs(double);
86extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
87
88/*
89 * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
90 *
91 * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
92 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
93 *
94 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
95 */
96void
97__vcos(int n, double * restrict x, int stridex, double * restrict y,
98	int stridey)
99{
100	double		x0_or_one[4], x1_or_one[4], x2_or_one[4];
101	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
102	double		x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
103	unsigned	hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
104	int		i, biguns, nsave, sxsave, sysave;
105	volatile int	v __unused;
106	nsave = n;
107	xsave = x;
108	sxsave = stridex;
109	ysave = y;
110	sysave = stridey;
111	biguns = 0;
112
113	x0 = *x;	/* 'x0' may be used uninitialized */
114	do /* MAIN LOOP */
115	{
116		/* Gotos here so _break_ exits MAIN LOOP. */
117LOOP0:  /* Find first arg in right range. */
118		xsb0 = HI(x); /* get most significant word */
119		hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
120		if (hx0 > 0x3fe921fb) {
121			/* Too big: arg reduction needed, so leave for second part */
122			biguns = 1;
123			goto MEDIUM;
124		}
125		if (hx0 < 0x3e400000) {
126			/* Too small.  cos x ~ 1. */
127			v = *x;
128			*y = 1.0;
129			x += stridex;
130			y += stridey;
131			i = 0;
132			if (--n <= 0)
133				break;
134			goto LOOP0;
135		}
136		x0 = *x;
137		py0 = y;
138		x += stridex;
139		y += stridey;
140		i = 1;
141		if (--n <= 0)
142			break;
143
144LOOP1: /* Get second arg, same as above. */
145		xsb1 = HI(x);
146		hx1 = xsb1 & ~0x80000000;
147		if (hx1 > 0x3fe921fb)
148		{
149			biguns = 2;
150			goto MEDIUM;
151		}
152		if (hx1 < 0x3e400000)
153		{
154			v = *x;
155			*y = 1.0;
156			x += stridex;
157			y += stridey;
158			i = 1;
159			if (--n <= 0)
160				break;
161			goto LOOP1;
162		}
163		x1 = *x;
164		py1 = y;
165		x += stridex;
166		y += stridey;
167		i = 2;
168		if (--n <= 0)
169			break;
170
171LOOP2: /* Get third arg, same as above. */
172		xsb2 = HI(x);
173		hx2 = xsb2 & ~0x80000000;
174		if (hx2 > 0x3fe921fb)
175		{
176			biguns = 3;
177			goto MEDIUM;
178		}
179		if (hx2 < 0x3e400000)
180		{
181			v = *x;
182			*y = 1.0;
183			x += stridex;
184			y += stridey;
185			i = 2;
186			if (--n <= 0)
187				break;
188			goto LOOP2;
189		}
190		x2 = *x;
191		py2 = y;
192
193		/*
194		 * 0x3fc40000 = 5/32 ~ 0.15625
195		 * Get msb after subtraction.  Will be 1 only if
196		 * hx0 - 5/32 is negative.
197		 */
198		i = (hx0 - 0x3fc40000) >> 31;
199		i |= ((hx1 - 0x3fc40000) >> 30) & 2;
200		i |= ((hx2 - 0x3fc40000) >> 29) & 4;
201		switch (i)
202		{
203			double		a0, a1, a2, w0, w1, w2;
204			double		t0, t1, t2, z0, z1, z2;
205			unsigned	j0, j1, j2;
206
207		case 0: /* All are > 5/32 */
208			j0 = (xsb0 + 0x4000) & 0xffff8000;
209			j1 = (xsb1 + 0x4000) & 0xffff8000;
210			j2 = (xsb2 + 0x4000) & 0xffff8000;
211			HI(&t0) = j0;
212			HI(&t1) = j1;
213			HI(&t2) = j2;
214			LO(&t0) = 0;
215			LO(&t1) = 0;
216			LO(&t2) = 0;
217			x0 -= t0;
218			x1 -= t1;
219			x2 -= t2;
220			z0 = x0 * x0;
221			z1 = x1 * x1;
222			z2 = x2 * x2;
223			t0 = z0 * (qq1 + z0 * qq2);
224			t1 = z1 * (qq1 + z1 * qq2);
225			t2 = z2 * (qq1 + z2 * qq2);
226			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
227			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
228			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
229			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
230			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
231			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
232			xsb0 = (xsb0 >> 30) & 2;
233			xsb1 = (xsb1 >> 30) & 2;
234			xsb2 = (xsb2 >> 30) & 2;
235			a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
236			a1 = __vlibm_TBL_sincos_hi[j1+1];
237			a2 = __vlibm_TBL_sincos_hi[j2+1];
238			   /*   cos_lo(t)			 sin_hi(t) */
239			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
240			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
241			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
242
243			*py0 = a0 + t0;
244			*py1 = a1 + t1;
245			*py2 = a2 + t2;
246			break;
247
248		case 1:
249			j1 = (xsb1 + 0x4000) & 0xffff8000;
250			j2 = (xsb2 + 0x4000) & 0xffff8000;
251			HI(&t1) = j1;
252			HI(&t2) = j2;
253			LO(&t1) = 0;
254			LO(&t2) = 0;
255			x1 -= t1;
256			x2 -= t2;
257			z0 = x0 * x0;
258			z1 = x1 * x1;
259			z2 = x2 * x2;
260			t0 = z0 * (poly3[1] + z0 * poly4[1]);
261			t1 = z1 * (qq1 + z1 * qq2);
262			t2 = z2 * (qq1 + z2 * qq2);
263			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
264			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
265			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
266			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
267			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
268			xsb1 = (xsb1 >> 30) & 2;
269			xsb2 = (xsb2 >> 30) & 2;
270			a1 = __vlibm_TBL_sincos_hi[j1+1];
271			a2 = __vlibm_TBL_sincos_hi[j2+1];
272			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
273			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
274			*py0 = one + t0;
275			*py1 = a1 + t1;
276			*py2 = a2 + t2;
277			break;
278
279		case 2:
280			j0 = (xsb0 + 0x4000) & 0xffff8000;
281			j2 = (xsb2 + 0x4000) & 0xffff8000;
282			HI(&t0) = j0;
283			HI(&t2) = j2;
284			LO(&t0) = 0;
285			LO(&t2) = 0;
286			x0 -= t0;
287			x2 -= t2;
288			z0 = x0 * x0;
289			z1 = x1 * x1;
290			z2 = x2 * x2;
291			t0 = z0 * (qq1 + z0 * qq2);
292			t1 = z1 * (poly3[1] + z1 * poly4[1]);
293			t2 = z2 * (qq1 + z2 * qq2);
294			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
295			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
296			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
297			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
299			xsb0 = (xsb0 >> 30) & 2;
300			xsb2 = (xsb2 >> 30) & 2;
301			a0 = __vlibm_TBL_sincos_hi[j0+1];
302			a2 = __vlibm_TBL_sincos_hi[j2+1];
303			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
304			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
305			*py0 = a0 + t0;
306			*py1 = one + t1;
307			*py2 = a2 + t2;
308			break;
309
310		case 3:
311			j2 = (xsb2 + 0x4000) & 0xffff8000;
312			HI(&t2) = j2;
313			LO(&t2) = 0;
314			x2 -= t2;
315			z0 = x0 * x0;
316			z1 = x1 * x1;
317			z2 = x2 * x2;
318			t0 = z0 * (poly3[1] + z0 * poly4[1]);
319			t1 = z1 * (poly3[1] + z1 * poly4[1]);
320			t2 = z2 * (qq1 + z2 * qq2);
321			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
322			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
323			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
324			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
325			xsb2 = (xsb2 >> 30) & 2;
326			a2 = __vlibm_TBL_sincos_hi[j2+1];
327			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
328			*py0 = one + t0;
329			*py1 = one + t1;
330			*py2 = a2 + t2;
331			break;
332
333		case 4:
334			j0 = (xsb0 + 0x4000) & 0xffff8000;
335			j1 = (xsb1 + 0x4000) & 0xffff8000;
336			HI(&t0) = j0;
337			HI(&t1) = j1;
338			LO(&t0) = 0;
339			LO(&t1) = 0;
340			x0 -= t0;
341			x1 -= t1;
342			z0 = x0 * x0;
343			z1 = x1 * x1;
344			z2 = x2 * x2;
345			t0 = z0 * (qq1 + z0 * qq2);
346			t1 = z1 * (qq1 + z1 * qq2);
347			t2 = z2 * (poly3[1] + z2 * poly4[1]);
348			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
349			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
350			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
351			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
352			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
353			xsb0 = (xsb0 >> 30) & 2;
354			xsb1 = (xsb1 >> 30) & 2;
355			a0 = __vlibm_TBL_sincos_hi[j0+1];
356			a1 = __vlibm_TBL_sincos_hi[j1+1];
357			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
358			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
359			*py0 = a0 + t0;
360			*py1 = a1 + t1;
361			*py2 = one + t2;
362			break;
363
364		case 5:
365			j1 = (xsb1 + 0x4000) & 0xffff8000;
366			HI(&t1) = j1;
367			LO(&t1) = 0;
368			x1 -= t1;
369			z0 = x0 * x0;
370			z1 = x1 * x1;
371			z2 = x2 * x2;
372			t0 = z0 * (poly3[1] + z0 * poly4[1]);
373			t1 = z1 * (qq1 + z1 * qq2);
374			t2 = z2 * (poly3[1] + z2 * poly4[1]);
375			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
376			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
377			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
378			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
379			xsb1 = (xsb1 >> 30) & 2;
380			a1 = __vlibm_TBL_sincos_hi[j1+1];
381			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
382			*py0 = one + t0;
383			*py1 = a1 + t1;
384			*py2 = one + t2;
385			break;
386
387		case 6:
388			j0 = (xsb0 + 0x4000) & 0xffff8000;
389			HI(&t0) = j0;
390			LO(&t0) = 0;
391			x0 -= t0;
392			z0 = x0 * x0;
393			z1 = x1 * x1;
394			z2 = x2 * x2;
395			t0 = z0 * (qq1 + z0 * qq2);
396			t1 = z1 * (poly3[1] + z1 * poly4[1]);
397			t2 = z2 * (poly3[1] + z2 * poly4[1]);
398			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
399			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
400			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
401			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
402			xsb0 = (xsb0 >> 30) & 2;
403			a0 = __vlibm_TBL_sincos_hi[j0+1];
404			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
405			*py0 = a0 + t0;
406			*py1 = one + t1;
407			*py2 = one + t2;
408			break;
409
410		case 7: /* All are < 5/32 */
411			z0 = x0 * x0;
412			z1 = x1 * x1;
413			z2 = x2 * x2;
414			t0 = z0 * (poly3[1] + z0 * poly4[1]);
415			t1 = z1 * (poly3[1] + z1 * poly4[1]);
416			t2 = z2 * (poly3[1] + z2 * poly4[1]);
417			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
418			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
419			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
420			*py0 = one + t0;
421			*py1 = one + t1;
422			*py2 = one + t2;
423			break;
424		}
425
426		x += stridex;
427		y += stridey;
428		i = 0;
429	} while (--n > 0); /* END MAIN LOOP */
430
431	/*
432	 * CLEAN UP last 0, 1, or 2 elts.
433	 */
434	if (i > 0) /* Clean up elts at tail.  i < 3. */
435	{
436		double		a0, a1, w0, w1;
437		double		t0, t1, z0, z1;
438		unsigned	j0, j1;
439
440		if (i > 1)
441		{
442			if (hx1 < 0x3fc40000)
443			{
444				z1 = x1 * x1;
445				t1 = z1 * (poly3[1] + z1 * poly4[1]);
446				t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
447				t1 = one + t1;
448				*py1 = t1;
449			}
450			else
451			{
452				j1 = (xsb1 + 0x4000) & 0xffff8000;
453				HI(&t1) = j1;
454				LO(&t1) = 0;
455				x1 -= t1;
456				z1 = x1 * x1;
457				t1 = z1 * (qq1 + z1 * qq2);
458				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
459				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
460				xsb1 = (xsb1 >> 30) & 2;
461				a1 = __vlibm_TBL_sincos_hi[j1+1];
462				t1 = __vlibm_TBL_sincos_lo[j1+1]
463					- (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
464				*py1 = a1 + t1;
465			}
466		}
467		if (hx0 < 0x3fc40000)
468		{
469			z0 = x0 * x0;
470			t0 = z0 * (poly3[1] + z0 * poly4[1]);
471			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
472			t0 = one + t0;
473			*py0 = t0;
474		}
475		else
476		{
477			j0 = (xsb0 + 0x4000) & 0xffff8000;
478			HI(&t0) = j0;
479			LO(&t0) = 0;
480			x0 -= t0;
481			z0 = x0 * x0;
482			t0 = z0 * (qq1 + z0 * qq2);
483			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
484			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
485			xsb0 = (xsb0 >> 30) & 2;
486			a0 = __vlibm_TBL_sincos_hi[j0+1];
487			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
488			*py0 = a0 + t0;
489		}
490	} /* END CLEAN UP */
491
492	return;
493
494	/*
495	 * Take care of BIGUNS.
496	 *
497	 * We have jumped here in the middle of processing after having
498	 * encountered a medium range argument.  Therefore things are in a
499	 * bit of a tizzy.
500	 */
501
502MEDIUM:
503
504	x0_or_one[1] = 1.0;
505	x1_or_one[1] = 1.0;
506	x2_or_one[1] = 1.0;
507	x0_or_one[3] = -1.0;
508	x1_or_one[3] = -1.0;
509	x2_or_one[3] = -1.0;
510	y0_or_zero[1] = 0.0;
511	y1_or_zero[1] = 0.0;
512	y2_or_zero[1] = 0.0;
513	y0_or_zero[3] = 0.0;
514	y1_or_zero[3] = 0.0;
515	y2_or_zero[3] = 0.0;
516
517	if (biguns == 3)
518	{
519		biguns = 0;
520		xsb0 = xsb0 >> 31;
521		xsb1 = xsb1 >> 31;
522		goto loop2;
523	}
524	else if (biguns == 2)
525	{
526		xsb0 = xsb0 >> 31;
527		biguns = 0;
528		goto loop1;
529	}
530	biguns = 0;
531
532	do
533	{
534		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
535		unsigned	hx;
536		int			n0, n1, n2;
537
538		/*
539		 * Find 3 more to work on: Not already done, not too big.
540		 */
541
542loop0:
543		hx = HI(x);
544		xsb0 = hx >> 31;
545		hx &= ~0x80000000;
546		if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */
547		{
548			if (hx >= 0x7ff00000) /* Inf or NaN */
549			{
550				x0 = *x;
551				*y = x0 - x0;
552			}
553			else
554				biguns = 1;
555			x += stridex;
556			y += stridey;
557			i = 0;
558			if (--n <= 0)
559				break;
560			goto loop0;
561		}
562		x0 = *x;
563		py0 = y;
564		x += stridex;
565		y += stridey;
566		i = 1;
567		if (--n <= 0)
568			break;
569
570loop1:
571		hx = HI(x);
572		xsb1 = hx >> 31;
573		hx &= ~0x80000000;
574		if (hx > 0x413921fb)
575		{
576			if (hx >= 0x7ff00000)
577			{
578				x1 = *x;
579				*y = x1 - x1;
580			}
581			else
582				biguns = 1;
583			x += stridex;
584			y += stridey;
585			i = 1;
586			if (--n <= 0)
587				break;
588			goto loop1;
589		}
590		x1 = *x;
591		py1 = y;
592		x += stridex;
593		y += stridey;
594		i = 2;
595		if (--n <= 0)
596			break;
597
598loop2:
599		hx = HI(x);
600		xsb2 = hx >> 31;
601		hx &= ~0x80000000;
602		if (hx > 0x413921fb)
603		{
604			if (hx >= 0x7ff00000)
605			{
606				x2 = *x;
607				*y = x2 - x2;
608			}
609			else
610				biguns = 1;
611			x += stridex;
612			y += stridey;
613			i = 2;
614			if (--n <= 0)
615				break;
616			goto loop2;
617		}
618		x2 = *x;
619		py2 = y;
620
621		n0 = (int) (x0 * invpio2 + half[xsb0]);
622		n1 = (int) (x1 * invpio2 + half[xsb1]);
623		n2 = (int) (x2 * invpio2 + half[xsb2]);
624		fn0 = (double) n0;
625		fn1 = (double) n1;
626		fn2 = (double) n2;
627		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
628		n1 = (n1 + 1) & 3;
629		n2 = (n2 + 1) & 3;
630		a0 = x0 - fn0 * pio2_1;
631		a1 = x1 - fn1 * pio2_1;
632		a2 = x2 - fn2 * pio2_1;
633		w0 = fn0 * pio2_2;
634		w1 = fn1 * pio2_2;
635		w2 = fn2 * pio2_2;
636		x0 = a0 - w0;
637		x1 = a1 - w1;
638		x2 = a2 - w2;
639		y0 = (a0 - x0) - w0;
640		y1 = (a1 - x1) - w1;
641		y2 = (a2 - x2) - w2;
642		a0 = x0;
643		a1 = x1;
644		a2 = x2;
645		w0 = fn0 * pio2_3 - y0;
646		w1 = fn1 * pio2_3 - y1;
647		w2 = fn2 * pio2_3 - y2;
648		x0 = a0 - w0;
649		x1 = a1 - w1;
650		x2 = a2 - w2;
651		y0 = (a0 - x0) - w0;
652		y1 = (a1 - x1) - w1;
653		y2 = (a2 - x2) - w2;
654		a0 = x0;
655		a1 = x1;
656		a2 = x2;
657		w0 = fn0 * pio2_3t - y0;
658		w1 = fn1 * pio2_3t - y1;
659		w2 = fn2 * pio2_3t - y2;
660		x0 = a0 - w0;
661		x1 = a1 - w1;
662		x2 = a2 - w2;
663		y0 = (a0 - x0) - w0;
664		y1 = (a1 - x1) - w1;
665		y2 = (a2 - x2) - w2;
666		xsb0 = HI(&x0);
667		i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
668		xsb1 = HI(&x1);
669		i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
670		xsb2 = HI(&x2);
671		i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
672		switch (i)
673		{
674			double		t0, t1, t2, z0, z1, z2;
675			unsigned	j0, j1, j2;
676
677		case 0:
678			j0 = (xsb0 + 0x4000) & 0xffff8000;
679			j1 = (xsb1 + 0x4000) & 0xffff8000;
680			j2 = (xsb2 + 0x4000) & 0xffff8000;
681			HI(&t0) = j0;
682			HI(&t1) = j1;
683			HI(&t2) = j2;
684			LO(&t0) = 0;
685			LO(&t1) = 0;
686			LO(&t2) = 0;
687			x0 = (x0 - t0) + y0;
688			x1 = (x1 - t1) + y1;
689			x2 = (x2 - t2) + y2;
690			z0 = x0 * x0;
691			z1 = x1 * x1;
692			z2 = x2 * x2;
693			t0 = z0 * (qq1 + z0 * qq2);
694			t1 = z1 * (qq1 + z1 * qq2);
695			t2 = z2 * (qq1 + z2 * qq2);
696			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
697			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
698			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
699			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
700			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
701			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
702			xsb0 = (xsb0 >> 30) & 2;
703			xsb1 = (xsb1 >> 30) & 2;
704			xsb2 = (xsb2 >> 30) & 2;
705			n0 ^= (xsb0 & ~(n0 << 1));
706			n1 ^= (xsb1 & ~(n1 << 1));
707			n2 ^= (xsb2 & ~(n2 << 1));
708			xsb0 |= 1;
709			xsb1 |= 1;
710			xsb2 |= 1;
711			a0 = __vlibm_TBL_sincos_hi[j0+n0];
712			a1 = __vlibm_TBL_sincos_hi[j1+n1];
713			a2 = __vlibm_TBL_sincos_hi[j2+n2];
714			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
715			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
716			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
717			*py0 = ( a0 + t0 );
718			*py1 = ( a1 + t1 );
719			*py2 = ( a2 + t2 );
720			break;
721
722		case 1:
723			j0 = n0 & 1;
724			j1 = (xsb1 + 0x4000) & 0xffff8000;
725			j2 = (xsb2 + 0x4000) & 0xffff8000;
726			HI(&t1) = j1;
727			HI(&t2) = j2;
728			LO(&t1) = 0;
729			LO(&t2) = 0;
730			x0_or_one[0] = x0;
731			x0_or_one[2] = -x0;
732			y0_or_zero[0] = y0;
733			y0_or_zero[2] = -y0;
734			x1 = (x1 - t1) + y1;
735			x2 = (x2 - t2) + y2;
736			z0 = x0 * x0;
737			z1 = x1 * x1;
738			z2 = x2 * x2;
739			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
740			t1 = z1 * (qq1 + z1 * qq2);
741			t2 = z2 * (qq1 + z2 * qq2);
742			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
743			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
744			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
745			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
746			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
747			xsb1 = (xsb1 >> 30) & 2;
748			xsb2 = (xsb2 >> 30) & 2;
749			n1 ^= (xsb1 & ~(n1 << 1));
750			n2 ^= (xsb2 & ~(n2 << 1));
751			xsb1 |= 1;
752			xsb2 |= 1;
753			a1 = __vlibm_TBL_sincos_hi[j1+n1];
754			a2 = __vlibm_TBL_sincos_hi[j2+n2];
755			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
756			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
757			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
758			*py0 = t0;
759			*py1 = ( a1 + t1 );
760			*py2 = ( a2 + t2 );
761			break;
762
763		case 2:
764			j0 = (xsb0 + 0x4000) & 0xffff8000;
765			j1 = n1 & 1;
766			j2 = (xsb2 + 0x4000) & 0xffff8000;
767			HI(&t0) = j0;
768			HI(&t2) = j2;
769			LO(&t0) = 0;
770			LO(&t2) = 0;
771			x1_or_one[0] = x1;
772			x1_or_one[2] = -x1;
773			x0 = (x0 - t0) + y0;
774			y1_or_zero[0] = y1;
775			y1_or_zero[2] = -y1;
776			x2 = (x2 - t2) + y2;
777			z0 = x0 * x0;
778			z1 = x1 * x1;
779			z2 = x2 * x2;
780			t0 = z0 * (qq1 + z0 * qq2);
781			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
782			t2 = z2 * (qq1 + z2 * qq2);
783			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
784			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
785			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
786			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
787			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
788			xsb0 = (xsb0 >> 30) & 2;
789			xsb2 = (xsb2 >> 30) & 2;
790			n0 ^= (xsb0 & ~(n0 << 1));
791			n2 ^= (xsb2 & ~(n2 << 1));
792			xsb0 |= 1;
793			xsb2 |= 1;
794			a0 = __vlibm_TBL_sincos_hi[j0+n0];
795			a2 = __vlibm_TBL_sincos_hi[j2+n2];
796			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
797			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
798			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
799			*py0 = ( a0 + t0 );
800			*py1 = t1;
801			*py2 = ( a2 + t2 );
802			break;
803
804		case 3:
805			j0 = n0 & 1;
806			j1 = n1 & 1;
807			j2 = (xsb2 + 0x4000) & 0xffff8000;
808			HI(&t2) = j2;
809			LO(&t2) = 0;
810			x0_or_one[0] = x0;
811			x0_or_one[2] = -x0;
812			x1_or_one[0] = x1;
813			x1_or_one[2] = -x1;
814			y0_or_zero[0] = y0;
815			y0_or_zero[2] = -y0;
816			y1_or_zero[0] = y1;
817			y1_or_zero[2] = -y1;
818			x2 = (x2 - t2) + y2;
819			z0 = x0 * x0;
820			z1 = x1 * x1;
821			z2 = x2 * x2;
822			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
823			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
824			t2 = z2 * (qq1 + z2 * qq2);
825			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
826			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
827			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
828			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
829			xsb2 = (xsb2 >> 30) & 2;
830			n2 ^= (xsb2 & ~(n2 << 1));
831			xsb2 |= 1;
832			a2 = __vlibm_TBL_sincos_hi[j2+n2];
833			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
834			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
835			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
836			*py0 = t0;
837			*py1 = t1;
838			*py2 = ( a2 + t2 );
839			break;
840
841		case 4:
842			j0 = (xsb0 + 0x4000) & 0xffff8000;
843			j1 = (xsb1 + 0x4000) & 0xffff8000;
844			j2 = n2 & 1;
845			HI(&t0) = j0;
846			HI(&t1) = j1;
847			LO(&t0) = 0;
848			LO(&t1) = 0;
849			x2_or_one[0] = x2;
850			x2_or_one[2] = -x2;
851			x0 = (x0 - t0) + y0;
852			x1 = (x1 - t1) + y1;
853			y2_or_zero[0] = y2;
854			y2_or_zero[2] = -y2;
855			z0 = x0 * x0;
856			z1 = x1 * x1;
857			z2 = x2 * x2;
858			t0 = z0 * (qq1 + z0 * qq2);
859			t1 = z1 * (qq1 + z1 * qq2);
860			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
861			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
862			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
863			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
864			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
865			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
866			xsb0 = (xsb0 >> 30) & 2;
867			xsb1 = (xsb1 >> 30) & 2;
868			n0 ^= (xsb0 & ~(n0 << 1));
869			n1 ^= (xsb1 & ~(n1 << 1));
870			xsb0 |= 1;
871			xsb1 |= 1;
872			a0 = __vlibm_TBL_sincos_hi[j0+n0];
873			a1 = __vlibm_TBL_sincos_hi[j1+n1];
874			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
875			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
876			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
877			*py0 = ( a0 + t0 );
878			*py1 = ( a1 + t1 );
879			*py2 = t2;
880			break;
881
882		case 5:
883			j0 = n0 & 1;
884			j1 = (xsb1 + 0x4000) & 0xffff8000;
885			j2 = n2 & 1;
886			HI(&t1) = j1;
887			LO(&t1) = 0;
888			x0_or_one[0] = x0;
889			x0_or_one[2] = -x0;
890			x2_or_one[0] = x2;
891			x2_or_one[2] = -x2;
892			y0_or_zero[0] = y0;
893			y0_or_zero[2] = -y0;
894			x1 = (x1 - t1) + y1;
895			y2_or_zero[0] = y2;
896			y2_or_zero[2] = -y2;
897			z0 = x0 * x0;
898			z1 = x1 * x1;
899			z2 = x2 * x2;
900			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
901			t1 = z1 * (qq1 + z1 * qq2);
902			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
903			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
904			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
905			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
906			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
907			xsb1 = (xsb1 >> 30) & 2;
908			n1 ^= (xsb1 & ~(n1 << 1));
909			xsb1 |= 1;
910			a1 = __vlibm_TBL_sincos_hi[j1+n1];
911			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
912			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
913			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
914			*py0 = t0;
915			*py1 = ( a1 + t1 );
916			*py2 = t2;
917			break;
918
919		case 6:
920			j0 = (xsb0 + 0x4000) & 0xffff8000;
921			j1 = n1 & 1;
922			j2 = n2 & 1;
923			HI(&t0) = j0;
924			LO(&t0) = 0;
925			x1_or_one[0] = x1;
926			x1_or_one[2] = -x1;
927			x2_or_one[0] = x2;
928			x2_or_one[2] = -x2;
929			x0 = (x0 - t0) + y0;
930			y1_or_zero[0] = y1;
931			y1_or_zero[2] = -y1;
932			y2_or_zero[0] = y2;
933			y2_or_zero[2] = -y2;
934			z0 = x0 * x0;
935			z1 = x1 * x1;
936			z2 = x2 * x2;
937			t0 = z0 * (qq1 + z0 * qq2);
938			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
939			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
940			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
941			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
942			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
943			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
944			xsb0 = (xsb0 >> 30) & 2;
945			n0 ^= (xsb0 & ~(n0 << 1));
946			xsb0 |= 1;
947			a0 = __vlibm_TBL_sincos_hi[j0+n0];
948			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
949			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
950			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
951			*py0 = ( a0 + t0 );
952			*py1 = t1;
953			*py2 = t2;
954			break;
955
956		case 7:
957			j0 = n0 & 1;
958			j1 = n1 & 1;
959			j2 = n2 & 1;
960			x0_or_one[0] = x0;
961			x0_or_one[2] = -x0;
962			x1_or_one[0] = x1;
963			x1_or_one[2] = -x1;
964			x2_or_one[0] = x2;
965			x2_or_one[2] = -x2;
966			y0_or_zero[0] = y0;
967			y0_or_zero[2] = -y0;
968			y1_or_zero[0] = y1;
969			y1_or_zero[2] = -y1;
970			y2_or_zero[0] = y2;
971			y2_or_zero[2] = -y2;
972			z0 = x0 * x0;
973			z1 = x1 * x1;
974			z2 = x2 * x2;
975			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
976			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
977			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
978			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
979			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
980			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
981			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
982			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
983			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
984			*py0 = t0;
985			*py1 = t1;
986			*py2 = t2;
987			break;
988		}
989
990		x += stridex;
991		y += stridey;
992		i = 0;
993	} while (--n > 0);
994
995	if (i > 0)
996	{
997		double		fn0, fn1, a0, a1, w0, w1, y0, y1;
998		double		t0, t1, z0, z1;
999		unsigned	j0, j1;
1000		int			n0, n1;
1001
1002		if (i > 1)
1003		{
1004			n1 = (int) (x1 * invpio2 + half[xsb1]);
1005			fn1 = (double) n1;
1006			n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1007			a1 = x1 - fn1 * pio2_1;
1008			w1 = fn1 * pio2_2;
1009			x1 = a1 - w1;
1010			y1 = (a1 - x1) - w1;
1011			a1 = x1;
1012			w1 = fn1 * pio2_3 - y1;
1013			x1 = a1 - w1;
1014			y1 = (a1 - x1) - w1;
1015			a1 = x1;
1016			w1 = fn1 * pio2_3t - y1;
1017			x1 = a1 - w1;
1018			y1 = (a1 - x1) - w1;
1019			xsb1 = HI(&x1);
1020			if ((xsb1 & ~0x80000000) < thresh[n1&1])
1021			{
1022				j1 = n1 & 1;
1023				x1_or_one[0] = x1;
1024				x1_or_one[2] = -x1;
1025				y1_or_zero[0] = y1;
1026				y1_or_zero[2] = -y1;
1027				z1 = x1 * x1;
1028				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1029				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1030				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1031				*py1 = t1;
1032			}
1033			else
1034			{
1035				j1 = (xsb1 + 0x4000) & 0xffff8000;
1036				HI(&t1) = j1;
1037				LO(&t1) = 0;
1038				x1 = (x1 - t1) + y1;
1039				z1 = x1 * x1;
1040				t1 = z1 * (qq1 + z1 * qq2);
1041				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1042				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1043				xsb1 = (xsb1 >> 30) & 2;
1044				n1 ^= (xsb1 & ~(n1 << 1));
1045				xsb1 |= 1;
1046				a1 = __vlibm_TBL_sincos_hi[j1+n1];
1047				t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
1048				*py1 = ( a1 + t1 );
1049			}
1050		}
1051		n0 = (int) (x0 * invpio2 + half[xsb0]);
1052		fn0 = (double) n0;
1053		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1054		a0 = x0 - fn0 * pio2_1;
1055		w0 = fn0 * pio2_2;
1056		x0 = a0 - w0;
1057		y0 = (a0 - x0) - w0;
1058		a0 = x0;
1059		w0 = fn0 * pio2_3 - y0;
1060		x0 = a0 - w0;
1061		y0 = (a0 - x0) - w0;
1062		a0 = x0;
1063		w0 = fn0 * pio2_3t - y0;
1064		x0 = a0 - w0;
1065		y0 = (a0 - x0) - w0;
1066		xsb0 = HI(&x0);
1067		if ((xsb0 & ~0x80000000) < thresh[n0&1])
1068		{
1069			j0 = n0 & 1;
1070			x0_or_one[0] = x0;
1071			x0_or_one[2] = -x0;
1072			y0_or_zero[0] = y0;
1073			y0_or_zero[2] = -y0;
1074			z0 = x0 * x0;
1075			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1076			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1077			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1078			*py0 = t0;
1079		}
1080		else
1081		{
1082			j0 = (xsb0 + 0x4000) & 0xffff8000;
1083			HI(&t0) = j0;
1084			LO(&t0) = 0;
1085			x0 = (x0 - t0) + y0;
1086			z0 = x0 * x0;
1087			t0 = z0 * (qq1 + z0 * qq2);
1088			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1089			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1090			xsb0 = (xsb0 >> 30) & 2;
1091			n0 ^= (xsb0 & ~(n0 << 1));
1092			xsb0 |= 1;
1093			a0 = __vlibm_TBL_sincos_hi[j0+n0];
1094			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
1095			*py0 = ( a0 + t0 );
1096		}
1097	}
1098
1099	if (biguns)
1100		__vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
1101}
1102