xref: /illumos-gate/usr/src/lib/libmvec/common/__vcos.c (revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
32 
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x)	*(1+(int*)x)
35 #define LO(x)	*(unsigned*)x
36 #else
37 #define HI(x)	*(int*)x
38 #define LO(x)	*(1+(unsigned*)x)
39 #endif
40 
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46 
47 /*
48  * vcos.1.c
49  *
50  * Vector cosine function.  Just slight modifications to vsin.8.c, mainly
51  * in the primary range part.
52  *
53  * Modification to primary range processing.  If an argument that does not
54  * fall in the primary range is encountered, then processing is continued
55  * in the medium range.
56  *
57  */
58 
59 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
60 
61 static const double
62 	half[2]	= { 0.5, -0.5 },
63 	one		= 1.0,
64 	invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */
65 	pio2_1	= 1.570796326734125614166,	/* first 33 bits of pi/2 */
66 	pio2_2	= 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
67 	pio2_3	= 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
68 	pio2_3t	= 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
69 	pp1		= -1.666666666605760465276263943134982554676e-0001,
70 	pp2		=  8.333261209690963126718376566146180944442e-0003,
71 	qq1		= -4.999999999977710986407023955908711557870e-0001,
72 	qq2		=  4.166654863857219350645055881018842089580e-0002,
73 	poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
74 				-4.999999999999931701464060878888294524481e-0001 },
75 	poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
76 				 4.166666666394861917535640593963708222319e-0002 },
77 	poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
78 				-1.388888552656142867832756687736851681462e-0003 },
79 	poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
80 				 2.478519423681460796618128289454530524759e-0005 };
81 
82 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
83 
84 /* Don't __ the following; acomp will handle it */
85 extern double fabs(double);
86 extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
87 
88 /*
89  * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
90  *
91  * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
92  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
93  *
94  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
95  */
96 void
97 __vcos(int n, double * restrict x, int stridex, double * restrict y,
98 	int stridey)
99 {
100 	double		x0_or_one[4], x1_or_one[4], x2_or_one[4];
101 	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
102 	double		x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
103 	unsigned	hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
104 	int		i, biguns, nsave, sxsave, sysave;
105 	volatile int	v __GNU_UNUSED;
106 	nsave = n;
107 	xsave = x;
108 	sxsave = stridex;
109 	ysave = y;
110 	sysave = stridey;
111 	biguns = 0;
112 
113 	do /* MAIN LOOP */
114 	{
115 		/* Gotos here so _break_ exits MAIN LOOP. */
116 LOOP0:  /* Find first arg in right range. */
117 		xsb0 = HI(x); /* get most significant word */
118 		hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
119 		if (hx0 > 0x3fe921fb) {
120 			/* Too big: arg reduction needed, so leave for second part */
121 			biguns = 1;
122 			goto MEDIUM;
123 		}
124 		if (hx0 < 0x3e400000) {
125 			/* Too small.  cos x ~ 1. */
126 			v = *x;
127 			*y = 1.0;
128 			x += stridex;
129 			y += stridey;
130 			i = 0;
131 			if (--n <= 0)
132 				break;
133 			goto LOOP0;
134 		}
135 		x0 = *x;
136 		py0 = y;
137 		x += stridex;
138 		y += stridey;
139 		i = 1;
140 		if (--n <= 0)
141 			break;
142 
143 LOOP1: /* Get second arg, same as above. */
144 		xsb1 = HI(x);
145 		hx1 = xsb1 & ~0x80000000;
146 		if (hx1 > 0x3fe921fb)
147 		{
148 			biguns = 2;
149 			goto MEDIUM;
150 		}
151 		if (hx1 < 0x3e400000)
152 		{
153 			v = *x;
154 			*y = 1.0;
155 			x += stridex;
156 			y += stridey;
157 			i = 1;
158 			if (--n <= 0)
159 				break;
160 			goto LOOP1;
161 		}
162 		x1 = *x;
163 		py1 = y;
164 		x += stridex;
165 		y += stridey;
166 		i = 2;
167 		if (--n <= 0)
168 			break;
169 
170 LOOP2: /* Get third arg, same as above. */
171 		xsb2 = HI(x);
172 		hx2 = xsb2 & ~0x80000000;
173 		if (hx2 > 0x3fe921fb)
174 		{
175 			biguns = 3;
176 			goto MEDIUM;
177 		}
178 		if (hx2 < 0x3e400000)
179 		{
180 			v = *x;
181 			*y = 1.0;
182 			x += stridex;
183 			y += stridey;
184 			i = 2;
185 			if (--n <= 0)
186 				break;
187 			goto LOOP2;
188 		}
189 		x2 = *x;
190 		py2 = y;
191 
192 		/*
193 		 * 0x3fc40000 = 5/32 ~ 0.15625
194 		 * Get msb after subtraction.  Will be 1 only if
195 		 * hx0 - 5/32 is negative.
196 		 */
197 		i = (hx0 - 0x3fc40000) >> 31;
198 		i |= ((hx1 - 0x3fc40000) >> 30) & 2;
199 		i |= ((hx2 - 0x3fc40000) >> 29) & 4;
200 		switch (i)
201 		{
202 			double		a0, a1, a2, w0, w1, w2;
203 			double		t0, t1, t2, z0, z1, z2;
204 			unsigned	j0, j1, j2;
205 
206 		case 0: /* All are > 5/32 */
207 			j0 = (xsb0 + 0x4000) & 0xffff8000;
208 			j1 = (xsb1 + 0x4000) & 0xffff8000;
209 			j2 = (xsb2 + 0x4000) & 0xffff8000;
210 			HI(&t0) = j0;
211 			HI(&t1) = j1;
212 			HI(&t2) = j2;
213 			LO(&t0) = 0;
214 			LO(&t1) = 0;
215 			LO(&t2) = 0;
216 			x0 -= t0;
217 			x1 -= t1;
218 			x2 -= t2;
219 			z0 = x0 * x0;
220 			z1 = x1 * x1;
221 			z2 = x2 * x2;
222 			t0 = z0 * (qq1 + z0 * qq2);
223 			t1 = z1 * (qq1 + z1 * qq2);
224 			t2 = z2 * (qq1 + z2 * qq2);
225 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
226 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
227 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
228 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
229 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
230 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
231 			xsb0 = (xsb0 >> 30) & 2;
232 			xsb1 = (xsb1 >> 30) & 2;
233 			xsb2 = (xsb2 >> 30) & 2;
234 			a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
235 			a1 = __vlibm_TBL_sincos_hi[j1+1];
236 			a2 = __vlibm_TBL_sincos_hi[j2+1];
237 			   /*   cos_lo(t)			 sin_hi(t) */
238 			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
239 			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
240 			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
241 
242 			*py0 = a0 + t0;
243 			*py1 = a1 + t1;
244 			*py2 = a2 + t2;
245 			break;
246 
247 		case 1:
248 			j1 = (xsb1 + 0x4000) & 0xffff8000;
249 			j2 = (xsb2 + 0x4000) & 0xffff8000;
250 			HI(&t1) = j1;
251 			HI(&t2) = j2;
252 			LO(&t1) = 0;
253 			LO(&t2) = 0;
254 			x1 -= t1;
255 			x2 -= t2;
256 			z0 = x0 * x0;
257 			z1 = x1 * x1;
258 			z2 = x2 * x2;
259 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
260 			t1 = z1 * (qq1 + z1 * qq2);
261 			t2 = z2 * (qq1 + z2 * qq2);
262 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
263 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
264 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
265 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
266 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
267 			xsb1 = (xsb1 >> 30) & 2;
268 			xsb2 = (xsb2 >> 30) & 2;
269 			a1 = __vlibm_TBL_sincos_hi[j1+1];
270 			a2 = __vlibm_TBL_sincos_hi[j2+1];
271 			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
272 			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
273 			*py0 = one + t0;
274 			*py1 = a1 + t1;
275 			*py2 = a2 + t2;
276 			break;
277 
278 		case 2:
279 			j0 = (xsb0 + 0x4000) & 0xffff8000;
280 			j2 = (xsb2 + 0x4000) & 0xffff8000;
281 			HI(&t0) = j0;
282 			HI(&t2) = j2;
283 			LO(&t0) = 0;
284 			LO(&t2) = 0;
285 			x0 -= t0;
286 			x2 -= t2;
287 			z0 = x0 * x0;
288 			z1 = x1 * x1;
289 			z2 = x2 * x2;
290 			t0 = z0 * (qq1 + z0 * qq2);
291 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
292 			t2 = z2 * (qq1 + z2 * qq2);
293 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
294 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
295 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
296 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
297 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298 			xsb0 = (xsb0 >> 30) & 2;
299 			xsb2 = (xsb2 >> 30) & 2;
300 			a0 = __vlibm_TBL_sincos_hi[j0+1];
301 			a2 = __vlibm_TBL_sincos_hi[j2+1];
302 			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
303 			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
304 			*py0 = a0 + t0;
305 			*py1 = one + t1;
306 			*py2 = a2 + t2;
307 			break;
308 
309 		case 3:
310 			j2 = (xsb2 + 0x4000) & 0xffff8000;
311 			HI(&t2) = j2;
312 			LO(&t2) = 0;
313 			x2 -= t2;
314 			z0 = x0 * x0;
315 			z1 = x1 * x1;
316 			z2 = x2 * x2;
317 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
318 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
319 			t2 = z2 * (qq1 + z2 * qq2);
320 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
321 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
322 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
323 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
324 			xsb2 = (xsb2 >> 30) & 2;
325 			a2 = __vlibm_TBL_sincos_hi[j2+1];
326 			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
327 			*py0 = one + t0;
328 			*py1 = one + t1;
329 			*py2 = a2 + t2;
330 			break;
331 
332 		case 4:
333 			j0 = (xsb0 + 0x4000) & 0xffff8000;
334 			j1 = (xsb1 + 0x4000) & 0xffff8000;
335 			HI(&t0) = j0;
336 			HI(&t1) = j1;
337 			LO(&t0) = 0;
338 			LO(&t1) = 0;
339 			x0 -= t0;
340 			x1 -= t1;
341 			z0 = x0 * x0;
342 			z1 = x1 * x1;
343 			z2 = x2 * x2;
344 			t0 = z0 * (qq1 + z0 * qq2);
345 			t1 = z1 * (qq1 + z1 * qq2);
346 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
347 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
348 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
349 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
350 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
351 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
352 			xsb0 = (xsb0 >> 30) & 2;
353 			xsb1 = (xsb1 >> 30) & 2;
354 			a0 = __vlibm_TBL_sincos_hi[j0+1];
355 			a1 = __vlibm_TBL_sincos_hi[j1+1];
356 			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
357 			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
358 			*py0 = a0 + t0;
359 			*py1 = a1 + t1;
360 			*py2 = one + t2;
361 			break;
362 
363 		case 5:
364 			j1 = (xsb1 + 0x4000) & 0xffff8000;
365 			HI(&t1) = j1;
366 			LO(&t1) = 0;
367 			x1 -= t1;
368 			z0 = x0 * x0;
369 			z1 = x1 * x1;
370 			z2 = x2 * x2;
371 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
372 			t1 = z1 * (qq1 + z1 * qq2);
373 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
374 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
375 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
376 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
377 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
378 			xsb1 = (xsb1 >> 30) & 2;
379 			a1 = __vlibm_TBL_sincos_hi[j1+1];
380 			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
381 			*py0 = one + t0;
382 			*py1 = a1 + t1;
383 			*py2 = one + t2;
384 			break;
385 
386 		case 6:
387 			j0 = (xsb0 + 0x4000) & 0xffff8000;
388 			HI(&t0) = j0;
389 			LO(&t0) = 0;
390 			x0 -= t0;
391 			z0 = x0 * x0;
392 			z1 = x1 * x1;
393 			z2 = x2 * x2;
394 			t0 = z0 * (qq1 + z0 * qq2);
395 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
396 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
397 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
398 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
399 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
400 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
401 			xsb0 = (xsb0 >> 30) & 2;
402 			a0 = __vlibm_TBL_sincos_hi[j0+1];
403 			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
404 			*py0 = a0 + t0;
405 			*py1 = one + t1;
406 			*py2 = one + t2;
407 			break;
408 
409 		case 7: /* All are < 5/32 */
410 			z0 = x0 * x0;
411 			z1 = x1 * x1;
412 			z2 = x2 * x2;
413 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
414 			t1 = z1 * (poly3[1] + z1 * poly4[1]);
415 			t2 = z2 * (poly3[1] + z2 * poly4[1]);
416 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
417 			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
418 			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
419 			*py0 = one + t0;
420 			*py1 = one + t1;
421 			*py2 = one + t2;
422 			break;
423 		}
424 
425 		x += stridex;
426 		y += stridey;
427 		i = 0;
428 	} while (--n > 0); /* END MAIN LOOP */
429 
430 	/*
431 	 * CLEAN UP last 0, 1, or 2 elts.
432 	 */
433 	if (i > 0) /* Clean up elts at tail.  i < 3. */
434 	{
435 		double		a0, a1, w0, w1;
436 		double		t0, t1, z0, z1;
437 		unsigned	j0, j1;
438 
439 		if (i > 1)
440 		{
441 			if (hx1 < 0x3fc40000)
442 			{
443 				z1 = x1 * x1;
444 				t1 = z1 * (poly3[1] + z1 * poly4[1]);
445 				t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
446 				t1 = one + t1;
447 				*py1 = t1;
448 			}
449 			else
450 			{
451 				j1 = (xsb1 + 0x4000) & 0xffff8000;
452 				HI(&t1) = j1;
453 				LO(&t1) = 0;
454 				x1 -= t1;
455 				z1 = x1 * x1;
456 				t1 = z1 * (qq1 + z1 * qq2);
457 				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
458 				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
459 				xsb1 = (xsb1 >> 30) & 2;
460 				a1 = __vlibm_TBL_sincos_hi[j1+1];
461 				t1 = __vlibm_TBL_sincos_lo[j1+1]
462 					- (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
463 				*py1 = a1 + t1;
464 			}
465 		}
466 		if (hx0 < 0x3fc40000)
467 		{
468 			z0 = x0 * x0;
469 			t0 = z0 * (poly3[1] + z0 * poly4[1]);
470 			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
471 			t0 = one + t0;
472 			*py0 = t0;
473 		}
474 		else
475 		{
476 			j0 = (xsb0 + 0x4000) & 0xffff8000;
477 			HI(&t0) = j0;
478 			LO(&t0) = 0;
479 			x0 -= t0;
480 			z0 = x0 * x0;
481 			t0 = z0 * (qq1 + z0 * qq2);
482 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
483 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
484 			xsb0 = (xsb0 >> 30) & 2;
485 			a0 = __vlibm_TBL_sincos_hi[j0+1];
486 			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
487 			*py0 = a0 + t0;
488 		}
489 	} /* END CLEAN UP */
490 
491 	return;
492 
493 	/*
494 	 * Take care of BIGUNS.
495 	 *
496 	 * We have jumped here in the middle of processing after having
497 	 * encountered a medium range argument.  Therefore things are in a
498 	 * bit of a tizzy.
499 	 */
500 
501 MEDIUM:
502 
503 	x0_or_one[1] = 1.0;
504 	x1_or_one[1] = 1.0;
505 	x2_or_one[1] = 1.0;
506 	x0_or_one[3] = -1.0;
507 	x1_or_one[3] = -1.0;
508 	x2_or_one[3] = -1.0;
509 	y0_or_zero[1] = 0.0;
510 	y1_or_zero[1] = 0.0;
511 	y2_or_zero[1] = 0.0;
512 	y0_or_zero[3] = 0.0;
513 	y1_or_zero[3] = 0.0;
514 	y2_or_zero[3] = 0.0;
515 
516 	if (biguns == 3)
517 	{
518 		biguns = 0;
519 		xsb0 = xsb0 >> 31;
520 		xsb1 = xsb1 >> 31;
521 		goto loop2;
522 	}
523 	else if (biguns == 2)
524 	{
525 		xsb0 = xsb0 >> 31;
526 		biguns = 0;
527 		goto loop1;
528 	}
529 	biguns = 0;
530 
531 	do
532 	{
533 		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
534 		unsigned	hx;
535 		int			n0, n1, n2;
536 
537 		/*
538 		 * Find 3 more to work on: Not already done, not too big.
539 		 */
540 
541 loop0:
542 		hx = HI(x);
543 		xsb0 = hx >> 31;
544 		hx &= ~0x80000000;
545 		if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */
546 		{
547 			if (hx >= 0x7ff00000) /* Inf or NaN */
548 			{
549 				x0 = *x;
550 				*y = x0 - x0;
551 			}
552 			else
553 				biguns = 1;
554 			x += stridex;
555 			y += stridey;
556 			i = 0;
557 			if (--n <= 0)
558 				break;
559 			goto loop0;
560 		}
561 		x0 = *x;
562 		py0 = y;
563 		x += stridex;
564 		y += stridey;
565 		i = 1;
566 		if (--n <= 0)
567 			break;
568 
569 loop1:
570 		hx = HI(x);
571 		xsb1 = hx >> 31;
572 		hx &= ~0x80000000;
573 		if (hx > 0x413921fb)
574 		{
575 			if (hx >= 0x7ff00000)
576 			{
577 				x1 = *x;
578 				*y = x1 - x1;
579 			}
580 			else
581 				biguns = 1;
582 			x += stridex;
583 			y += stridey;
584 			i = 1;
585 			if (--n <= 0)
586 				break;
587 			goto loop1;
588 		}
589 		x1 = *x;
590 		py1 = y;
591 		x += stridex;
592 		y += stridey;
593 		i = 2;
594 		if (--n <= 0)
595 			break;
596 
597 loop2:
598 		hx = HI(x);
599 		xsb2 = hx >> 31;
600 		hx &= ~0x80000000;
601 		if (hx > 0x413921fb)
602 		{
603 			if (hx >= 0x7ff00000)
604 			{
605 				x2 = *x;
606 				*y = x2 - x2;
607 			}
608 			else
609 				biguns = 1;
610 			x += stridex;
611 			y += stridey;
612 			i = 2;
613 			if (--n <= 0)
614 				break;
615 			goto loop2;
616 		}
617 		x2 = *x;
618 		py2 = y;
619 
620 		n0 = (int) (x0 * invpio2 + half[xsb0]);
621 		n1 = (int) (x1 * invpio2 + half[xsb1]);
622 		n2 = (int) (x2 * invpio2 + half[xsb2]);
623 		fn0 = (double) n0;
624 		fn1 = (double) n1;
625 		fn2 = (double) n2;
626 		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
627 		n1 = (n1 + 1) & 3;
628 		n2 = (n2 + 1) & 3;
629 		a0 = x0 - fn0 * pio2_1;
630 		a1 = x1 - fn1 * pio2_1;
631 		a2 = x2 - fn2 * pio2_1;
632 		w0 = fn0 * pio2_2;
633 		w1 = fn1 * pio2_2;
634 		w2 = fn2 * pio2_2;
635 		x0 = a0 - w0;
636 		x1 = a1 - w1;
637 		x2 = a2 - w2;
638 		y0 = (a0 - x0) - w0;
639 		y1 = (a1 - x1) - w1;
640 		y2 = (a2 - x2) - w2;
641 		a0 = x0;
642 		a1 = x1;
643 		a2 = x2;
644 		w0 = fn0 * pio2_3 - y0;
645 		w1 = fn1 * pio2_3 - y1;
646 		w2 = fn2 * pio2_3 - y2;
647 		x0 = a0 - w0;
648 		x1 = a1 - w1;
649 		x2 = a2 - w2;
650 		y0 = (a0 - x0) - w0;
651 		y1 = (a1 - x1) - w1;
652 		y2 = (a2 - x2) - w2;
653 		a0 = x0;
654 		a1 = x1;
655 		a2 = x2;
656 		w0 = fn0 * pio2_3t - y0;
657 		w1 = fn1 * pio2_3t - y1;
658 		w2 = fn2 * pio2_3t - y2;
659 		x0 = a0 - w0;
660 		x1 = a1 - w1;
661 		x2 = a2 - w2;
662 		y0 = (a0 - x0) - w0;
663 		y1 = (a1 - x1) - w1;
664 		y2 = (a2 - x2) - w2;
665 		xsb0 = HI(&x0);
666 		i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
667 		xsb1 = HI(&x1);
668 		i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
669 		xsb2 = HI(&x2);
670 		i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
671 		switch (i)
672 		{
673 			double		t0, t1, t2, z0, z1, z2;
674 			unsigned	j0, j1, j2;
675 
676 		case 0:
677 			j0 = (xsb0 + 0x4000) & 0xffff8000;
678 			j1 = (xsb1 + 0x4000) & 0xffff8000;
679 			j2 = (xsb2 + 0x4000) & 0xffff8000;
680 			HI(&t0) = j0;
681 			HI(&t1) = j1;
682 			HI(&t2) = j2;
683 			LO(&t0) = 0;
684 			LO(&t1) = 0;
685 			LO(&t2) = 0;
686 			x0 = (x0 - t0) + y0;
687 			x1 = (x1 - t1) + y1;
688 			x2 = (x2 - t2) + y2;
689 			z0 = x0 * x0;
690 			z1 = x1 * x1;
691 			z2 = x2 * x2;
692 			t0 = z0 * (qq1 + z0 * qq2);
693 			t1 = z1 * (qq1 + z1 * qq2);
694 			t2 = z2 * (qq1 + z2 * qq2);
695 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
696 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
697 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
698 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
699 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
700 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
701 			xsb0 = (xsb0 >> 30) & 2;
702 			xsb1 = (xsb1 >> 30) & 2;
703 			xsb2 = (xsb2 >> 30) & 2;
704 			n0 ^= (xsb0 & ~(n0 << 1));
705 			n1 ^= (xsb1 & ~(n1 << 1));
706 			n2 ^= (xsb2 & ~(n2 << 1));
707 			xsb0 |= 1;
708 			xsb1 |= 1;
709 			xsb2 |= 1;
710 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
711 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
712 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
713 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
714 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
715 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
716 			*py0 = ( a0 + t0 );
717 			*py1 = ( a1 + t1 );
718 			*py2 = ( a2 + t2 );
719 			break;
720 
721 		case 1:
722 			j0 = n0 & 1;
723 			j1 = (xsb1 + 0x4000) & 0xffff8000;
724 			j2 = (xsb2 + 0x4000) & 0xffff8000;
725 			HI(&t1) = j1;
726 			HI(&t2) = j2;
727 			LO(&t1) = 0;
728 			LO(&t2) = 0;
729 			x0_or_one[0] = x0;
730 			x0_or_one[2] = -x0;
731 			y0_or_zero[0] = y0;
732 			y0_or_zero[2] = -y0;
733 			x1 = (x1 - t1) + y1;
734 			x2 = (x2 - t2) + y2;
735 			z0 = x0 * x0;
736 			z1 = x1 * x1;
737 			z2 = x2 * x2;
738 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
739 			t1 = z1 * (qq1 + z1 * qq2);
740 			t2 = z2 * (qq1 + z2 * qq2);
741 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
742 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
743 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
744 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
745 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
746 			xsb1 = (xsb1 >> 30) & 2;
747 			xsb2 = (xsb2 >> 30) & 2;
748 			n1 ^= (xsb1 & ~(n1 << 1));
749 			n2 ^= (xsb2 & ~(n2 << 1));
750 			xsb1 |= 1;
751 			xsb2 |= 1;
752 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
753 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
754 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
755 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
756 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
757 			*py0 = t0;
758 			*py1 = ( a1 + t1 );
759 			*py2 = ( a2 + t2 );
760 			break;
761 
762 		case 2:
763 			j0 = (xsb0 + 0x4000) & 0xffff8000;
764 			j1 = n1 & 1;
765 			j2 = (xsb2 + 0x4000) & 0xffff8000;
766 			HI(&t0) = j0;
767 			HI(&t2) = j2;
768 			LO(&t0) = 0;
769 			LO(&t2) = 0;
770 			x1_or_one[0] = x1;
771 			x1_or_one[2] = -x1;
772 			x0 = (x0 - t0) + y0;
773 			y1_or_zero[0] = y1;
774 			y1_or_zero[2] = -y1;
775 			x2 = (x2 - t2) + y2;
776 			z0 = x0 * x0;
777 			z1 = x1 * x1;
778 			z2 = x2 * x2;
779 			t0 = z0 * (qq1 + z0 * qq2);
780 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
781 			t2 = z2 * (qq1 + z2 * qq2);
782 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
783 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
784 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
785 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
786 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
787 			xsb0 = (xsb0 >> 30) & 2;
788 			xsb2 = (xsb2 >> 30) & 2;
789 			n0 ^= (xsb0 & ~(n0 << 1));
790 			n2 ^= (xsb2 & ~(n2 << 1));
791 			xsb0 |= 1;
792 			xsb2 |= 1;
793 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
794 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
795 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
796 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
797 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
798 			*py0 = ( a0 + t0 );
799 			*py1 = t1;
800 			*py2 = ( a2 + t2 );
801 			break;
802 
803 		case 3:
804 			j0 = n0 & 1;
805 			j1 = n1 & 1;
806 			j2 = (xsb2 + 0x4000) & 0xffff8000;
807 			HI(&t2) = j2;
808 			LO(&t2) = 0;
809 			x0_or_one[0] = x0;
810 			x0_or_one[2] = -x0;
811 			x1_or_one[0] = x1;
812 			x1_or_one[2] = -x1;
813 			y0_or_zero[0] = y0;
814 			y0_or_zero[2] = -y0;
815 			y1_or_zero[0] = y1;
816 			y1_or_zero[2] = -y1;
817 			x2 = (x2 - t2) + y2;
818 			z0 = x0 * x0;
819 			z1 = x1 * x1;
820 			z2 = x2 * x2;
821 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
822 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
823 			t2 = z2 * (qq1 + z2 * qq2);
824 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
825 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
826 			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
827 			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
828 			xsb2 = (xsb2 >> 30) & 2;
829 			n2 ^= (xsb2 & ~(n2 << 1));
830 			xsb2 |= 1;
831 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
832 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
833 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
834 			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
835 			*py0 = t0;
836 			*py1 = t1;
837 			*py2 = ( a2 + t2 );
838 			break;
839 
840 		case 4:
841 			j0 = (xsb0 + 0x4000) & 0xffff8000;
842 			j1 = (xsb1 + 0x4000) & 0xffff8000;
843 			j2 = n2 & 1;
844 			HI(&t0) = j0;
845 			HI(&t1) = j1;
846 			LO(&t0) = 0;
847 			LO(&t1) = 0;
848 			x2_or_one[0] = x2;
849 			x2_or_one[2] = -x2;
850 			x0 = (x0 - t0) + y0;
851 			x1 = (x1 - t1) + y1;
852 			y2_or_zero[0] = y2;
853 			y2_or_zero[2] = -y2;
854 			z0 = x0 * x0;
855 			z1 = x1 * x1;
856 			z2 = x2 * x2;
857 			t0 = z0 * (qq1 + z0 * qq2);
858 			t1 = z1 * (qq1 + z1 * qq2);
859 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
860 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
861 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
862 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
863 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
864 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
865 			xsb0 = (xsb0 >> 30) & 2;
866 			xsb1 = (xsb1 >> 30) & 2;
867 			n0 ^= (xsb0 & ~(n0 << 1));
868 			n1 ^= (xsb1 & ~(n1 << 1));
869 			xsb0 |= 1;
870 			xsb1 |= 1;
871 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
872 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
873 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
874 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
875 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
876 			*py0 = ( a0 + t0 );
877 			*py1 = ( a1 + t1 );
878 			*py2 = t2;
879 			break;
880 
881 		case 5:
882 			j0 = n0 & 1;
883 			j1 = (xsb1 + 0x4000) & 0xffff8000;
884 			j2 = n2 & 1;
885 			HI(&t1) = j1;
886 			LO(&t1) = 0;
887 			x0_or_one[0] = x0;
888 			x0_or_one[2] = -x0;
889 			x2_or_one[0] = x2;
890 			x2_or_one[2] = -x2;
891 			y0_or_zero[0] = y0;
892 			y0_or_zero[2] = -y0;
893 			x1 = (x1 - t1) + y1;
894 			y2_or_zero[0] = y2;
895 			y2_or_zero[2] = -y2;
896 			z0 = x0 * x0;
897 			z1 = x1 * x1;
898 			z2 = x2 * x2;
899 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
900 			t1 = z1 * (qq1 + z1 * qq2);
901 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
902 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
903 			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
904 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
905 			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
906 			xsb1 = (xsb1 >> 30) & 2;
907 			n1 ^= (xsb1 & ~(n1 << 1));
908 			xsb1 |= 1;
909 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
910 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
911 			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
912 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
913 			*py0 = t0;
914 			*py1 = ( a1 + t1 );
915 			*py2 = t2;
916 			break;
917 
918 		case 6:
919 			j0 = (xsb0 + 0x4000) & 0xffff8000;
920 			j1 = n1 & 1;
921 			j2 = n2 & 1;
922 			HI(&t0) = j0;
923 			LO(&t0) = 0;
924 			x1_or_one[0] = x1;
925 			x1_or_one[2] = -x1;
926 			x2_or_one[0] = x2;
927 			x2_or_one[2] = -x2;
928 			x0 = (x0 - t0) + y0;
929 			y1_or_zero[0] = y1;
930 			y1_or_zero[2] = -y1;
931 			y2_or_zero[0] = y2;
932 			y2_or_zero[2] = -y2;
933 			z0 = x0 * x0;
934 			z1 = x1 * x1;
935 			z2 = x2 * x2;
936 			t0 = z0 * (qq1 + z0 * qq2);
937 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
938 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
939 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
940 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
941 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
942 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
943 			xsb0 = (xsb0 >> 30) & 2;
944 			n0 ^= (xsb0 & ~(n0 << 1));
945 			xsb0 |= 1;
946 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
947 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
948 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
949 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
950 			*py0 = ( a0 + t0 );
951 			*py1 = t1;
952 			*py2 = t2;
953 			break;
954 
955 		case 7:
956 			j0 = n0 & 1;
957 			j1 = n1 & 1;
958 			j2 = n2 & 1;
959 			x0_or_one[0] = x0;
960 			x0_or_one[2] = -x0;
961 			x1_or_one[0] = x1;
962 			x1_or_one[2] = -x1;
963 			x2_or_one[0] = x2;
964 			x2_or_one[2] = -x2;
965 			y0_or_zero[0] = y0;
966 			y0_or_zero[2] = -y0;
967 			y1_or_zero[0] = y1;
968 			y1_or_zero[2] = -y1;
969 			y2_or_zero[0] = y2;
970 			y2_or_zero[2] = -y2;
971 			z0 = x0 * x0;
972 			z1 = x1 * x1;
973 			z2 = x2 * x2;
974 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
975 			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
976 			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
977 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
978 			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
979 			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
980 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
981 			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
982 			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
983 			*py0 = t0;
984 			*py1 = t1;
985 			*py2 = t2;
986 			break;
987 		}
988 
989 		x += stridex;
990 		y += stridey;
991 		i = 0;
992 	} while (--n > 0);
993 
994 	if (i > 0)
995 	{
996 		double		fn0, fn1, a0, a1, w0, w1, y0, y1;
997 		double		t0, t1, z0, z1;
998 		unsigned	j0, j1;
999 		int			n0, n1;
1000 
1001 		if (i > 1)
1002 		{
1003 			n1 = (int) (x1 * invpio2 + half[xsb1]);
1004 			fn1 = (double) n1;
1005 			n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1006 			a1 = x1 - fn1 * pio2_1;
1007 			w1 = fn1 * pio2_2;
1008 			x1 = a1 - w1;
1009 			y1 = (a1 - x1) - w1;
1010 			a1 = x1;
1011 			w1 = fn1 * pio2_3 - y1;
1012 			x1 = a1 - w1;
1013 			y1 = (a1 - x1) - w1;
1014 			a1 = x1;
1015 			w1 = fn1 * pio2_3t - y1;
1016 			x1 = a1 - w1;
1017 			y1 = (a1 - x1) - w1;
1018 			xsb1 = HI(&x1);
1019 			if ((xsb1 & ~0x80000000) < thresh[n1&1])
1020 			{
1021 				j1 = n1 & 1;
1022 				x1_or_one[0] = x1;
1023 				x1_or_one[2] = -x1;
1024 				y1_or_zero[0] = y1;
1025 				y1_or_zero[2] = -y1;
1026 				z1 = x1 * x1;
1027 				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1028 				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1029 				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1030 				*py1 = t1;
1031 			}
1032 			else
1033 			{
1034 				j1 = (xsb1 + 0x4000) & 0xffff8000;
1035 				HI(&t1) = j1;
1036 				LO(&t1) = 0;
1037 				x1 = (x1 - t1) + y1;
1038 				z1 = x1 * x1;
1039 				t1 = z1 * (qq1 + z1 * qq2);
1040 				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1041 				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1042 				xsb1 = (xsb1 >> 30) & 2;
1043 				n1 ^= (xsb1 & ~(n1 << 1));
1044 				xsb1 |= 1;
1045 				a1 = __vlibm_TBL_sincos_hi[j1+n1];
1046 				t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
1047 				*py1 = ( a1 + t1 );
1048 			}
1049 		}
1050 		n0 = (int) (x0 * invpio2 + half[xsb0]);
1051 		fn0 = (double) n0;
1052 		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1053 		a0 = x0 - fn0 * pio2_1;
1054 		w0 = fn0 * pio2_2;
1055 		x0 = a0 - w0;
1056 		y0 = (a0 - x0) - w0;
1057 		a0 = x0;
1058 		w0 = fn0 * pio2_3 - y0;
1059 		x0 = a0 - w0;
1060 		y0 = (a0 - x0) - w0;
1061 		a0 = x0;
1062 		w0 = fn0 * pio2_3t - y0;
1063 		x0 = a0 - w0;
1064 		y0 = (a0 - x0) - w0;
1065 		xsb0 = HI(&x0);
1066 		if ((xsb0 & ~0x80000000) < thresh[n0&1])
1067 		{
1068 			j0 = n0 & 1;
1069 			x0_or_one[0] = x0;
1070 			x0_or_one[2] = -x0;
1071 			y0_or_zero[0] = y0;
1072 			y0_or_zero[2] = -y0;
1073 			z0 = x0 * x0;
1074 			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1075 			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1076 			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1077 			*py0 = t0;
1078 		}
1079 		else
1080 		{
1081 			j0 = (xsb0 + 0x4000) & 0xffff8000;
1082 			HI(&t0) = j0;
1083 			LO(&t0) = 0;
1084 			x0 = (x0 - t0) + y0;
1085 			z0 = x0 * x0;
1086 			t0 = z0 * (qq1 + z0 * qq2);
1087 			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1088 			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1089 			xsb0 = (xsb0 >> 30) & 2;
1090 			n0 ^= (xsb0 & ~(n0 << 1));
1091 			xsb0 |= 1;
1092 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
1093 			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
1094 			*py0 = ( a0 + t0 );
1095 		}
1096 	}
1097 
1098 	if (biguns)
1099 		__vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
1100 }
1101