1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24 */
25/*
26 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30#include <sys/isa_defs.h>
31
32#ifdef _LITTLE_ENDIAN
33#define HI(x)	*(1+(int*)x)
34#define LO(x)	*(unsigned*)x
35#else
36#define HI(x)	*(int*)x
37#define LO(x)	*(1+(unsigned*)x)
38#endif
39
40#ifdef __RESTRICT
41#define restrict _Restrict
42#else
43#define restrict
44#endif
45
46extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
47
48static const double
49	half[2]	= { 0.5, -0.5 },
50	one		= 1.0,
51	invpio2	= 0.636619772367581343075535,
52	pio2_1	= 1.570796326734125614166,
53	pio2_2	= 6.077100506303965976596e-11,
54	pio2_3	= 2.022266248711166455796e-21,
55	pio2_3t	= 8.478427660368899643959e-32,
56	pp1		= -1.666666666605760465276263943134982554676e-0001,
57	pp2		=  8.333261209690963126718376566146180944442e-0003,
58	qq1		= -4.999999999977710986407023955908711557870e-0001,
59	qq2		=  4.166654863857219350645055881018842089580e-0002,
60	poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
61				-4.999999999999931701464060878888294524481e-0001 },
62	poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
63				 4.166666666394861917535640593963708222319e-0002 },
64	poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
65				-1.388888552656142867832756687736851681462e-0003 },
66	poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
67				 2.478519423681460796618128289454530524759e-0005 };
68
69static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
70
71extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
72
73void
74__vlibm_vcos_big_ultra3(int n, double * restrict x, int stridex, double * restrict y,
75	int stridey, int pthresh)
76{
77	double		x0_or_one[4], x1_or_one[4], x2_or_one[4];
78	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
79	double		x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
80	unsigned	xsb0, xsb1, xsb2;
81	int			i, biguns, nsave, sxsave, sysave;
82
83	nsave = n;
84	xsave = x;
85	sxsave = stridex;
86	ysave = y;
87	sysave = stridey;
88	biguns = 0;
89
90	x0_or_one[1] = 1.0;
91	x1_or_one[1] = 1.0;
92	x2_or_one[1] = 1.0;
93	x0_or_one[3] = -1.0;
94	x1_or_one[3] = -1.0;
95	x2_or_one[3] = -1.0;
96	y0_or_zero[1] = 0.0;
97	y1_or_zero[1] = 0.0;
98	y2_or_zero[1] = 0.0;
99	y0_or_zero[3] = 0.0;
100	y1_or_zero[3] = 0.0;
101	y2_or_zero[3] = 0.0;
102
103	do
104	{
105		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
106		unsigned	hx;
107		int			n0, n1, n2;
108
109loop0:
110		hx = HI(x);
111		xsb0 = hx >> 31;
112		hx &= ~0x80000000;
113		if (hx <= pthresh || hx > 0x413921fb)
114		{
115			if (hx > 0x413921fb && hx < 0x7ff00000)
116				biguns = 1;
117			x += stridex;
118			y += stridey;
119			i = 0;
120			if (--n <= 0)
121				break;
122			goto loop0;
123		}
124		x0 = *x;
125		py0 = y;
126		x += stridex;
127		y += stridey;
128		i = 1;
129		if (--n <= 0)
130			break;
131
132loop1:
133		hx = HI(x);
134		xsb1 = hx >> 31;
135		hx &= ~0x80000000;
136		if (hx <= pthresh || hx > 0x413921fb)
137		{
138			if (hx > 0x413921fb && hx < 0x7ff00000)
139				biguns = 1;
140			x += stridex;
141			y += stridey;
142			i = 1;
143			if (--n <= 0)
144				break;
145			goto loop1;
146		}
147		x1 = *x;
148		py1 = y;
149		x += stridex;
150		y += stridey;
151		i = 2;
152		if (--n <= 0)
153			break;
154
155loop2:
156		hx = HI(x);
157		xsb2 = hx >> 31;
158		hx &= ~0x80000000;
159		if (hx <= pthresh || hx > 0x413921fb)
160		{
161			if (hx > 0x413921fb && hx < 0x7ff00000)
162				biguns = 1;
163			x += stridex;
164			y += stridey;
165			i = 2;
166			if (--n <= 0)
167				break;
168			goto loop2;
169		}
170		x2 = *x;
171		py2 = y;
172
173		n0 = (int) (x0 * invpio2 + half[xsb0]);
174		n1 = (int) (x1 * invpio2 + half[xsb1]);
175		n2 = (int) (x2 * invpio2 + half[xsb2]);
176		fn0 = (double) n0;
177		fn1 = (double) n1;
178		fn2 = (double) n2;
179		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
180		n1 = (n1 + 1) & 3;
181		n2 = (n2 + 1) & 3;
182		a0 = x0 - fn0 * pio2_1;
183		a1 = x1 - fn1 * pio2_1;
184		a2 = x2 - fn2 * pio2_1;
185		w0 = fn0 * pio2_2;
186		w1 = fn1 * pio2_2;
187		w2 = fn2 * pio2_2;
188		x0 = a0 - w0;
189		x1 = a1 - w1;
190		x2 = a2 - w2;
191		y0 = (a0 - x0) - w0;
192		y1 = (a1 - x1) - w1;
193		y2 = (a2 - x2) - w2;
194		a0 = x0;
195		a1 = x1;
196		a2 = x2;
197		w0 = fn0 * pio2_3 - y0;
198		w1 = fn1 * pio2_3 - y1;
199		w2 = fn2 * pio2_3 - y2;
200		x0 = a0 - w0;
201		x1 = a1 - w1;
202		x2 = a2 - w2;
203		y0 = (a0 - x0) - w0;
204		y1 = (a1 - x1) - w1;
205		y2 = (a2 - x2) - w2;
206		a0 = x0;
207		a1 = x1;
208		a2 = x2;
209		w0 = fn0 * pio2_3t - y0;
210		w1 = fn1 * pio2_3t - y1;
211		w2 = fn2 * pio2_3t - y2;
212		x0 = a0 - w0;
213		x1 = a1 - w1;
214		x2 = a2 - w2;
215		y0 = (a0 - x0) - w0;
216		y1 = (a1 - x1) - w1;
217		y2 = (a2 - x2) - w2;
218		xsb0 = HI(&x0);
219		i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
220		xsb1 = HI(&x1);
221		i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
222		xsb2 = HI(&x2);
223		i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
224		switch (i)
225		{
226			double		t0, t1, t2, z0, z1, z2;
227			unsigned	j0, j1, j2;
228
229		case 0:
230			j0 = (xsb0 + 0x4000) & 0xffff8000;
231			j1 = (xsb1 + 0x4000) & 0xffff8000;
232			j2 = (xsb2 + 0x4000) & 0xffff8000;
233			HI(&t0) = j0;
234			HI(&t1) = j1;
235			HI(&t2) = j2;
236			LO(&t0) = 0;
237			LO(&t1) = 0;
238			LO(&t2) = 0;
239			x0 = (x0 - t0) + y0;
240			x1 = (x1 - t1) + y1;
241			x2 = (x2 - t2) + y2;
242			z0 = x0 * x0;
243			z1 = x1 * x1;
244			z2 = x2 * x2;
245			t0 = z0 * (qq1 + z0 * qq2);
246			t1 = z1 * (qq1 + z1 * qq2);
247			t2 = z2 * (qq1 + z2 * qq2);
248			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
249			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
250			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
251			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
252			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
253			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
254			xsb0 = (xsb0 >> 30) & 2;
255			xsb1 = (xsb1 >> 30) & 2;
256			xsb2 = (xsb2 >> 30) & 2;
257			n0 ^= (xsb0 & ~(n0 << 1));
258			n1 ^= (xsb1 & ~(n1 << 1));
259			n2 ^= (xsb2 & ~(n2 << 1));
260			xsb0 |= 1;
261			xsb1 |= 1;
262			xsb2 |= 1;
263			a0 = __vlibm_TBL_sincos_hi[j0+n0];
264			a1 = __vlibm_TBL_sincos_hi[j1+n1];
265			a2 = __vlibm_TBL_sincos_hi[j2+n2];
266			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
267			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
268			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
269			*py0 = ( a0 + t0 );
270			*py1 = ( a1 + t1 );
271			*py2 = ( a2 + t2 );
272			break;
273
274		case 1:
275			j0 = n0 & 1;
276			j1 = (xsb1 + 0x4000) & 0xffff8000;
277			j2 = (xsb2 + 0x4000) & 0xffff8000;
278			HI(&t1) = j1;
279			HI(&t2) = j2;
280			LO(&t1) = 0;
281			LO(&t2) = 0;
282			x0_or_one[0] = x0;
283			x0_or_one[2] = -x0;
284			y0_or_zero[0] = y0;
285			y0_or_zero[2] = -y0;
286			x1 = (x1 - t1) + y1;
287			x2 = (x2 - t2) + y2;
288			z0 = x0 * x0;
289			z1 = x1 * x1;
290			z2 = x2 * x2;
291			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
292			t1 = z1 * (qq1 + z1 * qq2);
293			t2 = z2 * (qq1 + z2 * qq2);
294			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
295			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
296			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
297			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
299			xsb1 = (xsb1 >> 30) & 2;
300			xsb2 = (xsb2 >> 30) & 2;
301			n1 ^= (xsb1 & ~(n1 << 1));
302			n2 ^= (xsb2 & ~(n2 << 1));
303			xsb1 |= 1;
304			xsb2 |= 1;
305			a1 = __vlibm_TBL_sincos_hi[j1+n1];
306			a2 = __vlibm_TBL_sincos_hi[j2+n2];
307			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
308			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
309			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
310			*py0 = t0;
311			*py1 = ( a1 + t1 );
312			*py2 = ( a2 + t2 );
313			break;
314
315		case 2:
316			j0 = (xsb0 + 0x4000) & 0xffff8000;
317			j1 = n1 & 1;
318			j2 = (xsb2 + 0x4000) & 0xffff8000;
319			HI(&t0) = j0;
320			HI(&t2) = j2;
321			LO(&t0) = 0;
322			LO(&t2) = 0;
323			x1_or_one[0] = x1;
324			x1_or_one[2] = -x1;
325			x0 = (x0 - t0) + y0;
326			y1_or_zero[0] = y1;
327			y1_or_zero[2] = -y1;
328			x2 = (x2 - t2) + y2;
329			z0 = x0 * x0;
330			z1 = x1 * x1;
331			z2 = x2 * x2;
332			t0 = z0 * (qq1 + z0 * qq2);
333			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
334			t2 = z2 * (qq1 + z2 * qq2);
335			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
336			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
337			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
338			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
339			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
340			xsb0 = (xsb0 >> 30) & 2;
341			xsb2 = (xsb2 >> 30) & 2;
342			n0 ^= (xsb0 & ~(n0 << 1));
343			n2 ^= (xsb2 & ~(n2 << 1));
344			xsb0 |= 1;
345			xsb2 |= 1;
346			a0 = __vlibm_TBL_sincos_hi[j0+n0];
347			a2 = __vlibm_TBL_sincos_hi[j2+n2];
348			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
349			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
350			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
351			*py0 = ( a0 + t0 );
352			*py1 = t1;
353			*py2 = ( a2 + t2 );
354			break;
355
356		case 3:
357			j0 = n0 & 1;
358			j1 = n1 & 1;
359			j2 = (xsb2 + 0x4000) & 0xffff8000;
360			HI(&t2) = j2;
361			LO(&t2) = 0;
362			x0_or_one[0] = x0;
363			x0_or_one[2] = -x0;
364			x1_or_one[0] = x1;
365			x1_or_one[2] = -x1;
366			y0_or_zero[0] = y0;
367			y0_or_zero[2] = -y0;
368			y1_or_zero[0] = y1;
369			y1_or_zero[2] = -y1;
370			x2 = (x2 - t2) + y2;
371			z0 = x0 * x0;
372			z1 = x1 * x1;
373			z2 = x2 * x2;
374			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
375			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
376			t2 = z2 * (qq1 + z2 * qq2);
377			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
378			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
379			w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
380			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
381			xsb2 = (xsb2 >> 30) & 2;
382			n2 ^= (xsb2 & ~(n2 << 1));
383			xsb2 |= 1;
384			a2 = __vlibm_TBL_sincos_hi[j2+n2];
385			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
386			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
387			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
388			*py0 = t0;
389			*py1 = t1;
390			*py2 = ( a2 + t2 );
391			break;
392
393		case 4:
394			j0 = (xsb0 + 0x4000) & 0xffff8000;
395			j1 = (xsb1 + 0x4000) & 0xffff8000;
396			j2 = n2 & 1;
397			HI(&t0) = j0;
398			HI(&t1) = j1;
399			LO(&t0) = 0;
400			LO(&t1) = 0;
401			x2_or_one[0] = x2;
402			x2_or_one[2] = -x2;
403			x0 = (x0 - t0) + y0;
404			x1 = (x1 - t1) + y1;
405			y2_or_zero[0] = y2;
406			y2_or_zero[2] = -y2;
407			z0 = x0 * x0;
408			z1 = x1 * x1;
409			z2 = x2 * x2;
410			t0 = z0 * (qq1 + z0 * qq2);
411			t1 = z1 * (qq1 + z1 * qq2);
412			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
413			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
414			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
415			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
416			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
417			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
418			xsb0 = (xsb0 >> 30) & 2;
419			xsb1 = (xsb1 >> 30) & 2;
420			n0 ^= (xsb0 & ~(n0 << 1));
421			n1 ^= (xsb1 & ~(n1 << 1));
422			xsb0 |= 1;
423			xsb1 |= 1;
424			a0 = __vlibm_TBL_sincos_hi[j0+n0];
425			a1 = __vlibm_TBL_sincos_hi[j1+n1];
426			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
427			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
428			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
429			*py0 = ( a0 + t0 );
430			*py1 = ( a1 + t1 );
431			*py2 = t2;
432			break;
433
434		case 5:
435			j0 = n0 & 1;
436			j1 = (xsb1 + 0x4000) & 0xffff8000;
437			j2 = n2 & 1;
438			HI(&t1) = j1;
439			LO(&t1) = 0;
440			x0_or_one[0] = x0;
441			x0_or_one[2] = -x0;
442			x2_or_one[0] = x2;
443			x2_or_one[2] = -x2;
444			y0_or_zero[0] = y0;
445			y0_or_zero[2] = -y0;
446			x1 = (x1 - t1) + y1;
447			y2_or_zero[0] = y2;
448			y2_or_zero[2] = -y2;
449			z0 = x0 * x0;
450			z1 = x1 * x1;
451			z2 = x2 * x2;
452			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
453			t1 = z1 * (qq1 + z1 * qq2);
454			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
455			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
456			w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
457			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
458			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
459			xsb1 = (xsb1 >> 30) & 2;
460			n1 ^= (xsb1 & ~(n1 << 1));
461			xsb1 |= 1;
462			a1 = __vlibm_TBL_sincos_hi[j1+n1];
463			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
464			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
465			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
466			*py0 = t0;
467			*py1 = ( a1 + t1 );
468			*py2 = t2;
469			break;
470
471		case 6:
472			j0 = (xsb0 + 0x4000) & 0xffff8000;
473			j1 = n1 & 1;
474			j2 = n2 & 1;
475			HI(&t0) = j0;
476			LO(&t0) = 0;
477			x1_or_one[0] = x1;
478			x1_or_one[2] = -x1;
479			x2_or_one[0] = x2;
480			x2_or_one[2] = -x2;
481			x0 = (x0 - t0) + y0;
482			y1_or_zero[0] = y1;
483			y1_or_zero[2] = -y1;
484			y2_or_zero[0] = y2;
485			y2_or_zero[2] = -y2;
486			z0 = x0 * x0;
487			z1 = x1 * x1;
488			z2 = x2 * x2;
489			t0 = z0 * (qq1 + z0 * qq2);
490			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
491			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
492			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
493			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
494			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
495			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
496			xsb0 = (xsb0 >> 30) & 2;
497			n0 ^= (xsb0 & ~(n0 << 1));
498			xsb0 |= 1;
499			a0 = __vlibm_TBL_sincos_hi[j0+n0];
500			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
501			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
502			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
503			*py0 = ( a0 + t0 );
504			*py1 = t1;
505			*py2 = t2;
506			break;
507
508		case 7:
509			j0 = n0 & 1;
510			j1 = n1 & 1;
511			j2 = n2 & 1;
512			x0_or_one[0] = x0;
513			x0_or_one[2] = -x0;
514			x1_or_one[0] = x1;
515			x1_or_one[2] = -x1;
516			x2_or_one[0] = x2;
517			x2_or_one[2] = -x2;
518			y0_or_zero[0] = y0;
519			y0_or_zero[2] = -y0;
520			y1_or_zero[0] = y1;
521			y1_or_zero[2] = -y1;
522			y2_or_zero[0] = y2;
523			y2_or_zero[2] = -y2;
524			z0 = x0 * x0;
525			z1 = x1 * x1;
526			z2 = x2 * x2;
527			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
528			t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
529			t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
530			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
531			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
532			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
533			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
534			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
535			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
536			*py0 = t0;
537			*py1 = t1;
538			*py2 = t2;
539			break;
540		}
541
542		x += stridex;
543		y += stridey;
544		i = 0;
545	} while (--n > 0);
546
547	if (i > 0)
548	{
549		double		fn0, fn1, a0, a1, w0, w1, y0, y1;
550		double		t0, t1, z0, z1;
551		unsigned	j0, j1;
552		int			n0, n1;
553
554		if (i > 1)
555		{
556			n1 = (int) (x1 * invpio2 + half[xsb1]);
557			fn1 = (double) n1;
558			n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
559			a1 = x1 - fn1 * pio2_1;
560			w1 = fn1 * pio2_2;
561			x1 = a1 - w1;
562			y1 = (a1 - x1) - w1;
563			a1 = x1;
564			w1 = fn1 * pio2_3 - y1;
565			x1 = a1 - w1;
566			y1 = (a1 - x1) - w1;
567			a1 = x1;
568			w1 = fn1 * pio2_3t - y1;
569			x1 = a1 - w1;
570			y1 = (a1 - x1) - w1;
571			xsb1 = HI(&x1);
572			if ((xsb1 & ~0x80000000) < thresh[n1&1])
573			{
574				j1 = n1 & 1;
575				x1_or_one[0] = x1;
576				x1_or_one[2] = -x1;
577				y1_or_zero[0] = y1;
578				y1_or_zero[2] = -y1;
579				z1 = x1 * x1;
580				t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
581				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
582				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
583				*py1 = t1;
584			}
585			else
586			{
587				j1 = (xsb1 + 0x4000) & 0xffff8000;
588				HI(&t1) = j1;
589				LO(&t1) = 0;
590				x1 = (x1 - t1) + y1;
591				z1 = x1 * x1;
592				t1 = z1 * (qq1 + z1 * qq2);
593				w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
594				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
595				xsb1 = (xsb1 >> 30) & 2;
596				n1 ^= (xsb1 & ~(n1 << 1));
597				xsb1 |= 1;
598				a1 = __vlibm_TBL_sincos_hi[j1+n1];
599				t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
600				*py1 = ( a1 + t1 );
601			}
602		}
603		n0 = (int) (x0 * invpio2 + half[xsb0]);
604		fn0 = (double) n0;
605		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
606		a0 = x0 - fn0 * pio2_1;
607		w0 = fn0 * pio2_2;
608		x0 = a0 - w0;
609		y0 = (a0 - x0) - w0;
610		a0 = x0;
611		w0 = fn0 * pio2_3 - y0;
612		x0 = a0 - w0;
613		y0 = (a0 - x0) - w0;
614		a0 = x0;
615		w0 = fn0 * pio2_3t - y0;
616		x0 = a0 - w0;
617		y0 = (a0 - x0) - w0;
618		xsb0 = HI(&x0);
619		if ((xsb0 & ~0x80000000) < thresh[n0&1])
620		{
621			j0 = n0 & 1;
622			x0_or_one[0] = x0;
623			x0_or_one[2] = -x0;
624			y0_or_zero[0] = y0;
625			y0_or_zero[2] = -y0;
626			z0 = x0 * x0;
627			t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
628			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
629			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
630			*py0 = t0;
631		}
632		else
633		{
634			j0 = (xsb0 + 0x4000) & 0xffff8000;
635			HI(&t0) = j0;
636			LO(&t0) = 0;
637			x0 = (x0 - t0) + y0;
638			z0 = x0 * x0;
639			t0 = z0 * (qq1 + z0 * qq2);
640			w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
641			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
642			xsb0 = (xsb0 >> 30) & 2;
643			n0 ^= (xsb0 & ~(n0 << 1));
644			xsb0 |= 1;
645			a0 = __vlibm_TBL_sincos_hi[j0+n0];
646			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
647			*py0 = ( a0 + t0 );
648		}
649	}
650
651	if (biguns)
652		__vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
653}
654