1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis  * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis  *
4*25c28e83SPiotr Jasiukajtis  * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis  * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis  * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis  *
8*25c28e83SPiotr Jasiukajtis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis  * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis  * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis  * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis  *
13*25c28e83SPiotr Jasiukajtis  * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis  * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis  * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis  *
19*25c28e83SPiotr Jasiukajtis  * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis  */
21*25c28e83SPiotr Jasiukajtis /*
22*25c28e83SPiotr Jasiukajtis  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*25c28e83SPiotr Jasiukajtis  */
24*25c28e83SPiotr Jasiukajtis /*
25*25c28e83SPiotr Jasiukajtis  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26*25c28e83SPiotr Jasiukajtis  * Use is subject to license terms.
27*25c28e83SPiotr Jasiukajtis  */
28*25c28e83SPiotr Jasiukajtis 
29*25c28e83SPiotr Jasiukajtis /*
30*25c28e83SPiotr Jasiukajtis  * __vsinf: single precision vector sin
31*25c28e83SPiotr Jasiukajtis  *
32*25c28e83SPiotr Jasiukajtis  * Algorithm:
33*25c28e83SPiotr Jasiukajtis  *
34*25c28e83SPiotr Jasiukajtis  * For |x| < pi/4, approximate sin(x) by a polynomial x+x*z*(S0+
35*25c28e83SPiotr Jasiukajtis  * z*(S1+z*S2)) and cos(x) by a polynomial 1+z*(-1/2+z*(C0+z*(C1+
36*25c28e83SPiotr Jasiukajtis  * z*C2))), where z = x*x, all evaluated in double precision.
37*25c28e83SPiotr Jasiukajtis  *
38*25c28e83SPiotr Jasiukajtis  * Accuracy:
39*25c28e83SPiotr Jasiukajtis  *
40*25c28e83SPiotr Jasiukajtis  * The largest error is less than 0.6 ulps.
41*25c28e83SPiotr Jasiukajtis  */
42*25c28e83SPiotr Jasiukajtis 
43*25c28e83SPiotr Jasiukajtis #include <sys/isa_defs.h>
44*25c28e83SPiotr Jasiukajtis 
45*25c28e83SPiotr Jasiukajtis #ifdef _LITTLE_ENDIAN
46*25c28e83SPiotr Jasiukajtis #define	HI(x)	*(1+(int *)&x)
47*25c28e83SPiotr Jasiukajtis #define	LO(x)	*(unsigned *)&x
48*25c28e83SPiotr Jasiukajtis #else
49*25c28e83SPiotr Jasiukajtis #define	HI(x)	*(int *)&x
50*25c28e83SPiotr Jasiukajtis #define	LO(x)	*(1+(unsigned *)&x)
51*25c28e83SPiotr Jasiukajtis #endif
52*25c28e83SPiotr Jasiukajtis 
53*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT
54*25c28e83SPiotr Jasiukajtis #define	restrict _Restrict
55*25c28e83SPiotr Jasiukajtis #else
56*25c28e83SPiotr Jasiukajtis #define	restrict
57*25c28e83SPiotr Jasiukajtis #endif
58*25c28e83SPiotr Jasiukajtis 
59*25c28e83SPiotr Jasiukajtis extern int __vlibm_rem_pio2m(double *, double *, int, int, int);
60*25c28e83SPiotr Jasiukajtis 
61*25c28e83SPiotr Jasiukajtis static const double C[] = {
62*25c28e83SPiotr Jasiukajtis 	-1.66666552424430847168e-01,	/* 2^ -3 * -1.5555460000000 */
63*25c28e83SPiotr Jasiukajtis 	8.33219196647405624390e-03,	/* 2^ -7 *  1.11077E0000000 */
64*25c28e83SPiotr Jasiukajtis 	-1.95187909412197768688e-04,	/* 2^-13 * -1.9956B60000000 */
65*25c28e83SPiotr Jasiukajtis 	1.0,
66*25c28e83SPiotr Jasiukajtis 	-0.5,
67*25c28e83SPiotr Jasiukajtis 	4.16666455566883087158e-02,	/* 2^ -5 *  1.55554A0000000 */
68*25c28e83SPiotr Jasiukajtis 	-1.38873036485165357590e-03,	/* 2^-10 * -1.6C0C1E0000000 */
69*25c28e83SPiotr Jasiukajtis 	2.44309903791872784495e-05,	/* 2^-16 *  1.99E24E0000000 */
70*25c28e83SPiotr Jasiukajtis 	0.636619772367581343075535,	/* 2^ -1  * 1.45F306DC9C883 */
71*25c28e83SPiotr Jasiukajtis 	6755399441055744.0,		/* 2^ 52  * 1.8000000000000 */
72*25c28e83SPiotr Jasiukajtis 	1.570796326734125614166,	/* 2^  0  * 1.921FB54400000 */
73*25c28e83SPiotr Jasiukajtis 	6.077100506506192601475e-11,	/* 2^-34  * 1.0B4611A626331 */
74*25c28e83SPiotr Jasiukajtis };
75*25c28e83SPiotr Jasiukajtis 
76*25c28e83SPiotr Jasiukajtis #define	S0	C[0]
77*25c28e83SPiotr Jasiukajtis #define	S1	C[1]
78*25c28e83SPiotr Jasiukajtis #define	S2	C[2]
79*25c28e83SPiotr Jasiukajtis #define	one	C[3]
80*25c28e83SPiotr Jasiukajtis #define	mhalf	C[4]
81*25c28e83SPiotr Jasiukajtis #define	C0	C[5]
82*25c28e83SPiotr Jasiukajtis #define	C1	C[6]
83*25c28e83SPiotr Jasiukajtis #define	C2	C[7]
84*25c28e83SPiotr Jasiukajtis #define	invpio2	C[8]
85*25c28e83SPiotr Jasiukajtis #define	c3two51	C[9]
86*25c28e83SPiotr Jasiukajtis #define	pio2_1  C[10]
87*25c28e83SPiotr Jasiukajtis #define	pio2_t	C[11]
88*25c28e83SPiotr Jasiukajtis 
89*25c28e83SPiotr Jasiukajtis #define	PREPROCESS(N, index, label)					\
90*25c28e83SPiotr Jasiukajtis 	hx = *(int *)x;							\
91*25c28e83SPiotr Jasiukajtis 	ix = hx & 0x7fffffff;						\
92*25c28e83SPiotr Jasiukajtis 	t = *x;								\
93*25c28e83SPiotr Jasiukajtis 	x += stridex;							\
94*25c28e83SPiotr Jasiukajtis 	if (ix <= 0x3f490fdb) { /* |x| < pi/4 */			\
95*25c28e83SPiotr Jasiukajtis 		if (ix == 0) {						\
96*25c28e83SPiotr Jasiukajtis 			y[index] = t;					\
97*25c28e83SPiotr Jasiukajtis 			goto label;					\
98*25c28e83SPiotr Jasiukajtis 		}							\
99*25c28e83SPiotr Jasiukajtis 		y##N = (double)t;					\
100*25c28e83SPiotr Jasiukajtis 		n##N = 0;						\
101*25c28e83SPiotr Jasiukajtis 	} else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */		\
102*25c28e83SPiotr Jasiukajtis 		y##N = (double)t;					\
103*25c28e83SPiotr Jasiukajtis 		medium = 1;						\
104*25c28e83SPiotr Jasiukajtis 	} else {							\
105*25c28e83SPiotr Jasiukajtis 		if (ix >= 0x7f800000) { /* inf or nan */		\
106*25c28e83SPiotr Jasiukajtis 			y[index] = t / t;				\
107*25c28e83SPiotr Jasiukajtis 			goto label;					\
108*25c28e83SPiotr Jasiukajtis 		}							\
109*25c28e83SPiotr Jasiukajtis 		z##N = y##N = (double)t;				\
110*25c28e83SPiotr Jasiukajtis 		hx = HI(y##N);						\
111*25c28e83SPiotr Jasiukajtis 		n##N = ((hx >> 20) & 0x7ff) - 1046;			\
112*25c28e83SPiotr Jasiukajtis 		HI(z##N) = (hx & 0xfffff) | 0x41600000;			\
113*25c28e83SPiotr Jasiukajtis 		n##N = __vlibm_rem_pio2m(&z##N, &y##N, n##N, 1, 0);	\
114*25c28e83SPiotr Jasiukajtis 		if (hx < 0) {						\
115*25c28e83SPiotr Jasiukajtis 			y##N = -y##N;					\
116*25c28e83SPiotr Jasiukajtis 			n##N = -n##N;					\
117*25c28e83SPiotr Jasiukajtis 		}							\
118*25c28e83SPiotr Jasiukajtis 		z##N = y##N * y##N;					\
119*25c28e83SPiotr Jasiukajtis 		if (n##N & 1) { /* compute cos y */			\
120*25c28e83SPiotr Jasiukajtis 			f##N = (float)(one + z##N * (mhalf + z##N *	\
121*25c28e83SPiotr Jasiukajtis 			    (C0 + z##N * (C1 + z##N * C2))));		\
122*25c28e83SPiotr Jasiukajtis 		} else { /* compute sin y */				\
123*25c28e83SPiotr Jasiukajtis 			f##N = (float)(y##N + y##N * z##N * (S0 +	\
124*25c28e83SPiotr Jasiukajtis 			    z##N * (S1 + z##N * S2)));			\
125*25c28e83SPiotr Jasiukajtis 		}							\
126*25c28e83SPiotr Jasiukajtis 		y[index] = (n##N & 2)? -f##N : f##N;			\
127*25c28e83SPiotr Jasiukajtis 		goto label;						\
128*25c28e83SPiotr Jasiukajtis 	}
129*25c28e83SPiotr Jasiukajtis 
130*25c28e83SPiotr Jasiukajtis #define	PROCESS(N)							\
131*25c28e83SPiotr Jasiukajtis 	if (medium) {							\
132*25c28e83SPiotr Jasiukajtis 		z##N = y##N * invpio2 + c3two51;			\
133*25c28e83SPiotr Jasiukajtis 		n##N = LO(z##N);					\
134*25c28e83SPiotr Jasiukajtis 		z##N -= c3two51;					\
135*25c28e83SPiotr Jasiukajtis 		y##N = (y##N - z##N * pio2_1) - z##N * pio2_t;		\
136*25c28e83SPiotr Jasiukajtis 	}								\
137*25c28e83SPiotr Jasiukajtis 	z##N = y##N * y##N;						\
138*25c28e83SPiotr Jasiukajtis 	if (n##N & 1) { /* compute cos y */				\
139*25c28e83SPiotr Jasiukajtis 		f##N = (float)(one + z##N * (mhalf + z##N * (C0 +	\
140*25c28e83SPiotr Jasiukajtis 		    z##N * (C1 + z##N * C2))));				\
141*25c28e83SPiotr Jasiukajtis 	} else { /* compute sin y */					\
142*25c28e83SPiotr Jasiukajtis 		f##N = (float)(y##N + y##N * z##N * (S0 + z##N * (S1 +	\
143*25c28e83SPiotr Jasiukajtis 		    z##N * S2)));					\
144*25c28e83SPiotr Jasiukajtis 	}								\
145*25c28e83SPiotr Jasiukajtis 	*y = (n##N & 2)? -f##N : f##N;					\
146*25c28e83SPiotr Jasiukajtis 	y += stridey
147*25c28e83SPiotr Jasiukajtis 
148*25c28e83SPiotr Jasiukajtis void
__vsinf(int n,float * restrict x,int stridex,float * restrict y,int stridey)149*25c28e83SPiotr Jasiukajtis __vsinf(int n, float *restrict x, int stridex, float *restrict y,
150*25c28e83SPiotr Jasiukajtis     int stridey)
151*25c28e83SPiotr Jasiukajtis {
152*25c28e83SPiotr Jasiukajtis 	double		y0, y1, y2, y3;
153*25c28e83SPiotr Jasiukajtis 	double		z0, z1, z2, z3;
154*25c28e83SPiotr Jasiukajtis 	float		f0, f1, f2, f3, t;
155*25c28e83SPiotr Jasiukajtis 	int		n0 = 0, n1 = 0, n2 = 0, n3, hx, ix, medium;
156*25c28e83SPiotr Jasiukajtis 
157*25c28e83SPiotr Jasiukajtis 	y -= stridey;
158*25c28e83SPiotr Jasiukajtis 
159*25c28e83SPiotr Jasiukajtis 	for (;;) {
160*25c28e83SPiotr Jasiukajtis begin:
161*25c28e83SPiotr Jasiukajtis 		y += stridey;
162*25c28e83SPiotr Jasiukajtis 
163*25c28e83SPiotr Jasiukajtis 		if (--n < 0)
164*25c28e83SPiotr Jasiukajtis 			break;
165*25c28e83SPiotr Jasiukajtis 
166*25c28e83SPiotr Jasiukajtis 		medium = 0;
167*25c28e83SPiotr Jasiukajtis 		PREPROCESS(0, 0, begin);
168*25c28e83SPiotr Jasiukajtis 
169*25c28e83SPiotr Jasiukajtis 		if (--n < 0)
170*25c28e83SPiotr Jasiukajtis 			goto process1;
171*25c28e83SPiotr Jasiukajtis 
172*25c28e83SPiotr Jasiukajtis 		PREPROCESS(1, stridey, process1);
173*25c28e83SPiotr Jasiukajtis 
174*25c28e83SPiotr Jasiukajtis 		if (--n < 0)
175*25c28e83SPiotr Jasiukajtis 			goto process2;
176*25c28e83SPiotr Jasiukajtis 
177*25c28e83SPiotr Jasiukajtis 		PREPROCESS(2, (stridey << 1), process2);
178*25c28e83SPiotr Jasiukajtis 
179*25c28e83SPiotr Jasiukajtis 		if (--n < 0)
180*25c28e83SPiotr Jasiukajtis 			goto process3;
181*25c28e83SPiotr Jasiukajtis 
182*25c28e83SPiotr Jasiukajtis 		PREPROCESS(3, (stridey << 1) + stridey, process3);
183*25c28e83SPiotr Jasiukajtis 
184*25c28e83SPiotr Jasiukajtis 		if (medium) {
185*25c28e83SPiotr Jasiukajtis 			z0 = y0 * invpio2 + c3two51;
186*25c28e83SPiotr Jasiukajtis 			z1 = y1 * invpio2 + c3two51;
187*25c28e83SPiotr Jasiukajtis 			z2 = y2 * invpio2 + c3two51;
188*25c28e83SPiotr Jasiukajtis 			z3 = y3 * invpio2 + c3two51;
189*25c28e83SPiotr Jasiukajtis 
190*25c28e83SPiotr Jasiukajtis 			n0 = LO(z0);
191*25c28e83SPiotr Jasiukajtis 			n1 = LO(z1);
192*25c28e83SPiotr Jasiukajtis 			n2 = LO(z2);
193*25c28e83SPiotr Jasiukajtis 			n3 = LO(z3);
194*25c28e83SPiotr Jasiukajtis 
195*25c28e83SPiotr Jasiukajtis 			z0 -= c3two51;
196*25c28e83SPiotr Jasiukajtis 			z1 -= c3two51;
197*25c28e83SPiotr Jasiukajtis 			z2 -= c3two51;
198*25c28e83SPiotr Jasiukajtis 			z3 -= c3two51;
199*25c28e83SPiotr Jasiukajtis 
200*25c28e83SPiotr Jasiukajtis 			y0 = (y0 - z0 * pio2_1) - z0 * pio2_t;
201*25c28e83SPiotr Jasiukajtis 			y1 = (y1 - z1 * pio2_1) - z1 * pio2_t;
202*25c28e83SPiotr Jasiukajtis 			y2 = (y2 - z2 * pio2_1) - z2 * pio2_t;
203*25c28e83SPiotr Jasiukajtis 			y3 = (y3 - z3 * pio2_1) - z3 * pio2_t;
204*25c28e83SPiotr Jasiukajtis 		}
205*25c28e83SPiotr Jasiukajtis 
206*25c28e83SPiotr Jasiukajtis 		z0 = y0 * y0;
207*25c28e83SPiotr Jasiukajtis 		z1 = y1 * y1;
208*25c28e83SPiotr Jasiukajtis 		z2 = y2 * y2;
209*25c28e83SPiotr Jasiukajtis 		z3 = y3 * y3;
210*25c28e83SPiotr Jasiukajtis 
211*25c28e83SPiotr Jasiukajtis 		hx = (n0 & 1) | ((n1 & 1) << 1) | ((n2 & 1) << 2) |
212*25c28e83SPiotr Jasiukajtis 		    ((n3 & 1) << 3);
213*25c28e83SPiotr Jasiukajtis 		switch (hx) {
214*25c28e83SPiotr Jasiukajtis 		case 0:
215*25c28e83SPiotr Jasiukajtis 			f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2)));
216*25c28e83SPiotr Jasiukajtis 			f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2)));
217*25c28e83SPiotr Jasiukajtis 			f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2)));
218*25c28e83SPiotr Jasiukajtis 			f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2)));
219*25c28e83SPiotr Jasiukajtis 			break;
220*25c28e83SPiotr Jasiukajtis 
221*25c28e83SPiotr Jasiukajtis 		case 1:
222*25c28e83SPiotr Jasiukajtis 			f0 = (float)(one + z0 * (mhalf + z0 * (C0 +
223*25c28e83SPiotr Jasiukajtis 			    z0 * (C1 + z0 * C2))));
224*25c28e83SPiotr Jasiukajtis 			f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2)));
225*25c28e83SPiotr Jasiukajtis 			f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2)));
226*25c28e83SPiotr Jasiukajtis 			f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2)));
227*25c28e83SPiotr Jasiukajtis 			break;
228*25c28e83SPiotr Jasiukajtis 
229*25c28e83SPiotr Jasiukajtis 		case 2:
230*25c28e83SPiotr Jasiukajtis 			f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2)));
231*25c28e83SPiotr Jasiukajtis 			f1 = (float)(one + z1 * (mhalf + z1 * (C0 +
232*25c28e83SPiotr Jasiukajtis 			    z1 * (C1 + z1 * C2))));
233*25c28e83SPiotr Jasiukajtis 			f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2)));
234*25c28e83SPiotr Jasiukajtis 			f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2)));
235*25c28e83SPiotr Jasiukajtis 			break;
236*25c28e83SPiotr Jasiukajtis 
237*25c28e83SPiotr Jasiukajtis 		case 3:
238*25c28e83SPiotr Jasiukajtis 			f0 = (float)(one + z0 * (mhalf + z0 * (C0 +
239*25c28e83SPiotr Jasiukajtis 			    z0 * (C1 + z0 * C2))));
240*25c28e83SPiotr Jasiukajtis 			f1 = (float)(one + z1 * (mhalf + z1 * (C0 +
241*25c28e83SPiotr Jasiukajtis 			    z1 * (C1 + z1 * C2))));
242*25c28e83SPiotr Jasiukajtis 			f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2)));
243*25c28e83SPiotr Jasiukajtis 			f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2)));
244*25c28e83SPiotr Jasiukajtis 			break;
245*25c28e83SPiotr Jasiukajtis 
246*25c28e83SPiotr Jasiukajtis 		case 4:
247*25c28e83SPiotr Jasiukajtis 			f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2)));
248*25c28e83SPiotr Jasiukajtis 			f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2)));
249*25c28e83SPiotr Jasiukajtis 			f2 = (float)(one + z2 * (mhalf + z2 * (C0 +
250*25c28e83SPiotr Jasiukajtis 			    z2 * (C1 + z2 * C2))));
251*25c28e83SPiotr Jasiukajtis 			f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2)));
252*25c28e83SPiotr Jasiukajtis 			break;
253*25c28e83SPiotr Jasiukajtis 
254*25c28e83SPiotr Jasiukajtis 		case 5:
255*25c28e83SPiotr Jasiukajtis 			f0 = (float)(one + z0 * (mhalf + z0 * (C0 +
256*25c28e83SPiotr Jasiukajtis 			    z0 * (C1 + z0 * C2))));
257*25c28e83SPiotr Jasiukajtis 			f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2)));
258*25c28e83SPiotr Jasiukajtis 			f2 = (float)(one + z2 * (mhalf + z2 * (C0 +
259*25c28e83SPiotr Jasiukajtis 			    z2 * (C1 + z2 * C2))));
260*25c28e83SPiotr Jasiukajtis 			f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2)));
261*25c28e83SPiotr Jasiukajtis 			break;
262*25c28e83SPiotr Jasiukajtis 
263*25c28e83SPiotr Jasiukajtis 		case 6:
264*25c28e83SPiotr Jasiukajtis 			f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2)));
265*25c28e83SPiotr Jasiukajtis 			f1 = (float)(one + z1 * (mhalf + z1 * (C0 +
266*25c28e83SPiotr Jasiukajtis 			    z1 * (C1 + z1 * C2))));
267*25c28e83SPiotr Jasiukajtis 			f2 = (float)(one + z2 * (mhalf + z2 * (C0 +
268*25c28e83SPiotr Jasiukajtis 			    z2 * (C1 + z2 * C2))));
269*25c28e83SPiotr Jasiukajtis 			f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2)));
270*25c28e83SPiotr Jasiukajtis 			break;
271*25c28e83SPiotr Jasiukajtis 
272*25c28e83SPiotr Jasiukajtis 		case 7:
273*25c28e83SPiotr Jasiukajtis 			f0 = (float)(one + z0 * (mhalf + z0 * (C0 +
274*25c28e83SPiotr Jasiukajtis 			    z0 * (C1 + z0 * C2))));
275*25c28e83SPiotr Jasiukajtis 			f1 = (float)(one + z1 * (mhalf + z1 * (C0 +
276*25c28e83SPiotr Jasiukajtis 			    z1 * (C1 + z1 * C2))));
277*25c28e83SPiotr Jasiukajtis 			f2 = (float)(one + z2 * (mhalf + z2 * (C0 +
278*25c28e83SPiotr Jasiukajtis 			    z2 * (C1 + z2 * C2))));
279*25c28e83SPiotr Jasiukajtis 			f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2)));
280*25c28e83SPiotr Jasiukajtis 			break;
281*25c28e83SPiotr Jasiukajtis 
282*25c28e83SPiotr Jasiukajtis 		case 8:
283*25c28e83SPiotr Jasiukajtis 			f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2)));
284*25c28e83SPiotr Jasiukajtis 			f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2)));
285*25c28e83SPiotr Jasiukajtis 			f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2)));
286*25c28e83SPiotr Jasiukajtis 			f3 = (float)(one + z3 * (mhalf + z3 * (C0 +
287*25c28e83SPiotr Jasiukajtis 			    z3 * (C1 + z3 * C2))));
288*25c28e83SPiotr Jasiukajtis 			break;
289*25c28e83SPiotr Jasiukajtis 
290*25c28e83SPiotr Jasiukajtis 		case 9:
291*25c28e83SPiotr Jasiukajtis 			f0 = (float)(one + z0 * (mhalf + z0 * (C0 +
292*25c28e83SPiotr Jasiukajtis 			    z0 * (C1 + z0 * C2))));
293*25c28e83SPiotr Jasiukajtis 			f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2)));
294*25c28e83SPiotr Jasiukajtis 			f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2)));
295*25c28e83SPiotr Jasiukajtis 			f3 = (float)(one + z3 * (mhalf + z3 * (C0 +
296*25c28e83SPiotr Jasiukajtis 			    z3 * (C1 + z3 * C2))));
297*25c28e83SPiotr Jasiukajtis 			break;
298*25c28e83SPiotr Jasiukajtis 
299*25c28e83SPiotr Jasiukajtis 		case 10:
300*25c28e83SPiotr Jasiukajtis 			f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2)));
301*25c28e83SPiotr Jasiukajtis 			f1 = (float)(one + z1 * (mhalf + z1 * (C0 +
302*25c28e83SPiotr Jasiukajtis 			    z1 * (C1 + z1 * C2))));
303*25c28e83SPiotr Jasiukajtis 			f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2)));
304*25c28e83SPiotr Jasiukajtis 			f3 = (float)(one + z3 * (mhalf + z3 * (C0 +
305*25c28e83SPiotr Jasiukajtis 			    z3 * (C1 + z3 * C2))));
306*25c28e83SPiotr Jasiukajtis 			break;
307*25c28e83SPiotr Jasiukajtis 
308*25c28e83SPiotr Jasiukajtis 		case 11:
309*25c28e83SPiotr Jasiukajtis 			f0 = (float)(one + z0 * (mhalf + z0 * (C0 +
310*25c28e83SPiotr Jasiukajtis 			    z0 * (C1 + z0 * C2))));
311*25c28e83SPiotr Jasiukajtis 			f1 = (float)(one + z1 * (mhalf + z1 * (C0 +
312*25c28e83SPiotr Jasiukajtis 			    z1 * (C1 + z1 * C2))));
313*25c28e83SPiotr Jasiukajtis 			f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2)));
314*25c28e83SPiotr Jasiukajtis 			f3 = (float)(one + z3 * (mhalf + z3 * (C0 +
315*25c28e83SPiotr Jasiukajtis 			    z3 * (C1 + z3 * C2))));
316*25c28e83SPiotr Jasiukajtis 			break;
317*25c28e83SPiotr Jasiukajtis 
318*25c28e83SPiotr Jasiukajtis 		case 12:
319*25c28e83SPiotr Jasiukajtis 			f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2)));
320*25c28e83SPiotr Jasiukajtis 			f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2)));
321*25c28e83SPiotr Jasiukajtis 			f2 = (float)(one + z2 * (mhalf + z2 * (C0 +
322*25c28e83SPiotr Jasiukajtis 			    z2 * (C1 + z2 * C2))));
323*25c28e83SPiotr Jasiukajtis 			f3 = (float)(one + z3 * (mhalf + z3 * (C0 +
324*25c28e83SPiotr Jasiukajtis 			    z3 * (C1 + z3 * C2))));
325*25c28e83SPiotr Jasiukajtis 			break;
326*25c28e83SPiotr Jasiukajtis 
327*25c28e83SPiotr Jasiukajtis 		case 13:
328*25c28e83SPiotr Jasiukajtis 			f0 = (float)(one + z0 * (mhalf + z0 * (C0 +
329*25c28e83SPiotr Jasiukajtis 			    z0 * (C1 + z0 * C2))));
330*25c28e83SPiotr Jasiukajtis 			f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2)));
331*25c28e83SPiotr Jasiukajtis 			f2 = (float)(one + z2 * (mhalf + z2 * (C0 +
332*25c28e83SPiotr Jasiukajtis 			    z2 * (C1 + z2 * C2))));
333*25c28e83SPiotr Jasiukajtis 			f3 = (float)(one + z3 * (mhalf + z3 * (C0 +
334*25c28e83SPiotr Jasiukajtis 			    z3 * (C1 + z3 * C2))));
335*25c28e83SPiotr Jasiukajtis 			break;
336*25c28e83SPiotr Jasiukajtis 
337*25c28e83SPiotr Jasiukajtis 		case 14:
338*25c28e83SPiotr Jasiukajtis 			f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2)));
339*25c28e83SPiotr Jasiukajtis 			f1 = (float)(one + z1 * (mhalf + z1 * (C0 +
340*25c28e83SPiotr Jasiukajtis 			    z1 * (C1 + z1 * C2))));
341*25c28e83SPiotr Jasiukajtis 			f2 = (float)(one + z2 * (mhalf + z2 * (C0 +
342*25c28e83SPiotr Jasiukajtis 			    z2 * (C1 + z2 * C2))));
343*25c28e83SPiotr Jasiukajtis 			f3 = (float)(one + z3 * (mhalf + z3 * (C0 +
344*25c28e83SPiotr Jasiukajtis 			    z3 * (C1 + z3 * C2))));
345*25c28e83SPiotr Jasiukajtis 			break;
346*25c28e83SPiotr Jasiukajtis 
347*25c28e83SPiotr Jasiukajtis 		default:
348*25c28e83SPiotr Jasiukajtis 			f0 = (float)(one + z0 * (mhalf + z0 * (C0 +
349*25c28e83SPiotr Jasiukajtis 			    z0 * (C1 + z0 * C2))));
350*25c28e83SPiotr Jasiukajtis 			f1 = (float)(one + z1 * (mhalf + z1 * (C0 +
351*25c28e83SPiotr Jasiukajtis 			    z1 * (C1 + z1 * C2))));
352*25c28e83SPiotr Jasiukajtis 			f2 = (float)(one + z2 * (mhalf + z2 * (C0 +
353*25c28e83SPiotr Jasiukajtis 			    z2 * (C1 + z2 * C2))));
354*25c28e83SPiotr Jasiukajtis 			f3 = (float)(one + z3 * (mhalf + z3 * (C0 +
355*25c28e83SPiotr Jasiukajtis 			    z3 * (C1 + z3 * C2))));
356*25c28e83SPiotr Jasiukajtis 		}
357*25c28e83SPiotr Jasiukajtis 
358*25c28e83SPiotr Jasiukajtis 		*y = (n0 & 2)? -f0 : f0;
359*25c28e83SPiotr Jasiukajtis 		y += stridey;
360*25c28e83SPiotr Jasiukajtis 		*y = (n1 & 2)? -f1 : f1;
361*25c28e83SPiotr Jasiukajtis 		y += stridey;
362*25c28e83SPiotr Jasiukajtis 		*y = (n2 & 2)? -f2 : f2;
363*25c28e83SPiotr Jasiukajtis 		y += stridey;
364*25c28e83SPiotr Jasiukajtis 		*y = (n3 & 2)? -f3 : f3;
365*25c28e83SPiotr Jasiukajtis 		continue;
366*25c28e83SPiotr Jasiukajtis 
367*25c28e83SPiotr Jasiukajtis process1:
368*25c28e83SPiotr Jasiukajtis 		PROCESS(0);
369*25c28e83SPiotr Jasiukajtis 		continue;
370*25c28e83SPiotr Jasiukajtis 
371*25c28e83SPiotr Jasiukajtis process2:
372*25c28e83SPiotr Jasiukajtis 		PROCESS(0);
373*25c28e83SPiotr Jasiukajtis 		PROCESS(1);
374*25c28e83SPiotr Jasiukajtis 		continue;
375*25c28e83SPiotr Jasiukajtis 
376*25c28e83SPiotr Jasiukajtis process3:
377*25c28e83SPiotr Jasiukajtis 		PROCESS(0);
378*25c28e83SPiotr Jasiukajtis 		PROCESS(1);
379*25c28e83SPiotr Jasiukajtis 		PROCESS(2);
380*25c28e83SPiotr Jasiukajtis 	}
381*25c28e83SPiotr Jasiukajtis }
382