1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29/*
30 * __vsincosf: single precision vector sincos
31 *
32 * Algorithm:
33 *
34 * For |x| < pi/4, approximate sin(x) by a polynomial x+x*z*(S0+
35 * z*(S1+z*S2)) and cos(x) by a polynomial 1+z*(-1/2+z*(C0+z*(C1+
36 * z*C2))), where z = x*x, all evaluated in double precision.
37 *
38 * Accuracy:
39 *
40 * The largest error is less than 0.6 ulps.
41 */
42
43#include <sys/isa_defs.h>
44
45#ifdef _LITTLE_ENDIAN
46#define	HI(x)	*(1+(int *)&x)
47#define	LO(x)	*(unsigned *)&x
48#else
49#define	HI(x)	*(int *)&x
50#define	LO(x)	*(1+(unsigned *)&x)
51#endif
52
53#ifdef __RESTRICT
54#define	restrict _Restrict
55#else
56#define	restrict
57#endif
58
59extern int __vlibm_rem_pio2m(double *, double *, int, int, int);
60
61static const double C[] = {
62	-1.66666552424430847168e-01,	/* 2^ -3 * -1.5555460000000 */
63	8.33219196647405624390e-03,	/* 2^ -7 *  1.11077E0000000 */
64	-1.95187909412197768688e-04,	/* 2^-13 * -1.9956B60000000 */
65	1.0,
66	-0.5,
67	4.16666455566883087158e-02,	/* 2^ -5 *  1.55554A0000000 */
68	-1.38873036485165357590e-03,	/* 2^-10 * -1.6C0C1E0000000 */
69	2.44309903791872784495e-05,	/* 2^-16 *  1.99E24E0000000 */
70	0.636619772367581343075535,	/* 2^ -1  * 1.45F306DC9C883 */
71	6755399441055744.0,		/* 2^ 52  * 1.8000000000000 */
72	1.570796326734125614166,	/* 2^  0  * 1.921FB54400000 */
73	6.077100506506192601475e-11,	/* 2^-34  * 1.0B4611A626331 */
74};
75
76#define	S0	C[0]
77#define	S1	C[1]
78#define	S2	C[2]
79#define	one	C[3]
80#define	mhalf	C[4]
81#define	C0	C[5]
82#define	C1	C[6]
83#define	C2	C[7]
84#define	invpio2	C[8]
85#define	c3two51	C[9]
86#define	pio2_1  C[10]
87#define	pio2_t	C[11]
88
89#define	PREPROCESS(N, sindex, cindex, label)				\
90	hx = *(int *)x;							\
91	ix = hx & 0x7fffffff;						\
92	t = *x;								\
93	x += stridex;							\
94	if (ix <= 0x3f490fdb) { /* |x| < pi/4 */			\
95		if (ix == 0) {						\
96			s[sindex] = t;					\
97			c[cindex] = one;				\
98			goto label;					\
99		}							\
100		y##N = (double)t;					\
101		n##N = 0;						\
102	} else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */		\
103		y##N = (double)t;					\
104		medium = 1;						\
105	} else {							\
106		if (ix >= 0x7f800000) { /* inf or nan */		\
107			s[sindex] = c[cindex] = t / t;			\
108			goto label;					\
109		}							\
110		z##N = y##N = (double)t;				\
111		hx = HI(y##N);						\
112		n##N = ((hx >> 20) & 0x7ff) - 1046;			\
113		HI(z##N) = (hx & 0xfffff) | 0x41600000;			\
114		n##N = __vlibm_rem_pio2m(&z##N, &y##N, n##N, 1, 0);	\
115		if (hx < 0) {						\
116			y##N = -y##N;					\
117			n##N = -n##N;					\
118		}							\
119		z##N = y##N * y##N;					\
120		f##N = (float)(y##N + y##N * z##N * (S0 + z##N *	\
121		    (S1 + z##N * S2)));					\
122		g##N = (float)(one + z##N * (mhalf + z##N * (C0 +	\
123		    z##N * (C1 + z##N * C2))));				\
124		if (n##N & 2) {						\
125			f##N = -f##N;					\
126			g##N = -g##N;					\
127		}							\
128		if (n##N & 1) {						\
129			s[sindex] = g##N;				\
130			c[cindex] = -f##N;				\
131		} else {						\
132			s[sindex] = f##N;				\
133			c[cindex] = g##N;				\
134		}							\
135		goto label;						\
136	}
137
138#define	PROCESS(N)							\
139	if (medium) {							\
140		z##N = y##N * invpio2 + c3two51;			\
141		n##N = LO(z##N);					\
142		z##N -= c3two51;					\
143		y##N = (y##N - z##N * pio2_1) - z##N * pio2_t;		\
144	}								\
145	z##N = y##N * y##N;						\
146	f##N = (float)(y##N + y##N * z##N * (S0 + z##N * (S1 + z##N * S2)));\
147	g##N = (float)(one + z##N * (mhalf + z##N * (C0 + z##N *	\
148	    (C1 + z##N * C2))));					\
149	if (n##N & 2) {							\
150		f##N = -f##N;						\
151		g##N = -g##N;						\
152	}								\
153	if (n##N & 1) {							\
154		*s = g##N;						\
155		*c = -f##N;						\
156	} else {							\
157		*s = f##N;						\
158		*c = g##N;						\
159	}								\
160	s += strides;							\
161	c += stridec
162
163void
164__vsincosf(int n, float *restrict x, int stridex,
165    float *restrict s, int strides, float *restrict c, int stridec)
166{
167	double		y0, y1, y2, y3;
168	double		z0, z1, z2, z3;
169	float		f0, f1, f2, f3, t;
170	float		g0, g1, g2, g3;
171	int		n0 = 0, n1 = 0, n2 = 0, n3, hx, ix, medium;
172
173	s -= strides;
174	c -= stridec;
175
176	for (;;) {
177begin:
178		s += strides;
179		c += stridec;
180
181		if (--n < 0)
182			break;
183
184		medium = 0;
185		PREPROCESS(0, 0, 0, begin);
186
187		if (--n < 0)
188			goto process1;
189
190		PREPROCESS(1, strides, stridec, process1);
191
192		if (--n < 0)
193			goto process2;
194
195		PREPROCESS(2, (strides << 1), (stridec << 1), process2);
196
197		if (--n < 0)
198			goto process3;
199
200		PREPROCESS(3, (strides << 1) + strides,
201		    (stridec << 1) + stridec, process3);
202
203		if (medium) {
204			z0 = y0 * invpio2 + c3two51;
205			z1 = y1 * invpio2 + c3two51;
206			z2 = y2 * invpio2 + c3two51;
207			z3 = y3 * invpio2 + c3two51;
208
209			n0 = LO(z0);
210			n1 = LO(z1);
211			n2 = LO(z2);
212			n3 = LO(z3);
213
214			z0 -= c3two51;
215			z1 -= c3two51;
216			z2 -= c3two51;
217			z3 -= c3two51;
218
219			y0 = (y0 - z0 * pio2_1) - z0 * pio2_t;
220			y1 = (y1 - z1 * pio2_1) - z1 * pio2_t;
221			y2 = (y2 - z2 * pio2_1) - z2 * pio2_t;
222			y3 = (y3 - z3 * pio2_1) - z3 * pio2_t;
223		}
224
225		z0 = y0 * y0;
226		z1 = y1 * y1;
227		z2 = y2 * y2;
228		z3 = y3 * y3;
229
230		f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2)));
231		f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2)));
232		f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2)));
233		f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2)));
234
235		g0 = (float)(one + z0 * (mhalf + z0 * (C0 + z0 *
236		    (C1 + z0 * C2))));
237		g1 = (float)(one + z1 * (mhalf + z1 * (C0 + z1 *
238		    (C1 + z1 * C2))));
239		g2 = (float)(one + z2 * (mhalf + z2 * (C0 + z2 *
240		    (C1 + z2 * C2))));
241		g3 = (float)(one + z3 * (mhalf + z3 * (C0 + z3 *
242		    (C1 + z3 * C2))));
243
244		if (n0 & 2) {
245			f0 = -f0;
246			g0 = -g0;
247		}
248		if (n1 & 2) {
249			f1 = -f1;
250			g1 = -g1;
251		}
252		if (n2 & 2) {
253			f2 = -f2;
254			g2 = -g2;
255		}
256		if (n3 & 2) {
257			f3 = -f3;
258			g3 = -g3;
259		}
260
261		if (n0 & 1) {
262			*s = g0;
263			*c = -f0;
264		} else {
265			*s = f0;
266			*c = g0;
267		}
268		s += strides;
269		c += stridec;
270
271		if (n1 & 1) {
272			*s = g1;
273			*c = -f1;
274		} else {
275			*s = f1;
276			*c = g1;
277		}
278		s += strides;
279		c += stridec;
280
281		if (n2 & 1) {
282			*s = g2;
283			*c = -f2;
284		} else {
285			*s = f2;
286			*c = g2;
287		}
288		s += strides;
289		c += stridec;
290
291		if (n3 & 1) {
292			*s = g3;
293			*c = -f3;
294		} else {
295			*s = f3;
296			*c = g3;
297		}
298		continue;
299
300process1:
301		PROCESS(0);
302		continue;
303
304process2:
305		PROCESS(0);
306		PROCESS(1);
307		continue;
308
309process3:
310		PROCESS(0);
311		PROCESS(1);
312		PROCESS(2);
313	}
314}
315