xref: /illumos-gate/usr/src/lib/libm/common/C/sincos.c (revision ddc0e0b5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23  */
24 /*
25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  */
28 
29 #pragma weak __sincos = sincos
30 
31 /* INDENT OFF */
32 /*
33  * sincos(x,s,c)
34  * Accurate Table look-up algorithm by K.C. Ng, 2000.
35  *
36  * 1. Reduce x to x>0 by cos(-x)=cos(x), sin(-x)=-sin(x).
37  * 2. For 0<= x < 8, let i = (64*x chopped)-10. Let d = x - a[i], where
38  *    a[i] is a double that is close to (i+10.5)/64 (and hence |d|< 10.5/64)
39  *    and such that sin(a[i]) and cos(a[i]) is close to a double (with error
40  *    less than 2**-8 ulp). Then
41  *
42  *	cos(x) = cos(a[i]+d) = cos(a[i])cos(d) - sin(a[i])*sin(d)
43  *	       = TBL_cos_a[i]*(1+QQ1*d^2+QQ2*d^4) -
44  *			TBL_sin_a[i]*(d+PP1*d^3+PP2*d^5)
45  *	       = TBL_cos_a[i] + (TBL_cos_a[i]*d^2*(QQ1+QQ2*d^2) -
46  *			TBL_sin_a[i]*(d+PP1*d^3+PP2*d^5))
47  *
48  *      sin(x) = sin(a[i]+d) = sin(a[i])cos(d) + cos(a[i])*sin(d)
49  *             = TBL_sin_a[i]*(1+QQ1*d^2+QQ2*d^4) +
50  *			TBL_cos_a[i]*(d+PP1*d^3+PP2*d^5)
51  *             = TBL_sin_a[i] + (TBL_sin_a[i]*d^2*(QQ1+QQ2*d^2) +
52  *			TBL_cos_a[i]*(d+PP1*d^3+PP2*d^5))
53  *
54  *    Note: for x close to n*pi/2, special treatment is need for either
55  *    sin or cos:
56  *    i in [81, 100] (  pi/2 +-10.5/64 => tiny cos(x) = sin(pi/2-x)
57  *    i in [181,200] (  pi   +-10.5/64 => tiny sin(x) = sin(pi-x)
58  *    i in [282,301] (  3pi/2+-10.5/64 => tiny cos(x) = sin(x-3pi/2)
59  *    i in [382,401] (  2pi  +-10.5/64 => tiny sin(x) = sin(x-2pi)
60  *    i in [483,502] (  5pi/2+-10.5/64 => tiny cos(x) = sin(5pi/2-x)
61  *
62  * 3. For x >= 8.0, use kernel function __rem_pio2 to perform argument
63  *    reduction and call __k_sincos_ to compute sin and cos.
64  *
65  * kernel function:
66  *	__rem_pio2	... argument reduction routine
67  *	__k_sincos_	... sine and cosine function on [-pi/4,pi/4]
68  *
69  * Method.
70  *      Let S and C denote the sin and cos respectively on [-PI/4, +PI/4].
71  *      1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in
72  *	   [-pi/2 , +pi/2], and let n = k mod 4.
73  *	2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have
74  *
75  *          n        sin(x)      cos(x)        tan(x)
76  *     ----------------------------------------------------------
77  *	    0	       S	   C		 S/C
78  *	    1	       C	  -S		-C/S
79  *	    2	      -S	  -C		 S/C
80  *	    3	      -C	   S		-C/S
81  *     ----------------------------------------------------------
82  *
83  * Special cases:
84  *      Let trig be any of sin, cos, or tan.
85  *      trig(+-INF)  is NaN, with signals;
86  *      trig(NaN)    is that NaN;
87  *
88  * Accuracy:
89  *	TRIG(x) returns trig(x) nearly rounded (less than 1 ulp)
90  */
91 
92 #include "libm.h"
93 
94 static const double sc[] = {
95 /* ONE	= */  1.0,
96 /* NONE	= */ -1.0,
97 /*
98  * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008
99  */
100 /* PP1	= */ -0.166666666666316558867252052378889521480627858683055567,
101 /* PP2	= */   .008333315652997472323564894248466758248475374977974017927,
102 /*
103  * |(sin(x) - (x+p1*x^3+...+p4*x^9)|
104  * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125
105  * |                 x             |
106  */
107 /* P1  	= */ -1.666666666666629669805215138920301589656e-0001,
108 /* P2  	= */  8.333333332390951295683993455280336376663e-0003,
109 /* P3  	= */ -1.984126237997976692791551778230098403960e-0004,
110 /* P4  	= */  2.753403624854277237649987622848330351110e-0006,
111 /*
112  * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d)
113  */
114 /* QQ1	= */ -0.4999999999975492381842911981948418542742729,
115 /* QQ2	= */  0.041666542904352059294545209158357640398771740,
116 /* Q1  	= */ -0.5,
117 /* Q2  	= */  4.166666666500350703680945520860748617445e-0002,
118 /* Q3  	= */ -1.388888596436972210694266290577848696006e-0003,
119 /* Q4  	= */  2.478563078858589473679519517892953492192e-0005,
120 /* PIO2_H    = */  1.570796326794896557999,
121 /* PIO2_L    = */  6.123233995736765886130e-17,
122 /* PIO2_L0   = */  6.123233995727922165564e-17,
123 /* PIO2_L1   = */  8.843720566135701120255e-29,
124 /* PI_H      = */  3.1415926535897931159979634685,
125 /* PI_L      = */  1.22464679914735317722606593227425e-16,
126 /* PI_L0     = */  1.22464679914558443311283879205095e-16,
127 /* PI_L1     = */  1.768744113227140223300005233735517376e-28,
128 /* PI3O2_H   = */  4.712388980384689673997,
129 /* PI3O2_L   = */  1.836970198721029765839e-16,
130 /* PI3O2_L0  = */  1.836970198720396133587e-16,
131 /* PI3O2_L1  = */  6.336322524749201142226e-29,
132 /* PI2_H     = */  6.2831853071795862319959269370,
133 /* PI2_L     = */  2.44929359829470635445213186454850e-16,
134 /* PI2_L0    = */  2.44929359829116886622567758410190e-16,
135 /* PI2_L1    = */  3.537488226454280446600010467471034752e-28,
136 /* PI5O2_H   = */  7.853981633974482789995,
137 /* PI5O2_L   = */  3.061616997868382943065e-16,
138 /* PI5O2_L0  = */  3.061616997861941598865e-16,
139 /* PI5O2_L1  = */  6.441344200433640781982e-28,
140 };
141 /* INDENT ON */
142 
143 #define	ONE		sc[0]
144 #define	PP1		sc[2]
145 #define	PP2		sc[3]
146 #define	P1		sc[4]
147 #define	P2		sc[5]
148 #define	P3		sc[6]
149 #define	P4		sc[7]
150 #define	QQ1		sc[8]
151 #define	QQ2		sc[9]
152 #define	Q1		sc[10]
153 #define	Q2		sc[11]
154 #define	Q3		sc[12]
155 #define	Q4		sc[13]
156 #define	PIO2_H		sc[14]
157 #define	PIO2_L		sc[15]
158 #define	PIO2_L0		sc[16]
159 #define	PIO2_L1		sc[17]
160 #define	PI_H		sc[18]
161 #define	PI_L		sc[19]
162 #define	PI_L0		sc[20]
163 #define	PI_L1		sc[21]
164 #define	PI3O2_H		sc[22]
165 #define	PI3O2_L		sc[23]
166 #define	PI3O2_L0	sc[24]
167 #define	PI3O2_L1	sc[25]
168 #define	PI2_H		sc[26]
169 #define	PI2_L		sc[27]
170 #define	PI2_L0		sc[28]
171 #define	PI2_L1		sc[29]
172 #define	PI5O2_H		sc[30]
173 #define	PI5O2_L		sc[31]
174 #define	PI5O2_L0	sc[32]
175 #define	PI5O2_L1	sc[33]
176 #define	PoS(x, z)	((x * z) * (PP1 + z * PP2))
177 #define	PoL(x, z)	((x * z) * ((P1 + z * P2) + (z * z) * (P3 + z * P4)))
178 
179 extern const double _TBL_sincos[], _TBL_sincosx[];
180 
181 void
sincos(double x,double * s,double * c)182 sincos(double x, double *s, double *c) {
183 	double	z, y[2], w, t, v, p, q;
184 	int	i, j, n, hx, ix, lx;
185 
186 	hx = ((int *)&x)[HIWORD];
187 	lx = ((int *)&x)[LOWORD];
188 	ix = hx & ~0x80000000;
189 
190 	if (ix <= 0x3fc50000) {	/* |x| < 10.5/64 = 0.164062500 */
191 		if (ix < 0x3e400000) {	/* |x| < 2**-27 */
192 			if ((int)x == 0)
193 				*c = ONE;
194 			*s = x;
195 		} else {
196 			z = x * x;
197 			if (ix < 0x3f800000) {	/* |x| < 0.008 */
198 				q = z * (QQ1 + z * QQ2);
199 				p = PoS(x, z);
200 			} else {
201 				q = z * ((Q1 + z * Q2) + (z * z) *
202 				    (Q3 + z * Q4));
203 				p = PoL(x, z);
204 			}
205 			*c = ONE + q;
206 			*s = x + p;
207 		}
208 		return;
209 	}
210 
211 	n = ix >> 20;
212 	i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n);
213 	j = i - 10;
214 	if (n < 0x402) {	/* |x| < 8 */
215 		x = fabs(x);
216 		v = x - _TBL_sincosx[j];
217 		t = v * v;
218 		w = _TBL_sincos[(j<<1)];
219 		z = _TBL_sincos[(j<<1)+1];
220 		p = v + PoS(v, t);
221 		q = t * (QQ1 + t * QQ2);
222 		if ((((j - 81) ^ (j - 101)) |
223 		    ((j - 282) ^ (j - 302)) |
224 		    ((j - 483) ^ (j - 503)) |
225 		    ((j - 181) ^ (j - 201)) |
226 		    ((j - 382) ^ (j - 402))) < 0) {
227 			if (j <= 101) {
228 				/* near pi/2, cos(x) = sin(pi/2-x) */
229 				t = w * q + z * p;
230 				*s = (hx >= 0)? w + t : -w - t;
231 				p = PIO2_H - x;
232 				i = ix - 0x3ff921fb;
233 				x = p + PIO2_L;
234 				if ((i | ((lx - 0x54442D00) &
235 				    0xffffff00)) == 0) {
236 					/* very close to pi/2 */
237 					x = p + PIO2_L0;
238 					*c = x + PIO2_L1;
239 				} else {
240 					z = x * x;
241 					if (((ix - 0x3ff92000) >> 12) == 0) {
242 						/* |pi/2-x|<2**-8 */
243 						w = PIO2_L + PoS(x, z);
244 					} else {
245 						w = PIO2_L + PoL(x, z);
246 					}
247 					*c = p + w;
248 				}
249 			} else if (j <= 201) {
250 				/* near pi, sin(x) = sin(pi-x) */
251 				*c = z - (w * p - z * q);
252 				p = PI_H - x;
253 				i = ix - 0x400921fb;
254 				x = p + PI_L;
255 				if ((i | ((lx - 0x54442D00) &
256 				    0xffffff00)) == 0) {
257 					/* very close to pi */
258 					x = p + PI_L0;
259 					*s = (hx >= 0)? x + PI_L1 :
260 					    -(x + PI_L1);
261 				} else {
262 					z = x * x;
263 					if (((ix - 0x40092000) >> 11) == 0) {
264 						/* |pi-x|<2**-8 */
265 						w = PI_L + PoS(x, z);
266 					} else {
267 						w = PI_L + PoL(x, z);
268 					}
269 					*s = (hx >= 0)? p + w : -p - w;
270 				}
271 			} else if (j <= 302) {
272 				/* near 3/2pi, cos(x)=sin(x-3/2pi) */
273 				t = w * q + z * p;
274 				*s = (hx >= 0)? w + t : -w - t;
275 				p = x - PI3O2_H;
276 				i = ix - 0x4012D97C;
277 				x = p - PI3O2_L;
278 				if ((i | ((lx - 0x7f332100) &
279 				    0xffffff00)) == 0) {
280 					/* very close to 3/2pi */
281 					x = p - PI3O2_L0;
282 					*c = x - PI3O2_L1;
283 				} else {
284 					z = x * x;
285 					if (((ix - 0x4012D800) >> 9) == 0) {
286 						/* |3/2pi-x|<2**-8 */
287 						w = PoS(x, z) - PI3O2_L;
288 					} else {
289 						w = PoL(x, z) - PI3O2_L;
290 					}
291 					*c = p + w;
292 				}
293 			} else if (j <= 402) {
294 				/* near 2pi, sin(x)=sin(x-2pi) */
295 				*c = z - (w * p - z * q);
296 				p = x - PI2_H;
297 				i = ix - 0x401921fb;
298 				x = p - PI2_L;
299 				if ((i | ((lx - 0x54442D00) &
300 				    0xffffff00)) == 0) {
301 					/* very close to 2pi */
302 					x = p - PI2_L0;
303 					*s = (hx >= 0)? x - PI2_L1 :
304 					    -(x - PI2_L1);
305 				} else {
306 					z = x * x;
307 					if (((ix - 0x40192000) >> 10) == 0) {
308 						/* |x-2pi|<2**-8 */
309 						w = PoS(x, z) - PI2_L;
310 					} else {
311 						w = PoL(x, z) - PI2_L;
312 					}
313 					*s = (hx >= 0)? p + w : -p - w;
314 				}
315 			} else {
316 				/* near 5pi/2, cos(x) = sin(5pi/2-x) */
317 				t = w * q + z * p;
318 				*s = (hx >= 0)? w + t : -w - t;
319 				p = PI5O2_H - x;
320 				i = ix - 0x401F6A7A;
321 				x = p + PI5O2_L;
322 				if ((i | ((lx - 0x29553800) &
323 				    0xffffff00)) == 0) {
324 					/* very close to pi/2 */
325 					x = p + PI5O2_L0;
326 					*c = x + PI5O2_L1;
327 				} else {
328 					z = x * x;
329 					if (((ix - 0x401F6A7A) >> 7) == 0) {
330 						/* |5pi/2-x|<2**-8 */
331 						w = PI5O2_L + PoS(x, z);
332 					} else {
333 						w = PI5O2_L + PoL(x, z);
334 					}
335 					*c = p + w;
336 				}
337 			}
338 		} else {
339 			*c = z - (w * p - z * q);
340 			t = w * q + z * p;
341 			*s = (hx >= 0)? w + t : -w - t;
342 		}
343 		return;
344 	}
345 
346 	if (ix >= 0x7ff00000) {
347 		*s = *c = x / x;
348 		return;
349 	}
350 
351 	/* argument reduction needed */
352 	n = __rem_pio2(x, y);
353 	switch (n & 3) {
354 	case 0:
355 		*s = __k_sincos(y[0], y[1], c);
356 		break;
357 	case 1:
358 		*c = -__k_sincos(y[0], y[1], s);
359 		break;
360 	case 2:
361 		*s = -__k_sincos(y[0], y[1], c);
362 		*c = -*c;
363 		break;
364 	default:
365 		*c = __k_sincos(y[0], y[1], s);
366 		*s = -*s;
367 	}
368 }
369