1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2011, Richard Lowe
29  */
30 
31 /* Functions in this file are duplicated in locallibm.il.  Keep them in sync */
32 
33 #ifndef _LIBM_INLINES_H
34 #define	_LIBM_INLINES_H
35 
36 #ifdef __GNUC__
37 
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41 
42 #include <sys/types.h>
43 #include <sys/ieeefp.h>
44 
45 #define	_LO_WORD(x)	((uint32_t *)&x)[0]
46 #define	_HI_WORD(x)	((uint32_t *)&x)[1]
47 #define	_HIER_WORD(x)	((uint32_t *)&x)[2]
48 
49 extern __inline__ double
50 __inline_sqrt(double a)
51 {
52 	double ret;
53 
54 	__asm__ __volatile__("fsqrt\n\t" : "=t" (ret) : "0" (a) : "cc");
55 	return (ret);
56 }
57 
58 extern __inline__ double
59 __ieee754_sqrt(double a)
60 {
61 	return (__inline_sqrt(a));
62 }
63 
64 extern __inline__ float
65 __inline_sqrtf(float a)
66 {
67 	float ret;
68 
69 	__asm__ __volatile__("fsqrt\n\t" : "=t" (ret) : "0" (a) : "cc");
70 	return (ret);
71 }
72 
73 extern __inline__ double
74 __inline_rint(double a)
75 {
76 	__asm__ __volatile__(
77 	    "andl $0x7fffffff,%1\n\t"
78 	    "cmpl $0x43300000,%1\n\t"
79 	    "jae  1f\n\t"
80 	    "frndint\n\t"
81 	    "1: fwait\n\t"
82 	    : "+t" (a), "+&r" (_HI_WORD(a))
83 	    :
84 	    : "cc");
85 
86 	return (a);
87 }
88 
89 /*
90  * 00 - 24 bits
91  * 01 - reserved
92  * 10 - 53 bits
93  * 11 - 64 bits
94  */
95 extern __inline__ int
96 __swapRP(int i)
97 {
98 	int ret;
99 	uint16_t cw;
100 
101 	__asm__ __volatile__("fstcw %0\n\t" : "=m" (cw));
102 
103 	ret = (cw >> 8) & 0x3;
104 	cw = (cw & 0xfcff) | ((i & 0x3) << 8);
105 
106 	__asm__ __volatile__("fldcw %0\n\t" : : "m" (cw));
107 
108 	return (ret);
109 }
110 
111 /*
112  * 00 - Round to nearest, with even preferred
113  * 01 - Round down
114  * 10 - Round up
115  * 11 - Chop
116  */
117 extern __inline__ enum fp_direction_type
118 __swap87RD(enum fp_direction_type i)
119 {
120 	int ret;
121 	uint16_t cw;
122 
123 	__asm__ __volatile__("fstcw %0\n\t" : "=m" (cw));
124 
125 	ret = (cw >> 10) & 0x3;
126 	cw = (cw & 0xf3ff) | ((i & 0x3) << 10);
127 
128 	__asm__ __volatile__("fldcw %0\n\t" : : "m" (cw));
129 
130 	return (ret);
131 }
132 
133 extern __inline__ double
134 ceil(double d)
135 {
136 	/*
137 	 * Let's set a Rounding Control (RC) bits from x87 FPU Control Word
138 	 * to fp_positive and save old bits in rd.
139 	 */
140 	short rd = __swap87RD(fp_positive);
141 
142 	/*
143 	 * The FRNDINT instruction returns a floating-point value that is the
144 	 * integral value closest to the source value in the direction of the
145 	 * rounding mode specified in the RC field of the x87 FPU control word.
146 	 *
147 	 * Rounds the source value in the ST(0) register to the nearest
148 	 * integral value, depending on the current rounding mode
149 	 * (setting of the RC field of the FPU control word),
150 	 * and stores the result in ST(0).
151 	 */
152 	__asm__ __volatile__("frndint" : "+t" (d) : : "cc");
153 
154 	/* restore old RC bits */
155 	__swap87RD(rd);
156 
157 	return (d);
158 }
159 
160 extern __inline__ double
161 copysign(double d1, double d2)
162 {
163 	__asm__ __volatile__(
164 	    "andl $0x7fffffff,%0\n\t"	/* %0 <-- hi_32(abs(d)) */
165 	    "andl $0x80000000,%1\n\t"	/* %1[31] <-- sign_bit(d2) */
166 	    "orl  %1,%0\n\t"		/* %0 <-- hi_32(copysign(x,y)) */
167 	    : "+&r" (_HI_WORD(d1)), "+r" (_HI_WORD(d2))
168 	    :
169 	    : "cc");
170 
171 	return (d1);
172 }
173 
174 extern __inline__ double
175 fabs(double d)
176 {
177 	__asm__ __volatile__("fabs\n\t" : "+t" (d) : : "cc");
178 	return (d);
179 }
180 
181 extern __inline__ float
182 fabsf(float d)
183 {
184 	__asm__ __volatile__("fabs\n\t" : "+t" (d) : : "cc");
185 	return (d);
186 }
187 
188 extern __inline__ long double
189 fabsl(long double d)
190 {
191 	__asm__ __volatile__("fabs\n\t" : "+t" (d) : : "cc");
192 	return (d);
193 }
194 
195 extern __inline__ int
196 finite(double d)
197 {
198 	int ret = _HI_WORD(d);
199 
200 	__asm__ __volatile__(
201 	    "notl %0\n\t"
202 	    "andl $0x7ff00000,%0\n\t"
203 	    "negl %0\n\t"
204 	    "shrl $31,%0\n\t"
205 	    : "+r" (ret)
206 	    :
207 	    : "cc");
208 	return (ret);
209 }
210 
211 extern __inline__ double
212 floor(double d)
213 {
214 	short rd = __swap87RD(fp_negative);
215 
216 	__asm__ __volatile__("frndint" : "+t" (d), "+r" (rd) : : "cc");
217 	__swap87RD(rd);
218 
219 	return (d);
220 }
221 
222 /*
223  *      branchless __isnan
224  *      ((0x7ff00000-[((lx|-lx)>>31)&1]|ahx)>>31)&1 = 1 iff x is NaN
225  */
226 extern __inline__ int
227 isnan(double d)
228 {
229 	int ret;
230 
231 	__asm__ __volatile__(
232 		"movl %1,%%ecx\n\t"
233 		"negl %%ecx\n\t"			/* ecx <-- -lo_32(x) */
234 		"orl  %%ecx,%1\n\t"
235 		"shrl $31,%1\n\t"			/* 1 iff lx != 0 */
236 		"andl $0x7fffffff,%2\n\t"	/* ecx <-- hi_32(abs(x)) */
237 		"orl  %2,%1\n\t"
238 		"subl $0x7ff00000,%1\n\t"
239 		"negl %1\n\t"
240 		"shrl $31,%1\n\t"
241 		: "=r" (ret)
242 		: "0" (_HI_WORD(d)), "r" (_LO_WORD(d))
243 		: "ecx");
244 
245 	return (ret);
246 }
247 
248 extern __inline__ int
249 isnanf(float f)
250 {
251 	__asm__ __volatile__(
252 	    "andl $0x7fffffff,%0\n\t"
253 	    "negl %0\n\t"
254 	    "addl $0x7f800000,%0\n\t"
255 	    "shrl $31,%0\n\t"
256 	    : "+r" (f)
257 	    :
258 	    : "cc");
259 
260 	return (f);
261 }
262 
263 extern __inline__ double
264 rint(double a) {
265     return (__inline_rint(a));
266 }
267 
268 extern __inline__ double
269 scalbn(double d, int n)
270 {
271 	double dummy;
272 
273 	__asm__ __volatile__(
274 	    "fildl %2\n\t"	/* Convert N to extended */
275 	    "fxch\n\t"
276 	    "fscale\n\t"
277 	    : "+t" (d), "=u" (dummy)
278 	    : "m" (n)
279 	    : "cc");
280 
281 	return (d);
282 }
283 
284 extern __inline__ int
285 signbit(double d)
286 {
287 	return (_HI_WORD(d) >> 31);
288 }
289 
290 extern __inline__ int
291 signbitf(float f)
292 {
293 	return ((*(uint32_t *)&f) >> 31);
294 }
295 
296 extern __inline__ double
297 sqrt(double d)
298 {
299 	return (__inline_sqrt(d));
300 }
301 
302 extern __inline__ float
303 sqrtf(float f)
304 {
305 	return (__inline_sqrtf(f));
306 }
307 
308 extern __inline__ long double
309 sqrtl(long double ld)
310 {
311 	__asm__ __volatile__("fsqrt" : "+t" (ld) : : "cc");
312 	return (ld);
313 }
314 
315 extern __inline__ int
316 isnanl(long double ld)
317 {
318 	int ret = _HIER_WORD(ld);
319 
320 	__asm__ __volatile__(
321 	    "andl  $0x00007fff,%0\n\t"
322 	    "jz	   1f\n\t"		/* jump if exp is all 0 */
323 	    "xorl  $0x00007fff,%0\n\t"
324 	    "jz	   2f\n\t"		/* jump if exp is all 1 */
325 	    "testl $0x80000000,%1\n\t"
326 	    "jz	   3f\n\t"		/* jump if leading bit is 0 */
327 	    "xorl  %0,%0\n\t"
328 	    "jmp   1f\n\t"
329 	    "2:\n\t"			/* note that %0 = 0 from before */
330 	    "cmpl  $0x80000000,%1\n\t"	/* what is first half of significand? */
331 	    "jnz   3f\n\t"		/* jump if not equal to 0x80000000 */
332 	    "testl $0xffffffff,%2\n\t"	/* is second half of significand 0? */
333 	    "jnz   3f\n\t"		/* jump if not equal to 0 */
334 	    "jmp   1f\n\t"
335 	    "3:\n\t"
336 	    "movl  $1,%0\n\t"
337 	    "1:\n\t"
338 	    : "+&r" (ret)
339 	    : "r" (_HI_WORD(ld)), "r" (_LO_WORD(ld))
340 	    : "cc");
341 
342 	return (ret);
343 }
344 
345 #ifdef __cplusplus
346 }
347 #endif
348 
349 #endif  /* __GNUC__ */
350 
351 #endif /* _LIBM_INLINES_H */
352