1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2011, Richard Lowe
14  */
15 
16 #ifndef _FENV_INLINES_H
17 #define	_FENV_INLINES_H
18 
19 #ifdef __GNUC__
20 
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24 
25 #include <sys/types.h>
26 
27 #if defined(__x86)
28 
29 /*
30  * Floating point Control Word and Status Word
31  * Definition should actually be shared with x86
32  * (much of this 'amd64' code can be, in fact.)
33  */
34 union fp_cwsw {
35 	uint32_t cwsw;
36 	struct {
37 		uint16_t cw;
38 		uint16_t sw;
39 	} words;
40 };
41 
42 extern __GNU_INLINE void
__fenv_getcwsw(unsigned int * value)43 __fenv_getcwsw(unsigned int *value)
44 {
45 	union fp_cwsw *u = (union fp_cwsw *)value;
46 
47 	__asm__ __volatile__(
48 	    "fstsw %0\n\t"
49 	    "fstcw %1\n\t"
50 	    : "=m" (u->words.cw), "=m" (u->words.sw));
51 }
52 
53 extern __GNU_INLINE void
__fenv_setcwsw(const unsigned int * value)54 __fenv_setcwsw(const unsigned int *value)
55 {
56 	union fp_cwsw cwsw;
57 	short fenv[16];
58 
59 	cwsw.cwsw = *value;
60 
61 	__asm__ __volatile__(
62 	    "fstenv %0\n\t"
63 	    "movw   %4,%1\n\t"
64 	    "movw   %3,%2\n\t"
65 	    "fldenv %0\n\t"
66 	    "fwait\n\t"
67 	    : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
68 	    : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
69 	    /* For practical purposes, we clobber the whole FPU */
70 	    : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
71 	      "st(6)", "st(7)");
72 }
73 
74 extern __GNU_INLINE void
__fenv_getmxcsr(unsigned int * value)75 __fenv_getmxcsr(unsigned int *value)
76 {
77 	__asm__ __volatile__("stmxcsr %0" : "=m" (*value));
78 }
79 
80 extern __GNU_INLINE void
__fenv_setmxcsr(const unsigned int * value)81 __fenv_setmxcsr(const unsigned int *value)
82 {
83 	__asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
84 }
85 
86 extern __GNU_INLINE long double
f2xm1(long double x)87 f2xm1(long double x)
88 {
89 	long double ret;
90 
91 	__asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
92 	return (ret);
93 }
94 
95 extern __GNU_INLINE long double
fyl2x(long double y,long double x)96 fyl2x(long double y, long double x)
97 {
98 	long double ret;
99 
100 	__asm__ __volatile__("fyl2x"
101 	    : "=t" (ret)
102 	    : "0" (x), "u" (y)
103 	    : "st(1)", "cc");
104 	return (ret);
105 }
106 
107 extern __GNU_INLINE long double
fptan(long double x)108 fptan(long double x)
109 {
110 	/*
111 	 * fptan pushes 1.0 then the result on completion, so we want to pop
112 	 * the FP stack twice, so we need a dummy value into which to pop it.
113 	 */
114 	long double ret;
115 	long double dummy;
116 
117 	__asm__ __volatile__("fptan"
118 	    : "=t" (dummy), "=u" (ret)
119 	    : "0" (x)
120 	    : "cc");
121 	return (ret);
122 }
123 
124 extern __GNU_INLINE long double
fpatan(long double x,long double y)125 fpatan(long double x, long double y)
126 {
127 	long double ret;
128 
129 	__asm__ __volatile__("fpatan"
130 	    : "=t" (ret)
131 	    : "0" (y), "u" (x)
132 	    : "st(1)", "cc");
133 	return (ret);
134 }
135 
136 extern __GNU_INLINE long double
fxtract(long double x)137 fxtract(long double x)
138 {
139 	__asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
140 	return (x);
141 }
142 
143 extern __GNU_INLINE long double
fprem1(long double idend,long double div)144 fprem1(long double idend, long double div)
145 {
146 	__asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
147 	return (div);
148 }
149 
150 extern __GNU_INLINE long double
fprem(long double idend,long double div)151 fprem(long double idend, long double div)
152 {
153 	__asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
154 	return (div);
155 }
156 
157 extern __GNU_INLINE long double
fyl2xp1(long double y,long double x)158 fyl2xp1(long double y, long double x)
159 {
160 	long double ret;
161 
162 	__asm__ __volatile__("fyl2xp1"
163 	    : "=t" (ret)
164 	    : "0" (x), "u" (y)
165 	    : "st(1)", "cc");
166 	return (ret);
167 }
168 
169 extern __GNU_INLINE long double
fsqrt(long double x)170 fsqrt(long double x)
171 {
172 	__asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
173 	return (x);
174 }
175 
176 extern __GNU_INLINE long double
fsincos(long double x)177 fsincos(long double x)
178 {
179 	long double dummy;
180 
181 	__asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc");
182 	return (x);
183 }
184 
185 extern __GNU_INLINE long double
frndint(long double x)186 frndint(long double x)
187 {
188 	__asm__ __volatile__("frndint" : "+t" (x) : : "cc");
189 	return (x);
190 }
191 
192 extern __GNU_INLINE long double
fscale(long double x,long double y)193 fscale(long double x, long double y)
194 {
195 	long double ret;
196 
197 	__asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
198 	return (ret);
199 }
200 
201 extern __GNU_INLINE long double
fsin(long double x)202 fsin(long double x)
203 {
204 	__asm__ __volatile__("fsin" : "+t" (x) : : "cc");
205 	return (x);
206 }
207 
208 extern __GNU_INLINE long double
fcos(long double x)209 fcos(long double x)
210 {
211 	__asm__ __volatile__("fcos" : "+t" (x) : : "cc");
212 	return (x);
213 }
214 
215 extern __GNU_INLINE void
sse_cmpeqss(float * f1,float * f2,int * i1)216 sse_cmpeqss(float *f1, float *f2, int *i1)
217 {
218 	__asm__ __volatile__(
219 	    "cmpeqss %2, %1\n\t"
220 	    "movss   %1, %0"
221 	    : "=m" (*i1), "+x" (*f1)
222 	    : "x" (*f2)
223 	    : "cc");
224 }
225 
226 extern __GNU_INLINE void
sse_cmpltss(float * f1,float * f2,int * i1)227 sse_cmpltss(float *f1, float *f2, int *i1)
228 {
229 	__asm__ __volatile__(
230 	    "cmpltss %2, %1\n\t"
231 	    "movss   %1, %0"
232 	    : "=m" (*i1), "+x" (*f1)
233 	    : "x" (*f2)
234 	    : "cc");
235 }
236 
237 extern __GNU_INLINE void
sse_cmpless(float * f1,float * f2,int * i1)238 sse_cmpless(float *f1, float *f2, int *i1)
239 {
240 	__asm__ __volatile__(
241 	    "cmpless %2, %1\n\t"
242 	    "movss   %1, %0"
243 	    : "=m" (*i1), "+x" (*f1)
244 	    : "x" (*f2)
245 	    : "cc");
246 }
247 
248 extern __GNU_INLINE void
sse_cmpunordss(float * f1,float * f2,int * i1)249 sse_cmpunordss(float *f1, float *f2, int *i1)
250 {
251 	__asm__ __volatile__(
252 	    "cmpunordss %2, %1\n\t"
253 	    "movss      %1, %0"
254 	    : "=m" (*i1), "+x" (*f1)
255 	    : "x" (*f2)
256 	    : "cc");
257 }
258 
259 extern __GNU_INLINE void
sse_minss(float * f1,float * f2,float * f3)260 sse_minss(float *f1, float *f2, float *f3)
261 {
262 	__asm__ __volatile__(
263 	    "minss %2, %1\n\t"
264 	    "movss %1, %0"
265 	    : "=m" (*f3), "+x" (*f1)
266 	    : "x" (*f2));
267 }
268 
269 extern __GNU_INLINE void
sse_maxss(float * f1,float * f2,float * f3)270 sse_maxss(float *f1, float *f2, float *f3)
271 {
272 	__asm__ __volatile__(
273 	    "maxss %2, %1\n\t"
274 	    "movss %1, %0"
275 	    : "=m" (*f3), "+x" (*f1)
276 	    : "x" (*f2));
277 }
278 
279 extern __GNU_INLINE void
sse_addss(float * f1,float * f2,float * f3)280 sse_addss(float *f1, float *f2, float *f3)
281 {
282 	__asm__ __volatile__(
283 	    "addss %2, %1\n\t"
284 	    "movss %1, %0"
285 	    : "=m" (*f3), "+x" (*f1)
286 	    : "x" (*f2));
287 }
288 
289 extern __GNU_INLINE void
sse_subss(float * f1,float * f2,float * f3)290 sse_subss(float *f1, float *f2, float *f3)
291 {
292 	__asm__ __volatile__(
293 	    "subss %2, %1\n\t"
294 	    "movss %1, %0"
295 	    : "=m" (*f3), "+x" (*f1)
296 	    : "x" (*f2));
297 }
298 
299 extern __GNU_INLINE void
sse_mulss(float * f1,float * f2,float * f3)300 sse_mulss(float *f1, float *f2, float *f3)
301 {
302 	__asm__ __volatile__(
303 	    "mulss %2, %1\n\t"
304 	    "movss %1, %0"
305 	    : "=m" (*f3), "+x" (*f1)
306 	    : "x" (*f2));
307 }
308 
309 extern __GNU_INLINE void
sse_divss(float * f1,float * f2,float * f3)310 sse_divss(float *f1, float *f2, float *f3)
311 {
312 	__asm__ __volatile__(
313 	    "divss %2, %1\n\t"
314 	    "movss %1, %0"
315 	    : "=m" (*f3), "+x" (*f1)
316 	    : "x" (*f2));
317 }
318 
319 extern __GNU_INLINE void
sse_sqrtss(float * f1,float * f2)320 sse_sqrtss(float *f1, float *f2)
321 {
322 	double tmp;
323 
324 	__asm__ __volatile__(
325 	    "sqrtss %2, %1\n\t"
326 	    "movss  %1, %0"
327 	    : "=m" (*f2), "=x" (tmp)
328 	    : "m" (*f1));
329 }
330 
331 extern __GNU_INLINE void
sse_ucomiss(float * f1,float * f2)332 sse_ucomiss(float *f1, float *f2)
333 {
334 	__asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
335 
336 }
337 
338 extern __GNU_INLINE void
sse_comiss(float * f1,float * f2)339 sse_comiss(float *f1, float *f2)
340 {
341 	__asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
342 }
343 
344 extern __GNU_INLINE void
sse_cvtss2sd(float * f1,double * d1)345 sse_cvtss2sd(float *f1, double *d1)
346 {
347 	double tmp;
348 
349 	__asm__ __volatile__(
350 	    "cvtss2sd %2, %1\n\t"
351 	    "movsd    %1, %0"
352 	    : "=m" (*d1), "=x" (tmp)
353 	    : "m" (*f1));
354 }
355 
356 extern __GNU_INLINE void
sse_cvtsi2ss(int * i1,float * f1)357 sse_cvtsi2ss(int *i1, float *f1)
358 {
359 	double tmp;
360 
361 	__asm__ __volatile__(
362 	    "cvtsi2ss %2, %1\n\t"
363 	    "movss    %1, %0"
364 	    : "=m" (*f1), "=x" (tmp)
365 	    : "m" (*i1));
366 }
367 
368 extern __GNU_INLINE void
sse_cvttss2si(float * f1,int * i1)369 sse_cvttss2si(float *f1, int *i1)
370 {
371 	int tmp;
372 
373 	__asm__ __volatile__(
374 	    "cvttss2si %2, %1\n\t"
375 	    "movl      %1, %0"
376 	    : "=m" (*i1), "=r" (tmp)
377 	    : "m" (*f1));
378 }
379 
380 extern __GNU_INLINE void
sse_cvtss2si(float * f1,int * i1)381 sse_cvtss2si(float *f1, int *i1)
382 {
383 	int tmp;
384 
385 	__asm__ __volatile__(
386 	    "cvtss2si %2, %1\n\t"
387 	    "movl     %1, %0"
388 	    : "=m" (*i1), "=r" (tmp)
389 	    : "m" (*f1));
390 }
391 
392 #if defined(__amd64)
393 extern __GNU_INLINE void
sse_cvtsi2ssq(long long * ll1,float * f1)394 sse_cvtsi2ssq(long long *ll1, float *f1)
395 {
396 	double tmp;
397 
398 	__asm__ __volatile__(
399 	    "cvtsi2ssq %2, %1\n\t"
400 	    "movss     %1, %0"
401 	    : "=m" (*f1), "=x" (tmp)
402 	    : "m" (*ll1));
403 }
404 
405 extern __GNU_INLINE void
sse_cvttss2siq(float * f1,long long * ll1)406 sse_cvttss2siq(float *f1, long long *ll1)
407 {
408 	uint64_t tmp;
409 
410 	__asm__ __volatile__(
411 	    "cvttss2siq %2, %1\n\t"
412 	    "movq       %1, %0"
413 	    : "=m" (*ll1), "=r" (tmp)
414 	    : "m" (*f1));
415 }
416 
417 extern __GNU_INLINE void
sse_cvtss2siq(float * f1,long long * ll1)418 sse_cvtss2siq(float *f1, long long *ll1)
419 {
420 	uint64_t tmp;
421 
422 	__asm__ __volatile__(
423 	    "cvtss2siq %2, %1\n\t"
424 	    "movq      %1, %0"
425 	    : "=m" (*ll1), "=r" (tmp)
426 	    : "m" (*f1));
427 }
428 
429 #endif
430 
431 extern __GNU_INLINE void
sse_cmpeqsd(double * d1,double * d2,long long * ll1)432 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
433 {
434 	__asm__ __volatile__(
435 	    "cmpeqsd %2,%1\n\t"
436 	    "movsd   %1,%0"
437 	    : "=m" (*ll1), "+x" (*d1)
438 	    : "x" (*d2));
439 }
440 
441 extern __GNU_INLINE void
sse_cmpltsd(double * d1,double * d2,long long * ll1)442 sse_cmpltsd(double *d1, double *d2, long long *ll1)
443 {
444 	__asm__ __volatile__(
445 	    "cmpltsd %2,%1\n\t"
446 	    "movsd   %1,%0"
447 	    : "=m" (*ll1), "+x" (*d1)
448 	    : "x" (*d2));
449 }
450 
451 extern __GNU_INLINE void
sse_cmplesd(double * d1,double * d2,long long * ll1)452 sse_cmplesd(double *d1, double *d2, long long *ll1)
453 {
454 	__asm__ __volatile__(
455 	    "cmplesd %2,%1\n\t"
456 	    "movsd   %1,%0"
457 	    : "=m" (*ll1), "+x" (*d1)
458 	    : "x" (*d2));
459 }
460 
461 extern __GNU_INLINE void
sse_cmpunordsd(double * d1,double * d2,long long * ll1)462 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
463 {
464 	__asm__ __volatile__(
465 	    "cmpunordsd %2,%1\n\t"
466 	    "movsd      %1,%0"
467 	    : "=m" (*ll1), "+x" (*d1)
468 	    : "x" (*d2));
469 }
470 
471 
472 extern __GNU_INLINE void
sse_minsd(double * d1,double * d2,double * d3)473 sse_minsd(double *d1, double *d2, double *d3)
474 {
475 	__asm__ __volatile__(
476 	    "minsd %2,%1\n\t"
477 	    "movsd %1,%0"
478 	    : "=m" (*d3), "+x" (*d1)
479 	    : "x" (*d2));
480 }
481 
482 extern __GNU_INLINE void
sse_maxsd(double * d1,double * d2,double * d3)483 sse_maxsd(double *d1, double *d2, double *d3)
484 {
485 	__asm__ __volatile__(
486 	    "maxsd %2,%1\n\t"
487 	    "movsd %1,%0"
488 	    : "=m" (*d3), "+x" (*d1)
489 	    : "x" (*d2));
490 }
491 
492 extern __GNU_INLINE void
sse_addsd(double * d1,double * d2,double * d3)493 sse_addsd(double *d1, double *d2, double *d3)
494 {
495 	__asm__ __volatile__(
496 	    "addsd %2,%1\n\t"
497 	    "movsd %1,%0"
498 	    : "=m" (*d3), "+x" (*d1)
499 	    : "x" (*d2));
500 }
501 
502 extern __GNU_INLINE void
sse_subsd(double * d1,double * d2,double * d3)503 sse_subsd(double *d1, double *d2, double *d3)
504 {
505 	__asm__ __volatile__(
506 	    "subsd %2,%1\n\t"
507 	    "movsd %1,%0"
508 	    : "=m" (*d3), "+x" (*d1)
509 	    : "x" (*d2));
510 }
511 
512 extern __GNU_INLINE void
sse_mulsd(double * d1,double * d2,double * d3)513 sse_mulsd(double *d1, double *d2, double *d3)
514 {
515 	__asm__ __volatile__(
516 	    "mulsd %2,%1\n\t"
517 	    "movsd %1,%0"
518 	    : "=m" (*d3), "+x" (*d1)
519 	    : "x" (*d2));
520 }
521 
522 extern __GNU_INLINE void
sse_divsd(double * d1,double * d2,double * d3)523 sse_divsd(double *d1, double *d2, double *d3)
524 {
525 	__asm__ __volatile__(
526 	    "divsd %2,%1\n\t"
527 	    "movsd %1,%0"
528 	    : "=m" (*d3), "+x" (*d1)
529 	    : "x" (*d2));
530 }
531 
532 extern __GNU_INLINE void
sse_sqrtsd(double * d1,double * d2)533 sse_sqrtsd(double *d1, double *d2)
534 {
535 	double tmp;
536 
537 	__asm__ __volatile__(
538 	    "sqrtsd %2, %1\n\t"
539 	    "movsd %1, %0"
540 	    : "=m" (*d2), "=x" (tmp)
541 	    : "m" (*d1));
542 }
543 
544 extern __GNU_INLINE void
sse_ucomisd(double * d1,double * d2)545 sse_ucomisd(double *d1, double *d2)
546 {
547 	__asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
548 }
549 
550 extern __GNU_INLINE void
sse_comisd(double * d1,double * d2)551 sse_comisd(double *d1, double *d2)
552 {
553 	__asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
554 }
555 
556 extern __GNU_INLINE void
sse_cvtsd2ss(double * d1,float * f1)557 sse_cvtsd2ss(double *d1, float *f1)
558 {
559 	double tmp;
560 
561 	__asm__ __volatile__(
562 	    "cvtsd2ss %2,%1\n\t"
563 	    "movss    %1,%0"
564 	    : "=m" (*f1), "=x" (tmp)
565 	    : "m" (*d1));
566 }
567 
568 extern __GNU_INLINE void
sse_cvtsi2sd(int * i1,double * d1)569 sse_cvtsi2sd(int *i1, double *d1)
570 {
571 	double tmp;
572 	__asm__ __volatile__(
573 	    "cvtsi2sd %2,%1\n\t"
574 	    "movsd    %1,%0"
575 	    : "=m" (*d1), "=x" (tmp)
576 	    : "m" (*i1));
577 }
578 
579 extern __GNU_INLINE void
sse_cvttsd2si(double * d1,int * i1)580 sse_cvttsd2si(double *d1, int *i1)
581 {
582 	int tmp;
583 
584 	__asm__ __volatile__(
585 	    "cvttsd2si %2,%1\n\t"
586 	    "movl      %1,%0"
587 	    : "=m" (*i1), "=r" (tmp)
588 	    : "m" (*d1));
589 }
590 
591 extern __GNU_INLINE void
sse_cvtsd2si(double * d1,int * i1)592 sse_cvtsd2si(double *d1, int *i1)
593 {
594 	int tmp;
595 
596 	__asm__ __volatile__(
597 	    "cvtsd2si %2,%1\n\t"
598 	    "movl     %1,%0"
599 	    : "=m" (*i1), "=r" (tmp)
600 	    : "m" (*d1));
601 }
602 
603 #if defined(__amd64)
604 extern __GNU_INLINE void
sse_cvtsi2sdq(long long * ll1,double * d1)605 sse_cvtsi2sdq(long long *ll1, double *d1)
606 {
607 	double tmp;
608 
609 	__asm__ __volatile__(
610 	    "cvtsi2sdq %2,%1\n\t"
611 	    "movsd     %1,%0"
612 	    : "=m" (*d1), "=x" (tmp)
613 	    : "m" (*ll1));
614 }
615 
616 extern __GNU_INLINE void
sse_cvttsd2siq(double * d1,long long * ll1)617 sse_cvttsd2siq(double *d1, long long *ll1)
618 {
619 	uint64_t tmp;
620 
621 	__asm__ __volatile__(
622 	    "cvttsd2siq %2,%1\n\t"
623 	    "movq       %1,%0"
624 	    : "=m" (*ll1), "=r" (tmp)
625 	    : "m" (*d1));
626 }
627 
628 extern __GNU_INLINE void
sse_cvtsd2siq(double * d1,long long * ll1)629 sse_cvtsd2siq(double *d1, long long *ll1)
630 {
631 	uint64_t tmp;
632 
633 	__asm__ __volatile__(
634 	    "cvtsd2siq %2,%1\n\t"
635 	    "movq      %1,%0"
636 	    : "=m" (*ll1), "=r" (tmp)
637 	    : "m" (*d1));
638 }
639 #endif
640 
641 #elif defined(__sparc)
642 extern __GNU_INLINE void
643 __fenv_getfsr(unsigned long *l)
644 {
645 	__asm__ __volatile__(
646 #if defined(__sparcv9)
647 	    "stx %%fsr,%0\n\t"
648 #else
649 	    "st  %%fsr,%0\n\t"
650 #endif
651 	    : "=m" (*l));
652 }
653 
654 extern __GNU_INLINE void
655 __fenv_setfsr(const unsigned long *l)
656 {
657 	__asm__ __volatile__(
658 #if defined(__sparcv9)
659 	    "ldx %0,%%fsr\n\t"
660 #else
661 	    "ld %0,%%fsr\n\t"
662 #endif
663 	    : : "m" (*l) : "cc");
664 }
665 
666 extern __GNU_INLINE void
667 __fenv_getfsr32(unsigned int *l)
668 {
669 	__asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
670 }
671 
672 extern __GNU_INLINE void
673 __fenv_setfsr32(const unsigned int *l)
674 {
675 	__asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
676 }
677 #else
678 #error "GCC FENV inlines not implemented for this platform"
679 #endif
680 
681 #ifdef __cplusplus
682 }
683 #endif
684 
685 #endif  /* __GNUC__ */
686 
687 #endif /* _FENV_INLINES_H */
688