xref: /illumos-gate/usr/src/lib/libmvec/common/__vhypotf.c (revision d626ebd403efade415c36352e0c012082508b3ca)
125c28e83SPiotr Jasiukajtis /*
225c28e83SPiotr Jasiukajtis  * CDDL HEADER START
325c28e83SPiotr Jasiukajtis  *
425c28e83SPiotr Jasiukajtis  * The contents of this file are subject to the terms of the
525c28e83SPiotr Jasiukajtis  * Common Development and Distribution License (the "License").
625c28e83SPiotr Jasiukajtis  * You may not use this file except in compliance with the License.
725c28e83SPiotr Jasiukajtis  *
825c28e83SPiotr Jasiukajtis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
925c28e83SPiotr Jasiukajtis  * or http://www.opensolaris.org/os/licensing.
1025c28e83SPiotr Jasiukajtis  * See the License for the specific language governing permissions
1125c28e83SPiotr Jasiukajtis  * and limitations under the License.
1225c28e83SPiotr Jasiukajtis  *
1325c28e83SPiotr Jasiukajtis  * When distributing Covered Code, include this CDDL HEADER in each
1425c28e83SPiotr Jasiukajtis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1525c28e83SPiotr Jasiukajtis  * If applicable, add the following below this CDDL HEADER, with the
1625c28e83SPiotr Jasiukajtis  * fields enclosed by brackets "[]" replaced with your own identifying
1725c28e83SPiotr Jasiukajtis  * information: Portions Copyright [yyyy] [name of copyright owner]
1825c28e83SPiotr Jasiukajtis  *
1925c28e83SPiotr Jasiukajtis  * CDDL HEADER END
2025c28e83SPiotr Jasiukajtis  */
2125c28e83SPiotr Jasiukajtis 
2225c28e83SPiotr Jasiukajtis /*
2325c28e83SPiotr Jasiukajtis  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
2425c28e83SPiotr Jasiukajtis  */
2525c28e83SPiotr Jasiukajtis /*
2625c28e83SPiotr Jasiukajtis  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
2725c28e83SPiotr Jasiukajtis  * Use is subject to license terms.
2825c28e83SPiotr Jasiukajtis  */
2925c28e83SPiotr Jasiukajtis 
3025c28e83SPiotr Jasiukajtis #include "libm_inlines.h"
3125c28e83SPiotr Jasiukajtis 
3225c28e83SPiotr Jasiukajtis #ifdef __RESTRICT
33*d626ebd4SToomas Soome #define	restrict _Restrict
3425c28e83SPiotr Jasiukajtis #else
35*d626ebd4SToomas Soome #define	restrict
3625c28e83SPiotr Jasiukajtis #endif
3725c28e83SPiotr Jasiukajtis 
3825c28e83SPiotr Jasiukajtis extern double sqrt(double);
3925c28e83SPiotr Jasiukajtis 
40*d626ebd4SToomas Soome /*
41*d626ebd4SToomas Soome  * Instead of type punning, use union type.
42*d626ebd4SToomas Soome  */
43*d626ebd4SToomas Soome typedef union h32 {
44*d626ebd4SToomas Soome 	float f;
45*d626ebd4SToomas Soome 	unsigned u;
46*d626ebd4SToomas Soome } h32;
47*d626ebd4SToomas Soome 
4825c28e83SPiotr Jasiukajtis void
49*d626ebd4SToomas Soome __vhypotf(int n, float *restrict x, int stridex, float *restrict y,
50*d626ebd4SToomas Soome     int stridey, float *restrict z, int stridez)
5125c28e83SPiotr Jasiukajtis {
5225c28e83SPiotr Jasiukajtis 	float		x0, x1, x2, y0, y1, y2, z0, z1, z2, *pz0, *pz1, *pz2;
53*d626ebd4SToomas Soome 	h32		hx0, hx1, hx2, hy0, hy1, hy2;
54*d626ebd4SToomas Soome 	int		i, j0, j1, j2;
5525c28e83SPiotr Jasiukajtis 
56*d626ebd4SToomas Soome 	do {
5725c28e83SPiotr Jasiukajtis LOOP0:
58*d626ebd4SToomas Soome 		hx0.f = *x;
59*d626ebd4SToomas Soome 		hy0.f = *y;
60*d626ebd4SToomas Soome 		hx0.u &= ~0x80000000;
61*d626ebd4SToomas Soome 		hy0.u &= ~0x80000000;
62*d626ebd4SToomas Soome 		x0 = hx0.f;
63*d626ebd4SToomas Soome 		y0 = hy0.f;
64*d626ebd4SToomas Soome 		if (hy0.u > hx0.u) {
65*d626ebd4SToomas Soome 			i = hy0.u - hx0.u;
66*d626ebd4SToomas Soome 			j0 = hy0.u & 0x7f800000;
67*d626ebd4SToomas Soome 			if (hx0.u == 0)
6825c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
69*d626ebd4SToomas Soome 		} else {
70*d626ebd4SToomas Soome 			i = hx0.u - hy0.u;
71*d626ebd4SToomas Soome 			j0 = hx0.u & 0x7f800000;
72*d626ebd4SToomas Soome 			if (hy0.u == 0)
7325c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
74*d626ebd4SToomas Soome 			else if (hx0.u == 0)
7525c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
7625c28e83SPiotr Jasiukajtis 		}
77*d626ebd4SToomas Soome 		if (i >= 0x0c800000 || j0 >= 0x7f800000) {
7825c28e83SPiotr Jasiukajtis 			z0 = x0 + y0;
79*d626ebd4SToomas Soome 			if (hx0.u == 0x7f800000)
8025c28e83SPiotr Jasiukajtis 				z0 = x0;
81*d626ebd4SToomas Soome 			else if (hy0.u  == 0x7f800000)
8225c28e83SPiotr Jasiukajtis 				z0 = y0;
83*d626ebd4SToomas Soome 			else if (hx0.u > 0x7f800000 || hy0.u > 0x7f800000)
8425c28e83SPiotr Jasiukajtis 				z0 = *x + *y;
8525c28e83SPiotr Jasiukajtis 			*z = z0;
8625c28e83SPiotr Jasiukajtis 			x += stridex;
8725c28e83SPiotr Jasiukajtis 			y += stridey;
8825c28e83SPiotr Jasiukajtis 			z += stridez;
8925c28e83SPiotr Jasiukajtis 			i = 0;
9025c28e83SPiotr Jasiukajtis 			if (--n <= 0)
9125c28e83SPiotr Jasiukajtis 				break;
9225c28e83SPiotr Jasiukajtis 			goto LOOP0;
9325c28e83SPiotr Jasiukajtis 		}
9425c28e83SPiotr Jasiukajtis 		pz0 = z;
9525c28e83SPiotr Jasiukajtis 		x += stridex;
9625c28e83SPiotr Jasiukajtis 		y += stridey;
9725c28e83SPiotr Jasiukajtis 		z += stridez;
9825c28e83SPiotr Jasiukajtis 		i = 1;
9925c28e83SPiotr Jasiukajtis 		if (--n <= 0)
10025c28e83SPiotr Jasiukajtis 			break;
10125c28e83SPiotr Jasiukajtis 
10225c28e83SPiotr Jasiukajtis LOOP1:
103*d626ebd4SToomas Soome 		hx1.f = *x;
104*d626ebd4SToomas Soome 		hy1.f = *y;
105*d626ebd4SToomas Soome 		hx1.u &= ~0x80000000;
106*d626ebd4SToomas Soome 		hy1.u &= ~0x80000000;
107*d626ebd4SToomas Soome 		x1 = hx1.f;
108*d626ebd4SToomas Soome 		y1 = hy1.f;
109*d626ebd4SToomas Soome 		if (hy1.u > hx1.u) {
110*d626ebd4SToomas Soome 			i = hy1.u - hx1.u;
111*d626ebd4SToomas Soome 			j1 = hy1.u & 0x7f800000;
112*d626ebd4SToomas Soome 			if (hx1.u == 0)
11325c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
114*d626ebd4SToomas Soome 		} else {
115*d626ebd4SToomas Soome 			i = hx1.u - hy1.u;
116*d626ebd4SToomas Soome 			j1 = hx1.u & 0x7f800000;
117*d626ebd4SToomas Soome 			if (hy1.u == 0)
11825c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
119*d626ebd4SToomas Soome 			else if (hx1.u == 0)
12025c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
12125c28e83SPiotr Jasiukajtis 		}
122*d626ebd4SToomas Soome 		if (i >= 0x0c800000 || j1 >= 0x7f800000) {
12325c28e83SPiotr Jasiukajtis 			z1 = x1 + y1;
124*d626ebd4SToomas Soome 			if (hx1.u == 0x7f800000)
12525c28e83SPiotr Jasiukajtis 				z1 = x1;
126*d626ebd4SToomas Soome 			else if (hy1.u == 0x7f800000)
12725c28e83SPiotr Jasiukajtis 				z1 = y1;
128*d626ebd4SToomas Soome 			else if (hx1.u > 0x7f800000 || hy1.u > 0x7f800000)
12925c28e83SPiotr Jasiukajtis 				z1 = *x + *y;
13025c28e83SPiotr Jasiukajtis 			*z = z1;
13125c28e83SPiotr Jasiukajtis 			x += stridex;
13225c28e83SPiotr Jasiukajtis 			y += stridey;
13325c28e83SPiotr Jasiukajtis 			z += stridez;
13425c28e83SPiotr Jasiukajtis 			i = 1;
13525c28e83SPiotr Jasiukajtis 			if (--n <= 0)
13625c28e83SPiotr Jasiukajtis 				break;
13725c28e83SPiotr Jasiukajtis 			goto LOOP1;
13825c28e83SPiotr Jasiukajtis 		}
13925c28e83SPiotr Jasiukajtis 		pz1 = z;
14025c28e83SPiotr Jasiukajtis 		x += stridex;
14125c28e83SPiotr Jasiukajtis 		y += stridey;
14225c28e83SPiotr Jasiukajtis 		z += stridez;
14325c28e83SPiotr Jasiukajtis 		i = 2;
14425c28e83SPiotr Jasiukajtis 		if (--n <= 0)
14525c28e83SPiotr Jasiukajtis 			break;
14625c28e83SPiotr Jasiukajtis 
14725c28e83SPiotr Jasiukajtis LOOP2:
148*d626ebd4SToomas Soome 		hx2.f = *x;
149*d626ebd4SToomas Soome 		hy2.f = *y;
150*d626ebd4SToomas Soome 		hx2.u &= ~0x80000000;
151*d626ebd4SToomas Soome 		hy2.u &= ~0x80000000;
152*d626ebd4SToomas Soome 		x2 = hx2.f;
153*d626ebd4SToomas Soome 		y2 = hy2.f;
154*d626ebd4SToomas Soome 		if (hy2.u > hx2.u) {
155*d626ebd4SToomas Soome 			i = hy2.u - hx2.u;
156*d626ebd4SToomas Soome 			j2 = hy2.u & 0x7f800000;
157*d626ebd4SToomas Soome 			if (hx2.u == 0)
15825c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
159*d626ebd4SToomas Soome 		} else {
160*d626ebd4SToomas Soome 			i = hx2.u - hy2.u;
161*d626ebd4SToomas Soome 			j2 = hx2.u & 0x7f800000;
162*d626ebd4SToomas Soome 			if (hy2.u == 0)
16325c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
164*d626ebd4SToomas Soome 			else if (hx2.u == 0)
16525c28e83SPiotr Jasiukajtis 				i = 0x7f800000;
16625c28e83SPiotr Jasiukajtis 		}
167*d626ebd4SToomas Soome 		if (i >= 0x0c800000 || j2 >= 0x7f800000) {
16825c28e83SPiotr Jasiukajtis 			z2 = x2 + y2;
169*d626ebd4SToomas Soome 			if (hx2.u == 0x7f800000)
17025c28e83SPiotr Jasiukajtis 				z2 = x2;
171*d626ebd4SToomas Soome 			else if (hy2.u == 0x7f800000)
17225c28e83SPiotr Jasiukajtis 				z2 = y2;
173*d626ebd4SToomas Soome 			else if (hx2.u > 0x7f800000 || hy2.u > 0x7f800000)
17425c28e83SPiotr Jasiukajtis 				z2 = *x + *y;
17525c28e83SPiotr Jasiukajtis 			*z = z2;
17625c28e83SPiotr Jasiukajtis 			x += stridex;
17725c28e83SPiotr Jasiukajtis 			y += stridey;
17825c28e83SPiotr Jasiukajtis 			z += stridez;
17925c28e83SPiotr Jasiukajtis 			i = 2;
18025c28e83SPiotr Jasiukajtis 			if (--n <= 0)
18125c28e83SPiotr Jasiukajtis 				break;
18225c28e83SPiotr Jasiukajtis 			goto LOOP2;
18325c28e83SPiotr Jasiukajtis 		}
18425c28e83SPiotr Jasiukajtis 		pz2 = z;
18525c28e83SPiotr Jasiukajtis 
18625c28e83SPiotr Jasiukajtis 		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
18725c28e83SPiotr Jasiukajtis 		z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
18825c28e83SPiotr Jasiukajtis 		z2 = sqrt(x2 * (double)x2 + y2 * (double)y2);
18925c28e83SPiotr Jasiukajtis 		*pz0 = z0;
19025c28e83SPiotr Jasiukajtis 		*pz1 = z1;
19125c28e83SPiotr Jasiukajtis 		*pz2 = z2;
19225c28e83SPiotr Jasiukajtis 
19325c28e83SPiotr Jasiukajtis 		x += stridex;
19425c28e83SPiotr Jasiukajtis 		y += stridey;
19525c28e83SPiotr Jasiukajtis 		z += stridez;
19625c28e83SPiotr Jasiukajtis 		i = 0;
19725c28e83SPiotr Jasiukajtis 	} while (--n > 0);
19825c28e83SPiotr Jasiukajtis 
199*d626ebd4SToomas Soome 	if (i > 0) {
200*d626ebd4SToomas Soome 		if (i > 1) {
20125c28e83SPiotr Jasiukajtis 			z1 = sqrt(x1 * (double)x1 + y1 * (double)y1);
20225c28e83SPiotr Jasiukajtis 			*pz1 = z1;
20325c28e83SPiotr Jasiukajtis 		}
20425c28e83SPiotr Jasiukajtis 		z0 = sqrt(x0 * (double)x0 + y0 * (double)y0);
20525c28e83SPiotr Jasiukajtis 		*pz0 = z0;
20625c28e83SPiotr Jasiukajtis 	}
20725c28e83SPiotr Jasiukajtis }
208