125c28e83SPiotr Jasiukajtis /* 225c28e83SPiotr Jasiukajtis * CDDL HEADER START 325c28e83SPiotr Jasiukajtis * 425c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 525c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 625c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 725c28e83SPiotr Jasiukajtis * 825c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 925c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 1025c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 1125c28e83SPiotr Jasiukajtis * and limitations under the License. 1225c28e83SPiotr Jasiukajtis * 1325c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 1425c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1525c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 1625c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 1725c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 1825c28e83SPiotr Jasiukajtis * 1925c28e83SPiotr Jasiukajtis * CDDL HEADER END 2025c28e83SPiotr Jasiukajtis */ 2125c28e83SPiotr Jasiukajtis 2225c28e83SPiotr Jasiukajtis /* 2325c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 2425c28e83SPiotr Jasiukajtis */ 2525c28e83SPiotr Jasiukajtis /* 2625c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 2725c28e83SPiotr Jasiukajtis * Use is subject to license terms. 2825c28e83SPiotr Jasiukajtis */ 2925c28e83SPiotr Jasiukajtis 3025c28e83SPiotr Jasiukajtis #include "libm_inlines.h" 3125c28e83SPiotr Jasiukajtis 3225c28e83SPiotr Jasiukajtis #ifdef __RESTRICT 33*d626ebd4SToomas Soome #define restrict _Restrict 3425c28e83SPiotr Jasiukajtis #else 35*d626ebd4SToomas Soome #define restrict 3625c28e83SPiotr Jasiukajtis #endif 3725c28e83SPiotr Jasiukajtis 3825c28e83SPiotr Jasiukajtis extern double sqrt(double); 3925c28e83SPiotr Jasiukajtis 40*d626ebd4SToomas Soome /* 41*d626ebd4SToomas Soome * Instead of type punning, use union type. 42*d626ebd4SToomas Soome */ 43*d626ebd4SToomas Soome typedef union h32 { 44*d626ebd4SToomas Soome float f; 45*d626ebd4SToomas Soome unsigned u; 46*d626ebd4SToomas Soome } h32; 47*d626ebd4SToomas Soome 4825c28e83SPiotr Jasiukajtis void 49*d626ebd4SToomas Soome __vhypotf(int n, float *restrict x, int stridex, float *restrict y, 50*d626ebd4SToomas Soome int stridey, float *restrict z, int stridez) 5125c28e83SPiotr Jasiukajtis { 5225c28e83SPiotr Jasiukajtis float x0, x1, x2, y0, y1, y2, z0, z1, z2, *pz0, *pz1, *pz2; 53*d626ebd4SToomas Soome h32 hx0, hx1, hx2, hy0, hy1, hy2; 54*d626ebd4SToomas Soome int i, j0, j1, j2; 5525c28e83SPiotr Jasiukajtis 56*d626ebd4SToomas Soome do { 5725c28e83SPiotr Jasiukajtis LOOP0: 58*d626ebd4SToomas Soome hx0.f = *x; 59*d626ebd4SToomas Soome hy0.f = *y; 60*d626ebd4SToomas Soome hx0.u &= ~0x80000000; 61*d626ebd4SToomas Soome hy0.u &= ~0x80000000; 62*d626ebd4SToomas Soome x0 = hx0.f; 63*d626ebd4SToomas Soome y0 = hy0.f; 64*d626ebd4SToomas Soome if (hy0.u > hx0.u) { 65*d626ebd4SToomas Soome i = hy0.u - hx0.u; 66*d626ebd4SToomas Soome j0 = hy0.u & 0x7f800000; 67*d626ebd4SToomas Soome if (hx0.u == 0) 6825c28e83SPiotr Jasiukajtis i = 0x7f800000; 69*d626ebd4SToomas Soome } else { 70*d626ebd4SToomas Soome i = hx0.u - hy0.u; 71*d626ebd4SToomas Soome j0 = hx0.u & 0x7f800000; 72*d626ebd4SToomas Soome if (hy0.u == 0) 7325c28e83SPiotr Jasiukajtis i = 0x7f800000; 74*d626ebd4SToomas Soome else if (hx0.u == 0) 7525c28e83SPiotr Jasiukajtis i = 0x7f800000; 7625c28e83SPiotr Jasiukajtis } 77*d626ebd4SToomas Soome if (i >= 0x0c800000 || j0 >= 0x7f800000) { 7825c28e83SPiotr Jasiukajtis z0 = x0 + y0; 79*d626ebd4SToomas Soome if (hx0.u == 0x7f800000) 8025c28e83SPiotr Jasiukajtis z0 = x0; 81*d626ebd4SToomas Soome else if (hy0.u == 0x7f800000) 8225c28e83SPiotr Jasiukajtis z0 = y0; 83*d626ebd4SToomas Soome else if (hx0.u > 0x7f800000 || hy0.u > 0x7f800000) 8425c28e83SPiotr Jasiukajtis z0 = *x + *y; 8525c28e83SPiotr Jasiukajtis *z = z0; 8625c28e83SPiotr Jasiukajtis x += stridex; 8725c28e83SPiotr Jasiukajtis y += stridey; 8825c28e83SPiotr Jasiukajtis z += stridez; 8925c28e83SPiotr Jasiukajtis i = 0; 9025c28e83SPiotr Jasiukajtis if (--n <= 0) 9125c28e83SPiotr Jasiukajtis break; 9225c28e83SPiotr Jasiukajtis goto LOOP0; 9325c28e83SPiotr Jasiukajtis } 9425c28e83SPiotr Jasiukajtis pz0 = z; 9525c28e83SPiotr Jasiukajtis x += stridex; 9625c28e83SPiotr Jasiukajtis y += stridey; 9725c28e83SPiotr Jasiukajtis z += stridez; 9825c28e83SPiotr Jasiukajtis i = 1; 9925c28e83SPiotr Jasiukajtis if (--n <= 0) 10025c28e83SPiotr Jasiukajtis break; 10125c28e83SPiotr Jasiukajtis 10225c28e83SPiotr Jasiukajtis LOOP1: 103*d626ebd4SToomas Soome hx1.f = *x; 104*d626ebd4SToomas Soome hy1.f = *y; 105*d626ebd4SToomas Soome hx1.u &= ~0x80000000; 106*d626ebd4SToomas Soome hy1.u &= ~0x80000000; 107*d626ebd4SToomas Soome x1 = hx1.f; 108*d626ebd4SToomas Soome y1 = hy1.f; 109*d626ebd4SToomas Soome if (hy1.u > hx1.u) { 110*d626ebd4SToomas Soome i = hy1.u - hx1.u; 111*d626ebd4SToomas Soome j1 = hy1.u & 0x7f800000; 112*d626ebd4SToomas Soome if (hx1.u == 0) 11325c28e83SPiotr Jasiukajtis i = 0x7f800000; 114*d626ebd4SToomas Soome } else { 115*d626ebd4SToomas Soome i = hx1.u - hy1.u; 116*d626ebd4SToomas Soome j1 = hx1.u & 0x7f800000; 117*d626ebd4SToomas Soome if (hy1.u == 0) 11825c28e83SPiotr Jasiukajtis i = 0x7f800000; 119*d626ebd4SToomas Soome else if (hx1.u == 0) 12025c28e83SPiotr Jasiukajtis i = 0x7f800000; 12125c28e83SPiotr Jasiukajtis } 122*d626ebd4SToomas Soome if (i >= 0x0c800000 || j1 >= 0x7f800000) { 12325c28e83SPiotr Jasiukajtis z1 = x1 + y1; 124*d626ebd4SToomas Soome if (hx1.u == 0x7f800000) 12525c28e83SPiotr Jasiukajtis z1 = x1; 126*d626ebd4SToomas Soome else if (hy1.u == 0x7f800000) 12725c28e83SPiotr Jasiukajtis z1 = y1; 128*d626ebd4SToomas Soome else if (hx1.u > 0x7f800000 || hy1.u > 0x7f800000) 12925c28e83SPiotr Jasiukajtis z1 = *x + *y; 13025c28e83SPiotr Jasiukajtis *z = z1; 13125c28e83SPiotr Jasiukajtis x += stridex; 13225c28e83SPiotr Jasiukajtis y += stridey; 13325c28e83SPiotr Jasiukajtis z += stridez; 13425c28e83SPiotr Jasiukajtis i = 1; 13525c28e83SPiotr Jasiukajtis if (--n <= 0) 13625c28e83SPiotr Jasiukajtis break; 13725c28e83SPiotr Jasiukajtis goto LOOP1; 13825c28e83SPiotr Jasiukajtis } 13925c28e83SPiotr Jasiukajtis pz1 = z; 14025c28e83SPiotr Jasiukajtis x += stridex; 14125c28e83SPiotr Jasiukajtis y += stridey; 14225c28e83SPiotr Jasiukajtis z += stridez; 14325c28e83SPiotr Jasiukajtis i = 2; 14425c28e83SPiotr Jasiukajtis if (--n <= 0) 14525c28e83SPiotr Jasiukajtis break; 14625c28e83SPiotr Jasiukajtis 14725c28e83SPiotr Jasiukajtis LOOP2: 148*d626ebd4SToomas Soome hx2.f = *x; 149*d626ebd4SToomas Soome hy2.f = *y; 150*d626ebd4SToomas Soome hx2.u &= ~0x80000000; 151*d626ebd4SToomas Soome hy2.u &= ~0x80000000; 152*d626ebd4SToomas Soome x2 = hx2.f; 153*d626ebd4SToomas Soome y2 = hy2.f; 154*d626ebd4SToomas Soome if (hy2.u > hx2.u) { 155*d626ebd4SToomas Soome i = hy2.u - hx2.u; 156*d626ebd4SToomas Soome j2 = hy2.u & 0x7f800000; 157*d626ebd4SToomas Soome if (hx2.u == 0) 15825c28e83SPiotr Jasiukajtis i = 0x7f800000; 159*d626ebd4SToomas Soome } else { 160*d626ebd4SToomas Soome i = hx2.u - hy2.u; 161*d626ebd4SToomas Soome j2 = hx2.u & 0x7f800000; 162*d626ebd4SToomas Soome if (hy2.u == 0) 16325c28e83SPiotr Jasiukajtis i = 0x7f800000; 164*d626ebd4SToomas Soome else if (hx2.u == 0) 16525c28e83SPiotr Jasiukajtis i = 0x7f800000; 16625c28e83SPiotr Jasiukajtis } 167*d626ebd4SToomas Soome if (i >= 0x0c800000 || j2 >= 0x7f800000) { 16825c28e83SPiotr Jasiukajtis z2 = x2 + y2; 169*d626ebd4SToomas Soome if (hx2.u == 0x7f800000) 17025c28e83SPiotr Jasiukajtis z2 = x2; 171*d626ebd4SToomas Soome else if (hy2.u == 0x7f800000) 17225c28e83SPiotr Jasiukajtis z2 = y2; 173*d626ebd4SToomas Soome else if (hx2.u > 0x7f800000 || hy2.u > 0x7f800000) 17425c28e83SPiotr Jasiukajtis z2 = *x + *y; 17525c28e83SPiotr Jasiukajtis *z = z2; 17625c28e83SPiotr Jasiukajtis x += stridex; 17725c28e83SPiotr Jasiukajtis y += stridey; 17825c28e83SPiotr Jasiukajtis z += stridez; 17925c28e83SPiotr Jasiukajtis i = 2; 18025c28e83SPiotr Jasiukajtis if (--n <= 0) 18125c28e83SPiotr Jasiukajtis break; 18225c28e83SPiotr Jasiukajtis goto LOOP2; 18325c28e83SPiotr Jasiukajtis } 18425c28e83SPiotr Jasiukajtis pz2 = z; 18525c28e83SPiotr Jasiukajtis 18625c28e83SPiotr Jasiukajtis z0 = sqrt(x0 * (double)x0 + y0 * (double)y0); 18725c28e83SPiotr Jasiukajtis z1 = sqrt(x1 * (double)x1 + y1 * (double)y1); 18825c28e83SPiotr Jasiukajtis z2 = sqrt(x2 * (double)x2 + y2 * (double)y2); 18925c28e83SPiotr Jasiukajtis *pz0 = z0; 19025c28e83SPiotr Jasiukajtis *pz1 = z1; 19125c28e83SPiotr Jasiukajtis *pz2 = z2; 19225c28e83SPiotr Jasiukajtis 19325c28e83SPiotr Jasiukajtis x += stridex; 19425c28e83SPiotr Jasiukajtis y += stridey; 19525c28e83SPiotr Jasiukajtis z += stridez; 19625c28e83SPiotr Jasiukajtis i = 0; 19725c28e83SPiotr Jasiukajtis } while (--n > 0); 19825c28e83SPiotr Jasiukajtis 199*d626ebd4SToomas Soome if (i > 0) { 200*d626ebd4SToomas Soome if (i > 1) { 20125c28e83SPiotr Jasiukajtis z1 = sqrt(x1 * (double)x1 + y1 * (double)y1); 20225c28e83SPiotr Jasiukajtis *pz1 = z1; 20325c28e83SPiotr Jasiukajtis } 20425c28e83SPiotr Jasiukajtis z0 = sqrt(x0 * (double)x0 + y0 * (double)y0); 20525c28e83SPiotr Jasiukajtis *pz0 = z0; 20625c28e83SPiotr Jasiukajtis } 20725c28e83SPiotr Jasiukajtis } 208