125c28e83SPiotr Jasiukajtis/* 225c28e83SPiotr Jasiukajtis * CDDL HEADER START 325c28e83SPiotr Jasiukajtis * 425c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 525c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 625c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 725c28e83SPiotr Jasiukajtis * 825c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 925c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 1025c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 1125c28e83SPiotr Jasiukajtis * and limitations under the License. 1225c28e83SPiotr Jasiukajtis * 1325c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 1425c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1525c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 1625c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 1725c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 1825c28e83SPiotr Jasiukajtis * 1925c28e83SPiotr Jasiukajtis * CDDL HEADER END 2025c28e83SPiotr Jasiukajtis */ 2125c28e83SPiotr Jasiukajtis/* 2225c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 2325c28e83SPiotr Jasiukajtis */ 2425c28e83SPiotr Jasiukajtis/* 2525c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 2625c28e83SPiotr Jasiukajtis * Use is subject to license terms. 2725c28e83SPiotr Jasiukajtis */ 2825c28e83SPiotr Jasiukajtis 2925c28e83SPiotr Jasiukajtis .file "__vatan.S" 3025c28e83SPiotr Jasiukajtis 3125c28e83SPiotr Jasiukajtis#include "libm.h" 3225c28e83SPiotr Jasiukajtis 3325c28e83SPiotr Jasiukajtis RO_DATA 3425c28e83SPiotr Jasiukajtis 3525c28e83SPiotr Jasiukajtis! following is the C version of the ATAN algorithm 3625c28e83SPiotr Jasiukajtis! #include <math.h> 3725c28e83SPiotr Jasiukajtis! #include <stdio.h> 3825c28e83SPiotr Jasiukajtis! double jkatan(double *x) 3925c28e83SPiotr Jasiukajtis! { 4025c28e83SPiotr Jasiukajtis! double f, z, ans, ansu, ansl, tmp, poly, conup, conlo, dummy; 4125c28e83SPiotr Jasiukajtis! int index, sign, intf, intz; 4225c28e83SPiotr Jasiukajtis! extern const double __vlibm_TBL_atan1[]; 4325c28e83SPiotr Jasiukajtis! long *pf = (long *) &f, *pz = (long *) &z; 44*55fea89dSDan Cross! 4525c28e83SPiotr Jasiukajtis! /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 4625c28e83SPiotr Jasiukajtis! * Error = -3.08254E-18 On the interval |x| < 1/64 */ 47*55fea89dSDan Cross! 4825c28e83SPiotr Jasiukajtis! /* define dummy names for readability. Use parray to help compiler optimize loads */ 4925c28e83SPiotr Jasiukajtis! #define p3 parray[0] 5025c28e83SPiotr Jasiukajtis! #define p2 parray[1] 5125c28e83SPiotr Jasiukajtis! #define p1 parray[2] 52*55fea89dSDan Cross! #define soffset 3 53*55fea89dSDan Cross! 54*55fea89dSDan Cross! static const double parray[] = { 5525c28e83SPiotr Jasiukajtis! -1.428029046844299722E-01, /* p[3] */ 5625c28e83SPiotr Jasiukajtis! 1.999999917247000615E-01, /* p[2] */ 5725c28e83SPiotr Jasiukajtis! -3.333333333329292858E-01, /* p[1] */ 5825c28e83SPiotr Jasiukajtis! 1.0, /* not used for p[0], though */ 5925c28e83SPiotr Jasiukajtis! -1.0, /* used to flip sign of answer */ 6025c28e83SPiotr Jasiukajtis! }; 61*55fea89dSDan Cross! 6225c28e83SPiotr Jasiukajtis! f = *x; /* fetch argument */ 6325c28e83SPiotr Jasiukajtis! intf = pf[0]; /* grab upper half */ 6425c28e83SPiotr Jasiukajtis! sign = intf & 0x80000000; /* sign of argument */ 6525c28e83SPiotr Jasiukajtis! intf ^= sign; /* abs(upper argument) */ 6625c28e83SPiotr Jasiukajtis! sign = (unsigned) sign >> 31; /* sign bit = 0 or 1 */ 6725c28e83SPiotr Jasiukajtis! pf[0] = intf; 68*55fea89dSDan Cross! 6925c28e83SPiotr Jasiukajtis! if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */ 7025c28e83SPiotr Jasiukajtis! { 71*55fea89dSDan Cross! if( (intf > 0x7ff00000) || 7225c28e83SPiotr Jasiukajtis! ((intf == 0x7ff00000) && (pf[1] !=0)) ) return (*x-*x);/* return NaN if x=NaN*/ 7325c28e83SPiotr Jasiukajtis! if( intf < 0x3e300000 ) /* avoid underflow for small arg */ 7425c28e83SPiotr Jasiukajtis! { 7525c28e83SPiotr Jasiukajtis! dummy = 1.0e37 + f; 7625c28e83SPiotr Jasiukajtis! dummy = dummy; 7725c28e83SPiotr Jasiukajtis! return (*x); 7825c28e83SPiotr Jasiukajtis! } 7925c28e83SPiotr Jasiukajtis! if( intf > 0x43600000 ) /* avoid underflow for big arg */ 8025c28e83SPiotr Jasiukajtis! { 8125c28e83SPiotr Jasiukajtis! index = 2; 8225c28e83SPiotr Jasiukajtis! f = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */ 8325c28e83SPiotr Jasiukajtis! f = parray[soffset + sign] * f; /* put sign bit on ans */ 8425c28e83SPiotr Jasiukajtis! return (f); 8525c28e83SPiotr Jasiukajtis! } 8625c28e83SPiotr Jasiukajtis! } 87*55fea89dSDan Cross! 8825c28e83SPiotr Jasiukajtis! index = 0; /* points to 0,0 in table */ 8925c28e83SPiotr Jasiukajtis! if (intf > 0x40500000) /* if(|x| > 64 */ 9025c28e83SPiotr Jasiukajtis! { f = -1.0/f; 9125c28e83SPiotr Jasiukajtis! index = 2; /* point to pi/2 upper, lower */ 9225c28e83SPiotr Jasiukajtis! } 9325c28e83SPiotr Jasiukajtis! else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */ 9425c28e83SPiotr Jasiukajtis! { 9525c28e83SPiotr Jasiukajtis! intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ 9625c28e83SPiotr Jasiukajtis! pz[0] = intz; /* store as a double (z) */ 9725c28e83SPiotr Jasiukajtis! pz[1] = 0; /* ...lower */ 9825c28e83SPiotr Jasiukajtis! f = (f - z)/(1.0 + f*z); /* get reduced argument */ 9925c28e83SPiotr Jasiukajtis! index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ 10025c28e83SPiotr Jasiukajtis! index += 4; /* skip over 0,0,pi/2,pi/2 */ 10125c28e83SPiotr Jasiukajtis! } 10225c28e83SPiotr Jasiukajtis! conup = __vlibm_TBL_atan1[index]; /* upper table */ 10325c28e83SPiotr Jasiukajtis! conlo = __vlibm_TBL_atan1[index+1]; /* lower table */ 10425c28e83SPiotr Jasiukajtis! tmp = f*f; 10525c28e83SPiotr Jasiukajtis! poly = (f*tmp)*((p3*tmp + p2)*tmp + p1); 10625c28e83SPiotr Jasiukajtis! ansu = conup + f; /* compute atan(f) upper */ 10725c28e83SPiotr Jasiukajtis! ansl = (((conup - ansu) + f) + poly) + conlo; 10825c28e83SPiotr Jasiukajtis! ans = ansu + ansl; 10925c28e83SPiotr Jasiukajtis! ans = parray[soffset + sign] * ans; 11025c28e83SPiotr Jasiukajtis! return ans; 11125c28e83SPiotr Jasiukajtis! } 11225c28e83SPiotr Jasiukajtis 11325c28e83SPiotr Jasiukajtis/* 8 bytes = 1 double f.p. word */ 11425c28e83SPiotr Jasiukajtis#define WSIZE 8 11525c28e83SPiotr Jasiukajtis 11625c28e83SPiotr Jasiukajtis .align 32 !align with full D-cache line 11725c28e83SPiotr Jasiukajtis.COEFFS: 11825c28e83SPiotr Jasiukajtis .double 0r-1.428029046844299722E-01 !p[3] 11925c28e83SPiotr Jasiukajtis .double 0r1.999999917247000615E-01 !p[2] 12025c28e83SPiotr Jasiukajtis .double 0r-3.333333333329292858E-01 !p[1] 12125c28e83SPiotr Jasiukajtis .double 0r-1.0, !constant -1.0 12225c28e83SPiotr Jasiukajtis .word 0x00008000,0x0 !for fp rounding of reduced arg 12325c28e83SPiotr Jasiukajtis .word 0x7fff0000,0x0 !for fp truncation 12425c28e83SPiotr Jasiukajtis .word 0x47900000,0 !a number close to 1.0E37 12525c28e83SPiotr Jasiukajtis .word 0x80000000,0x0 !mask for fp sign bit 12625c28e83SPiotr Jasiukajtis .word 0x3f800000,0x0 !1.0/128.0 dummy "safe" argument 12725c28e83SPiotr Jasiukajtis .type .COEFFS,#object 12825c28e83SPiotr Jasiukajtis 12925c28e83SPiotr Jasiukajtis ENTRY(__vatan) 13025c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-16,%sp 13125c28e83SPiotr Jasiukajtis PIC_SETUP(g5) 13225c28e83SPiotr Jasiukajtis PIC_SET(g5,__vlibm_TBL_atan1,o4) 13325c28e83SPiotr Jasiukajtis PIC_SET(g5,.COEFFS,o0) 13425c28e83SPiotr Jasiukajtis/* 13525c28e83SPiotr Jasiukajtis __vatan(int n, double *x, int stridex, double *y, stridey) 13625c28e83SPiotr Jasiukajtis computes y(i) = atan( x(i) ), for 1=1,n. Stridex, stridey 137*55fea89dSDan Cross are the distance between x and y elements 13825c28e83SPiotr Jasiukajtis 13925c28e83SPiotr Jasiukajtis %i0 n 14025c28e83SPiotr Jasiukajtis %i1 address of x 14125c28e83SPiotr Jasiukajtis %i2 stride x 14225c28e83SPiotr Jasiukajtis %i3 address of y 14325c28e83SPiotr Jasiukajtis %i4 stride y 14425c28e83SPiotr Jasiukajtis*/ 14525c28e83SPiotr Jasiukajtis cmp %i0,0 !if n <=0, 14625c28e83SPiotr Jasiukajtis ble,pn %icc,.RETURN !....then do nothing 14725c28e83SPiotr Jasiukajtis sll %i2,3,%i2 !convert stride to byte count 14825c28e83SPiotr Jasiukajtis sll %i4,3,%i4 !convert stride to byte count 14925c28e83SPiotr Jasiukajtis 15025c28e83SPiotr Jasiukajtis/* pre-load constants before beginning main loop */ 15125c28e83SPiotr Jasiukajtis 15225c28e83SPiotr Jasiukajtis ldd [%o0],%f58 !load p[3] 153*55fea89dSDan Cross mov 2,%i5 !argcount = 3 15425c28e83SPiotr Jasiukajtis 15525c28e83SPiotr Jasiukajtis ldd [%o0+WSIZE],%f60 !load p[2] 15625c28e83SPiotr Jasiukajtis add %fp,STACK_BIAS-8,%l1 !yaddr1 = &dummy 15725c28e83SPiotr Jasiukajtis fzero %f18 !ansu1 = 0 15825c28e83SPiotr Jasiukajtis 15925c28e83SPiotr Jasiukajtis ldd [%o0+2*WSIZE],%f62 !load p[1] 16025c28e83SPiotr Jasiukajtis add %fp,STACK_BIAS-8,%l2 !yaddr2 = &dummy 16125c28e83SPiotr Jasiukajtis fzero %f12 !(poly1) = 0 16225c28e83SPiotr Jasiukajtis 163*55fea89dSDan Cross ldd [%o0+3*WSIZE],%f56 !-1.0 16425c28e83SPiotr Jasiukajtis fzero %f14 !tmp1 = 0 16525c28e83SPiotr Jasiukajtis 16625c28e83SPiotr Jasiukajtis ldd [%o0+4*WSIZE],%f52 !load rounding mask 16725c28e83SPiotr Jasiukajtis fzero %f16 !conup1 = 0 16825c28e83SPiotr Jasiukajtis 16925c28e83SPiotr Jasiukajtis ldd [%o0+5*WSIZE],%f54 !load truncation mask 17025c28e83SPiotr Jasiukajtis fzero %f36 !f1 = 0 17125c28e83SPiotr Jasiukajtis 17225c28e83SPiotr Jasiukajtis ldd [%o0+6*WSIZE],%f50 !1.0e37 17325c28e83SPiotr Jasiukajtis fzero %f38 !f2 = 0 17425c28e83SPiotr Jasiukajtis 17525c28e83SPiotr Jasiukajtis ldd [%o0+7*WSIZE],%f32 !mask for sign bit 17625c28e83SPiotr Jasiukajtis 17725c28e83SPiotr Jasiukajtis ldd [%o4+2*WSIZE],%f46 !pi/2 upper 178*55fea89dSDan Cross ldd [%o4+(2*WSIZE+8)],%f48 !pi/2 lower 17925c28e83SPiotr Jasiukajtis sethi %hi(0x40500000),%l6 !64.0 18025c28e83SPiotr Jasiukajtis sethi %hi(0x3f900000),%l7 !1/64.0 18125c28e83SPiotr Jasiukajtis mov 0,%l4 !index1 = 0 18225c28e83SPiotr Jasiukajtis mov 0,%l5 !index2 = 0 18325c28e83SPiotr Jasiukajtis 18425c28e83SPiotr Jasiukajtis.MAINLOOP: 18525c28e83SPiotr Jasiukajtis 18625c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 18725c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 18825c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 18925c28e83SPiotr Jasiukajtis 19025c28e83SPiotr Jasiukajtis.LOOP0: 19125c28e83SPiotr Jasiukajtis deccc %i0 !--n 19225c28e83SPiotr Jasiukajtis bneg 1f 19325c28e83SPiotr Jasiukajtis mov %i1,%o5 !xuse = x (delay slot) 19425c28e83SPiotr Jasiukajtis 19525c28e83SPiotr Jasiukajtis ba 2f 19625c28e83SPiotr Jasiukajtis nop !delay slot 19725c28e83SPiotr Jasiukajtis1: 19825c28e83SPiotr Jasiukajtis PIC_SET(g5,.COEFFS+8*WSIZE,o5) 19925c28e83SPiotr Jasiukajtis dec %i5 !argcount-- 20025c28e83SPiotr Jasiukajtis2: 20125c28e83SPiotr Jasiukajtis sethi %hi(0x80000000),%o7 !mask for sign bit 20225c28e83SPiotr Jasiukajtis/*2 */ sethi %hi(0x43600000),%o1 !big = 0x43600000,0 20325c28e83SPiotr Jasiukajtis ld [%o5],%o0 !intf = pf[0] = f upper 20425c28e83SPiotr Jasiukajtis ldd [%o4+%l5],%f26 !conup2 = __vlibm_TBL_atan1[index2] 20525c28e83SPiotr Jasiukajtis 20625c28e83SPiotr Jasiukajtis sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0 20725c28e83SPiotr Jasiukajtis/*4 */ andn %o0,%o7,%o0 !intf = fabs(intf) 20825c28e83SPiotr Jasiukajtis ldd [%o5],%f34 !f = *x into f34 20925c28e83SPiotr Jasiukajtis 21025c28e83SPiotr Jasiukajtis sub %o1,%o0,%o1 !(-) if intf > big 21125c28e83SPiotr Jasiukajtis/*6 */ sub %o0,%o2,%o2 !(-) if intf < small 212*55fea89dSDan Cross fand %f34,%f32,%f40 !sign0 = sign bit 21325c28e83SPiotr Jasiukajtis fmuld %f38,%f38,%f24 !tmp2= f2*f2 21425c28e83SPiotr Jasiukajtis 21525c28e83SPiotr Jasiukajtis/*7 */ orcc %o1,%o2,%g0 !(-) if either true 21625c28e83SPiotr Jasiukajtis bneg,pn %icc,.SPECIAL0 !if (-) goto special cases below 21725c28e83SPiotr Jasiukajtis fabsd %f34,%f34 !abs(f) (delay slot) 21825c28e83SPiotr Jasiukajtis !---------------------- 21925c28e83SPiotr Jasiukajtis 22025c28e83SPiotr Jasiukajtis 22125c28e83SPiotr Jasiukajtis sethi %hi(0x8000),%o7 !rounding bit 22225c28e83SPiotr Jasiukajtis/*8 */ fpadd32 %f34,%f52,%f0 !intf + 0x00008000 (again) 22325c28e83SPiotr Jasiukajtis faddd %f26,%f38,%f28 !ansu2 = conup2 + f2 22425c28e83SPiotr Jasiukajtis 22525c28e83SPiotr Jasiukajtis add %o0,%o7,%o0 !intf + 0x00008000 (delay slot) 22625c28e83SPiotr Jasiukajtis/*9*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again) 22725c28e83SPiotr Jasiukajtis fmuld %f58,%f24,%f22 !p[3]*tmp2 22825c28e83SPiotr Jasiukajtis 22925c28e83SPiotr Jasiukajtis/*10 */ sethi %hi(0x7fff0000),%o7 !mask for rounding argument 23025c28e83SPiotr Jasiukajtis fmuld %f34,%f0,%f10 !f*z 23125c28e83SPiotr Jasiukajtis fsubd %f34,%f0,%f20 !f - z 23225c28e83SPiotr Jasiukajtis add %o4,%l4,%l4 !base addr + index1 23325c28e83SPiotr Jasiukajtis fmuld %f14,%f12,%f12 !poly1 = (f1*tmp1)*((p3*tmp1 + p2)*tmp1 + p1) 23425c28e83SPiotr Jasiukajtis faddd %f16,%f36,%f16 !(conup1 - ansu1) + f1 23525c28e83SPiotr Jasiukajtis 23625c28e83SPiotr Jasiukajtis/*12 */ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000 23725c28e83SPiotr Jasiukajtis faddd %f22,%f60,%f22 !p[3]*tmp2 + p[2] 23825c28e83SPiotr Jasiukajtis ldd [%l4+WSIZE],%f14 !conlo1 = __vlibm_TBL_atan1[index+1] 23925c28e83SPiotr Jasiukajtis 24025c28e83SPiotr Jasiukajtis/*13 */ sub %o0,%l7,%o2 !intz - 0x3f900000 24125c28e83SPiotr Jasiukajtis fsubd %f10,%f56,%f10 !(f*z - (-1.0)) 24225c28e83SPiotr Jasiukajtis faddd %f16,%f12,%f12 !((conup1 - ansu1) + f1) + poly1 24325c28e83SPiotr Jasiukajtis 24425c28e83SPiotr Jasiukajtis cmp %o0,%l6 !(|f| > 64) 24525c28e83SPiotr Jasiukajtis ble .ELSE0 !if(|f| > 64) then 24625c28e83SPiotr Jasiukajtis/*15 */ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15 24725c28e83SPiotr Jasiukajtis mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower 24825c28e83SPiotr Jasiukajtis ba .ENDIF0 !continue 24925c28e83SPiotr Jasiukajtis/*16 */ fdivd %f56,%f34,%f34 !f = -1.0/f (delay slot) 25025c28e83SPiotr Jasiukajtis .ELSE0: !else f( |x| >= (1/64)) 25125c28e83SPiotr Jasiukajtis cmp %o0,%l7 !if intf >= 1/64 25225c28e83SPiotr Jasiukajtis bl .ENDIF0 !if( |x| >= (1/64) ) then... 25325c28e83SPiotr Jasiukajtis mov 0,%o1 !index == 0 , point to conup,conlo = 0,0 25425c28e83SPiotr Jasiukajtis add %o3,4,%o1 !index = index + 4 25525c28e83SPiotr Jasiukajtis/*16 */ fdivd %f20,%f10,%f34 !f = (f - z)/(1.0 + f*z), reduced argument 25625c28e83SPiotr Jasiukajtis .ENDIF0: 25725c28e83SPiotr Jasiukajtis 25825c28e83SPiotr Jasiukajtis/*17*/ sll %o1,3,%l3 !index0 = index 25925c28e83SPiotr Jasiukajtis mov %i3,%l0 !yaddr0 = address of y 26025c28e83SPiotr Jasiukajtis faddd %f12,%f14,%f12 !ansl1 = (((conup1 - ansu)1 + f1) + poly1) + conlo1 26125c28e83SPiotr Jasiukajtis fmuld %f22,%f24,%f22 !(p3*tmp2 + p2)*tmp2 26225c28e83SPiotr Jasiukajtis fsubd %f26,%f28,%f26 !conup2 - ansu2 26325c28e83SPiotr Jasiukajtis 26425c28e83SPiotr Jasiukajtis/*20*/ add %i1,%i2,%i1 !x += stridex 26525c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 !y += stridey 26625c28e83SPiotr Jasiukajtis faddd %f18,%f12,%f36 !ans1 = ansu1 + ansl1 26725c28e83SPiotr Jasiukajtis fmuld %f38,%f24,%f24 !f*tmp2 26825c28e83SPiotr Jasiukajtis faddd %f22,%f62,%f22 !(p3*tmp2 + p2)*tmp2 + p1 26925c28e83SPiotr Jasiukajtis 27025c28e83SPiotr Jasiukajtis/*23*/ for %f36,%f42,%f36 !sign(ans1) = sign of argument 27125c28e83SPiotr Jasiukajtis std %f36,[%l1] !*yaddr1 = ans1 27225c28e83SPiotr Jasiukajtis add %o4,%l5,%l5 !base addr + index2 27325c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f22 !poly2 = (f2*tmp2)*((p3*tmp2 + p2)*tmp2 + p1) 27425c28e83SPiotr Jasiukajtis faddd %f26,%f38,%f26 !(conup2 - ansu2) + f2 27525c28e83SPiotr Jasiukajtis cmp %i5,0 !if argcount =0, we are done 27625c28e83SPiotr Jasiukajtis be .RETURN 27725c28e83SPiotr Jasiukajtis nop 27825c28e83SPiotr Jasiukajtis 27925c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 28025c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 28125c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 28225c28e83SPiotr Jasiukajtis 28325c28e83SPiotr Jasiukajtis.LOOP1: 28425c28e83SPiotr Jasiukajtis/*25*/ deccc %i0 !--n 28525c28e83SPiotr Jasiukajtis bneg 1f 28625c28e83SPiotr Jasiukajtis mov %i1,%o5 !xuse = x (delay slot) 28725c28e83SPiotr Jasiukajtis ba 2f 28825c28e83SPiotr Jasiukajtis nop !delay slot 28925c28e83SPiotr Jasiukajtis1: 29025c28e83SPiotr Jasiukajtis PIC_SET(g5,.COEFFS+8*WSIZE,o5) 29125c28e83SPiotr Jasiukajtis dec %i5 !argcount-- 29225c28e83SPiotr Jasiukajtis2: 29325c28e83SPiotr Jasiukajtis 29425c28e83SPiotr Jasiukajtis/*26*/ sethi %hi(0x80000000),%o7 !mask for sign bit 29525c28e83SPiotr Jasiukajtis sethi %hi(0x43600000),%o1 !big = 0x43600000,0 29625c28e83SPiotr Jasiukajtis ld [%o5],%o0 !intf = pf[0] = f upper 29725c28e83SPiotr Jasiukajtis 29825c28e83SPiotr Jasiukajtis/*28*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0 29925c28e83SPiotr Jasiukajtis andn %o0,%o7,%o0 !intf = fabs(intf) 30025c28e83SPiotr Jasiukajtis ldd [%o5],%f36 !f = *x into f36 30125c28e83SPiotr Jasiukajtis 30225c28e83SPiotr Jasiukajtis/*30*/ sub %o1,%o0,%o1 !(-) if intf > big 30325c28e83SPiotr Jasiukajtis sub %o0,%o2,%o2 !(-) if intf < small 304*55fea89dSDan Cross fand %f36,%f32,%f42 !sign1 = sign bit 30525c28e83SPiotr Jasiukajtis 30625c28e83SPiotr Jasiukajtis/*31*/ orcc %o1,%o2,%g0 !(-) if either true 30725c28e83SPiotr Jasiukajtis bneg,pn %icc,.SPECIAL1 !if (-) goto special cases below 30825c28e83SPiotr Jasiukajtis fabsd %f36,%f36 !abs(f) (delay slot) 30925c28e83SPiotr Jasiukajtis !---------------------- 31025c28e83SPiotr Jasiukajtis 31125c28e83SPiotr Jasiukajtis/*32*/ fpadd32 %f36,%f52,%f0 !intf + 0x00008000 (again) 31225c28e83SPiotr Jasiukajtis ldd [%l5+WSIZE],%f24 !conlo2 = __vlibm_TBL_atan1[index2+1] 31325c28e83SPiotr Jasiukajtis 31425c28e83SPiotr Jasiukajtis/*33*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again) 31525c28e83SPiotr Jasiukajtis sethi %hi(0x8000),%o7 !rounding bit 31625c28e83SPiotr Jasiukajtis faddd %f26,%f22,%f22 !((conup2 - ansu2) + f2) + poly2 31725c28e83SPiotr Jasiukajtis 31825c28e83SPiotr Jasiukajtis/*34*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot) 31925c28e83SPiotr Jasiukajtis sethi %hi(0x7fff0000),%o7 !mask for rounding argument 32025c28e83SPiotr Jasiukajtis fmuld %f36,%f0,%f10 !f*z 32125c28e83SPiotr Jasiukajtis fsubd %f36,%f0,%f20 !f - z 32225c28e83SPiotr Jasiukajtis 32325c28e83SPiotr Jasiukajtis/*35*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000 32425c28e83SPiotr Jasiukajtis faddd %f22,%f24,%f22 !ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2 32525c28e83SPiotr Jasiukajtis 32625c28e83SPiotr Jasiukajtis/*37*/ sub %o0,%l7,%o2 !intz - 0x3f900000 32725c28e83SPiotr Jasiukajtis fsubd %f10,%f56,%f10 !(f*z - (-1.0)) 32825c28e83SPiotr Jasiukajtis ldd [%o4+%l3],%f6 !conup0 = __vlibm_TBL_atan1[index0] 32925c28e83SPiotr Jasiukajtis 33025c28e83SPiotr Jasiukajtis cmp %o0,%l6 !(|f| > 64) 33125c28e83SPiotr Jasiukajtis ble .ELSE1 !if(|f| > 64) then 33225c28e83SPiotr Jasiukajtis/*38*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15 33325c28e83SPiotr Jasiukajtis mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower 33425c28e83SPiotr Jasiukajtis ba .ENDIF1 !continue 33525c28e83SPiotr Jasiukajtis/*40*/ fdivd %f56,%f36,%f36 !f = -1.0/f (delay slot) 33625c28e83SPiotr Jasiukajtis .ELSE1: !else f( |x| >= (1/64)) 33725c28e83SPiotr Jasiukajtis cmp %o0,%l7 !if intf >= 1/64 33825c28e83SPiotr Jasiukajtis bl .ENDIF1 !if( |x| >= (1/64) ) then... 33925c28e83SPiotr Jasiukajtis mov 0,%o1 !index == 0 , point to conup,conlo = 0,0 34025c28e83SPiotr Jasiukajtis add %o3,4,%o1 !index = index + 4 34125c28e83SPiotr Jasiukajtis/*40*/ fdivd %f20,%f10,%f36 !f = (f - z)/(1.0 + f*z), reduced argument 34225c28e83SPiotr Jasiukajtis .ENDIF1: 34325c28e83SPiotr Jasiukajtis 34425c28e83SPiotr Jasiukajtis/*41*/sll %o1,3,%l4 !index1 = index 34525c28e83SPiotr Jasiukajtis mov %i3,%l1 !yaddr1 = address of y 34625c28e83SPiotr Jasiukajtis fmuld %f34,%f34,%f4 !tmp0= f0*f0 34725c28e83SPiotr Jasiukajtis faddd %f28,%f22,%f38 !ans2 = ansu2 + ansl2 348*55fea89dSDan Cross 34925c28e83SPiotr Jasiukajtis/*44*/add %i1,%i2,%i1 !x += stridex 35025c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 !y += stridey 35125c28e83SPiotr Jasiukajtis fmuld %f58,%f4,%f2 !p[3]*tmp0 35225c28e83SPiotr Jasiukajtis faddd %f6,%f34,%f8 !ansu0 = conup0 + f0 35325c28e83SPiotr Jasiukajtis for %f38,%f44,%f38 !sign(ans2) = sign of argument 35425c28e83SPiotr Jasiukajtis std %f38,[%l2] !*yaddr2 = ans2 35525c28e83SPiotr Jasiukajtis cmp %i5,0 !if argcount =0, we are done 35625c28e83SPiotr Jasiukajtis be .RETURN 35725c28e83SPiotr Jasiukajtis nop 35825c28e83SPiotr Jasiukajtis 35925c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 36025c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 36125c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 36225c28e83SPiotr Jasiukajtis 36325c28e83SPiotr Jasiukajtis.LOOP2: 36425c28e83SPiotr Jasiukajtis/*46*/ deccc %i0 !--n 36525c28e83SPiotr Jasiukajtis bneg 1f 36625c28e83SPiotr Jasiukajtis mov %i1,%o5 !xuse = x (delay slot) 36725c28e83SPiotr Jasiukajtis ba 2f 36825c28e83SPiotr Jasiukajtis nop !delay slot 36925c28e83SPiotr Jasiukajtis1: 37025c28e83SPiotr Jasiukajtis PIC_SET(g5,.COEFFS+8*WSIZE,o5) 37125c28e83SPiotr Jasiukajtis dec %i5 !argcount-- 37225c28e83SPiotr Jasiukajtis2: 37325c28e83SPiotr Jasiukajtis 37425c28e83SPiotr Jasiukajtis/*47*/ sethi %hi(0x80000000),%o7 !mask for sign bit 37525c28e83SPiotr Jasiukajtis sethi %hi(0x43600000),%o1 !big = 0x43600000,0 37625c28e83SPiotr Jasiukajtis ld [%o5],%o0 !intf = pf[0] = f upper 37725c28e83SPiotr Jasiukajtis 37825c28e83SPiotr Jasiukajtis/*49*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0 37925c28e83SPiotr Jasiukajtis andn %o0,%o7,%o0 !intf = fabs(intf) 38025c28e83SPiotr Jasiukajtis ldd [%o5],%f38 !f = *x into f38 38125c28e83SPiotr Jasiukajtis 38225c28e83SPiotr Jasiukajtis/*51*/ sub %o1,%o0,%o1 !(-) if intf > big 38325c28e83SPiotr Jasiukajtis sub %o0,%o2,%o2 !(-) if intf < small 384*55fea89dSDan Cross fand %f38,%f32,%f44 !sign2 = sign bit 38525c28e83SPiotr Jasiukajtis 38625c28e83SPiotr Jasiukajtis/*52*/ orcc %o1,%o2,%g0 !(-) if either true 38725c28e83SPiotr Jasiukajtis bneg,pn %icc,.SPECIAL2 !if (-) goto special cases below 38825c28e83SPiotr Jasiukajtis fabsd %f38,%f38 !abs(f) (delay slot) 38925c28e83SPiotr Jasiukajtis !---------------------- 39025c28e83SPiotr Jasiukajtis 39125c28e83SPiotr Jasiukajtis/*53*/ fpadd32 %f38,%f52,%f0 !intf + 0x00008000 (again) 39225c28e83SPiotr Jasiukajtis faddd %f2,%f60,%f2 !p[3]*tmp0 + p[2] 39325c28e83SPiotr Jasiukajtis 39425c28e83SPiotr Jasiukajtis/*54*/ sethi %hi(0x8000),%o7 !rounding bit 39525c28e83SPiotr Jasiukajtis fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again) 39625c28e83SPiotr Jasiukajtis 39725c28e83SPiotr Jasiukajtis/*55*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot) 39825c28e83SPiotr Jasiukajtis sethi %hi(0x7fff0000),%o7 !mask for rounding argument 39925c28e83SPiotr Jasiukajtis fmuld %f38,%f0,%f10 !f*z 40025c28e83SPiotr Jasiukajtis fsubd %f38,%f0,%f20 !f - z 40125c28e83SPiotr Jasiukajtis 40225c28e83SPiotr Jasiukajtis/*56*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000 40325c28e83SPiotr Jasiukajtis fmuld %f2,%f4,%f2 !(p3*tmp0 + p2)*tmp0 40425c28e83SPiotr Jasiukajtis fsubd %f6,%f8,%f6 !conup0 - ansu0 40525c28e83SPiotr Jasiukajtis 40625c28e83SPiotr Jasiukajtis/*58*/ sub %o0,%l7,%o2 !intz - 0x3f900000 40725c28e83SPiotr Jasiukajtis fsubd %f10,%f56,%f10 !(f*z - (-1.0)) 40825c28e83SPiotr Jasiukajtis ldd [%o4+%l4],%f16 !conup1 = __vlibm_TBL_atan1[index1] 40925c28e83SPiotr Jasiukajtis 41025c28e83SPiotr Jasiukajtis cmp %o0,%l6 !(|f| > 64) 41125c28e83SPiotr Jasiukajtis ble .ELSE2 !if(|f| > 64) then 41225c28e83SPiotr Jasiukajtis/*60*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15 41325c28e83SPiotr Jasiukajtis mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower 41425c28e83SPiotr Jasiukajtis ba .ENDIF2 !continue 41525c28e83SPiotr Jasiukajtis/*61*/ fdivd %f56,%f38,%f38 !f = -1.0/f (delay slot) 41625c28e83SPiotr Jasiukajtis .ELSE2: !else f( |x| >= (1/64)) 41725c28e83SPiotr Jasiukajtis cmp %o0,%l7 !if intf >= 1/64 41825c28e83SPiotr Jasiukajtis bl .ENDIF2 !if( |x| >= (1/64) ) then... 41925c28e83SPiotr Jasiukajtis mov 0,%o1 !index == 0 , point to conup,conlo = 0,0 42025c28e83SPiotr Jasiukajtis add %o3,4,%o1 !index = index + 4 42125c28e83SPiotr Jasiukajtis/*61*/ fdivd %f20,%f10,%f38 !f = (f - z)/(1.0 + f*z), reduced argument 42225c28e83SPiotr Jasiukajtis .ENDIF2: 42325c28e83SPiotr Jasiukajtis 424*55fea89dSDan Cross 42525c28e83SPiotr Jasiukajtis/*62*/ sll %o1,3,%l5 !index2 = index 42625c28e83SPiotr Jasiukajtis mov %i3,%l2 !yaddr2 = address of y 42725c28e83SPiotr Jasiukajtis fmuld %f34,%f4,%f4 !f0*tmp0 42825c28e83SPiotr Jasiukajtis faddd %f2,%f62,%f2 !(p3*tmp0 + p2)*tmp0 + p1 42925c28e83SPiotr Jasiukajtis fmuld %f36,%f36,%f14 !tmp1= f1*f1 43025c28e83SPiotr Jasiukajtis 43125c28e83SPiotr Jasiukajtis/*65*/add %o4,%l3,%l3 !base addr + index0 43225c28e83SPiotr Jasiukajtis fmuld %f4,%f2,%f2 !poly0 = (f0*tmp0)*((p3*tmp0 + p2)*tmp0 + p1) 43325c28e83SPiotr Jasiukajtis faddd %f6,%f34,%f6 !(conup0 - ansu0) + f0 43425c28e83SPiotr Jasiukajtis fmuld %f58,%f14,%f12 !p[3]*tmp1 43525c28e83SPiotr Jasiukajtis faddd %f16,%f36,%f18 !ansu1 = conup1 + f1 43625c28e83SPiotr Jasiukajtis ldd [%l3+WSIZE],%f4 !conlo0 = __vlibm_TBL_atan1[index0+1] 43725c28e83SPiotr Jasiukajtis 43825c28e83SPiotr Jasiukajtis/*68*/ add %i1,%i2,%i1 !x += stridex 43925c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 !y += stridey 44025c28e83SPiotr Jasiukajtis faddd %f6,%f2,%f2 !((conup0 - ansu0) + f0) + poly0 44125c28e83SPiotr Jasiukajtis faddd %f12,%f60,%f12 !p[3]*tmp1 + p[2] 44225c28e83SPiotr Jasiukajtis 44325c28e83SPiotr Jasiukajtis/*71*/faddd %f2,%f4,%f2 !ansl0 = (((conup0 - ansu)0 + f0) + poly0) + conlo0 44425c28e83SPiotr Jasiukajtis fmuld %f12,%f14,%f12 !(p3*tmp1 + p2)*tmp1 44525c28e83SPiotr Jasiukajtis fsubd %f16,%f18,%f16 !conup1 - ansu1 44625c28e83SPiotr Jasiukajtis 44725c28e83SPiotr Jasiukajtis/*74*/faddd %f8,%f2,%f34 !ans0 = ansu0 + ansl0 44825c28e83SPiotr Jasiukajtis fmuld %f36,%f14,%f14 !f1*tmp1 44925c28e83SPiotr Jasiukajtis faddd %f12,%f62,%f12 !(p3*tmp1 + p2)*tmp1 + p1 45025c28e83SPiotr Jasiukajtis 45125c28e83SPiotr Jasiukajtis/*77*/ for %f34,%f40,%f34 !sign(ans0) = sign of argument 45225c28e83SPiotr Jasiukajtis std %f34,[%l0] !*yaddr0 = ans, always gets stored (delay slot) 45325c28e83SPiotr Jasiukajtis cmp %i5,0 !if argcount =0, we are done 45425c28e83SPiotr Jasiukajtis bg .MAINLOOP 45525c28e83SPiotr Jasiukajtis nop 45625c28e83SPiotr Jasiukajtis 45725c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 45825c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 45925c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 46025c28e83SPiotr Jasiukajtis 46125c28e83SPiotr Jasiukajtis.RETURN: 46225c28e83SPiotr Jasiukajtis ret 46325c28e83SPiotr Jasiukajtis restore %g0,%g0,%g0 46425c28e83SPiotr Jasiukajtis 46525c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 46625c28e83SPiotr Jasiukajtis /*------------SPECIAL CASE HANDLING FOR LOOP0 ------------------------------*/ 46725c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 46825c28e83SPiotr Jasiukajtis 46925c28e83SPiotr Jasiukajtis/* at this point 47025c28e83SPiotr Jasiukajtis %i1 x address 47125c28e83SPiotr Jasiukajtis %o0 intf 47225c28e83SPiotr Jasiukajtis %o2 intf - 0x3e300000 47325c28e83SPiotr Jasiukajtis %f34,36,38 f0,f1,f2 47425c28e83SPiotr Jasiukajtis %f40,42,44 sign0,sign1,sign2 47525c28e83SPiotr Jasiukajtis*/ 47625c28e83SPiotr Jasiukajtis 47725c28e83SPiotr Jasiukajtis .align 32 !align on I-cache boundary 47825c28e83SPiotr Jasiukajtis.SPECIAL0: 47925c28e83SPiotr Jasiukajtis orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000 48025c28e83SPiotr Jasiukajtis bpos 1f !if >=...continue 48125c28e83SPiotr Jasiukajtis sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this) 48225c28e83SPiotr Jasiukajtis ba 3f 48325c28e83SPiotr Jasiukajtis faddd %f34,%f50,%f30 !dummy op just to generate exception (delay slot) 48425c28e83SPiotr Jasiukajtis1: 485*55fea89dSDan Cross ld [%o5+4],%o5 !load x lower word 48625c28e83SPiotr Jasiukajtis sllx %o0,32,%o0 !left justify intf 48725c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 !left justify Inf 48825c28e83SPiotr Jasiukajtis or %o0,%o5,%o0 !merge in lower intf 48925c28e83SPiotr Jasiukajtis cmp %o0,%g1 !if intf > 0x7ff00000 00000000 49025c28e83SPiotr Jasiukajtis ble,pt %xcc,2f !pass thru if NaN 49125c28e83SPiotr Jasiukajtis nop 49225c28e83SPiotr Jasiukajtis fmuld %f34,%f34,%f34 !...... (x*x) trigger invalid exception 49325c28e83SPiotr Jasiukajtis ba 3f 49425c28e83SPiotr Jasiukajtis nop 49525c28e83SPiotr Jasiukajtis2: 496*55fea89dSDan Cross faddd %f46,%f48,%f34 !ans = pi/2 upper + pi/2 lower 497*55fea89dSDan Cross3: 49825c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 !x += stridex 49925c28e83SPiotr Jasiukajtis for %f34,%f40,%f34 !sign(ans) = sign of argument 50025c28e83SPiotr Jasiukajtis std %f34,[%i3] !*y = ans 50125c28e83SPiotr Jasiukajtis ba .LOOP0 !keep looping 50225c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 !y += stridey (delay slot) 50325c28e83SPiotr Jasiukajtis 50425c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 50525c28e83SPiotr Jasiukajtis /*-----------SPECIAL CASE HANDLING FOR LOOP1 -------------------------------*/ 50625c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 50725c28e83SPiotr Jasiukajtis 50825c28e83SPiotr Jasiukajtis .align 32 !align on I-cache boundary 50925c28e83SPiotr Jasiukajtis.SPECIAL1: 51025c28e83SPiotr Jasiukajtis orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000 51125c28e83SPiotr Jasiukajtis bpos 1f !if >=...continue 51225c28e83SPiotr Jasiukajtis sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this) 51325c28e83SPiotr Jasiukajtis ba 3f 51425c28e83SPiotr Jasiukajtis faddd %f36,%f50,%f30 !dummy op just to generate exception (delay slot) 51525c28e83SPiotr Jasiukajtis1: 516*55fea89dSDan Cross ld [%o5+4],%o5 !load x lower word 51725c28e83SPiotr Jasiukajtis sllx %o0,32,%o0 !left justify intf 51825c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 !left justify Inf 51925c28e83SPiotr Jasiukajtis or %o0,%o5,%o0 !merge in lower intf 52025c28e83SPiotr Jasiukajtis cmp %o0,%g1 !if intf > 0x7ff00000 00000000 52125c28e83SPiotr Jasiukajtis ble,pt %xcc,2f !pass thru if NaN 52225c28e83SPiotr Jasiukajtis nop 52325c28e83SPiotr Jasiukajtis fmuld %f36,%f36,%f36 !...... (x*x) trigger invalid exception 52425c28e83SPiotr Jasiukajtis ba 3f 52525c28e83SPiotr Jasiukajtis nop 52625c28e83SPiotr Jasiukajtis2: 527*55fea89dSDan Cross faddd %f46,%f48,%f36 !ans = pi/2 upper + pi/2 lower 528*55fea89dSDan Cross3: 52925c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 !x += stridex 53025c28e83SPiotr Jasiukajtis for %f36,%f42,%f36 !sign(ans) = sign of argument 53125c28e83SPiotr Jasiukajtis std %f36,[%i3] !*y = ans 53225c28e83SPiotr Jasiukajtis ba .LOOP1 !keep looping 53325c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 !y += stridey (delay slot) 53425c28e83SPiotr Jasiukajtis 53525c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 53625c28e83SPiotr Jasiukajtis /*------------SPECIAL CASE HANDLING FOR LOOP2 ------------------------------*/ 53725c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 53825c28e83SPiotr Jasiukajtis 53925c28e83SPiotr Jasiukajtis .align 32 !align on I-cache boundary 54025c28e83SPiotr Jasiukajtis.SPECIAL2: 54125c28e83SPiotr Jasiukajtis orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000 54225c28e83SPiotr Jasiukajtis bpos 1f !if >=...continue 54325c28e83SPiotr Jasiukajtis sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this) 54425c28e83SPiotr Jasiukajtis ba 3f 54525c28e83SPiotr Jasiukajtis faddd %f38,%f50,%f30 !dummy op just to generate exception (delay slot) 54625c28e83SPiotr Jasiukajtis1: 547*55fea89dSDan Cross ld [%o5+4],%o5 !load x lower word 54825c28e83SPiotr Jasiukajtis sllx %o0,32,%o0 !left justify intf 54925c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 !left justify Inf 55025c28e83SPiotr Jasiukajtis or %o0,%o5,%o0 !merge in lower intf 55125c28e83SPiotr Jasiukajtis cmp %o0,%g1 !if intf > 0x7ff00000 00000000 55225c28e83SPiotr Jasiukajtis ble,pt %xcc,2f !pass thru if NaN 55325c28e83SPiotr Jasiukajtis nop 55425c28e83SPiotr Jasiukajtis fmuld %f38,%f38,%f38 !...... (x*x) trigger invalid exception 55525c28e83SPiotr Jasiukajtis ba 3f 55625c28e83SPiotr Jasiukajtis nop 55725c28e83SPiotr Jasiukajtis2: 558*55fea89dSDan Cross faddd %f46,%f48,%f38 !ans = pi/2 upper + pi/2 lower 559*55fea89dSDan Cross3: 56025c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 !x += stridex 56125c28e83SPiotr Jasiukajtis for %f38,%f44,%f38 !sign(ans) = sign of argument 56225c28e83SPiotr Jasiukajtis std %f38,[%i3] !*y = ans 56325c28e83SPiotr Jasiukajtis ba .LOOP2 !keep looping 56425c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 !y += stridey 56525c28e83SPiotr Jasiukajtis 56625c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 56725c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 56825c28e83SPiotr Jasiukajtis /*--------------------------------------------------------------------------*/ 56925c28e83SPiotr Jasiukajtis 57025c28e83SPiotr Jasiukajtis SET_SIZE(__vatan) 57125c28e83SPiotr Jasiukajtis 57225c28e83SPiotr Jasiukajtis! .ident "03-20-96 Sparc V9 3-way-unrolled version" 573