1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "__vsqrtf_ultra3.S" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr Jasiukajtis .weak __vsqrtf 33*25c28e83SPiotr Jasiukajtis .type __vsqrtf,#function 34*25c28e83SPiotr Jasiukajtis __vsqrtf = __vsqrtf_ultra3 35*25c28e83SPiotr Jasiukajtis 36*25c28e83SPiotr Jasiukajtis RO_DATA 37*25c28e83SPiotr Jasiukajtis .align 64 38*25c28e83SPiotr Jasiukajtis 39*25c28e83SPiotr Jasiukajtis.CONST_TBL: 40*25c28e83SPiotr Jasiukajtis .word 0x3fe00001, 0x80007e00 ! K1 = 5.00000715259318464227e-01 41*25c28e83SPiotr Jasiukajtis .word 0xbfc00003, 0xc0017a01 ! K2 = -1.25000447037521686593e-01 42*25c28e83SPiotr Jasiukajtis .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff 43*25c28e83SPiotr Jasiukajtis .word 0x3ff00000, 0x00000000 ! DC1 = 0x3ff0000000000000 44*25c28e83SPiotr Jasiukajtis .word 0x7ffff000, 0x00000000 ! DC2 = 0x7ffff00000000000 45*25c28e83SPiotr Jasiukajtis 46*25c28e83SPiotr Jasiukajtis#define DC0 %f6 47*25c28e83SPiotr Jasiukajtis#define DC1 %f4 48*25c28e83SPiotr Jasiukajtis#define DC2 %f2 49*25c28e83SPiotr Jasiukajtis#define K2 %f38 50*25c28e83SPiotr Jasiukajtis#define K1 %f36 51*25c28e83SPiotr Jasiukajtis#define TBL %l2 52*25c28e83SPiotr Jasiukajtis#define stridex %l3 53*25c28e83SPiotr Jasiukajtis#define stridey %l4 54*25c28e83SPiotr Jasiukajtis#define _0x1ff0 %l5 55*25c28e83SPiotr Jasiukajtis#define counter %l6 56*25c28e83SPiotr Jasiukajtis#define _0x00800000 %l7 57*25c28e83SPiotr Jasiukajtis#define _0x7f800000 %o0 58*25c28e83SPiotr Jasiukajtis 59*25c28e83SPiotr Jasiukajtis#define tmp_px STACK_BIAS-0x40 60*25c28e83SPiotr Jasiukajtis#define tmp_counter STACK_BIAS-0x38 61*25c28e83SPiotr Jasiukajtis#define tmp0 STACK_BIAS-0x30 62*25c28e83SPiotr Jasiukajtis#define tmp1 STACK_BIAS-0x28 63*25c28e83SPiotr Jasiukajtis#define tmp2 STACK_BIAS-0x20 64*25c28e83SPiotr Jasiukajtis#define tmp3 STACK_BIAS-0x18 65*25c28e83SPiotr Jasiukajtis#define tmp4 STACK_BIAS-0x10 66*25c28e83SPiotr Jasiukajtis 67*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9 68*25c28e83SPiotr Jasiukajtis#define tmps 0x40 69*25c28e83SPiotr Jasiukajtis 70*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 71*25c28e83SPiotr Jasiukajtis! !!!!! algorithm !!!!! 72*25c28e83SPiotr Jasiukajtis! 73*25c28e83SPiotr Jasiukajtis! x0 = *px; 74*25c28e83SPiotr Jasiukajtis! ax = *(int*)px; 75*25c28e83SPiotr Jasiukajtis! px += stridex; 76*25c28e83SPiotr Jasiukajtis! 77*25c28e83SPiotr Jasiukajtis! if( ax >= 0x7f800000 ) 78*25c28e83SPiotr Jasiukajtis! { 79*25c28e83SPiotr Jasiukajtis! *py = sqrtf(x0); 80*25c28e83SPiotr Jasiukajtis! py += stridey; 81*25c28e83SPiotr Jasiukajtis! continue; 82*25c28e83SPiotr Jasiukajtis! } 83*25c28e83SPiotr Jasiukajtis! if( ax < 0x00800000 ) 84*25c28e83SPiotr Jasiukajtis! { 85*25c28e83SPiotr Jasiukajtis! *py = sqrtf(x0); 86*25c28e83SPiotr Jasiukajtis! py += stridey; 87*25c28e83SPiotr Jasiukajtis! continue; 88*25c28e83SPiotr Jasiukajtis! } 89*25c28e83SPiotr Jasiukajtis! 90*25c28e83SPiotr Jasiukajtis! db0 = (double)x0; 91*25c28e83SPiotr Jasiukajtis! iexp0 = ax >> 24; 92*25c28e83SPiotr Jasiukajtis! iexp0 += 0x3c0; 93*25c28e83SPiotr Jasiukajtis! lexp0 = (long long)iexp0 << 52; 94*25c28e83SPiotr Jasiukajtis! 95*25c28e83SPiotr Jasiukajtis! db0 = vis_fand(db0,DC0); 96*25c28e83SPiotr Jasiukajtis! db0 = vis_for(db0,DC1); 97*25c28e83SPiotr Jasiukajtis! hi0 = vis_fand(db0,DC2); 98*25c28e83SPiotr Jasiukajtis! 99*25c28e83SPiotr Jasiukajtis! ax >>= 11; 100*25c28e83SPiotr Jasiukajtis! si0 = ax & 0x1ff0; 101*25c28e83SPiotr Jasiukajtis! dtmp0 = ((double*)((char*)TBL + si0))[0]; 102*25c28e83SPiotr Jasiukajtis! xx0 = (db0 - hi0); 103*25c28e83SPiotr Jasiukajtis! xx0 *= dtmp0; 104*25c28e83SPiotr Jasiukajtis! dtmp0 = ((double*)((char*)TBL + si0))[1] 105*25c28e83SPiotr Jasiukajtis! res0 = K2 * xx0; 106*25c28e83SPiotr Jasiukajtis! res0 += K1; 107*25c28e83SPiotr Jasiukajtis! res0 *= xx0; 108*25c28e83SPiotr Jasiukajtis! res0 += DC1; 109*25c28e83SPiotr Jasiukajtis! res0 = dtmp0 * res0; 110*25c28e83SPiotr Jasiukajtis! dtmp1 = *((double*)&lexp0); 111*25c28e83SPiotr Jasiukajtis! res0 *= dtmp1; 112*25c28e83SPiotr Jasiukajtis! fres0 = (float)res0; 113*25c28e83SPiotr Jasiukajtis! *py = fres0; 114*25c28e83SPiotr Jasiukajtis! py += stridey; 115*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 116*25c28e83SPiotr Jasiukajtis 117*25c28e83SPiotr Jasiukajtis ENTRY(__vsqrtf_ultra3) 118*25c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-tmps,%sp 119*25c28e83SPiotr Jasiukajtis PIC_SETUP(l7) 120*25c28e83SPiotr Jasiukajtis PIC_SET(l7,.CONST_TBL,o2) 121*25c28e83SPiotr Jasiukajtis PIC_SET(l7,__vlibm_TBL_sqrtf,l2) 122*25c28e83SPiotr Jasiukajtis 123*25c28e83SPiotr Jasiukajtis st %i0,[%fp+tmp_counter] 124*25c28e83SPiotr Jasiukajtis sll %i2,2,stridex 125*25c28e83SPiotr Jasiukajtis or %g0,0xff8,%l5 126*25c28e83SPiotr Jasiukajtis 127*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 128*25c28e83SPiotr Jasiukajtis sll %l5,1,_0x1ff0 129*25c28e83SPiotr Jasiukajtis 130*25c28e83SPiotr Jasiukajtis ldd [%o2],K1 131*25c28e83SPiotr Jasiukajtis sll %i4,2,stridey 132*25c28e83SPiotr Jasiukajtis 133*25c28e83SPiotr Jasiukajtis ldd [%o2+8],K2 134*25c28e83SPiotr Jasiukajtis or %g0,%i3,%g5 135*25c28e83SPiotr Jasiukajtis 136*25c28e83SPiotr Jasiukajtis ldd [%o2+16],DC0 137*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%o0 138*25c28e83SPiotr Jasiukajtis 139*25c28e83SPiotr Jasiukajtis ldd [%o2+24],DC1 140*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%l7 141*25c28e83SPiotr Jasiukajtis 142*25c28e83SPiotr Jasiukajtis ldd [%o2+32],DC2 143*25c28e83SPiotr Jasiukajtis 144*25c28e83SPiotr Jasiukajtis.begin: 145*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_counter],counter 146*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_px],%i1 147*25c28e83SPiotr Jasiukajtis st %g0,[%fp+tmp_counter] 148*25c28e83SPiotr Jasiukajtis.begin1: 149*25c28e83SPiotr Jasiukajtis cmp counter,0 150*25c28e83SPiotr Jasiukajtis ble,pn %icc,.exit 151*25c28e83SPiotr Jasiukajtis 152*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%o2 ! (2_0) ax = *(int*)px; 153*25c28e83SPiotr Jasiukajtis 154*25c28e83SPiotr Jasiukajtis or %g0,%i1,%o7 155*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f25 ! (2_0) x0 = *px; 156*25c28e83SPiotr Jasiukajtis 157*25c28e83SPiotr Jasiukajtis cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000 158*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec ! (2_0) if( ax >= 0x7f800000 ) 159*25c28e83SPiotr Jasiukajtis nop 160*25c28e83SPiotr Jasiukajtis 161*25c28e83SPiotr Jasiukajtis cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000 162*25c28e83SPiotr Jasiukajtis bl,pn %icc,.spec ! (2_0) if( ax < 0x00800000 ) 163*25c28e83SPiotr Jasiukajtis nop 164*25c28e83SPiotr Jasiukajtis 165*25c28e83SPiotr Jasiukajtis fstod %f25,%f56 ! (2_0) db0 = (double)x0; 166*25c28e83SPiotr Jasiukajtis 167*25c28e83SPiotr Jasiukajtis lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px; 168*25c28e83SPiotr Jasiukajtis 169*25c28e83SPiotr Jasiukajtis sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24; 170*25c28e83SPiotr Jasiukajtis 171*25c28e83SPiotr Jasiukajtis add %o7,stridex,%i1 ! px += stridex 172*25c28e83SPiotr Jasiukajtis add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0; 173*25c28e83SPiotr Jasiukajtis lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px; 174*25c28e83SPiotr Jasiukajtis fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0); 175*25c28e83SPiotr Jasiukajtis 176*25c28e83SPiotr Jasiukajtis cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000 177*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update0 ! (3_0) if( ax >= 0x7f800000 ) 178*25c28e83SPiotr Jasiukajtis nop 179*25c28e83SPiotr Jasiukajtis.cont0: 180*25c28e83SPiotr Jasiukajtis sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52; 181*25c28e83SPiotr Jasiukajtis 182*25c28e83SPiotr Jasiukajtis sra %o2,11,%i2 ! (2_0) ax >>= 11; 183*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0); 184*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1); 185*25c28e83SPiotr Jasiukajtis 186*25c28e83SPiotr Jasiukajtis cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000 187*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update1 ! (3_0) if( ax < 0x00800000 ) 188*25c28e83SPiotr Jasiukajtis nop 189*25c28e83SPiotr Jasiukajtis.cont1: 190*25c28e83SPiotr Jasiukajtis fstod %f0,%f48 ! (3_0) db0 = (double)x0; 191*25c28e83SPiotr Jasiukajtis 192*25c28e83SPiotr Jasiukajtis and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0; 193*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px; 194*25c28e83SPiotr Jasiukajtis 195*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 196*25c28e83SPiotr Jasiukajtis add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0 197*25c28e83SPiotr Jasiukajtis fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2); 198*25c28e83SPiotr Jasiukajtis 199*25c28e83SPiotr Jasiukajtis sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24; 200*25c28e83SPiotr Jasiukajtis 201*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f13 ! (4_0) x0 = *px; 202*25c28e83SPiotr Jasiukajtis fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0); 203*25c28e83SPiotr Jasiukajtis 204*25c28e83SPiotr Jasiukajtis add %o4,960,%i0 ! (3_0) iexp0 += 0x3c0; 205*25c28e83SPiotr Jasiukajtis 206*25c28e83SPiotr Jasiukajtis cmp %o2,_0x7f800000 ! (4_1) ax ? 0x7f800000 207*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update2 ! (4_1) if( ax >= 0x7f800000 ) 208*25c28e83SPiotr Jasiukajtis nop 209*25c28e83SPiotr Jasiukajtis.cont2: 210*25c28e83SPiotr Jasiukajtis fsubd %f40,%f46,%f44 ! (2_1) xx0 = (db0 - hi0); 211*25c28e83SPiotr Jasiukajtis sllx %i0,52,%g1 ! (3_1) lexp0 = (long long)iexp0 << 52; 212*25c28e83SPiotr Jasiukajtis ldd [%i2],%f40 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; 213*25c28e83SPiotr Jasiukajtis 214*25c28e83SPiotr Jasiukajtis sra %o1,11,%l0 ! (3_1) ax >>= 11; 215*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+tmp1] ! (3_1) dtmp1 = *((double*)&lexp0); 216*25c28e83SPiotr Jasiukajtis for %f58,DC1,%f48 ! (3_1) db0 = vis_for(db0,DC1); 217*25c28e83SPiotr Jasiukajtis 218*25c28e83SPiotr Jasiukajtis cmp %o2,_0x00800000 ! (4_1) ax ? 0x00800000 219*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update3 ! (4_1) if( ax < 0x00800000 ) 220*25c28e83SPiotr Jasiukajtis nop 221*25c28e83SPiotr Jasiukajtis.cont3: 222*25c28e83SPiotr Jasiukajtis fstod %f13,%f50 ! (4_1) db0 = (double)x0; 223*25c28e83SPiotr Jasiukajtis 224*25c28e83SPiotr Jasiukajtis fmuld %f44,%f40,%f46 ! (2_1) xx0 *= dtmp0; 225*25c28e83SPiotr Jasiukajtis and %l0,_0x1ff0,%i0 ! (3_1) si0 = ax & 0x1ff0; 226*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%l1 ! (0_0) ax = *(int*)px; 227*25c28e83SPiotr Jasiukajtis 228*25c28e83SPiotr Jasiukajtis add %i0,TBL,%l0 ! (3_1) (char*)TBL + si0 229*25c28e83SPiotr Jasiukajtis fand %f48,DC2,%f62 ! (3_1) hi0 = vis_fand(db0,DC2); 230*25c28e83SPiotr Jasiukajtis 231*25c28e83SPiotr Jasiukajtis sra %o2,24,%o7 ! (4_1) iexp0 = ax >> 24; 232*25c28e83SPiotr Jasiukajtis 233*25c28e83SPiotr Jasiukajtis add %i1,stridex,%o4 ! px += stridex 234*25c28e83SPiotr Jasiukajtis add %o7,960,%o7 ! (4_1) iexp0 += 0x3c0; 235*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%f17 ! (0_0) x0 = *px; 236*25c28e83SPiotr Jasiukajtis fand %f50,DC0,%f54 ! (4_1) db0 = vis_fand(db0,DC0); 237*25c28e83SPiotr Jasiukajtis 238*25c28e83SPiotr Jasiukajtis fmuld K2,%f46,%f52 ! (2_1) res0 = K2 * xx0; 239*25c28e83SPiotr Jasiukajtis cmp %l1,_0x7f800000 ! (0_0) ax ? 0x7f800000 240*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update4 ! (0_0) if( ax >= 0x7f800000 ) 241*25c28e83SPiotr Jasiukajtis fsubd %f48,%f62,%f42 ! (3_1) xx0 = (db0 - hi0); 242*25c28e83SPiotr Jasiukajtis.cont4: 243*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o1 ! (4_1) lexp0 = (long long)iexp0 << 52; 244*25c28e83SPiotr Jasiukajtis ldd [%i0+TBL],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; 245*25c28e83SPiotr Jasiukajtis 246*25c28e83SPiotr Jasiukajtis sra %o2,11,%i5 ! (4_1) ax >>= 11; 247*25c28e83SPiotr Jasiukajtis stx %o1,[%fp+tmp2] ! (4_1) dtmp1 = *((double*)&lexp0); 248*25c28e83SPiotr Jasiukajtis for %f54,DC1,%f34 ! (4_1) db0 = vis_for(db0,DC1); 249*25c28e83SPiotr Jasiukajtis 250*25c28e83SPiotr Jasiukajtis cmp %l1,_0x00800000 ! (0_0) ax ? 0x00800000 251*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update5 ! (0_0) if( ax < 0x00800000 ) 252*25c28e83SPiotr Jasiukajtis nop 253*25c28e83SPiotr Jasiukajtis.cont5: 254*25c28e83SPiotr Jasiukajtis fstod %f17,%f56 ! (0_0) db0 = (double)x0; 255*25c28e83SPiotr Jasiukajtis 256*25c28e83SPiotr Jasiukajtis fmuld %f42,%f40,%f42 ! (3_1) xx0 *= dtmp0; 257*25c28e83SPiotr Jasiukajtis lda [stridex+%o4]0x82,%i0 ! (1_0) ax = *(int*)px; 258*25c28e83SPiotr Jasiukajtis faddd %f52,K1,%f52 ! (2_1) res0 += K1; 259*25c28e83SPiotr Jasiukajtis 260*25c28e83SPiotr Jasiukajtis sra %l1,24,%g1 ! (0_0) iexp0 = ax >> 24; 261*25c28e83SPiotr Jasiukajtis and %i5,_0x1ff0,%i5 ! (4_1) si0 = ax & 0x1ff0; 262*25c28e83SPiotr Jasiukajtis fand %f34,DC2,%f62 ! (4_1) hi0 = vis_fand(db0,DC2); 263*25c28e83SPiotr Jasiukajtis 264*25c28e83SPiotr Jasiukajtis add %o4,stridex,%i1 ! px += stridex 265*25c28e83SPiotr Jasiukajtis 266*25c28e83SPiotr Jasiukajtis add %g1,960,%o5 ! (0_0) iexp0 += 0x3c0; 267*25c28e83SPiotr Jasiukajtis add %i5,TBL,%i3 ! (4_1) (char*)TBL + si0 268*25c28e83SPiotr Jasiukajtis lda [stridex+%o4]0x82,%f21 ! (1_0) x0 = *px; 269*25c28e83SPiotr Jasiukajtis fand %f56,DC0,%f32 ! (0_0) db0 = vis_fand(db0,DC0); 270*25c28e83SPiotr Jasiukajtis 271*25c28e83SPiotr Jasiukajtis fmuld K2,%f42,%f50 ! (3_1) res0 = K2 * xx0; 272*25c28e83SPiotr Jasiukajtis cmp %i0,_0x7f800000 ! (1_0) ax ? 0x7f800000 273*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update6 ! (1_0) if( ax >= 0x7f800000 ) 274*25c28e83SPiotr Jasiukajtis fsubd %f34,%f62,%f54 ! (4_1) xx0 = (db0 - hi0); 275*25c28e83SPiotr Jasiukajtis.cont6: 276*25c28e83SPiotr Jasiukajtis fmuld %f52,%f46,%f52 ! (2_1) res0 *= xx0; 277*25c28e83SPiotr Jasiukajtis sllx %o5,52,%o7 ! (0_0) lexp0 = (long long)iexp0 << 52; 278*25c28e83SPiotr Jasiukajtis ldd [TBL+%i5],%f62 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; 279*25c28e83SPiotr Jasiukajtis 280*25c28e83SPiotr Jasiukajtis sra %l1,11,%i4 ! (0_0) ax >>= 11; 281*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp3] ! (0_0) dtmp1 = *((double*)&lexp0); 282*25c28e83SPiotr Jasiukajtis for %f32,DC1,%f48 ! (0_0) db0 = vis_for(db0,DC1); 283*25c28e83SPiotr Jasiukajtis 284*25c28e83SPiotr Jasiukajtis cmp %i0,_0x00800000 ! (1_0) ax ? 0x00800000 285*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update7 ! (1_0) if( ax < 0x00800000 ) 286*25c28e83SPiotr Jasiukajtis nop 287*25c28e83SPiotr Jasiukajtis.cont7: 288*25c28e83SPiotr Jasiukajtis fstod %f21,%f56 ! (1_0) db0 = (double)x0; 289*25c28e83SPiotr Jasiukajtis 290*25c28e83SPiotr Jasiukajtis fmuld %f54,%f62,%f46 ! (4_1) xx0 *= dtmp0; 291*25c28e83SPiotr Jasiukajtis and %i4,_0x1ff0,%g1 ! (0_0) si0 = ax & 0x1ff0; 292*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%o2 ! (2_0) ax = *(int*)px; 293*25c28e83SPiotr Jasiukajtis faddd %f50,K1,%f62 ! (3_1) res0 += K1; 294*25c28e83SPiotr Jasiukajtis 295*25c28e83SPiotr Jasiukajtis add %g1,TBL,%i5 ! (0_0) (double*)((char*)TBL + si0 296*25c28e83SPiotr Jasiukajtis fand %f48,DC2,%f32 ! (0_0) hi0 = vis_fand(db0,DC2); 297*25c28e83SPiotr Jasiukajtis 298*25c28e83SPiotr Jasiukajtis sra %i0,24,%o4 ! (1_0) iexp0 = ax >> 24; 299*25c28e83SPiotr Jasiukajtis ldd [%i2+8],%f60 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1] 300*25c28e83SPiotr Jasiukajtis faddd %f52,DC1,%f58 ! (2_1) res0 += DC1; 301*25c28e83SPiotr Jasiukajtis 302*25c28e83SPiotr Jasiukajtis add %i1,stridex,%o7 ! px += stridex 303*25c28e83SPiotr Jasiukajtis add %o4,960,%i2 ! (1_0) iexp0 += 0x3c0; 304*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%f25 ! (2_0) x0 = *px; 305*25c28e83SPiotr Jasiukajtis fand %f56,DC0,%f34 ! (1_0) db0 = vis_fand(db0,DC0); 306*25c28e83SPiotr Jasiukajtis 307*25c28e83SPiotr Jasiukajtis fmuld K2,%f46,%f50 ! (4_1) res0 = K2 * xx0; 308*25c28e83SPiotr Jasiukajtis cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000 309*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update8 ! (2_0) if( ax >= 0x7f800000 ) 310*25c28e83SPiotr Jasiukajtis fsubd %f48,%f32,%f52 ! (0_0) xx0 = (db0 - hi0); 311*25c28e83SPiotr Jasiukajtis.cont8: 312*25c28e83SPiotr Jasiukajtis fmuld %f62,%f42,%f54 ! (3_1) res0 *= xx0; 313*25c28e83SPiotr Jasiukajtis sllx %i2,52,%o4 ! (1_0) lexp0 = (long long)iexp0 << 52; 314*25c28e83SPiotr Jasiukajtis ldd [TBL+%g1],%f32 ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; 315*25c28e83SPiotr Jasiukajtis 316*25c28e83SPiotr Jasiukajtis fmuld %f60,%f58,%f60 ! (2_1) res0 = dtmp0 * res0; 317*25c28e83SPiotr Jasiukajtis sra %i0,11,%g1 ! (1_0) ax >>= 11; 318*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp4] ! (1_0) dtmp1 = *((double*)&lexp0); 319*25c28e83SPiotr Jasiukajtis for %f34,DC1,%f48 ! (1_0) db0 = vis_for(db0,DC1); 320*25c28e83SPiotr Jasiukajtis 321*25c28e83SPiotr Jasiukajtis cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000 322*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update9 ! (2_0) if( ax < 0x00800000 ) 323*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp0],%f40 ! (2_1) dtmp1 = *((double*)&lexp0); 324*25c28e83SPiotr Jasiukajtis fstod %f25,%f56 ! (2_0) db0 = (double)x0; 325*25c28e83SPiotr Jasiukajtis.cont9: 326*25c28e83SPiotr Jasiukajtis fmuld %f52,%f32,%f42 ! (0_0) xx0 *= dtmp0; 327*25c28e83SPiotr Jasiukajtis and %g1,_0x1ff0,%o5 ! (1_0) si0 = ax & 0x1ff0; 328*25c28e83SPiotr Jasiukajtis lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px; 329*25c28e83SPiotr Jasiukajtis faddd %f50,K1,%f34 ! (4_1) res0 += K1; 330*25c28e83SPiotr Jasiukajtis 331*25c28e83SPiotr Jasiukajtis add %o5,TBL,%i4 ! (1_0) (char*)TBL + si0 332*25c28e83SPiotr Jasiukajtis fand %f48,DC2,%f62 ! (1_0) hi0 = vis_fand(db0,DC2); 333*25c28e83SPiotr Jasiukajtis 334*25c28e83SPiotr Jasiukajtis fmuld %f60,%f40,%f32 ! (2_1) res0 *= dtmp1; 335*25c28e83SPiotr Jasiukajtis sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24; 336*25c28e83SPiotr Jasiukajtis ldd [%l0+8],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1] 337*25c28e83SPiotr Jasiukajtis faddd %f54,DC1,%f58 ! (3_1) res0 += DC1; 338*25c28e83SPiotr Jasiukajtis 339*25c28e83SPiotr Jasiukajtis add %o7,stridex,%i1 ! px += stridex 340*25c28e83SPiotr Jasiukajtis add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0; 341*25c28e83SPiotr Jasiukajtis lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px; 342*25c28e83SPiotr Jasiukajtis fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0); 343*25c28e83SPiotr Jasiukajtis 344*25c28e83SPiotr Jasiukajtis fmuld K2,%f42,%f50 ! (0_0) res0 = K2 * xx0; 345*25c28e83SPiotr Jasiukajtis cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000 346*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update10 ! (3_0) if( ax >= 0x7f800000 ) 347*25c28e83SPiotr Jasiukajtis fsubd %f48,%f62,%f54 ! (1_0) xx0 = (db0 - hi0); 348*25c28e83SPiotr Jasiukajtis.cont10: 349*25c28e83SPiotr Jasiukajtis fmuld %f34,%f46,%f52 ! (4_1) res0 *= xx0; 350*25c28e83SPiotr Jasiukajtis sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52; 351*25c28e83SPiotr Jasiukajtis ldd [TBL+%o5],%f56 ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; 352*25c28e83SPiotr Jasiukajtis 353*25c28e83SPiotr Jasiukajtis fmuld %f40,%f58,%f34 ! (3_1) res0 = dtmp0 * res0; 354*25c28e83SPiotr Jasiukajtis sra %o2,11,%i2 ! (2_0) ax >>= 11; 355*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0); 356*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1); 357*25c28e83SPiotr Jasiukajtis 358*25c28e83SPiotr Jasiukajtis cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000 359*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update11 ! (3_0) if( ax < 0x00800000 ) 360*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp1],%f62 ! (3_1) dtmp1 = *((double*)&lexp0); 361*25c28e83SPiotr Jasiukajtis fstod %f0,%f48 ! (3_0) db0 = (double)x0; 362*25c28e83SPiotr Jasiukajtis.cont11: 363*25c28e83SPiotr Jasiukajtis fmuld %f54,%f56,%f30 ! (1_0) xx0 *= dtmp0; 364*25c28e83SPiotr Jasiukajtis and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0; 365*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px; 366*25c28e83SPiotr Jasiukajtis faddd %f50,K1,%f56 ! (0_0) res0 += K1; 367*25c28e83SPiotr Jasiukajtis 368*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 369*25c28e83SPiotr Jasiukajtis add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0 370*25c28e83SPiotr Jasiukajtis fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2); 371*25c28e83SPiotr Jasiukajtis 372*25c28e83SPiotr Jasiukajtis fmuld %f34,%f62,%f28 ! (3_1) res0 *= dtmp1; 373*25c28e83SPiotr Jasiukajtis sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24; 374*25c28e83SPiotr Jasiukajtis ldd [%i3+8],%f50 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1] 375*25c28e83SPiotr Jasiukajtis faddd %f52,DC1,%f54 ! (4_1) res0 += DC1; 376*25c28e83SPiotr Jasiukajtis 377*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f13 ! (4_0) x0 = *px; 378*25c28e83SPiotr Jasiukajtis fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0); 379*25c28e83SPiotr Jasiukajtis 380*25c28e83SPiotr Jasiukajtis or %g0,%g5,%i3 381*25c28e83SPiotr Jasiukajtis cmp counter,5 382*25c28e83SPiotr Jasiukajtis bl,pn %icc,.tail 383*25c28e83SPiotr Jasiukajtis add %o4,960,%g5 ! (3_0) iexp0 += 0x3c0; 384*25c28e83SPiotr Jasiukajtis 385*25c28e83SPiotr Jasiukajtis ba .main_loop 386*25c28e83SPiotr Jasiukajtis sub counter,5,counter ! counter 387*25c28e83SPiotr Jasiukajtis 388*25c28e83SPiotr Jasiukajtis .align 16 389*25c28e83SPiotr Jasiukajtis.main_loop: 390*25c28e83SPiotr Jasiukajtis fmuld K2,%f30,%f60 ! (1_1) res0 = K2 * xx0; 391*25c28e83SPiotr Jasiukajtis cmp %o2,_0x7f800000 ! (4_1) ax ? 0x7f800000 392*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update12 ! (4_1) if( ax >= 0x7f800000 ) 393*25c28e83SPiotr Jasiukajtis fsubd %f40,%f46,%f44 ! (2_1) xx0 = (db0 - hi0); 394*25c28e83SPiotr Jasiukajtis.cont12: 395*25c28e83SPiotr Jasiukajtis fmuld %f56,%f42,%f52 ! (0_1) res0 *= xx0; 396*25c28e83SPiotr Jasiukajtis sllx %g5,52,%g5 ! (3_1) lexp0 = (long long)iexp0 << 52; 397*25c28e83SPiotr Jasiukajtis ldd [%i2],%f40 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; 398*25c28e83SPiotr Jasiukajtis fdtos %f32,%f15 ! (2_2) fres0 = (float)res0; 399*25c28e83SPiotr Jasiukajtis 400*25c28e83SPiotr Jasiukajtis fmuld %f50,%f54,%f42 ! (4_2) res0 = dtmp0 * res0; 401*25c28e83SPiotr Jasiukajtis sra %o1,11,%l0 ! (3_1) ax >>= 11; 402*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp1] ! (3_1) dtmp1 = *((double*)&lexp0); 403*25c28e83SPiotr Jasiukajtis for %f58,DC1,%f48 ! (3_1) db0 = vis_for(db0,DC1); 404*25c28e83SPiotr Jasiukajtis 405*25c28e83SPiotr Jasiukajtis cmp %o2,_0x00800000 ! (4_1) ax ? 0x00800000 406*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update13 ! (4_1) if( ax < 0x00800000 ) 407*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp2],%f56 ! (4_2) dtmp1 = *((double*)&lexp0); 408*25c28e83SPiotr Jasiukajtis fstod %f13,%f50 ! (4_1) db0 = (double)x0; 409*25c28e83SPiotr Jasiukajtis.cont13: 410*25c28e83SPiotr Jasiukajtis fmuld %f44,%f40,%f46 ! (2_1) xx0 *= dtmp0; 411*25c28e83SPiotr Jasiukajtis and %l0,_0x1ff0,%i0 ! (3_1) si0 = ax & 0x1ff0; 412*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%l1 ! (0_0) ax = *(int*)px; 413*25c28e83SPiotr Jasiukajtis faddd %f60,K1,%f32 ! (1_1) res0 += K1; 414*25c28e83SPiotr Jasiukajtis 415*25c28e83SPiotr Jasiukajtis add %i0,TBL,%l0 ! (3_1) (char*)TBL + si0 416*25c28e83SPiotr Jasiukajtis add %i3,stridey,%o3 ! py += stridey 417*25c28e83SPiotr Jasiukajtis st %f15,[%i3] ! (2_2) *py = fres0; 418*25c28e83SPiotr Jasiukajtis fand %f48,DC2,%f62 ! (3_1) hi0 = vis_fand(db0,DC2); 419*25c28e83SPiotr Jasiukajtis 420*25c28e83SPiotr Jasiukajtis fmuld %f42,%f56,%f44 ! (4_2) res0 *= dtmp1; 421*25c28e83SPiotr Jasiukajtis sra %o2,24,%o7 ! (4_1) iexp0 = ax >> 24; 422*25c28e83SPiotr Jasiukajtis ldd [%i5+8],%f58 ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1] 423*25c28e83SPiotr Jasiukajtis faddd %f52,DC1,%f34 ! (0_1) res0 += DC1; 424*25c28e83SPiotr Jasiukajtis 425*25c28e83SPiotr Jasiukajtis add %i1,stridex,%o4 ! px += stridex 426*25c28e83SPiotr Jasiukajtis add %o7,960,%o7 ! (4_1) iexp0 += 0x3c0; 427*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%f17 ! (0_0) x0 = *px; 428*25c28e83SPiotr Jasiukajtis fand %f50,DC0,%f54 ! (4_1) db0 = vis_fand(db0,DC0); 429*25c28e83SPiotr Jasiukajtis 430*25c28e83SPiotr Jasiukajtis fmuld K2,%f46,%f52 ! (2_1) res0 = K2 * xx0; 431*25c28e83SPiotr Jasiukajtis cmp %l1,_0x7f800000 ! (0_0) ax ? 0x7f800000 432*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update14 ! (0_0) if( ax >= 0x7f800000 ) 433*25c28e83SPiotr Jasiukajtis fsubd %f48,%f62,%f42 ! (3_1) xx0 = (db0 - hi0); 434*25c28e83SPiotr Jasiukajtis.cont14: 435*25c28e83SPiotr Jasiukajtis fmuld %f32,%f30,%f48 ! (1_1) res0 *= xx0; 436*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o1 ! (4_1) lexp0 = (long long)iexp0 << 52; 437*25c28e83SPiotr Jasiukajtis ldd [%i0+TBL],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; 438*25c28e83SPiotr Jasiukajtis fdtos %f28,%f19 ! (3_2) fres0 = (float)res0; 439*25c28e83SPiotr Jasiukajtis 440*25c28e83SPiotr Jasiukajtis fmuld %f58,%f34,%f32 ! (0_1) res0 = dtmp0 * res0; 441*25c28e83SPiotr Jasiukajtis sra %o2,11,%i5 ! (4_1) ax >>= 11; 442*25c28e83SPiotr Jasiukajtis stx %o1,[%fp+tmp2] ! (4_1) dtmp1 = *((double*)&lexp0); 443*25c28e83SPiotr Jasiukajtis for %f54,DC1,%f34 ! (4_1) db0 = vis_for(db0,DC1); 444*25c28e83SPiotr Jasiukajtis 445*25c28e83SPiotr Jasiukajtis cmp %l1,_0x00800000 ! (0_0) ax ? 0x00800000 446*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update15 ! (0_0) if( ax < 0x00800000 ) 447*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp3],%f60 ! (0_1) dtmp1 = *((double*)&lexp0); 448*25c28e83SPiotr Jasiukajtis fstod %f17,%f56 ! (0_0) db0 = (double)x0; 449*25c28e83SPiotr Jasiukajtis.cont15: 450*25c28e83SPiotr Jasiukajtis fmuld %f42,%f40,%f42 ! (3_1) xx0 *= dtmp0; 451*25c28e83SPiotr Jasiukajtis add %o3,stridey,%g5 ! py += stridey 452*25c28e83SPiotr Jasiukajtis lda [stridex+%o4]0x82,%i0 ! (1_0) ax = *(int*)px; 453*25c28e83SPiotr Jasiukajtis faddd %f52,K1,%f52 ! (2_1) res0 += K1; 454*25c28e83SPiotr Jasiukajtis 455*25c28e83SPiotr Jasiukajtis sra %l1,24,%g1 ! (0_0) iexp0 = ax >> 24; 456*25c28e83SPiotr Jasiukajtis and %i5,_0x1ff0,%i5 ! (4_1) si0 = ax & 0x1ff0; 457*25c28e83SPiotr Jasiukajtis st %f19,[%o3] ! (3_2) *py = fres0; 458*25c28e83SPiotr Jasiukajtis fand %f34,DC2,%f62 ! (4_1) hi0 = vis_fand(db0,DC2); 459*25c28e83SPiotr Jasiukajtis 460*25c28e83SPiotr Jasiukajtis fmuld %f32,%f60,%f40 ! (0_1) res0 *= dtmp1; 461*25c28e83SPiotr Jasiukajtis add %o4,stridex,%i1 ! px += stridex 462*25c28e83SPiotr Jasiukajtis ldd [%i4+8],%f60 ! (1_1) dtmp0 = ((double*)((char*)TBL + si0))[1] 463*25c28e83SPiotr Jasiukajtis faddd %f48,DC1,%f58 ! (1_1) res0 += DC1; 464*25c28e83SPiotr Jasiukajtis 465*25c28e83SPiotr Jasiukajtis add %g1,960,%o5 ! (0_0) iexp0 += 0x3c0; 466*25c28e83SPiotr Jasiukajtis add %i5,TBL,%i3 ! (4_1) (char*)TBL + si0 467*25c28e83SPiotr Jasiukajtis lda [stridex+%o4]0x82,%f21 ! (1_0) x0 = *px; 468*25c28e83SPiotr Jasiukajtis fand %f56,DC0,%f32 ! (0_0) db0 = vis_fand(db0,DC0); 469*25c28e83SPiotr Jasiukajtis 470*25c28e83SPiotr Jasiukajtis fmuld K2,%f42,%f50 ! (3_1) res0 = K2 * xx0; 471*25c28e83SPiotr Jasiukajtis cmp %i0,_0x7f800000 ! (1_0) ax ? 0x7f800000 472*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update16 ! (1_0) if( ax >= 0x7f800000 ) 473*25c28e83SPiotr Jasiukajtis fsubd %f34,%f62,%f54 ! (4_1) xx0 = (db0 - hi0); 474*25c28e83SPiotr Jasiukajtis.cont16: 475*25c28e83SPiotr Jasiukajtis fmuld %f52,%f46,%f52 ! (2_1) res0 *= xx0; 476*25c28e83SPiotr Jasiukajtis sllx %o5,52,%o7 ! (0_0) lexp0 = (long long)iexp0 << 52; 477*25c28e83SPiotr Jasiukajtis ldd [TBL+%i5],%f62 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; 478*25c28e83SPiotr Jasiukajtis fdtos %f44,%f23 ! (4_2) fres0 = (float)res0; 479*25c28e83SPiotr Jasiukajtis 480*25c28e83SPiotr Jasiukajtis fmuld %f60,%f58,%f44 ! (1_1) res0 = dtmp0 * res0; 481*25c28e83SPiotr Jasiukajtis sra %l1,11,%i4 ! (0_0) ax >>= 11; 482*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp3] ! (0_0) dtmp1 = *((double*)&lexp0); 483*25c28e83SPiotr Jasiukajtis for %f32,DC1,%f48 ! (0_0) db0 = vis_for(db0,DC1); 484*25c28e83SPiotr Jasiukajtis 485*25c28e83SPiotr Jasiukajtis cmp %i0,_0x00800000 ! (1_0) ax ? 0x00800000 486*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update17 ! (1_0) if( ax < 0x00800000 ) 487*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp4],%f34 ! (1_1) dtmp1 = *((double*)&lexp0); 488*25c28e83SPiotr Jasiukajtis fstod %f21,%f56 ! (1_0) db0 = (double)x0; 489*25c28e83SPiotr Jasiukajtis.cont17: 490*25c28e83SPiotr Jasiukajtis fmuld %f54,%f62,%f46 ! (4_1) xx0 *= dtmp0; 491*25c28e83SPiotr Jasiukajtis and %i4,_0x1ff0,%g1 ! (0_0) si0 = ax & 0x1ff0; 492*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%o2 ! (2_0) ax = *(int*)px; 493*25c28e83SPiotr Jasiukajtis faddd %f50,K1,%f62 ! (3_1) res0 += K1; 494*25c28e83SPiotr Jasiukajtis 495*25c28e83SPiotr Jasiukajtis add %g1,TBL,%i5 ! (0_0) (double*)((char*)TBL + si0 496*25c28e83SPiotr Jasiukajtis add %g5,stridey,%g5 ! py += stridey 497*25c28e83SPiotr Jasiukajtis st %f23,[stridey+%o3] ! (4_2) *py = fres0; 498*25c28e83SPiotr Jasiukajtis fand %f48,DC2,%f32 ! (0_0) hi0 = vis_fand(db0,DC2); 499*25c28e83SPiotr Jasiukajtis 500*25c28e83SPiotr Jasiukajtis fmuld %f44,%f34,%f44 ! (1_1) res0 *= dtmp1; 501*25c28e83SPiotr Jasiukajtis sra %i0,24,%o4 ! (1_0) iexp0 = ax >> 24; 502*25c28e83SPiotr Jasiukajtis ldd [%i2+8],%f60 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1] 503*25c28e83SPiotr Jasiukajtis faddd %f52,DC1,%f58 ! (2_1) res0 += DC1; 504*25c28e83SPiotr Jasiukajtis 505*25c28e83SPiotr Jasiukajtis add %i1,stridex,%o7 ! px += stridex 506*25c28e83SPiotr Jasiukajtis add %o4,960,%i2 ! (1_0) iexp0 += 0x3c0; 507*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%f25 ! (2_0) x0 = *px; 508*25c28e83SPiotr Jasiukajtis fand %f56,DC0,%f34 ! (1_0) db0 = vis_fand(db0,DC0); 509*25c28e83SPiotr Jasiukajtis 510*25c28e83SPiotr Jasiukajtis fmuld K2,%f46,%f50 ! (4_1) res0 = K2 * xx0; 511*25c28e83SPiotr Jasiukajtis cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000 512*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update18 ! (2_0) if( ax >= 0x7f800000 ) 513*25c28e83SPiotr Jasiukajtis fsubd %f48,%f32,%f52 ! (0_0) xx0 = (db0 - hi0); 514*25c28e83SPiotr Jasiukajtis.cont18: 515*25c28e83SPiotr Jasiukajtis fmuld %f62,%f42,%f54 ! (3_1) res0 *= xx0; 516*25c28e83SPiotr Jasiukajtis sllx %i2,52,%o4 ! (1_0) lexp0 = (long long)iexp0 << 52; 517*25c28e83SPiotr Jasiukajtis ldd [TBL+%g1],%f32 ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; 518*25c28e83SPiotr Jasiukajtis fdtos %f40,%f27 ! (0_1) fres0 = (float)res0; 519*25c28e83SPiotr Jasiukajtis 520*25c28e83SPiotr Jasiukajtis fmuld %f60,%f58,%f60 ! (2_1) res0 = dtmp0 * res0; 521*25c28e83SPiotr Jasiukajtis sra %i0,11,%g1 ! (1_0) ax >>= 11; 522*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp4] ! (1_0) dtmp1 = *((double*)&lexp0); 523*25c28e83SPiotr Jasiukajtis for %f34,DC1,%f48 ! (1_0) db0 = vis_for(db0,DC1); 524*25c28e83SPiotr Jasiukajtis 525*25c28e83SPiotr Jasiukajtis cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000 526*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update19 ! (2_0) if( ax < 0x00800000 ) 527*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp0],%f40 ! (2_1) dtmp1 = *((double*)&lexp0); 528*25c28e83SPiotr Jasiukajtis fstod %f25,%f56 ! (2_0) db0 = (double)x0; 529*25c28e83SPiotr Jasiukajtis.cont19: 530*25c28e83SPiotr Jasiukajtis fmuld %f52,%f32,%f42 ! (0_0) xx0 *= dtmp0; 531*25c28e83SPiotr Jasiukajtis and %g1,_0x1ff0,%o5 ! (1_0) si0 = ax & 0x1ff0; 532*25c28e83SPiotr Jasiukajtis lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px; 533*25c28e83SPiotr Jasiukajtis faddd %f50,K1,%f34 ! (4_1) res0 += K1; 534*25c28e83SPiotr Jasiukajtis 535*25c28e83SPiotr Jasiukajtis add %o5,TBL,%i4 ! (1_0) (char*)TBL + si0 536*25c28e83SPiotr Jasiukajtis add %g5,stridey,%g1 ! py += stridey 537*25c28e83SPiotr Jasiukajtis st %f27,[%g5] ! (0_1) *py = fres0; 538*25c28e83SPiotr Jasiukajtis fand %f48,DC2,%f62 ! (1_0) hi0 = vis_fand(db0,DC2); 539*25c28e83SPiotr Jasiukajtis 540*25c28e83SPiotr Jasiukajtis fmuld %f60,%f40,%f32 ! (2_1) res0 *= dtmp1; 541*25c28e83SPiotr Jasiukajtis sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24; 542*25c28e83SPiotr Jasiukajtis ldd [%l0+8],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1] 543*25c28e83SPiotr Jasiukajtis faddd %f54,DC1,%f58 ! (3_1) res0 += DC1; 544*25c28e83SPiotr Jasiukajtis 545*25c28e83SPiotr Jasiukajtis add %o7,stridex,%i1 ! px += stridex 546*25c28e83SPiotr Jasiukajtis add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0; 547*25c28e83SPiotr Jasiukajtis lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px; 548*25c28e83SPiotr Jasiukajtis fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0); 549*25c28e83SPiotr Jasiukajtis 550*25c28e83SPiotr Jasiukajtis fmuld K2,%f42,%f50 ! (0_0) res0 = K2 * xx0; 551*25c28e83SPiotr Jasiukajtis cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000 552*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update20 ! (3_0) if( ax >= 0x7f800000 ) 553*25c28e83SPiotr Jasiukajtis fsubd %f48,%f62,%f54 ! (1_0) xx0 = (db0 - hi0); 554*25c28e83SPiotr Jasiukajtis.cont20: 555*25c28e83SPiotr Jasiukajtis fmuld %f34,%f46,%f52 ! (4_1) res0 *= xx0; 556*25c28e83SPiotr Jasiukajtis sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52; 557*25c28e83SPiotr Jasiukajtis ldd [TBL+%o5],%f56 ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; 558*25c28e83SPiotr Jasiukajtis fdtos %f44,%f8 ! (1_1) fres0 = (float)res0; 559*25c28e83SPiotr Jasiukajtis 560*25c28e83SPiotr Jasiukajtis fmuld %f40,%f58,%f34 ! (3_1) res0 = dtmp0 * res0; 561*25c28e83SPiotr Jasiukajtis sra %o2,11,%i2 ! (2_0) ax >>= 11; 562*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0); 563*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1); 564*25c28e83SPiotr Jasiukajtis 565*25c28e83SPiotr Jasiukajtis cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000 566*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update21 ! (3_0) if( ax < 0x00800000 ) 567*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp1],%f62 ! (3_1) dtmp1 = *((double*)&lexp0); 568*25c28e83SPiotr Jasiukajtis fstod %f0,%f48 ! (3_0) db0 = (double)x0; 569*25c28e83SPiotr Jasiukajtis.cont21: 570*25c28e83SPiotr Jasiukajtis fmuld %f54,%f56,%f30 ! (1_0) xx0 *= dtmp0; 571*25c28e83SPiotr Jasiukajtis and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0; 572*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px; 573*25c28e83SPiotr Jasiukajtis faddd %f50,K1,%f56 ! (0_0) res0 += K1; 574*25c28e83SPiotr Jasiukajtis 575*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 576*25c28e83SPiotr Jasiukajtis add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0 577*25c28e83SPiotr Jasiukajtis st %f8,[stridey+%g5] ! (1_1) *py = fres0; 578*25c28e83SPiotr Jasiukajtis fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2); 579*25c28e83SPiotr Jasiukajtis 580*25c28e83SPiotr Jasiukajtis fmuld %f34,%f62,%f28 ! (3_1) res0 *= dtmp1; 581*25c28e83SPiotr Jasiukajtis sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24; 582*25c28e83SPiotr Jasiukajtis ldd [%i3+8],%f50 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1] 583*25c28e83SPiotr Jasiukajtis faddd %f52,DC1,%f54 ! (4_1) res0 += DC1; 584*25c28e83SPiotr Jasiukajtis 585*25c28e83SPiotr Jasiukajtis add %g1,stridey,%i3 ! py += stridey 586*25c28e83SPiotr Jasiukajtis subcc counter,5,counter ! counter 587*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f13 ! (4_0) x0 = *px; 588*25c28e83SPiotr Jasiukajtis fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0); 589*25c28e83SPiotr Jasiukajtis 590*25c28e83SPiotr Jasiukajtis bpos,pt %icc,.main_loop 591*25c28e83SPiotr Jasiukajtis add %o4,960,%g5 ! (3_0) iexp0 += 0x3c0; 592*25c28e83SPiotr Jasiukajtis 593*25c28e83SPiotr Jasiukajtis add counter,5,counter 594*25c28e83SPiotr Jasiukajtis.tail: 595*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 596*25c28e83SPiotr Jasiukajtis bneg,a .begin 597*25c28e83SPiotr Jasiukajtis or %g0,%i3,%g5 598*25c28e83SPiotr Jasiukajtis 599*25c28e83SPiotr Jasiukajtis fmuld %f56,%f42,%f52 ! (0_1) res0 *= xx0; 600*25c28e83SPiotr Jasiukajtis fdtos %f32,%f15 ! (2_2) fres0 = (float)res0; 601*25c28e83SPiotr Jasiukajtis 602*25c28e83SPiotr Jasiukajtis fmuld %f50,%f54,%f42 ! (4_2) res0 = dtmp0 * res0; 603*25c28e83SPiotr Jasiukajtis 604*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp2],%f56 ! (4_2) dtmp1 = *((double*)&lexp0); 605*25c28e83SPiotr Jasiukajtis 606*25c28e83SPiotr Jasiukajtis add %i3,stridey,%o3 ! py += stridey 607*25c28e83SPiotr Jasiukajtis st %f15,[%i3] ! (2_2) *py = fres0; 608*25c28e83SPiotr Jasiukajtis 609*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 610*25c28e83SPiotr Jasiukajtis bneg,a .begin 611*25c28e83SPiotr Jasiukajtis or %g0,%o3,%g5 612*25c28e83SPiotr Jasiukajtis 613*25c28e83SPiotr Jasiukajtis fmuld %f42,%f56,%f44 ! (4_2) res0 *= dtmp1; 614*25c28e83SPiotr Jasiukajtis ldd [%i5+8],%f58 ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1] 615*25c28e83SPiotr Jasiukajtis faddd %f52,DC1,%f34 ! (0_1) res0 += DC1; 616*25c28e83SPiotr Jasiukajtis 617*25c28e83SPiotr Jasiukajtis fdtos %f28,%f19 ! (3_2) fres0 = (float)res0; 618*25c28e83SPiotr Jasiukajtis 619*25c28e83SPiotr Jasiukajtis fmuld %f58,%f34,%f32 ! (0_1) res0 = dtmp0 * res0; 620*25c28e83SPiotr Jasiukajtis 621*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp3],%f60 ! (0_1) dtmp1 = *((double*)&lexp0); 622*25c28e83SPiotr Jasiukajtis 623*25c28e83SPiotr Jasiukajtis add %o3,stridey,%g5 ! py += stridey 624*25c28e83SPiotr Jasiukajtis 625*25c28e83SPiotr Jasiukajtis st %f19,[%o3] ! (3_2) *py = fres0; 626*25c28e83SPiotr Jasiukajtis 627*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 628*25c28e83SPiotr Jasiukajtis bneg,a .begin 629*25c28e83SPiotr Jasiukajtis nop 630*25c28e83SPiotr Jasiukajtis 631*25c28e83SPiotr Jasiukajtis fmuld %f32,%f60,%f40 ! (0_1) res0 *= dtmp1; 632*25c28e83SPiotr Jasiukajtis 633*25c28e83SPiotr Jasiukajtis fdtos %f44,%f23 ! (4_2) fres0 = (float)res0; 634*25c28e83SPiotr Jasiukajtis 635*25c28e83SPiotr Jasiukajtis add %g5,stridey,%g5 ! py += stridey 636*25c28e83SPiotr Jasiukajtis st %f23,[stridey+%o3] ! (4_2) *py = fres0; 637*25c28e83SPiotr Jasiukajtis 638*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 639*25c28e83SPiotr Jasiukajtis bneg,a .begin 640*25c28e83SPiotr Jasiukajtis nop 641*25c28e83SPiotr Jasiukajtis 642*25c28e83SPiotr Jasiukajtis fdtos %f40,%f27 ! (0_1) fres0 = (float)res0; 643*25c28e83SPiotr Jasiukajtis 644*25c28e83SPiotr Jasiukajtis st %f27,[%g5] ! (0_1) *py = fres0; 645*25c28e83SPiotr Jasiukajtis 646*25c28e83SPiotr Jasiukajtis ba .begin 647*25c28e83SPiotr Jasiukajtis add %g5,stridey,%g5 648*25c28e83SPiotr Jasiukajtis 649*25c28e83SPiotr Jasiukajtis .align 16 650*25c28e83SPiotr Jasiukajtis.spec: 651*25c28e83SPiotr Jasiukajtis fsqrts %f25,%f25 652*25c28e83SPiotr Jasiukajtis sub counter,1,counter 653*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 654*25c28e83SPiotr Jasiukajtis st %f25,[%g5] 655*25c28e83SPiotr Jasiukajtis ba .begin1 656*25c28e83SPiotr Jasiukajtis add %g5,stridey,%g5 657*25c28e83SPiotr Jasiukajtis 658*25c28e83SPiotr Jasiukajtis .align 16 659*25c28e83SPiotr Jasiukajtis.update0: 660*25c28e83SPiotr Jasiukajtis cmp counter,1 661*25c28e83SPiotr Jasiukajtis ble .cont0 662*25c28e83SPiotr Jasiukajtis fzeros %f0 663*25c28e83SPiotr Jasiukajtis 664*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 665*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%o1 666*25c28e83SPiotr Jasiukajtis 667*25c28e83SPiotr Jasiukajtis sub counter,1,counter 668*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 669*25c28e83SPiotr Jasiukajtis 670*25c28e83SPiotr Jasiukajtis ba .cont0 671*25c28e83SPiotr Jasiukajtis or %g0,1,counter 672*25c28e83SPiotr Jasiukajtis 673*25c28e83SPiotr Jasiukajtis .align 16 674*25c28e83SPiotr Jasiukajtis.update1: 675*25c28e83SPiotr Jasiukajtis cmp counter,1 676*25c28e83SPiotr Jasiukajtis ble .cont1 677*25c28e83SPiotr Jasiukajtis fzeros %f0 678*25c28e83SPiotr Jasiukajtis 679*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 680*25c28e83SPiotr Jasiukajtis clr %o1 681*25c28e83SPiotr Jasiukajtis 682*25c28e83SPiotr Jasiukajtis sub counter,1,counter 683*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 684*25c28e83SPiotr Jasiukajtis 685*25c28e83SPiotr Jasiukajtis ba .cont1 686*25c28e83SPiotr Jasiukajtis or %g0,1,counter 687*25c28e83SPiotr Jasiukajtis 688*25c28e83SPiotr Jasiukajtis .align 16 689*25c28e83SPiotr Jasiukajtis.update2: 690*25c28e83SPiotr Jasiukajtis cmp counter,2 691*25c28e83SPiotr Jasiukajtis ble .cont2 692*25c28e83SPiotr Jasiukajtis fzeros %f13 693*25c28e83SPiotr Jasiukajtis 694*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 695*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%o2 696*25c28e83SPiotr Jasiukajtis 697*25c28e83SPiotr Jasiukajtis sub counter,2,counter 698*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 699*25c28e83SPiotr Jasiukajtis 700*25c28e83SPiotr Jasiukajtis ba .cont2 701*25c28e83SPiotr Jasiukajtis or %g0,2,counter 702*25c28e83SPiotr Jasiukajtis 703*25c28e83SPiotr Jasiukajtis .align 16 704*25c28e83SPiotr Jasiukajtis.update3: 705*25c28e83SPiotr Jasiukajtis cmp counter,2 706*25c28e83SPiotr Jasiukajtis ble .cont3 707*25c28e83SPiotr Jasiukajtis fzeros %f13 708*25c28e83SPiotr Jasiukajtis 709*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 710*25c28e83SPiotr Jasiukajtis clr %o2 711*25c28e83SPiotr Jasiukajtis 712*25c28e83SPiotr Jasiukajtis sub counter,2,counter 713*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 714*25c28e83SPiotr Jasiukajtis 715*25c28e83SPiotr Jasiukajtis ba .cont3 716*25c28e83SPiotr Jasiukajtis or %g0,2,counter 717*25c28e83SPiotr Jasiukajtis 718*25c28e83SPiotr Jasiukajtis .align 16 719*25c28e83SPiotr Jasiukajtis.update4: 720*25c28e83SPiotr Jasiukajtis cmp counter,3 721*25c28e83SPiotr Jasiukajtis ble .cont4 722*25c28e83SPiotr Jasiukajtis fzeros %f17 723*25c28e83SPiotr Jasiukajtis 724*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_px] 725*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%l1 726*25c28e83SPiotr Jasiukajtis 727*25c28e83SPiotr Jasiukajtis sub counter,3,counter 728*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 729*25c28e83SPiotr Jasiukajtis 730*25c28e83SPiotr Jasiukajtis ba .cont4 731*25c28e83SPiotr Jasiukajtis or %g0,3,counter 732*25c28e83SPiotr Jasiukajtis 733*25c28e83SPiotr Jasiukajtis .align 16 734*25c28e83SPiotr Jasiukajtis.update5: 735*25c28e83SPiotr Jasiukajtis cmp counter,3 736*25c28e83SPiotr Jasiukajtis ble .cont5 737*25c28e83SPiotr Jasiukajtis fzeros %f17 738*25c28e83SPiotr Jasiukajtis 739*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_px] 740*25c28e83SPiotr Jasiukajtis clr %l1 741*25c28e83SPiotr Jasiukajtis 742*25c28e83SPiotr Jasiukajtis sub counter,3,counter 743*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 744*25c28e83SPiotr Jasiukajtis 745*25c28e83SPiotr Jasiukajtis ba .cont5 746*25c28e83SPiotr Jasiukajtis or %g0,3,counter 747*25c28e83SPiotr Jasiukajtis 748*25c28e83SPiotr Jasiukajtis .align 16 749*25c28e83SPiotr Jasiukajtis.update6: 750*25c28e83SPiotr Jasiukajtis cmp counter,4 751*25c28e83SPiotr Jasiukajtis ble .cont6 752*25c28e83SPiotr Jasiukajtis fzeros %f21 753*25c28e83SPiotr Jasiukajtis 754*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 755*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%i0 756*25c28e83SPiotr Jasiukajtis 757*25c28e83SPiotr Jasiukajtis sub counter,4,counter 758*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 759*25c28e83SPiotr Jasiukajtis 760*25c28e83SPiotr Jasiukajtis ba .cont6 761*25c28e83SPiotr Jasiukajtis or %g0,4,counter 762*25c28e83SPiotr Jasiukajtis 763*25c28e83SPiotr Jasiukajtis .align 16 764*25c28e83SPiotr Jasiukajtis.update7: 765*25c28e83SPiotr Jasiukajtis cmp counter,4 766*25c28e83SPiotr Jasiukajtis ble .cont7 767*25c28e83SPiotr Jasiukajtis fzeros %f21 768*25c28e83SPiotr Jasiukajtis 769*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 770*25c28e83SPiotr Jasiukajtis clr %i0 771*25c28e83SPiotr Jasiukajtis 772*25c28e83SPiotr Jasiukajtis sub counter,4,counter 773*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 774*25c28e83SPiotr Jasiukajtis 775*25c28e83SPiotr Jasiukajtis ba .cont7 776*25c28e83SPiotr Jasiukajtis or %g0,4,counter 777*25c28e83SPiotr Jasiukajtis 778*25c28e83SPiotr Jasiukajtis .align 16 779*25c28e83SPiotr Jasiukajtis.update8: 780*25c28e83SPiotr Jasiukajtis cmp counter,5 781*25c28e83SPiotr Jasiukajtis ble .cont8 782*25c28e83SPiotr Jasiukajtis fzeros %f25 783*25c28e83SPiotr Jasiukajtis 784*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp_px] 785*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%o2 786*25c28e83SPiotr Jasiukajtis 787*25c28e83SPiotr Jasiukajtis sub counter,5,counter 788*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 789*25c28e83SPiotr Jasiukajtis 790*25c28e83SPiotr Jasiukajtis ba .cont8 791*25c28e83SPiotr Jasiukajtis or %g0,5,counter 792*25c28e83SPiotr Jasiukajtis 793*25c28e83SPiotr Jasiukajtis .align 16 794*25c28e83SPiotr Jasiukajtis.update9: 795*25c28e83SPiotr Jasiukajtis cmp counter,5 796*25c28e83SPiotr Jasiukajtis ble .cont9 797*25c28e83SPiotr Jasiukajtis fzeros %f25 798*25c28e83SPiotr Jasiukajtis 799*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp_px] 800*25c28e83SPiotr Jasiukajtis clr %o2 801*25c28e83SPiotr Jasiukajtis 802*25c28e83SPiotr Jasiukajtis sub counter,5,counter 803*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 804*25c28e83SPiotr Jasiukajtis 805*25c28e83SPiotr Jasiukajtis ba .cont9 806*25c28e83SPiotr Jasiukajtis or %g0,5,counter 807*25c28e83SPiotr Jasiukajtis 808*25c28e83SPiotr Jasiukajtis .align 16 809*25c28e83SPiotr Jasiukajtis.update10: 810*25c28e83SPiotr Jasiukajtis cmp counter,6 811*25c28e83SPiotr Jasiukajtis ble .cont10 812*25c28e83SPiotr Jasiukajtis fzeros %f0 813*25c28e83SPiotr Jasiukajtis 814*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 815*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%o1 816*25c28e83SPiotr Jasiukajtis 817*25c28e83SPiotr Jasiukajtis sub counter,6,counter 818*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 819*25c28e83SPiotr Jasiukajtis 820*25c28e83SPiotr Jasiukajtis ba .cont10 821*25c28e83SPiotr Jasiukajtis or %g0,6,counter 822*25c28e83SPiotr Jasiukajtis 823*25c28e83SPiotr Jasiukajtis .align 16 824*25c28e83SPiotr Jasiukajtis.update11: 825*25c28e83SPiotr Jasiukajtis cmp counter,6 826*25c28e83SPiotr Jasiukajtis ble .cont11 827*25c28e83SPiotr Jasiukajtis fzeros %f0 828*25c28e83SPiotr Jasiukajtis 829*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 830*25c28e83SPiotr Jasiukajtis clr %o1 831*25c28e83SPiotr Jasiukajtis 832*25c28e83SPiotr Jasiukajtis sub counter,6,counter 833*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 834*25c28e83SPiotr Jasiukajtis 835*25c28e83SPiotr Jasiukajtis ba .cont11 836*25c28e83SPiotr Jasiukajtis or %g0,6,counter 837*25c28e83SPiotr Jasiukajtis 838*25c28e83SPiotr Jasiukajtis .align 16 839*25c28e83SPiotr Jasiukajtis.update12: 840*25c28e83SPiotr Jasiukajtis cmp counter,2 841*25c28e83SPiotr Jasiukajtis ble .cont12 842*25c28e83SPiotr Jasiukajtis fzeros %f13 843*25c28e83SPiotr Jasiukajtis 844*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 845*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%o2 846*25c28e83SPiotr Jasiukajtis 847*25c28e83SPiotr Jasiukajtis sub counter,2,counter 848*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 849*25c28e83SPiotr Jasiukajtis 850*25c28e83SPiotr Jasiukajtis ba .cont12 851*25c28e83SPiotr Jasiukajtis or %g0,2,counter 852*25c28e83SPiotr Jasiukajtis 853*25c28e83SPiotr Jasiukajtis .align 16 854*25c28e83SPiotr Jasiukajtis.update13: 855*25c28e83SPiotr Jasiukajtis cmp counter,2 856*25c28e83SPiotr Jasiukajtis ble .cont13 857*25c28e83SPiotr Jasiukajtis fzeros %f13 858*25c28e83SPiotr Jasiukajtis 859*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 860*25c28e83SPiotr Jasiukajtis clr %o2 861*25c28e83SPiotr Jasiukajtis 862*25c28e83SPiotr Jasiukajtis sub counter,2,counter 863*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 864*25c28e83SPiotr Jasiukajtis 865*25c28e83SPiotr Jasiukajtis ba .cont13 866*25c28e83SPiotr Jasiukajtis or %g0,2,counter 867*25c28e83SPiotr Jasiukajtis 868*25c28e83SPiotr Jasiukajtis .align 16 869*25c28e83SPiotr Jasiukajtis.update14: 870*25c28e83SPiotr Jasiukajtis cmp counter,3 871*25c28e83SPiotr Jasiukajtis ble .cont14 872*25c28e83SPiotr Jasiukajtis fzeros %f17 873*25c28e83SPiotr Jasiukajtis 874*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_px] 875*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%l1 876*25c28e83SPiotr Jasiukajtis 877*25c28e83SPiotr Jasiukajtis sub counter,3,counter 878*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 879*25c28e83SPiotr Jasiukajtis 880*25c28e83SPiotr Jasiukajtis ba .cont14 881*25c28e83SPiotr Jasiukajtis or %g0,3,counter 882*25c28e83SPiotr Jasiukajtis 883*25c28e83SPiotr Jasiukajtis .align 16 884*25c28e83SPiotr Jasiukajtis.update15: 885*25c28e83SPiotr Jasiukajtis cmp counter,3 886*25c28e83SPiotr Jasiukajtis ble .cont15 887*25c28e83SPiotr Jasiukajtis fzeros %f17 888*25c28e83SPiotr Jasiukajtis 889*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_px] 890*25c28e83SPiotr Jasiukajtis clr %l1 891*25c28e83SPiotr Jasiukajtis 892*25c28e83SPiotr Jasiukajtis sub counter,3,counter 893*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 894*25c28e83SPiotr Jasiukajtis 895*25c28e83SPiotr Jasiukajtis ba .cont15 896*25c28e83SPiotr Jasiukajtis or %g0,3,counter 897*25c28e83SPiotr Jasiukajtis 898*25c28e83SPiotr Jasiukajtis .align 16 899*25c28e83SPiotr Jasiukajtis.update16: 900*25c28e83SPiotr Jasiukajtis cmp counter,4 901*25c28e83SPiotr Jasiukajtis ble .cont16 902*25c28e83SPiotr Jasiukajtis fzeros %f21 903*25c28e83SPiotr Jasiukajtis 904*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 905*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%i0 906*25c28e83SPiotr Jasiukajtis 907*25c28e83SPiotr Jasiukajtis sub counter,4,counter 908*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 909*25c28e83SPiotr Jasiukajtis 910*25c28e83SPiotr Jasiukajtis ba .cont16 911*25c28e83SPiotr Jasiukajtis or %g0,4,counter 912*25c28e83SPiotr Jasiukajtis 913*25c28e83SPiotr Jasiukajtis .align 16 914*25c28e83SPiotr Jasiukajtis.update17: 915*25c28e83SPiotr Jasiukajtis cmp counter,4 916*25c28e83SPiotr Jasiukajtis ble .cont17 917*25c28e83SPiotr Jasiukajtis fzeros %f21 918*25c28e83SPiotr Jasiukajtis 919*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 920*25c28e83SPiotr Jasiukajtis clr %i0 921*25c28e83SPiotr Jasiukajtis 922*25c28e83SPiotr Jasiukajtis sub counter,4,counter 923*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 924*25c28e83SPiotr Jasiukajtis 925*25c28e83SPiotr Jasiukajtis ba .cont17 926*25c28e83SPiotr Jasiukajtis or %g0,4,counter 927*25c28e83SPiotr Jasiukajtis 928*25c28e83SPiotr Jasiukajtis .align 16 929*25c28e83SPiotr Jasiukajtis.update18: 930*25c28e83SPiotr Jasiukajtis cmp counter,5 931*25c28e83SPiotr Jasiukajtis ble .cont18 932*25c28e83SPiotr Jasiukajtis fzeros %f25 933*25c28e83SPiotr Jasiukajtis 934*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp_px] 935*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%o2 936*25c28e83SPiotr Jasiukajtis 937*25c28e83SPiotr Jasiukajtis sub counter,5,counter 938*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 939*25c28e83SPiotr Jasiukajtis 940*25c28e83SPiotr Jasiukajtis ba .cont18 941*25c28e83SPiotr Jasiukajtis or %g0,5,counter 942*25c28e83SPiotr Jasiukajtis 943*25c28e83SPiotr Jasiukajtis .align 16 944*25c28e83SPiotr Jasiukajtis.update19: 945*25c28e83SPiotr Jasiukajtis cmp counter,5 946*25c28e83SPiotr Jasiukajtis ble .cont19 947*25c28e83SPiotr Jasiukajtis fzeros %f25 948*25c28e83SPiotr Jasiukajtis 949*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp_px] 950*25c28e83SPiotr Jasiukajtis clr %o2 951*25c28e83SPiotr Jasiukajtis 952*25c28e83SPiotr Jasiukajtis sub counter,5,counter 953*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 954*25c28e83SPiotr Jasiukajtis 955*25c28e83SPiotr Jasiukajtis ba .cont19 956*25c28e83SPiotr Jasiukajtis or %g0,5,counter 957*25c28e83SPiotr Jasiukajtis 958*25c28e83SPiotr Jasiukajtis .align 16 959*25c28e83SPiotr Jasiukajtis.update20: 960*25c28e83SPiotr Jasiukajtis cmp counter,6 961*25c28e83SPiotr Jasiukajtis ble .cont20 962*25c28e83SPiotr Jasiukajtis fzeros %f0 963*25c28e83SPiotr Jasiukajtis 964*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 965*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%o1 966*25c28e83SPiotr Jasiukajtis 967*25c28e83SPiotr Jasiukajtis sub counter,6,counter 968*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 969*25c28e83SPiotr Jasiukajtis 970*25c28e83SPiotr Jasiukajtis ba .cont20 971*25c28e83SPiotr Jasiukajtis or %g0,6,counter 972*25c28e83SPiotr Jasiukajtis 973*25c28e83SPiotr Jasiukajtis .align 16 974*25c28e83SPiotr Jasiukajtis.update21: 975*25c28e83SPiotr Jasiukajtis cmp counter,6 976*25c28e83SPiotr Jasiukajtis ble .cont21 977*25c28e83SPiotr Jasiukajtis fzeros %f0 978*25c28e83SPiotr Jasiukajtis 979*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 980*25c28e83SPiotr Jasiukajtis clr %o1 981*25c28e83SPiotr Jasiukajtis 982*25c28e83SPiotr Jasiukajtis sub counter,6,counter 983*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 984*25c28e83SPiotr Jasiukajtis 985*25c28e83SPiotr Jasiukajtis ba .cont21 986*25c28e83SPiotr Jasiukajtis or %g0,6,counter 987*25c28e83SPiotr Jasiukajtis 988*25c28e83SPiotr Jasiukajtis.exit: 989*25c28e83SPiotr Jasiukajtis ret 990*25c28e83SPiotr Jasiukajtis restore 991*25c28e83SPiotr Jasiukajtis SET_SIZE(__vsqrtf_ultra3) 992*25c28e83SPiotr Jasiukajtis 993