1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "__vrhypot.S" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis RO_DATA 34*25c28e83SPiotr Jasiukajtis .align 64 35*25c28e83SPiotr Jasiukajtis 36*25c28e83SPiotr Jasiukajtis.CONST_TBL: 37*25c28e83SPiotr Jasiukajtis .word 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465, 38*25c28e83SPiotr Jasiukajtis .word 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a, 39*25c28e83SPiotr Jasiukajtis .word 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6, 40*25c28e83SPiotr Jasiukajtis .word 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3, 41*25c28e83SPiotr Jasiukajtis .word 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b, 42*25c28e83SPiotr Jasiukajtis .word 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036, 43*25c28e83SPiotr Jasiukajtis .word 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01, 44*25c28e83SPiotr Jasiukajtis .word 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1, 45*25c28e83SPiotr Jasiukajtis .word 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb, 46*25c28e83SPiotr Jasiukajtis .word 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5, 47*25c28e83SPiotr Jasiukajtis .word 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405, 48*25c28e83SPiotr Jasiukajtis .word 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc, 49*25c28e83SPiotr Jasiukajtis .word 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7, 50*25c28e83SPiotr Jasiukajtis .word 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec, 51*25c28e83SPiotr Jasiukajtis .word 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b, 52*25c28e83SPiotr Jasiukajtis .word 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed, 53*25c28e83SPiotr Jasiukajtis .word 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150, 54*25c28e83SPiotr Jasiukajtis .word 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539, 55*25c28e83SPiotr Jasiukajtis .word 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66, 56*25c28e83SPiotr Jasiukajtis .word 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995, 57*25c28e83SPiotr Jasiukajtis .word 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d, 58*25c28e83SPiotr Jasiukajtis .word 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19, 59*25c28e83SPiotr Jasiukajtis .word 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404, 60*25c28e83SPiotr Jasiukajtis .word 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22, 61*25c28e83SPiotr Jasiukajtis .word 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47, 62*25c28e83SPiotr Jasiukajtis .word 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a, 63*25c28e83SPiotr Jasiukajtis .word 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06, 64*25c28e83SPiotr Jasiukajtis .word 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358, 65*25c28e83SPiotr Jasiukajtis .word 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20, 66*25c28e83SPiotr Jasiukajtis .word 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f, 67*25c28e83SPiotr Jasiukajtis .word 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197, 68*25c28e83SPiotr Jasiukajtis .word 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010, 69*25c28e83SPiotr Jasiukajtis 70*25c28e83SPiotr Jasiukajtis .word 0x42300000, 0 ! D2ON36 = 2**36 71*25c28e83SPiotr Jasiukajtis .word 0xffffff00, 0 ! DA0 72*25c28e83SPiotr Jasiukajtis .word 0xfff00000, 0 ! DA1 73*25c28e83SPiotr Jasiukajtis .word 0x3ff00000, 0 ! DONE = 1.0 74*25c28e83SPiotr Jasiukajtis .word 0x40000000, 0 ! DTWO = 2.0 75*25c28e83SPiotr Jasiukajtis .word 0x7fd00000, 0 ! D2ON1022 76*25c28e83SPiotr Jasiukajtis .word 0x3cb00000, 0 ! D2ONM52 77*25c28e83SPiotr Jasiukajtis .word 0x43200000, 0 ! D2ON51 78*25c28e83SPiotr Jasiukajtis .word 0x0007ffff, 0xffffffff ! 0x0007ffffffffffff 79*25c28e83SPiotr Jasiukajtis 80*25c28e83SPiotr Jasiukajtis#define stridex %l2 81*25c28e83SPiotr Jasiukajtis#define stridey %l3 82*25c28e83SPiotr Jasiukajtis#define stridez %l5 83*25c28e83SPiotr Jasiukajtis 84*25c28e83SPiotr Jasiukajtis#define TBL_SHIFT 512 85*25c28e83SPiotr Jasiukajtis 86*25c28e83SPiotr Jasiukajtis#define TBL %l1 87*25c28e83SPiotr Jasiukajtis#define counter %l4 88*25c28e83SPiotr Jasiukajtis 89*25c28e83SPiotr Jasiukajtis#define _0x7ff00000 %l0 90*25c28e83SPiotr Jasiukajtis#define _0x00100000 %o5 91*25c28e83SPiotr Jasiukajtis#define _0x7fffffff %l6 92*25c28e83SPiotr Jasiukajtis 93*25c28e83SPiotr Jasiukajtis#define D2ON36 %f4 94*25c28e83SPiotr Jasiukajtis#define DTWO %f6 95*25c28e83SPiotr Jasiukajtis#define DONE %f8 96*25c28e83SPiotr Jasiukajtis#define DA0 %f58 97*25c28e83SPiotr Jasiukajtis#define DA1 %f56 98*25c28e83SPiotr Jasiukajtis 99*25c28e83SPiotr Jasiukajtis#define dtmp0 STACK_BIAS-0x80 100*25c28e83SPiotr Jasiukajtis#define dtmp1 STACK_BIAS-0x78 101*25c28e83SPiotr Jasiukajtis#define dtmp2 STACK_BIAS-0x70 102*25c28e83SPiotr Jasiukajtis#define dtmp3 STACK_BIAS-0x68 103*25c28e83SPiotr Jasiukajtis#define dtmp4 STACK_BIAS-0x60 104*25c28e83SPiotr Jasiukajtis#define dtmp5 STACK_BIAS-0x58 105*25c28e83SPiotr Jasiukajtis#define dtmp6 STACK_BIAS-0x50 106*25c28e83SPiotr Jasiukajtis#define dtmp7 STACK_BIAS-0x48 107*25c28e83SPiotr Jasiukajtis#define dtmp8 STACK_BIAS-0x40 108*25c28e83SPiotr Jasiukajtis#define dtmp9 STACK_BIAS-0x38 109*25c28e83SPiotr Jasiukajtis#define dtmp10 STACK_BIAS-0x30 110*25c28e83SPiotr Jasiukajtis#define dtmp11 STACK_BIAS-0x28 111*25c28e83SPiotr Jasiukajtis#define dtmp12 STACK_BIAS-0x20 112*25c28e83SPiotr Jasiukajtis#define dtmp13 STACK_BIAS-0x18 113*25c28e83SPiotr Jasiukajtis#define dtmp14 STACK_BIAS-0x10 114*25c28e83SPiotr Jasiukajtis#define dtmp15 STACK_BIAS-0x08 115*25c28e83SPiotr Jasiukajtis 116*25c28e83SPiotr Jasiukajtis#define ftmp0 STACK_BIAS-0x100 117*25c28e83SPiotr Jasiukajtis#define tmp_px STACK_BIAS-0x98 118*25c28e83SPiotr Jasiukajtis#define tmp_py STACK_BIAS-0x90 119*25c28e83SPiotr Jasiukajtis#define tmp_counter STACK_BIAS-0x88 120*25c28e83SPiotr Jasiukajtis 121*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9 122*25c28e83SPiotr Jasiukajtis#define tmps 0x100 123*25c28e83SPiotr Jasiukajtis 124*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 125*25c28e83SPiotr Jasiukajtis! !!!!! algorithm !!!!! 126*25c28e83SPiotr Jasiukajtis! hx0 = *(int*)px; 127*25c28e83SPiotr Jasiukajtis! hy0 = *(int*)py; 128*25c28e83SPiotr Jasiukajtis! 129*25c28e83SPiotr Jasiukajtis! ((float*)&x0)[0] = ((float*)px)[0]; 130*25c28e83SPiotr Jasiukajtis! ((float*)&x0)[1] = ((float*)px)[1]; 131*25c28e83SPiotr Jasiukajtis! ((float*)&y0)[0] = ((float*)py)[0]; 132*25c28e83SPiotr Jasiukajtis! ((float*)&y0)[1] = ((float*)py)[1]; 133*25c28e83SPiotr Jasiukajtis! 134*25c28e83SPiotr Jasiukajtis! hx0 &= 0x7fffffff; 135*25c28e83SPiotr Jasiukajtis! hy0 &= 0x7fffffff; 136*25c28e83SPiotr Jasiukajtis! 137*25c28e83SPiotr Jasiukajtis! diff0 = hy0 - hx0; 138*25c28e83SPiotr Jasiukajtis! j0 = diff0 >> 31; 139*25c28e83SPiotr Jasiukajtis! j0 &= diff0; 140*25c28e83SPiotr Jasiukajtis! j0 = hy0 - j0; 141*25c28e83SPiotr Jasiukajtis! j0 &= 0x7ff00000; 142*25c28e83SPiotr Jasiukajtis! 143*25c28e83SPiotr Jasiukajtis! j0 = 0x7ff00000 - j0; 144*25c28e83SPiotr Jasiukajtis! ll = (long long)j0 << 32; 145*25c28e83SPiotr Jasiukajtis! *(long long*)&scl0 = ll; 146*25c28e83SPiotr Jasiukajtis! 147*25c28e83SPiotr Jasiukajtis! if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 ) 148*25c28e83SPiotr Jasiukajtis! { 149*25c28e83SPiotr Jasiukajtis! lx = ((int*)px)[1]; 150*25c28e83SPiotr Jasiukajtis! ly = ((int*)py)[1]; 151*25c28e83SPiotr Jasiukajtis! 152*25c28e83SPiotr Jasiukajtis! if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0; 153*25c28e83SPiotr Jasiukajtis! else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0; 154*25c28e83SPiotr Jasiukajtis! else res0 = fabs(x0) * fabs(y0); 155*25c28e83SPiotr Jasiukajtis! 156*25c28e83SPiotr Jasiukajtis! ((float*)pz)[0] = ((float*)&res0)[0]; 157*25c28e83SPiotr Jasiukajtis! ((float*)pz)[1] = ((float*)&res0)[1]; 158*25c28e83SPiotr Jasiukajtis! 159*25c28e83SPiotr Jasiukajtis! px += stridex; 160*25c28e83SPiotr Jasiukajtis! py += stridey; 161*25c28e83SPiotr Jasiukajtis! pz += stridez; 162*25c28e83SPiotr Jasiukajtis! continue; 163*25c28e83SPiotr Jasiukajtis! } 164*25c28e83SPiotr Jasiukajtis! if ( hx0 < 0x00100000 && hy0 < 0x00100000 ) 165*25c28e83SPiotr Jasiukajtis! { 166*25c28e83SPiotr Jasiukajtis! lx = ((int*)px)[1]; 167*25c28e83SPiotr Jasiukajtis! ly = ((int*)py)[1]; 168*25c28e83SPiotr Jasiukajtis! ii = hx0 | hy0; 169*25c28e83SPiotr Jasiukajtis! ii |= lx; 170*25c28e83SPiotr Jasiukajtis! ii |= ly; 171*25c28e83SPiotr Jasiukajtis! if ( ii == 0 ) 172*25c28e83SPiotr Jasiukajtis! { 173*25c28e83SPiotr Jasiukajtis! res0 = 1.0 / 0.0; 174*25c28e83SPiotr Jasiukajtis! ((float*)pz)[0] = ((float*)&res0)[0]; 175*25c28e83SPiotr Jasiukajtis! ((float*)pz)[1] = ((float*)&res0)[1]; 176*25c28e83SPiotr Jasiukajtis! 177*25c28e83SPiotr Jasiukajtis! px += stridex; 178*25c28e83SPiotr Jasiukajtis! py += stridey; 179*25c28e83SPiotr Jasiukajtis! pz += stridez; 180*25c28e83SPiotr Jasiukajtis! continue; 181*25c28e83SPiotr Jasiukajtis! } 182*25c28e83SPiotr Jasiukajtis! x0 = fabs(x0); 183*25c28e83SPiotr Jasiukajtis! y0 = fabs(y0); 184*25c28e83SPiotr Jasiukajtis! if ( hx0 < 0x00080000 ) 185*25c28e83SPiotr Jasiukajtis! { 186*25c28e83SPiotr Jasiukajtis! x0 = *(long long*)&x0; 187*25c28e83SPiotr Jasiukajtis! } 188*25c28e83SPiotr Jasiukajtis! else 189*25c28e83SPiotr Jasiukajtis! { 190*25c28e83SPiotr Jasiukajtis! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; 191*25c28e83SPiotr Jasiukajtis! x0 = vis_fand(x0, dtmp0); 192*25c28e83SPiotr Jasiukajtis! x0 = *(long long*)&x0; 193*25c28e83SPiotr Jasiukajtis! x0 += D2ON51; 194*25c28e83SPiotr Jasiukajtis! } 195*25c28e83SPiotr Jasiukajtis! x0 *= D2ONM52; 196*25c28e83SPiotr Jasiukajtis! if ( hy0 < 0x00080000 ) 197*25c28e83SPiotr Jasiukajtis! { 198*25c28e83SPiotr Jasiukajtis! y0 = *(long long*)&y0; 199*25c28e83SPiotr Jasiukajtis! } 200*25c28e83SPiotr Jasiukajtis! else 201*25c28e83SPiotr Jasiukajtis! { 202*25c28e83SPiotr Jasiukajtis! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; 203*25c28e83SPiotr Jasiukajtis! y0 = vis_fand(y0, dtmp0); 204*25c28e83SPiotr Jasiukajtis! y0 = *(long long*)&y0; 205*25c28e83SPiotr Jasiukajtis! y0 += D2ON51; 206*25c28e83SPiotr Jasiukajtis! } 207*25c28e83SPiotr Jasiukajtis! y0 *= D2ONM52; 208*25c28e83SPiotr Jasiukajtis! *(long long*)&scl0 = 0x7fd0000000000000ULL; 209*25c28e83SPiotr Jasiukajtis! } 210*25c28e83SPiotr Jasiukajtis! else 211*25c28e83SPiotr Jasiukajtis! { 212*25c28e83SPiotr Jasiukajtis! x0 *= scl0; 213*25c28e83SPiotr Jasiukajtis! y0 *= scl0; 214*25c28e83SPiotr Jasiukajtis! } 215*25c28e83SPiotr Jasiukajtis! 216*25c28e83SPiotr Jasiukajtis! x_hi0 = x0 + D2ON36; 217*25c28e83SPiotr Jasiukajtis! y_hi0 = y0 + D2ON36; 218*25c28e83SPiotr Jasiukajtis! x_hi0 -= D2ON36; 219*25c28e83SPiotr Jasiukajtis! y_hi0 -= D2ON36; 220*25c28e83SPiotr Jasiukajtis! x_lo0 = x0 - x_hi0; 221*25c28e83SPiotr Jasiukajtis! y_lo0 = y0 - y_hi0; 222*25c28e83SPiotr Jasiukajtis! res0_hi = x_hi0 * x_hi0; 223*25c28e83SPiotr Jasiukajtis! dtmp0 = y_hi0 * y_hi0; 224*25c28e83SPiotr Jasiukajtis! res0_hi += dtmp0; 225*25c28e83SPiotr Jasiukajtis! res0_lo = x0 + x_hi0; 226*25c28e83SPiotr Jasiukajtis! res0_lo *= x_lo0; 227*25c28e83SPiotr Jasiukajtis! dtmp1 = y0 + y_hi0; 228*25c28e83SPiotr Jasiukajtis! dtmp1 *= y_lo0; 229*25c28e83SPiotr Jasiukajtis! res0_lo += dtmp1; 230*25c28e83SPiotr Jasiukajtis! 231*25c28e83SPiotr Jasiukajtis! dres = res0_hi + res0_lo; 232*25c28e83SPiotr Jasiukajtis! dexp0 = vis_fand(dres,DA1); 233*25c28e83SPiotr Jasiukajtis! iarr = ((int*)&dres)[0]; 234*25c28e83SPiotr Jasiukajtis! 235*25c28e83SPiotr Jasiukajtis! iarr >>= 11; 236*25c28e83SPiotr Jasiukajtis! iarr &= 0x1fc; 237*25c28e83SPiotr Jasiukajtis! dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 238*25c28e83SPiotr Jasiukajtis! dd = vis_fpsub32(dtmp0, dexp0); 239*25c28e83SPiotr Jasiukajtis! 240*25c28e83SPiotr Jasiukajtis! dtmp0 = dd * dres; 241*25c28e83SPiotr Jasiukajtis! dtmp0 = DTWO - dtmp0; 242*25c28e83SPiotr Jasiukajtis! dd *= dtmp0; 243*25c28e83SPiotr Jasiukajtis! dtmp1 = dd * dres; 244*25c28e83SPiotr Jasiukajtis! dtmp1 = DTWO - dtmp1; 245*25c28e83SPiotr Jasiukajtis! dd *= dtmp1; 246*25c28e83SPiotr Jasiukajtis! dtmp2 = dd * dres; 247*25c28e83SPiotr Jasiukajtis! dtmp2 = DTWO - dtmp2; 248*25c28e83SPiotr Jasiukajtis! dres = dd * dtmp2; 249*25c28e83SPiotr Jasiukajtis! 250*25c28e83SPiotr Jasiukajtis! res0 = vis_fand(dres,DA0); 251*25c28e83SPiotr Jasiukajtis! 252*25c28e83SPiotr Jasiukajtis! dtmp0 = res0_hi * res0; 253*25c28e83SPiotr Jasiukajtis! dtmp0 = DONE - dtmp0; 254*25c28e83SPiotr Jasiukajtis! dtmp1 = res0_lo * res0; 255*25c28e83SPiotr Jasiukajtis! dtmp0 -= dtmp1; 256*25c28e83SPiotr Jasiukajtis! dtmp0 *= dres; 257*25c28e83SPiotr Jasiukajtis! res0 += dtmp0; 258*25c28e83SPiotr Jasiukajtis! 259*25c28e83SPiotr Jasiukajtis! res0 = sqrt ( res0 ); 260*25c28e83SPiotr Jasiukajtis! 261*25c28e83SPiotr Jasiukajtis! res0 = scl0 * res0; 262*25c28e83SPiotr Jasiukajtis! 263*25c28e83SPiotr Jasiukajtis! ((float*)pz)[0] = ((float*)&res0)[0]; 264*25c28e83SPiotr Jasiukajtis! ((float*)pz)[1] = ((float*)&res0)[1]; 265*25c28e83SPiotr Jasiukajtis! 266*25c28e83SPiotr Jasiukajtis! px += stridex; 267*25c28e83SPiotr Jasiukajtis! py += stridey; 268*25c28e83SPiotr Jasiukajtis! pz += stridez; 269*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 270*25c28e83SPiotr Jasiukajtis 271*25c28e83SPiotr Jasiukajtis ENTRY(__vrhypot) 272*25c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-tmps,%sp 273*25c28e83SPiotr Jasiukajtis PIC_SETUP(l7) 274*25c28e83SPiotr Jasiukajtis PIC_SET(l7,.CONST_TBL,l1) 275*25c28e83SPiotr Jasiukajtis wr %g0,0x82,%asi 276*25c28e83SPiotr Jasiukajtis 277*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9 278*25c28e83SPiotr Jasiukajtis ldx [%fp+STACK_BIAS+176],stridez 279*25c28e83SPiotr Jasiukajtis#else 280*25c28e83SPiotr Jasiukajtis ld [%fp+STACK_BIAS+92],stridez 281*25c28e83SPiotr Jasiukajtis#endif 282*25c28e83SPiotr Jasiukajtis 283*25c28e83SPiotr Jasiukajtis sll %i2,3,stridex 284*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ff00000),_0x7ff00000 285*25c28e83SPiotr Jasiukajtis st %i0,[%fp+tmp_counter] 286*25c28e83SPiotr Jasiukajtis 287*25c28e83SPiotr Jasiukajtis sll %i4,3,stridey 288*25c28e83SPiotr Jasiukajtis sethi %hi(0x00100000),_0x00100000 289*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 290*25c28e83SPiotr Jasiukajtis 291*25c28e83SPiotr Jasiukajtis sll stridez,3,stridez 292*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),_0x7fffffff 293*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 294*25c28e83SPiotr Jasiukajtis 295*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT],D2ON36 296*25c28e83SPiotr Jasiukajtis add _0x7fffffff,1023,_0x7fffffff 297*25c28e83SPiotr Jasiukajtis 298*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+8],DA0 299*25c28e83SPiotr Jasiukajtis 300*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+16],DA1 301*25c28e83SPiotr Jasiukajtis 302*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+24],DONE 303*25c28e83SPiotr Jasiukajtis 304*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+32],DTWO 305*25c28e83SPiotr Jasiukajtis 306*25c28e83SPiotr Jasiukajtis.begin: 307*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_counter],counter 308*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_px],%i4 309*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_py],%i3 310*25c28e83SPiotr Jasiukajtis st %g0,[%fp+tmp_counter] 311*25c28e83SPiotr Jasiukajtis.begin1: 312*25c28e83SPiotr Jasiukajtis cmp counter,0 313*25c28e83SPiotr Jasiukajtis ble,pn %icc,.exit 314*25c28e83SPiotr Jasiukajtis 315*25c28e83SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; 316*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i1 317*25c28e83SPiotr Jasiukajtis 318*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; 319*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 320*25c28e83SPiotr Jasiukajtis 321*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; 322*25c28e83SPiotr Jasiukajtis 323*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 324*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec0 ! (7_0) if ( hx0 >= 0x7ff00000 ) 325*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; 326*25c28e83SPiotr Jasiukajtis 327*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 328*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec0 ! (7_0) if ( hy0 >= 0x7ff00000 ) 329*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; 330*25c28e83SPiotr Jasiukajtis 331*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; 332*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 333*25c28e83SPiotr Jasiukajtis bl,pn %icc,.spec1 ! (7_0) if ( hx0 < 0x00100000 ) 334*25c28e83SPiotr Jasiukajtis 335*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 336*25c28e83SPiotr Jasiukajtis.cont_spec0: 337*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; 338*25c28e83SPiotr Jasiukajtis 339*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; 340*25c28e83SPiotr Jasiukajtis 341*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; 342*25c28e83SPiotr Jasiukajtis 343*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 344*25c28e83SPiotr Jasiukajtis 345*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 346*25c28e83SPiotr Jasiukajtis 347*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 348*25c28e83SPiotr Jasiukajtis.cont_spec1: 349*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; 350*25c28e83SPiotr Jasiukajtis mov %i1,%i2 351*25c28e83SPiotr Jasiukajtis 352*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; 353*25c28e83SPiotr Jasiukajtis 354*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; 355*25c28e83SPiotr Jasiukajtis mov %i0,%o0 356*25c28e83SPiotr Jasiukajtis 357*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 358*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update0 ! (0_0) if ( hx0 >= 0x7ff00000 ) 359*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; 360*25c28e83SPiotr Jasiukajtis 361*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 362*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; 363*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update0 ! (0_0) if ( hy0 >= 0x7ff00000 ) 364*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; 365*25c28e83SPiotr Jasiukajtis 366*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 367*25c28e83SPiotr Jasiukajtis 368*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (0_0) j0 &= diff0; 369*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update1 ! (0_0) if ( hx0 < 0x00100000 ) 370*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; 371*25c28e83SPiotr Jasiukajtis.cont0: 372*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; 373*25c28e83SPiotr Jasiukajtis 374*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; 375*25c28e83SPiotr Jasiukajtis.cont1: 376*25c28e83SPiotr Jasiukajtis sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; 377*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; 378*25c28e83SPiotr Jasiukajtis 379*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; 380*25c28e83SPiotr Jasiukajtis 381*25c28e83SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; 382*25c28e83SPiotr Jasiukajtis 383*25c28e83SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; 384*25c28e83SPiotr Jasiukajtis 385*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; 386*25c28e83SPiotr Jasiukajtis 387*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 388*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; 389*25c28e83SPiotr Jasiukajtis 390*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; 391*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 392*25c28e83SPiotr Jasiukajtis 393*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; 394*25c28e83SPiotr Jasiukajtis 395*25c28e83SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; 396*25c28e83SPiotr Jasiukajtis 397*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 398*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; 399*25c28e83SPiotr Jasiukajtis 400*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; 401*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 402*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; 403*25c28e83SPiotr Jasiukajtis 404*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; 405*25c28e83SPiotr Jasiukajtis 406*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 407*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 408*25c28e83SPiotr Jasiukajtis 409*25c28e83SPiotr Jasiukajtis and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; 410*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update2 ! (1_0) if ( hx0 >= 0x7ff00000 ) 411*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; 412*25c28e83SPiotr Jasiukajtis 413*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 414*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; 415*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update3 ! (1_0) if ( hy0 >= 0x7ff00000 ) 416*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 417*25c28e83SPiotr Jasiukajtis 418*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; 419*25c28e83SPiotr Jasiukajtis 420*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (1_0) j0 &= diff0; 421*25c28e83SPiotr Jasiukajtis 422*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 423*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; 424*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 425*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 426*25c28e83SPiotr Jasiukajtis 427*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 428*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; 429*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update4 ! (1_0) if ( hx0 < 0x00100000 ) 430*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 431*25c28e83SPiotr Jasiukajtis 432*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; 433*25c28e83SPiotr Jasiukajtis.cont4: 434*25c28e83SPiotr Jasiukajtis sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 435*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 436*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 437*25c28e83SPiotr Jasiukajtis 438*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 439*25c28e83SPiotr Jasiukajtis 440*25c28e83SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; 441*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; 442*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; 443*25c28e83SPiotr Jasiukajtis 444*25c28e83SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; 445*25c28e83SPiotr Jasiukajtis 446*25c28e83SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; 447*25c28e83SPiotr Jasiukajtis 448*25c28e83SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; 449*25c28e83SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; 450*25c28e83SPiotr Jasiukajtis 451*25c28e83SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; 452*25c28e83SPiotr Jasiukajtis 453*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; 454*25c28e83SPiotr Jasiukajtis 455*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; 456*25c28e83SPiotr Jasiukajtis faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; 457*25c28e83SPiotr Jasiukajtis 458*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; 459*25c28e83SPiotr Jasiukajtis mov %i1,%i2 460*25c28e83SPiotr Jasiukajtis 461*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; 462*25c28e83SPiotr Jasiukajtis 463*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; 464*25c28e83SPiotr Jasiukajtis mov %i0,%o0 465*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; 466*25c28e83SPiotr Jasiukajtis 467*25c28e83SPiotr Jasiukajtis faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; 468*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; 469*25c28e83SPiotr Jasiukajtis 470*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 471*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update5 ! (2_0) if ( hx0 >= 0x7ff00000 ) 472*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 473*25c28e83SPiotr Jasiukajtis 474*25c28e83SPiotr Jasiukajtis and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; 475*25c28e83SPiotr Jasiukajtis st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; 476*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; 477*25c28e83SPiotr Jasiukajtis 478*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; 479*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 480*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update6 ! (2_0) if ( hy0 >= 0x7ff00000 ) 481*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 482*25c28e83SPiotr Jasiukajtis 483*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; 484*25c28e83SPiotr Jasiukajtis 485*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (2_0) j0 &= diff0; 486*25c28e83SPiotr Jasiukajtis 487*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 488*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 489*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; 490*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 491*25c28e83SPiotr Jasiukajtis 492*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 493*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; 494*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update7 ! (2_0) if ( hx0 < 0x00100000 ) 495*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 496*25c28e83SPiotr Jasiukajtis.cont7: 497*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; 498*25c28e83SPiotr Jasiukajtis 499*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 500*25c28e83SPiotr Jasiukajtis.cont8: 501*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 502*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 503*25c28e83SPiotr Jasiukajtis 504*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 505*25c28e83SPiotr Jasiukajtis 506*25c28e83SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; 507*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; 508*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; 509*25c28e83SPiotr Jasiukajtis 510*25c28e83SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; 511*25c28e83SPiotr Jasiukajtis 512*25c28e83SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; 513*25c28e83SPiotr Jasiukajtis 514*25c28e83SPiotr Jasiukajtis fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; 515*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; 516*25c28e83SPiotr Jasiukajtis 517*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 518*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; 519*25c28e83SPiotr Jasiukajtis 520*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; 521*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 522*25c28e83SPiotr Jasiukajtis fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); 523*25c28e83SPiotr Jasiukajtis 524*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; 525*25c28e83SPiotr Jasiukajtis 526*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; 527*25c28e83SPiotr Jasiukajtis sra %o2,11,%i3 ! (7_1) iarr >>= 11; 528*25c28e83SPiotr Jasiukajtis faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; 529*25c28e83SPiotr Jasiukajtis 530*25c28e83SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; 531*25c28e83SPiotr Jasiukajtis 532*25c28e83SPiotr Jasiukajtis add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr 533*25c28e83SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; 534*25c28e83SPiotr Jasiukajtis 535*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 536*25c28e83SPiotr Jasiukajtis ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 537*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; 538*25c28e83SPiotr Jasiukajtis 539*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; 540*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 541*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; 542*25c28e83SPiotr Jasiukajtis 543*25c28e83SPiotr Jasiukajtis faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; 544*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; 545*25c28e83SPiotr Jasiukajtis 546*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 547*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 548*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update9 ! (3_0) if ( hx0 >= 0x7ff00000 ) 549*25c28e83SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); 550*25c28e83SPiotr Jasiukajtis 551*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; 552*25c28e83SPiotr Jasiukajtis st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; 553*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; 554*25c28e83SPiotr Jasiukajtis 555*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; 556*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 557*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update10 ! (3_0) if ( hy0 >= 0x7ff00000 ) 558*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 559*25c28e83SPiotr Jasiukajtis 560*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 561*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; 562*25c28e83SPiotr Jasiukajtis 563*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (3_0) j0 &= diff0; 564*25c28e83SPiotr Jasiukajtis 565*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 566*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 567*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; 568*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 569*25c28e83SPiotr Jasiukajtis 570*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 571*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; 572*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update11 ! (3_0) if ( hx0 < 0x00100000 ) 573*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 574*25c28e83SPiotr Jasiukajtis.cont11: 575*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; 576*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 577*25c28e83SPiotr Jasiukajtis.cont12: 578*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 579*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 580*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 581*25c28e83SPiotr Jasiukajtis 582*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 583*25c28e83SPiotr Jasiukajtis 584*25c28e83SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; 585*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; 586*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; 587*25c28e83SPiotr Jasiukajtis 588*25c28e83SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; 589*25c28e83SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; 590*25c28e83SPiotr Jasiukajtis 591*25c28e83SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; 592*25c28e83SPiotr Jasiukajtis 593*25c28e83SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; 594*25c28e83SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; 595*25c28e83SPiotr Jasiukajtis 596*25c28e83SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; 597*25c28e83SPiotr Jasiukajtis 598*25c28e83SPiotr Jasiukajtis fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; 599*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; 600*25c28e83SPiotr Jasiukajtis fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); 601*25c28e83SPiotr Jasiukajtis 602*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; 603*25c28e83SPiotr Jasiukajtis 604*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; 605*25c28e83SPiotr Jasiukajtis sra %o2,11,%o4 ! (0_0) iarr >>= 11; 606*25c28e83SPiotr Jasiukajtis faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; 607*25c28e83SPiotr Jasiukajtis 608*25c28e83SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; 609*25c28e83SPiotr Jasiukajtis 610*25c28e83SPiotr Jasiukajtis add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr 611*25c28e83SPiotr Jasiukajtis mov %i1,%i2 612*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; 613*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; 614*25c28e83SPiotr Jasiukajtis 615*25c28e83SPiotr Jasiukajtis ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 616*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; 617*25c28e83SPiotr Jasiukajtis 618*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; 619*25c28e83SPiotr Jasiukajtis mov %i0,%o0 620*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; 621*25c28e83SPiotr Jasiukajtis 622*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; 623*25c28e83SPiotr Jasiukajtis faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; 624*25c28e83SPiotr Jasiukajtis 625*25c28e83SPiotr Jasiukajtis fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; 626*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 627*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 628*25c28e83SPiotr Jasiukajtis fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); 629*25c28e83SPiotr Jasiukajtis 630*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; 631*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update13 ! (4_0) if ( hx0 >= 0x7ff00000 ) 632*25c28e83SPiotr Jasiukajtis st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; 633*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; 634*25c28e83SPiotr Jasiukajtis 635*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; 636*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 637*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update14 ! (4_0) if ( hy0 >= 0x7ff00000 ) 638*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 639*25c28e83SPiotr Jasiukajtis 640*25c28e83SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 641*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; 642*25c28e83SPiotr Jasiukajtis 643*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (4_0) j0 &= diff0; 644*25c28e83SPiotr Jasiukajtis 645*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 646*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; 647*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 648*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 649*25c28e83SPiotr Jasiukajtis 650*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 651*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; 652*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update15 ! (4_0) if ( hx0 < 0x00100000 ) 653*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 654*25c28e83SPiotr Jasiukajtis.cont15: 655*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; 656*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 657*25c28e83SPiotr Jasiukajtis.cont16: 658*25c28e83SPiotr Jasiukajtis fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 659*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 660*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 661*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 662*25c28e83SPiotr Jasiukajtis 663*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 664*25c28e83SPiotr Jasiukajtis 665*25c28e83SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; 666*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; 667*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; 668*25c28e83SPiotr Jasiukajtis 669*25c28e83SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; 670*25c28e83SPiotr Jasiukajtis fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; 671*25c28e83SPiotr Jasiukajtis 672*25c28e83SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; 673*25c28e83SPiotr Jasiukajtis 674*25c28e83SPiotr Jasiukajtis fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; 675*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; 676*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; 677*25c28e83SPiotr Jasiukajtis 678*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; 679*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 680*25c28e83SPiotr Jasiukajtis 681*25c28e83SPiotr Jasiukajtis fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; 682*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; 683*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 684*25c28e83SPiotr Jasiukajtis fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); 685*25c28e83SPiotr Jasiukajtis 686*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; 687*25c28e83SPiotr Jasiukajtis 688*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; 689*25c28e83SPiotr Jasiukajtis sra %o2,11,%i3 ! (1_0) iarr >>= 11; 690*25c28e83SPiotr Jasiukajtis faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; 691*25c28e83SPiotr Jasiukajtis 692*25c28e83SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; 693*25c28e83SPiotr Jasiukajtis fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; 694*25c28e83SPiotr Jasiukajtis 695*25c28e83SPiotr Jasiukajtis add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr 696*25c28e83SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; 697*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; 698*25c28e83SPiotr Jasiukajtis 699*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 700*25c28e83SPiotr Jasiukajtis ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 701*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; 702*25c28e83SPiotr Jasiukajtis 703*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; 704*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 705*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; 706*25c28e83SPiotr Jasiukajtis 707*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; 708*25c28e83SPiotr Jasiukajtis faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; 709*25c28e83SPiotr Jasiukajtis 710*25c28e83SPiotr Jasiukajtis fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; 711*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 712*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 713*25c28e83SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); 714*25c28e83SPiotr Jasiukajtis 715*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; 716*25c28e83SPiotr Jasiukajtis st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; 717*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update17 ! (5_0) if ( hx0 >= 0x7ff00000 ) 718*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; 719*25c28e83SPiotr Jasiukajtis 720*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; 721*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 722*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update18 ! (5_0) if ( hy0 >= 0x7ff00000 ) 723*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 724*25c28e83SPiotr Jasiukajtis 725*25c28e83SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 726*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; 727*25c28e83SPiotr Jasiukajtis 728*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (5_0) j0 &= diff0; 729*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 730*25c28e83SPiotr Jasiukajtis 731*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 732*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; 733*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 734*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 735*25c28e83SPiotr Jasiukajtis 736*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 737*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; 738*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update19 ! (5_0) if ( hx0 < 0x00100000 ) 739*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 740*25c28e83SPiotr Jasiukajtis.cont19a: 741*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 742*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; 743*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 744*25c28e83SPiotr Jasiukajtis.cont19b: 745*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 746*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 747*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 748*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 749*25c28e83SPiotr Jasiukajtis 750*25c28e83SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 751*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 752*25c28e83SPiotr Jasiukajtis.cont20: 753*25c28e83SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; 754*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; 755*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; 756*25c28e83SPiotr Jasiukajtis 757*25c28e83SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; 758*25c28e83SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; 759*25c28e83SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; 760*25c28e83SPiotr Jasiukajtis 761*25c28e83SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; 762*25c28e83SPiotr Jasiukajtis 763*25c28e83SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; 764*25c28e83SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; 765*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; 766*25c28e83SPiotr Jasiukajtis 767*25c28e83SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; 768*25c28e83SPiotr Jasiukajtis 769*25c28e83SPiotr Jasiukajtis fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; 770*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; 771*25c28e83SPiotr Jasiukajtis fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); 772*25c28e83SPiotr Jasiukajtis 773*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; 774*25c28e83SPiotr Jasiukajtis fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; 775*25c28e83SPiotr Jasiukajtis 776*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; 777*25c28e83SPiotr Jasiukajtis sra %o2,11,%o4 ! (2_0) iarr >>= 11; 778*25c28e83SPiotr Jasiukajtis faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; 779*25c28e83SPiotr Jasiukajtis 780*25c28e83SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; 781*25c28e83SPiotr Jasiukajtis fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; 782*25c28e83SPiotr Jasiukajtis 783*25c28e83SPiotr Jasiukajtis add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr 784*25c28e83SPiotr Jasiukajtis mov %i1,%i2 785*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; 786*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; 787*25c28e83SPiotr Jasiukajtis 788*25c28e83SPiotr Jasiukajtis fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; 789*25c28e83SPiotr Jasiukajtis ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 790*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; 791*25c28e83SPiotr Jasiukajtis 792*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; 793*25c28e83SPiotr Jasiukajtis mov %i0,%o0 794*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; 795*25c28e83SPiotr Jasiukajtis 796*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; 797*25c28e83SPiotr Jasiukajtis faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; 798*25c28e83SPiotr Jasiukajtis 799*25c28e83SPiotr Jasiukajtis fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; 800*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 801*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 802*25c28e83SPiotr Jasiukajtis fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); 803*25c28e83SPiotr Jasiukajtis 804*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; 805*25c28e83SPiotr Jasiukajtis st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; 806*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update21 ! (6_0) if ( hx0 >= 0x7ff00000 ) 807*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; 808*25c28e83SPiotr Jasiukajtis 809*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; 810*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 811*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update22 ! (6_0) if ( hy0 >= 0x7ff00000 ) 812*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 813*25c28e83SPiotr Jasiukajtis 814*25c28e83SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 815*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; 816*25c28e83SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 817*25c28e83SPiotr Jasiukajtis 818*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (6_0) j0 &= diff0; 819*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 820*25c28e83SPiotr Jasiukajtis 821*25c28e83SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 822*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; 823*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 824*25c28e83SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 825*25c28e83SPiotr Jasiukajtis 826*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 827*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; 828*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update23 ! (6_0) if ( hx0 < 0x00100000 ) 829*25c28e83SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 830*25c28e83SPiotr Jasiukajtis.cont23a: 831*25c28e83SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 832*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; 833*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 834*25c28e83SPiotr Jasiukajtis.cont23b: 835*25c28e83SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 836*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 837*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 838*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 839*25c28e83SPiotr Jasiukajtis 840*25c28e83SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 841*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 842*25c28e83SPiotr Jasiukajtis.cont24: 843*25c28e83SPiotr Jasiukajtis fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; 844*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; 845*25c28e83SPiotr Jasiukajtis faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; 846*25c28e83SPiotr Jasiukajtis 847*25c28e83SPiotr Jasiukajtis lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; 848*25c28e83SPiotr Jasiukajtis fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; 849*25c28e83SPiotr Jasiukajtis 850*25c28e83SPiotr Jasiukajtis lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; 851*25c28e83SPiotr Jasiukajtis fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; 852*25c28e83SPiotr Jasiukajtis 853*25c28e83SPiotr Jasiukajtis fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; 854*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; 855*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; 856*25c28e83SPiotr Jasiukajtis 857*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 858*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; 859*25c28e83SPiotr Jasiukajtis 860*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; 861*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 862*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; 863*25c28e83SPiotr Jasiukajtis fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); 864*25c28e83SPiotr Jasiukajtis 865*25c28e83SPiotr Jasiukajtis fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; 866*25c28e83SPiotr Jasiukajtis fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; 867*25c28e83SPiotr Jasiukajtis 868*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; 869*25c28e83SPiotr Jasiukajtis sra %o2,11,%i3 ! (3_0) iarr >>= 11; 870*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; 871*25c28e83SPiotr Jasiukajtis 872*25c28e83SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; 873*25c28e83SPiotr Jasiukajtis fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; 874*25c28e83SPiotr Jasiukajtis 875*25c28e83SPiotr Jasiukajtis fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); 876*25c28e83SPiotr Jasiukajtis add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr 877*25c28e83SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; 878*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; 879*25c28e83SPiotr Jasiukajtis 880*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; 881*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 882*25c28e83SPiotr Jasiukajtis ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 883*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; 884*25c28e83SPiotr Jasiukajtis 885*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; 886*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 887*25c28e83SPiotr Jasiukajtis faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; 888*25c28e83SPiotr Jasiukajtis 889*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; 890*25c28e83SPiotr Jasiukajtis faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; 891*25c28e83SPiotr Jasiukajtis 892*25c28e83SPiotr Jasiukajtis fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; 893*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 894*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 895*25c28e83SPiotr Jasiukajtis fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); 896*25c28e83SPiotr Jasiukajtis 897*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; 898*25c28e83SPiotr Jasiukajtis st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; 899*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update25 ! (7_0) if ( hx0 >= 0x7ff00000 ) 900*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; 901*25c28e83SPiotr Jasiukajtis 902*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; 903*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 904*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update26 ! (7_0) if ( hy0 >= 0x7ff00000 ) 905*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 906*25c28e83SPiotr Jasiukajtis 907*25c28e83SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 908*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; 909*25c28e83SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 910*25c28e83SPiotr Jasiukajtis 911*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 912*25c28e83SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 913*25c28e83SPiotr Jasiukajtis 914*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 915*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; 916*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 917*25c28e83SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 918*25c28e83SPiotr Jasiukajtis 919*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 920*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; 921*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update27 ! (7_0) if ( hx0 < 0x00100000 ) 922*25c28e83SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 923*25c28e83SPiotr Jasiukajtis.cont27a: 924*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 925*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; 926*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 927*25c28e83SPiotr Jasiukajtis.cont27b: 928*25c28e83SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 929*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 930*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 931*25c28e83SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 932*25c28e83SPiotr Jasiukajtis 933*25c28e83SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 934*25c28e83SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 935*25c28e83SPiotr Jasiukajtis.cont28: 936*25c28e83SPiotr Jasiukajtis fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; 937*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; 938*25c28e83SPiotr Jasiukajtis faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; 939*25c28e83SPiotr Jasiukajtis 940*25c28e83SPiotr Jasiukajtis fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; 941*25c28e83SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; 942*25c28e83SPiotr Jasiukajtis 943*25c28e83SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; 944*25c28e83SPiotr Jasiukajtis fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; 945*25c28e83SPiotr Jasiukajtis 946*25c28e83SPiotr Jasiukajtis fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; 947*25c28e83SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; 948*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; 949*25c28e83SPiotr Jasiukajtis 950*25c28e83SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; 951*25c28e83SPiotr Jasiukajtis 952*25c28e83SPiotr Jasiukajtis fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; 953*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; 954*25c28e83SPiotr Jasiukajtis fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); 955*25c28e83SPiotr Jasiukajtis 956*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; 957*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; 958*25c28e83SPiotr Jasiukajtis fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; 959*25c28e83SPiotr Jasiukajtis 960*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; 961*25c28e83SPiotr Jasiukajtis sra %o2,11,%o4 ! (4_0) iarr >>= 11; 962*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; 963*25c28e83SPiotr Jasiukajtis 964*25c28e83SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; 965*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; 966*25c28e83SPiotr Jasiukajtis 967*25c28e83SPiotr Jasiukajtis cmp counter,8 968*25c28e83SPiotr Jasiukajtis bl,pn %icc,.tail 969*25c28e83SPiotr Jasiukajtis nop 970*25c28e83SPiotr Jasiukajtis 971*25c28e83SPiotr Jasiukajtis ba .main_loop 972*25c28e83SPiotr Jasiukajtis sub counter,8,counter 973*25c28e83SPiotr Jasiukajtis 974*25c28e83SPiotr Jasiukajtis .align 16 975*25c28e83SPiotr Jasiukajtis.main_loop: 976*25c28e83SPiotr Jasiukajtis fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); 977*25c28e83SPiotr Jasiukajtis add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr 978*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; 979*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; 980*25c28e83SPiotr Jasiukajtis 981*25c28e83SPiotr Jasiukajtis fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; 982*25c28e83SPiotr Jasiukajtis mov %i1,%i2 983*25c28e83SPiotr Jasiukajtis ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 984*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f50 ! (6_1) x_hi0 = x0 + D2ON36; 985*25c28e83SPiotr Jasiukajtis 986*25c28e83SPiotr Jasiukajtis nop 987*25c28e83SPiotr Jasiukajtis mov %i0,%o0 988*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; 989*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f2 ! (6_1) y_hi0 = y0 + D2ON36; 990*25c28e83SPiotr Jasiukajtis 991*25c28e83SPiotr Jasiukajtis faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; 992*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; 993*25c28e83SPiotr Jasiukajtis st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; 994*25c28e83SPiotr Jasiukajtis fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; 995*25c28e83SPiotr Jasiukajtis 996*25c28e83SPiotr Jasiukajtis fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; 997*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 998*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; 999*25c28e83SPiotr Jasiukajtis fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); 1000*25c28e83SPiotr Jasiukajtis 1001*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; 1002*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; 1003*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update29 ! (0_0) if ( hx0 >= 0x7ff00000 ) 1004*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f20 ! (6_1) x_hi0 -= D2ON36; 1005*25c28e83SPiotr Jasiukajtis 1006*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 1007*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; 1008*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update30 ! (0_0) if ( hy0 >= 0x7ff00000 ) 1009*25c28e83SPiotr Jasiukajtis fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; 1010*25c28e83SPiotr Jasiukajtis 1011*25c28e83SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 1012*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; 1013*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 1014*25c28e83SPiotr Jasiukajtis faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 1015*25c28e83SPiotr Jasiukajtis 1016*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (0_0) j0 &= diff0; 1017*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 1018*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update31 ! (0_0) if ( hx0 < 0x00100000 ) 1019*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 1020*25c28e83SPiotr Jasiukajtis.cont31: 1021*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 1022*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; 1023*25c28e83SPiotr Jasiukajtis nop 1024*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 1025*25c28e83SPiotr Jasiukajtis 1026*25c28e83SPiotr Jasiukajtis fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 1027*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1028*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; 1029*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 1030*25c28e83SPiotr Jasiukajtis 1031*25c28e83SPiotr Jasiukajtis fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 1032*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; 1033*25c28e83SPiotr Jasiukajtis nop 1034*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 1035*25c28e83SPiotr Jasiukajtis.cont32: 1036*25c28e83SPiotr Jasiukajtis fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; 1037*25c28e83SPiotr Jasiukajtis sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; 1038*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; 1039*25c28e83SPiotr Jasiukajtis faddd %f60,%f2,%f50 ! (6_1) dtmp1 = y0 + y_hi0; 1040*25c28e83SPiotr Jasiukajtis 1041*25c28e83SPiotr Jasiukajtis fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; 1042*25c28e83SPiotr Jasiukajtis nop 1043*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1044*25c28e83SPiotr Jasiukajtis fsubd %f60,%f2,%f2 ! (6_1) y_lo0 = y0 - y_hi0; 1045*25c28e83SPiotr Jasiukajtis 1046*25c28e83SPiotr Jasiukajtis fmuld %f62,%f28,%f28 ! (6_1) res0_lo *= x_lo0; 1047*25c28e83SPiotr Jasiukajtis nop 1048*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; 1049*25c28e83SPiotr Jasiukajtis faddd %f0,%f46,%f30 ! (6_1) res0_hi += dtmp0; 1050*25c28e83SPiotr Jasiukajtis 1051*25c28e83SPiotr Jasiukajtis nop 1052*25c28e83SPiotr Jasiukajtis nop 1053*25c28e83SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; 1054*25c28e83SPiotr Jasiukajtis fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; 1055*25c28e83SPiotr Jasiukajtis 1056*25c28e83SPiotr Jasiukajtis nop 1057*25c28e83SPiotr Jasiukajtis nop 1058*25c28e83SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; 1059*25c28e83SPiotr Jasiukajtis fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; 1060*25c28e83SPiotr Jasiukajtis 1061*25c28e83SPiotr Jasiukajtis fmuld %f50,%f2,%f46 ! (6_1) dtmp1 *= y_lo0; 1062*25c28e83SPiotr Jasiukajtis nop 1063*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; 1064*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; 1065*25c28e83SPiotr Jasiukajtis 1066*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 1067*25c28e83SPiotr Jasiukajtis nop 1068*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; 1069*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1070*25c28e83SPiotr Jasiukajtis 1071*25c28e83SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; 1072*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 1073*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; 1074*25c28e83SPiotr Jasiukajtis fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); 1075*25c28e83SPiotr Jasiukajtis 1076*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; 1077*25c28e83SPiotr Jasiukajtis nop 1078*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; 1079*25c28e83SPiotr Jasiukajtis fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; 1080*25c28e83SPiotr Jasiukajtis 1081*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; 1082*25c28e83SPiotr Jasiukajtis sra %o2,11,%i3 ! (5_1) iarr >>= 11; 1083*25c28e83SPiotr Jasiukajtis nop 1084*25c28e83SPiotr Jasiukajtis faddd %f28,%f46,%f40 ! (6_1) res0_lo += dtmp1; 1085*25c28e83SPiotr Jasiukajtis 1086*25c28e83SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; 1087*25c28e83SPiotr Jasiukajtis nop 1088*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1089*25c28e83SPiotr Jasiukajtis fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; 1090*25c28e83SPiotr Jasiukajtis 1091*25c28e83SPiotr Jasiukajtis fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); 1092*25c28e83SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; 1093*25c28e83SPiotr Jasiukajtis add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr 1094*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; 1095*25c28e83SPiotr Jasiukajtis 1096*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; 1097*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 1098*25c28e83SPiotr Jasiukajtis ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1099*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; 1100*25c28e83SPiotr Jasiukajtis 1101*25c28e83SPiotr Jasiukajtis nop 1102*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 1103*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; 1104*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; 1105*25c28e83SPiotr Jasiukajtis 1106*25c28e83SPiotr Jasiukajtis faddd %f30,%f40,%f18 ! (6_1) dres = res0_hi + res0_lo; 1107*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; 1108*25c28e83SPiotr Jasiukajtis st %f18,[%fp+ftmp0] ! (6_1) iarr = ((int*)&dres)[0]; 1109*25c28e83SPiotr Jasiukajtis fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; 1110*25c28e83SPiotr Jasiukajtis 1111*25c28e83SPiotr Jasiukajtis fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; 1112*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 1113*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1114*25c28e83SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); 1115*25c28e83SPiotr Jasiukajtis 1116*25c28e83SPiotr Jasiukajtis and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; 1117*25c28e83SPiotr Jasiukajtis nop 1118*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update33 ! (1_0) if ( hx0 >= 0x7ff00000 ) 1119*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; 1120*25c28e83SPiotr Jasiukajtis 1121*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 1122*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; 1123*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1124*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 1125*25c28e83SPiotr Jasiukajtis 1126*25c28e83SPiotr Jasiukajtis fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; 1127*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; 1128*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update34 ! (1_0) if ( hy0 >= 0x7ff00000 ) 1129*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; 1130*25c28e83SPiotr Jasiukajtis 1131*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (1_0) j0 &= diff0; 1132*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1133*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 1134*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 1135*25c28e83SPiotr Jasiukajtis 1136*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 1137*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; 1138*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 1139*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 1140*25c28e83SPiotr Jasiukajtis 1141*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 1142*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; 1143*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update35 ! (1_0) if ( hx0 < 0x00100000 ) 1144*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 1145*25c28e83SPiotr Jasiukajtis.cont35a: 1146*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 1147*25c28e83SPiotr Jasiukajtis nop 1148*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; 1149*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 1150*25c28e83SPiotr Jasiukajtis.cont35b: 1151*25c28e83SPiotr Jasiukajtis fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 1152*25c28e83SPiotr Jasiukajtis sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 1153*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 1154*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 1155*25c28e83SPiotr Jasiukajtis 1156*25c28e83SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 1157*25c28e83SPiotr Jasiukajtis nop 1158*25c28e83SPiotr Jasiukajtis nop 1159*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 1160*25c28e83SPiotr Jasiukajtis.cont36: 1161*25c28e83SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; 1162*25c28e83SPiotr Jasiukajtis nop 1163*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; 1164*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; 1165*25c28e83SPiotr Jasiukajtis 1166*25c28e83SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; 1167*25c28e83SPiotr Jasiukajtis nop 1168*25c28e83SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; 1169*25c28e83SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; 1170*25c28e83SPiotr Jasiukajtis 1171*25c28e83SPiotr Jasiukajtis nop 1172*25c28e83SPiotr Jasiukajtis nop 1173*25c28e83SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; 1174*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1175*25c28e83SPiotr Jasiukajtis 1176*25c28e83SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; 1177*25c28e83SPiotr Jasiukajtis nop 1178*25c28e83SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; 1179*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; 1180*25c28e83SPiotr Jasiukajtis 1181*25c28e83SPiotr Jasiukajtis nop 1182*25c28e83SPiotr Jasiukajtis nop 1183*25c28e83SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; 1184*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1185*25c28e83SPiotr Jasiukajtis 1186*25c28e83SPiotr Jasiukajtis fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; 1187*25c28e83SPiotr Jasiukajtis nop 1188*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (6_1) iarr = ((int*)&dres)[0]; 1189*25c28e83SPiotr Jasiukajtis fand %f18,DA1,%f2 ! (6_1) dexp0 = vis_fand(dres,DA1); 1190*25c28e83SPiotr Jasiukajtis 1191*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; 1192*25c28e83SPiotr Jasiukajtis nop 1193*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; 1194*25c28e83SPiotr Jasiukajtis fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; 1195*25c28e83SPiotr Jasiukajtis 1196*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; 1197*25c28e83SPiotr Jasiukajtis sra %o2,11,%g1 ! (6_1) iarr >>= 11; 1198*25c28e83SPiotr Jasiukajtis nop 1199*25c28e83SPiotr Jasiukajtis faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; 1200*25c28e83SPiotr Jasiukajtis 1201*25c28e83SPiotr Jasiukajtis nop 1202*25c28e83SPiotr Jasiukajtis and %g1,0x1fc,%g1 ! (6_1) iarr &= 0x1fc; 1203*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1204*25c28e83SPiotr Jasiukajtis fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; 1205*25c28e83SPiotr Jasiukajtis 1206*25c28e83SPiotr Jasiukajtis fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); 1207*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; 1208*25c28e83SPiotr Jasiukajtis add %g1,TBL,%g1 ! (6_1) (char*)dll1 + iarr 1209*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; 1210*25c28e83SPiotr Jasiukajtis 1211*25c28e83SPiotr Jasiukajtis fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; 1212*25c28e83SPiotr Jasiukajtis mov %i1,%i2 1213*25c28e83SPiotr Jasiukajtis ld [%g1],%f28 ! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1214*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; 1215*25c28e83SPiotr Jasiukajtis 1216*25c28e83SPiotr Jasiukajtis nop 1217*25c28e83SPiotr Jasiukajtis mov %i0,%o0 1218*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; 1219*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; 1220*25c28e83SPiotr Jasiukajtis 1221*25c28e83SPiotr Jasiukajtis faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; 1222*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; 1223*25c28e83SPiotr Jasiukajtis st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; 1224*25c28e83SPiotr Jasiukajtis fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; 1225*25c28e83SPiotr Jasiukajtis 1226*25c28e83SPiotr Jasiukajtis fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; 1227*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 1228*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1229*25c28e83SPiotr Jasiukajtis fpsub32 %f28,%f2,%f28 ! (6_1) dd = vis_fpsub32(dtmp0, dexp0); 1230*25c28e83SPiotr Jasiukajtis 1231*25c28e83SPiotr Jasiukajtis and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; 1232*25c28e83SPiotr Jasiukajtis nop 1233*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update37 ! (2_0) if ( hx0 >= 0x7ff00000 ) 1234*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; 1235*25c28e83SPiotr Jasiukajtis 1236*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; 1237*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 1238*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1239*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 1240*25c28e83SPiotr Jasiukajtis 1241*25c28e83SPiotr Jasiukajtis fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; 1242*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; 1243*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update38 ! (2_0) if ( hy0 >= 0x7ff00000 ) 1244*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; 1245*25c28e83SPiotr Jasiukajtis 1246*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (2_0) j0 &= diff0; 1247*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1248*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 1249*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 1250*25c28e83SPiotr Jasiukajtis 1251*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 1252*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 1253*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; 1254*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 1255*25c28e83SPiotr Jasiukajtis 1256*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 1257*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; 1258*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update39 ! (2_0) if ( hx0 < 0x00100000 ) 1259*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 1260*25c28e83SPiotr Jasiukajtis.cont39a: 1261*25c28e83SPiotr Jasiukajtis fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 1262*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; 1263*25c28e83SPiotr Jasiukajtis nop 1264*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 1265*25c28e83SPiotr Jasiukajtis.cont39b: 1266*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 1267*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 1268*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 1269*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 1270*25c28e83SPiotr Jasiukajtis 1271*25c28e83SPiotr Jasiukajtis fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 1272*25c28e83SPiotr Jasiukajtis nop 1273*25c28e83SPiotr Jasiukajtis nop 1274*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 1275*25c28e83SPiotr Jasiukajtis.cont40: 1276*25c28e83SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; 1277*25c28e83SPiotr Jasiukajtis nop 1278*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; 1279*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; 1280*25c28e83SPiotr Jasiukajtis 1281*25c28e83SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; 1282*25c28e83SPiotr Jasiukajtis nop 1283*25c28e83SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; 1284*25c28e83SPiotr Jasiukajtis fmuld %f28,%f20,%f54 ! (6_1) dd *= dtmp0; 1285*25c28e83SPiotr Jasiukajtis 1286*25c28e83SPiotr Jasiukajtis nop 1287*25c28e83SPiotr Jasiukajtis nop 1288*25c28e83SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; 1289*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1290*25c28e83SPiotr Jasiukajtis 1291*25c28e83SPiotr Jasiukajtis fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; 1292*25c28e83SPiotr Jasiukajtis nop 1293*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; 1294*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; 1295*25c28e83SPiotr Jasiukajtis 1296*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 1297*25c28e83SPiotr Jasiukajtis nop 1298*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; 1299*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1300*25c28e83SPiotr Jasiukajtis 1301*25c28e83SPiotr Jasiukajtis fmuld %f54,%f18,%f46 ! (6_1) dtmp1 = dd * dres; 1302*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 1303*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; 1304*25c28e83SPiotr Jasiukajtis fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); 1305*25c28e83SPiotr Jasiukajtis 1306*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; 1307*25c28e83SPiotr Jasiukajtis nop 1308*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; 1309*25c28e83SPiotr Jasiukajtis fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; 1310*25c28e83SPiotr Jasiukajtis 1311*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; 1312*25c28e83SPiotr Jasiukajtis sra %o2,11,%i3 ! (7_1) iarr >>= 11; 1313*25c28e83SPiotr Jasiukajtis nop 1314*25c28e83SPiotr Jasiukajtis faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; 1315*25c28e83SPiotr Jasiukajtis 1316*25c28e83SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; 1317*25c28e83SPiotr Jasiukajtis nop 1318*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1319*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; 1320*25c28e83SPiotr Jasiukajtis 1321*25c28e83SPiotr Jasiukajtis fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); 1322*25c28e83SPiotr Jasiukajtis add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr 1323*25c28e83SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; 1324*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f46,%f62 ! (6_1) dtmp1 = DTWO - dtmp1; 1325*25c28e83SPiotr Jasiukajtis 1326*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; 1327*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 1328*25c28e83SPiotr Jasiukajtis ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1329*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; 1330*25c28e83SPiotr Jasiukajtis 1331*25c28e83SPiotr Jasiukajtis nop 1332*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 1333*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; 1334*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; 1335*25c28e83SPiotr Jasiukajtis 1336*25c28e83SPiotr Jasiukajtis faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; 1337*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; 1338*25c28e83SPiotr Jasiukajtis st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; 1339*25c28e83SPiotr Jasiukajtis fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; 1340*25c28e83SPiotr Jasiukajtis 1341*25c28e83SPiotr Jasiukajtis fmuld %f54,%f62,%f24 ! (6_1) dd *= dtmp1; 1342*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 1343*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1344*25c28e83SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); 1345*25c28e83SPiotr Jasiukajtis 1346*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; 1347*25c28e83SPiotr Jasiukajtis nop 1348*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update41 ! (3_0) if ( hx0 >= 0x7ff00000 ) 1349*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; 1350*25c28e83SPiotr Jasiukajtis 1351*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; 1352*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 1353*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1354*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 1355*25c28e83SPiotr Jasiukajtis 1356*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 1357*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; 1358*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update42 ! (3_0) if ( hy0 >= 0x7ff00000 ) 1359*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; 1360*25c28e83SPiotr Jasiukajtis 1361*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (3_0) j0 &= diff0; 1362*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1363*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 1364*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 1365*25c28e83SPiotr Jasiukajtis 1366*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 1367*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 1368*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; 1369*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 1370*25c28e83SPiotr Jasiukajtis 1371*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 1372*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; 1373*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update43 ! (3_0) if ( hx0 < 0x00100000 ) 1374*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 1375*25c28e83SPiotr Jasiukajtis.cont43a: 1376*25c28e83SPiotr Jasiukajtis fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 1377*25c28e83SPiotr Jasiukajtis nop 1378*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; 1379*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 1380*25c28e83SPiotr Jasiukajtis.cont43b: 1381*25c28e83SPiotr Jasiukajtis fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; 1382*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 1383*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 1384*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 1385*25c28e83SPiotr Jasiukajtis 1386*25c28e83SPiotr Jasiukajtis fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 1387*25c28e83SPiotr Jasiukajtis nop 1388*25c28e83SPiotr Jasiukajtis nop 1389*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 1390*25c28e83SPiotr Jasiukajtis.cont44: 1391*25c28e83SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; 1392*25c28e83SPiotr Jasiukajtis nop 1393*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; 1394*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; 1395*25c28e83SPiotr Jasiukajtis 1396*25c28e83SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; 1397*25c28e83SPiotr Jasiukajtis nop 1398*25c28e83SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; 1399*25c28e83SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; 1400*25c28e83SPiotr Jasiukajtis 1401*25c28e83SPiotr Jasiukajtis nop 1402*25c28e83SPiotr Jasiukajtis nop 1403*25c28e83SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; 1404*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1405*25c28e83SPiotr Jasiukajtis 1406*25c28e83SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; 1407*25c28e83SPiotr Jasiukajtis nop 1408*25c28e83SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; 1409*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f18,%f20 ! (6_1) dtmp2 = DTWO - dtmp2; 1410*25c28e83SPiotr Jasiukajtis 1411*25c28e83SPiotr Jasiukajtis nop 1412*25c28e83SPiotr Jasiukajtis nop 1413*25c28e83SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; 1414*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1415*25c28e83SPiotr Jasiukajtis 1416*25c28e83SPiotr Jasiukajtis fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; 1417*25c28e83SPiotr Jasiukajtis nop 1418*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; 1419*25c28e83SPiotr Jasiukajtis fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); 1420*25c28e83SPiotr Jasiukajtis 1421*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; 1422*25c28e83SPiotr Jasiukajtis nop 1423*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; 1424*25c28e83SPiotr Jasiukajtis fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; 1425*25c28e83SPiotr Jasiukajtis 1426*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; 1427*25c28e83SPiotr Jasiukajtis sra %o2,11,%o4 ! (0_0) iarr >>= 11; 1428*25c28e83SPiotr Jasiukajtis nop 1429*25c28e83SPiotr Jasiukajtis faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; 1430*25c28e83SPiotr Jasiukajtis 1431*25c28e83SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; 1432*25c28e83SPiotr Jasiukajtis nop 1433*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1434*25c28e83SPiotr Jasiukajtis fmuld %f24,%f20,%f26 ! (6_1) dres = dd * dtmp2; 1435*25c28e83SPiotr Jasiukajtis 1436*25c28e83SPiotr Jasiukajtis fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); 1437*25c28e83SPiotr Jasiukajtis add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr 1438*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; 1439*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; 1440*25c28e83SPiotr Jasiukajtis 1441*25c28e83SPiotr Jasiukajtis fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; 1442*25c28e83SPiotr Jasiukajtis mov %i1,%i2 1443*25c28e83SPiotr Jasiukajtis ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1444*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; 1445*25c28e83SPiotr Jasiukajtis 1446*25c28e83SPiotr Jasiukajtis nop 1447*25c28e83SPiotr Jasiukajtis mov %i0,%o0 1448*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; 1449*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; 1450*25c28e83SPiotr Jasiukajtis 1451*25c28e83SPiotr Jasiukajtis fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; 1452*25c28e83SPiotr Jasiukajtis nop 1453*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; 1454*25c28e83SPiotr Jasiukajtis faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; 1455*25c28e83SPiotr Jasiukajtis 1456*25c28e83SPiotr Jasiukajtis fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; 1457*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 1458*25c28e83SPiotr Jasiukajtis st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; 1459*25c28e83SPiotr Jasiukajtis fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); 1460*25c28e83SPiotr Jasiukajtis 1461*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; 1462*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1463*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update45 ! (4_0) if ( hx0 >= 0x7ff00000 ) 1464*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; 1465*25c28e83SPiotr Jasiukajtis 1466*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; 1467*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 1468*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update46 ! (4_0) if ( hy0 >= 0x7ff00000 ) 1469*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 1470*25c28e83SPiotr Jasiukajtis 1471*25c28e83SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 1472*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; 1473*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1474*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 1475*25c28e83SPiotr Jasiukajtis 1476*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (4_0) j0 &= diff0; 1477*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 1478*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update47 ! (4_0) if ( hx0 < 0x00100000 ) 1479*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); 1480*25c28e83SPiotr Jasiukajtis.cont47a: 1481*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 1482*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; 1483*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 1484*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 1485*25c28e83SPiotr Jasiukajtis 1486*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 1487*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; 1488*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1489*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 1490*25c28e83SPiotr Jasiukajtis 1491*25c28e83SPiotr Jasiukajtis fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 1492*25c28e83SPiotr Jasiukajtis nop 1493*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; 1494*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 1495*25c28e83SPiotr Jasiukajtis.cont47b: 1496*25c28e83SPiotr Jasiukajtis fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 1497*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 1498*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 1499*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 1500*25c28e83SPiotr Jasiukajtis 1501*25c28e83SPiotr Jasiukajtis fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; 1502*25c28e83SPiotr Jasiukajtis nop 1503*25c28e83SPiotr Jasiukajtis nop 1504*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 1505*25c28e83SPiotr Jasiukajtis.cont48: 1506*25c28e83SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; 1507*25c28e83SPiotr Jasiukajtis nop 1508*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; 1509*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; 1510*25c28e83SPiotr Jasiukajtis 1511*25c28e83SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (6_1) dtmp0 = DONE - dtmp0; 1512*25c28e83SPiotr Jasiukajtis nop 1513*25c28e83SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; 1514*25c28e83SPiotr Jasiukajtis fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; 1515*25c28e83SPiotr Jasiukajtis 1516*25c28e83SPiotr Jasiukajtis nop 1517*25c28e83SPiotr Jasiukajtis nop 1518*25c28e83SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; 1519*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1520*25c28e83SPiotr Jasiukajtis 1521*25c28e83SPiotr Jasiukajtis fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; 1522*25c28e83SPiotr Jasiukajtis nop 1523*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; 1524*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; 1525*25c28e83SPiotr Jasiukajtis 1526*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; 1527*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 1528*25c28e83SPiotr Jasiukajtis nop 1529*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1530*25c28e83SPiotr Jasiukajtis 1531*25c28e83SPiotr Jasiukajtis fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; 1532*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 1533*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; 1534*25c28e83SPiotr Jasiukajtis fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); 1535*25c28e83SPiotr Jasiukajtis 1536*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; 1537*25c28e83SPiotr Jasiukajtis nop 1538*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; 1539*25c28e83SPiotr Jasiukajtis fsubd %f60,%f40,%f46 ! (6_1) dtmp0 -= dtmp1; 1540*25c28e83SPiotr Jasiukajtis 1541*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; 1542*25c28e83SPiotr Jasiukajtis sra %o2,11,%i3 ! (1_0) iarr >>= 11; 1543*25c28e83SPiotr Jasiukajtis nop 1544*25c28e83SPiotr Jasiukajtis faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; 1545*25c28e83SPiotr Jasiukajtis 1546*25c28e83SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; 1547*25c28e83SPiotr Jasiukajtis nop 1548*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1549*25c28e83SPiotr Jasiukajtis fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; 1550*25c28e83SPiotr Jasiukajtis 1551*25c28e83SPiotr Jasiukajtis fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); 1552*25c28e83SPiotr Jasiukajtis add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr 1553*25c28e83SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; 1554*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; 1555*25c28e83SPiotr Jasiukajtis 1556*25c28e83SPiotr Jasiukajtis fmuld %f46,%f26,%f52 ! (6_1) dtmp0 *= dres; 1557*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 1558*25c28e83SPiotr Jasiukajtis ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1559*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; 1560*25c28e83SPiotr Jasiukajtis 1561*25c28e83SPiotr Jasiukajtis nop 1562*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 1563*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; 1564*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; 1565*25c28e83SPiotr Jasiukajtis 1566*25c28e83SPiotr Jasiukajtis fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0; 1567*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; 1568*25c28e83SPiotr Jasiukajtis nop 1569*25c28e83SPiotr Jasiukajtis faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; 1570*25c28e83SPiotr Jasiukajtis 1571*25c28e83SPiotr Jasiukajtis fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; 1572*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 1573*25c28e83SPiotr Jasiukajtis st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; 1574*25c28e83SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); 1575*25c28e83SPiotr Jasiukajtis 1576*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; 1577*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1578*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update49 ! (5_0) if ( hx0 >= 0x7ff00000 ) 1579*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; 1580*25c28e83SPiotr Jasiukajtis 1581*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; 1582*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 1583*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update50 ! (5_0) if ( hy0 >= 0x7ff00000 ) 1584*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 1585*25c28e83SPiotr Jasiukajtis 1586*25c28e83SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 1587*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; 1588*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1589*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; 1590*25c28e83SPiotr Jasiukajtis 1591*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (5_0) j0 &= diff0; 1592*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 1593*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update51 ! (5_0) if ( hx0 < 0x00100000 ) 1594*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 1595*25c28e83SPiotr Jasiukajtis.cont51a: 1596*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 1597*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; 1598*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 1599*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 1600*25c28e83SPiotr Jasiukajtis 1601*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 1602*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; 1603*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1604*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 1605*25c28e83SPiotr Jasiukajtis 1606*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 1607*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; 1608*25c28e83SPiotr Jasiukajtis nop 1609*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 1610*25c28e83SPiotr Jasiukajtis.cont51b: 1611*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 1612*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 1613*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 1614*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 1615*25c28e83SPiotr Jasiukajtis 1616*25c28e83SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 1617*25c28e83SPiotr Jasiukajtis nop 1618*25c28e83SPiotr Jasiukajtis nop 1619*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 1620*25c28e83SPiotr Jasiukajtis.cont52: 1621*25c28e83SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; 1622*25c28e83SPiotr Jasiukajtis nop 1623*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; 1624*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; 1625*25c28e83SPiotr Jasiukajtis 1626*25c28e83SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; 1627*25c28e83SPiotr Jasiukajtis nop 1628*25c28e83SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; 1629*25c28e83SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; 1630*25c28e83SPiotr Jasiukajtis 1631*25c28e83SPiotr Jasiukajtis nop 1632*25c28e83SPiotr Jasiukajtis nop 1633*25c28e83SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; 1634*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1635*25c28e83SPiotr Jasiukajtis 1636*25c28e83SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; 1637*25c28e83SPiotr Jasiukajtis nop 1638*25c28e83SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; 1639*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; 1640*25c28e83SPiotr Jasiukajtis 1641*25c28e83SPiotr Jasiukajtis nop 1642*25c28e83SPiotr Jasiukajtis nop 1643*25c28e83SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; 1644*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1645*25c28e83SPiotr Jasiukajtis 1646*25c28e83SPiotr Jasiukajtis fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; 1647*25c28e83SPiotr Jasiukajtis nop 1648*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; 1649*25c28e83SPiotr Jasiukajtis fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); 1650*25c28e83SPiotr Jasiukajtis 1651*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; 1652*25c28e83SPiotr Jasiukajtis nop 1653*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; 1654*25c28e83SPiotr Jasiukajtis fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; 1655*25c28e83SPiotr Jasiukajtis 1656*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; 1657*25c28e83SPiotr Jasiukajtis sra %o2,11,%o4 ! (2_0) iarr >>= 11; 1658*25c28e83SPiotr Jasiukajtis nop 1659*25c28e83SPiotr Jasiukajtis faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; 1660*25c28e83SPiotr Jasiukajtis 1661*25c28e83SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; 1662*25c28e83SPiotr Jasiukajtis nop 1663*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1664*25c28e83SPiotr Jasiukajtis fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; 1665*25c28e83SPiotr Jasiukajtis 1666*25c28e83SPiotr Jasiukajtis fsqrtd %f52,%f24 ! (6_1) res0 = sqrt ( res0 ); 1667*25c28e83SPiotr Jasiukajtis add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr 1668*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; 1669*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; 1670*25c28e83SPiotr Jasiukajtis 1671*25c28e83SPiotr Jasiukajtis fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; 1672*25c28e83SPiotr Jasiukajtis mov %i1,%i2 1673*25c28e83SPiotr Jasiukajtis ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1674*25c28e83SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; 1675*25c28e83SPiotr Jasiukajtis 1676*25c28e83SPiotr Jasiukajtis nop 1677*25c28e83SPiotr Jasiukajtis mov %i0,%o0 1678*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; 1679*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; 1680*25c28e83SPiotr Jasiukajtis 1681*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; 1682*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; 1683*25c28e83SPiotr Jasiukajtis nop 1684*25c28e83SPiotr Jasiukajtis faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; 1685*25c28e83SPiotr Jasiukajtis 1686*25c28e83SPiotr Jasiukajtis fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; 1687*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 1688*25c28e83SPiotr Jasiukajtis st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; 1689*25c28e83SPiotr Jasiukajtis fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); 1690*25c28e83SPiotr Jasiukajtis 1691*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; 1692*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1693*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update53 ! (6_0) if ( hx0 >= 0x7ff00000 ) 1694*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; 1695*25c28e83SPiotr Jasiukajtis 1696*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; 1697*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 1698*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update54 ! (6_0) if ( hy0 >= 0x7ff00000 ) 1699*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 1700*25c28e83SPiotr Jasiukajtis 1701*25c28e83SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 1702*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; 1703*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1704*25c28e83SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 1705*25c28e83SPiotr Jasiukajtis 1706*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (6_0) j0 &= diff0; 1707*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 1708*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update55 ! (6_0) if ( hx0 < 0x00100000 ) 1709*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 1710*25c28e83SPiotr Jasiukajtis.cont55a: 1711*25c28e83SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 1712*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; 1713*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 1714*25c28e83SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 1715*25c28e83SPiotr Jasiukajtis 1716*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 1717*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; 1718*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1719*25c28e83SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 1720*25c28e83SPiotr Jasiukajtis 1721*25c28e83SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 1722*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; 1723*25c28e83SPiotr Jasiukajtis nop 1724*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 1725*25c28e83SPiotr Jasiukajtis.cont55b: 1726*25c28e83SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 1727*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 1728*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 1729*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 1730*25c28e83SPiotr Jasiukajtis 1731*25c28e83SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 1732*25c28e83SPiotr Jasiukajtis nop 1733*25c28e83SPiotr Jasiukajtis nop 1734*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 1735*25c28e83SPiotr Jasiukajtis.cont56: 1736*25c28e83SPiotr Jasiukajtis fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; 1737*25c28e83SPiotr Jasiukajtis nop 1738*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; 1739*25c28e83SPiotr Jasiukajtis faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; 1740*25c28e83SPiotr Jasiukajtis 1741*25c28e83SPiotr Jasiukajtis lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; 1742*25c28e83SPiotr Jasiukajtis nop 1743*25c28e83SPiotr Jasiukajtis nop 1744*25c28e83SPiotr Jasiukajtis fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; 1745*25c28e83SPiotr Jasiukajtis 1746*25c28e83SPiotr Jasiukajtis lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; 1747*25c28e83SPiotr Jasiukajtis nop 1748*25c28e83SPiotr Jasiukajtis nop 1749*25c28e83SPiotr Jasiukajtis fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; 1750*25c28e83SPiotr Jasiukajtis 1751*25c28e83SPiotr Jasiukajtis fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; 1752*25c28e83SPiotr Jasiukajtis nop 1753*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; 1754*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; 1755*25c28e83SPiotr Jasiukajtis 1756*25c28e83SPiotr Jasiukajtis nop 1757*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 1758*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; 1759*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1760*25c28e83SPiotr Jasiukajtis 1761*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; 1762*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 1763*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; 1764*25c28e83SPiotr Jasiukajtis fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); 1765*25c28e83SPiotr Jasiukajtis 1766*25c28e83SPiotr Jasiukajtis fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; 1767*25c28e83SPiotr Jasiukajtis nop 1768*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp14],%f0 ! (6_1) *(long long*)&scl0 = ll; 1769*25c28e83SPiotr Jasiukajtis fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; 1770*25c28e83SPiotr Jasiukajtis 1771*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; 1772*25c28e83SPiotr Jasiukajtis sra %o2,11,%i3 ! (3_0) iarr >>= 11; 1773*25c28e83SPiotr Jasiukajtis nop 1774*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; 1775*25c28e83SPiotr Jasiukajtis 1776*25c28e83SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; 1777*25c28e83SPiotr Jasiukajtis nop 1778*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1779*25c28e83SPiotr Jasiukajtis fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; 1780*25c28e83SPiotr Jasiukajtis 1781*25c28e83SPiotr Jasiukajtis fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); 1782*25c28e83SPiotr Jasiukajtis add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr 1783*25c28e83SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; 1784*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; 1785*25c28e83SPiotr Jasiukajtis 1786*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; 1787*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 1788*25c28e83SPiotr Jasiukajtis ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1789*25c28e83SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; 1790*25c28e83SPiotr Jasiukajtis 1791*25c28e83SPiotr Jasiukajtis nop 1792*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 1793*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; 1794*25c28e83SPiotr Jasiukajtis faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; 1795*25c28e83SPiotr Jasiukajtis 1796*25c28e83SPiotr Jasiukajtis fmuld %f0,%f24,%f2 ! (6_1) res0 = scl0 * res0; 1797*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; 1798*25c28e83SPiotr Jasiukajtis nop 1799*25c28e83SPiotr Jasiukajtis faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; 1800*25c28e83SPiotr Jasiukajtis 1801*25c28e83SPiotr Jasiukajtis fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; 1802*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 1803*25c28e83SPiotr Jasiukajtis st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; 1804*25c28e83SPiotr Jasiukajtis fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); 1805*25c28e83SPiotr Jasiukajtis 1806*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; 1807*25c28e83SPiotr Jasiukajtis st %f2,[%i5] ! (6_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1808*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update57 ! (7_0) if ( hx0 >= 0x7ff00000 ) 1809*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; 1810*25c28e83SPiotr Jasiukajtis 1811*25c28e83SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; 1812*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 1813*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update58 ! (7_0) if ( hy0 >= 0x7ff00000 ) 1814*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 1815*25c28e83SPiotr Jasiukajtis 1816*25c28e83SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 1817*25c28e83SPiotr Jasiukajtis sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; 1818*25c28e83SPiotr Jasiukajtis st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1819*25c28e83SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 1820*25c28e83SPiotr Jasiukajtis 1821*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 1822*25c28e83SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 1823*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update59 ! (7_0) if ( hx0 < 0x00100000 ) 1824*25c28e83SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 1825*25c28e83SPiotr Jasiukajtis.cont59a: 1826*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 1827*25c28e83SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; 1828*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 1829*25c28e83SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 1830*25c28e83SPiotr Jasiukajtis 1831*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 1832*25c28e83SPiotr Jasiukajtis and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; 1833*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1834*25c28e83SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 1835*25c28e83SPiotr Jasiukajtis 1836*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 1837*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; 1838*25c28e83SPiotr Jasiukajtis nop 1839*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 1840*25c28e83SPiotr Jasiukajtis.cont59b: 1841*25c28e83SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 1842*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 1843*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 1844*25c28e83SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 1845*25c28e83SPiotr Jasiukajtis 1846*25c28e83SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 1847*25c28e83SPiotr Jasiukajtis nop 1848*25c28e83SPiotr Jasiukajtis nop 1849*25c28e83SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 1850*25c28e83SPiotr Jasiukajtis.cont60: 1851*25c28e83SPiotr Jasiukajtis fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; 1852*25c28e83SPiotr Jasiukajtis nop 1853*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; 1854*25c28e83SPiotr Jasiukajtis faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; 1855*25c28e83SPiotr Jasiukajtis 1856*25c28e83SPiotr Jasiukajtis fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; 1857*25c28e83SPiotr Jasiukajtis nop 1858*25c28e83SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; 1859*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1860*25c28e83SPiotr Jasiukajtis 1861*25c28e83SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; 1862*25c28e83SPiotr Jasiukajtis nop 1863*25c28e83SPiotr Jasiukajtis nop 1864*25c28e83SPiotr Jasiukajtis fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; 1865*25c28e83SPiotr Jasiukajtis 1866*25c28e83SPiotr Jasiukajtis fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; 1867*25c28e83SPiotr Jasiukajtis nop 1868*25c28e83SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; 1869*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; 1870*25c28e83SPiotr Jasiukajtis 1871*25c28e83SPiotr Jasiukajtis nop 1872*25c28e83SPiotr Jasiukajtis nop 1873*25c28e83SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; 1874*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1875*25c28e83SPiotr Jasiukajtis 1876*25c28e83SPiotr Jasiukajtis fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; 1877*25c28e83SPiotr Jasiukajtis nop 1878*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; 1879*25c28e83SPiotr Jasiukajtis fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); 1880*25c28e83SPiotr Jasiukajtis 1881*25c28e83SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; 1882*25c28e83SPiotr Jasiukajtis nop 1883*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; 1884*25c28e83SPiotr Jasiukajtis fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; 1885*25c28e83SPiotr Jasiukajtis 1886*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; 1887*25c28e83SPiotr Jasiukajtis sra %o2,11,%o4 ! (4_0) iarr >>= 11; 1888*25c28e83SPiotr Jasiukajtis nop 1889*25c28e83SPiotr Jasiukajtis faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; 1890*25c28e83SPiotr Jasiukajtis 1891*25c28e83SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; 1892*25c28e83SPiotr Jasiukajtis subcc counter,8,counter ! counter -= 8; 1893*25c28e83SPiotr Jasiukajtis bpos,pt %icc,.main_loop 1894*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; 1895*25c28e83SPiotr Jasiukajtis 1896*25c28e83SPiotr Jasiukajtis add counter,8,counter 1897*25c28e83SPiotr Jasiukajtis 1898*25c28e83SPiotr Jasiukajtis.tail: 1899*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1900*25c28e83SPiotr Jasiukajtis bneg .begin 1901*25c28e83SPiotr Jasiukajtis nop 1902*25c28e83SPiotr Jasiukajtis 1903*25c28e83SPiotr Jasiukajtis fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); 1904*25c28e83SPiotr Jasiukajtis add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr 1905*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; 1906*25c28e83SPiotr Jasiukajtis 1907*25c28e83SPiotr Jasiukajtis fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; 1908*25c28e83SPiotr Jasiukajtis ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1909*25c28e83SPiotr Jasiukajtis 1910*25c28e83SPiotr Jasiukajtis fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; 1911*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; 1912*25c28e83SPiotr Jasiukajtis faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; 1913*25c28e83SPiotr Jasiukajtis 1914*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1915*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; 1916*25c28e83SPiotr Jasiukajtis bneg .begin 1917*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1918*25c28e83SPiotr Jasiukajtis 1919*25c28e83SPiotr Jasiukajtis fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; 1920*25c28e83SPiotr Jasiukajtis st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; 1921*25c28e83SPiotr Jasiukajtis fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); 1922*25c28e83SPiotr Jasiukajtis 1923*25c28e83SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 1924*25c28e83SPiotr Jasiukajtis faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 1925*25c28e83SPiotr Jasiukajtis 1926*25c28e83SPiotr Jasiukajtis 1927*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 1928*25c28e83SPiotr Jasiukajtis 1929*25c28e83SPiotr Jasiukajtis fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 1930*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 1931*25c28e83SPiotr Jasiukajtis 1932*25c28e83SPiotr Jasiukajtis fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; 1933*25c28e83SPiotr Jasiukajtis 1934*25c28e83SPiotr Jasiukajtis fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; 1935*25c28e83SPiotr Jasiukajtis 1936*25c28e83SPiotr Jasiukajtis fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; 1937*25c28e83SPiotr Jasiukajtis 1938*25c28e83SPiotr Jasiukajtis fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; 1939*25c28e83SPiotr Jasiukajtis 1940*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; 1941*25c28e83SPiotr Jasiukajtis 1942*25c28e83SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; 1943*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; 1944*25c28e83SPiotr Jasiukajtis fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); 1945*25c28e83SPiotr Jasiukajtis 1946*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; 1947*25c28e83SPiotr Jasiukajtis fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; 1948*25c28e83SPiotr Jasiukajtis 1949*25c28e83SPiotr Jasiukajtis sra %o2,11,%i3 ! (5_1) iarr >>= 11; 1950*25c28e83SPiotr Jasiukajtis 1951*25c28e83SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; 1952*25c28e83SPiotr Jasiukajtis fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; 1953*25c28e83SPiotr Jasiukajtis 1954*25c28e83SPiotr Jasiukajtis fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); 1955*25c28e83SPiotr Jasiukajtis add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr 1956*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; 1957*25c28e83SPiotr Jasiukajtis 1958*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; 1959*25c28e83SPiotr Jasiukajtis ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1960*25c28e83SPiotr Jasiukajtis 1961*25c28e83SPiotr Jasiukajtis fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; 1962*25c28e83SPiotr Jasiukajtis 1963*25c28e83SPiotr Jasiukajtis fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; 1964*25c28e83SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); 1965*25c28e83SPiotr Jasiukajtis 1966*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1967*25c28e83SPiotr Jasiukajtis 1968*25c28e83SPiotr Jasiukajtis fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; 1969*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1970*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; 1971*25c28e83SPiotr Jasiukajtis 1972*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1973*25c28e83SPiotr Jasiukajtis bneg .begin 1974*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1975*25c28e83SPiotr Jasiukajtis 1976*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 1977*25c28e83SPiotr Jasiukajtis 1978*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 1979*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 1980*25c28e83SPiotr Jasiukajtis 1981*25c28e83SPiotr Jasiukajtis fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 1982*25c28e83SPiotr Jasiukajtis 1983*25c28e83SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 1984*25c28e83SPiotr Jasiukajtis 1985*25c28e83SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; 1986*25c28e83SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; 1987*25c28e83SPiotr Jasiukajtis 1988*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; 1989*25c28e83SPiotr Jasiukajtis 1990*25c28e83SPiotr Jasiukajtis fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; 1991*25c28e83SPiotr Jasiukajtis 1992*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; 1993*25c28e83SPiotr Jasiukajtis fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; 1994*25c28e83SPiotr Jasiukajtis 1995*25c28e83SPiotr Jasiukajtis fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; 1996*25c28e83SPiotr Jasiukajtis 1997*25c28e83SPiotr Jasiukajtis fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); 1998*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; 1999*25c28e83SPiotr Jasiukajtis 2000*25c28e83SPiotr Jasiukajtis fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; 2001*25c28e83SPiotr Jasiukajtis 2002*25c28e83SPiotr Jasiukajtis fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; 2003*25c28e83SPiotr Jasiukajtis 2004*25c28e83SPiotr Jasiukajtis fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; 2005*25c28e83SPiotr Jasiukajtis 2006*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2007*25c28e83SPiotr Jasiukajtis 2008*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 2009*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2010*25c28e83SPiotr Jasiukajtis bneg .begin 2011*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2012*25c28e83SPiotr Jasiukajtis 2013*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; 2014*25c28e83SPiotr Jasiukajtis 2015*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 2016*25c28e83SPiotr Jasiukajtis 2017*25c28e83SPiotr Jasiukajtis fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 2018*25c28e83SPiotr Jasiukajtis 2019*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 2020*25c28e83SPiotr Jasiukajtis 2021*25c28e83SPiotr Jasiukajtis fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 2022*25c28e83SPiotr Jasiukajtis 2023*25c28e83SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; 2024*25c28e83SPiotr Jasiukajtis 2025*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; 2026*25c28e83SPiotr Jasiukajtis 2027*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; 2028*25c28e83SPiotr Jasiukajtis fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; 2029*25c28e83SPiotr Jasiukajtis 2030*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; 2031*25c28e83SPiotr Jasiukajtis 2032*25c28e83SPiotr Jasiukajtis fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); 2033*25c28e83SPiotr Jasiukajtis 2034*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; 2035*25c28e83SPiotr Jasiukajtis 2036*25c28e83SPiotr Jasiukajtis fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; 2037*25c28e83SPiotr Jasiukajtis 2038*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2039*25c28e83SPiotr Jasiukajtis 2040*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2041*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; 2042*25c28e83SPiotr Jasiukajtis 2043*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 2044*25c28e83SPiotr Jasiukajtis bneg .begin 2045*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2046*25c28e83SPiotr Jasiukajtis 2047*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 2048*25c28e83SPiotr Jasiukajtis 2049*25c28e83SPiotr Jasiukajtis fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 2050*25c28e83SPiotr Jasiukajtis 2051*25c28e83SPiotr Jasiukajtis fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 2052*25c28e83SPiotr Jasiukajtis 2053*25c28e83SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; 2054*25c28e83SPiotr Jasiukajtis 2055*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; 2056*25c28e83SPiotr Jasiukajtis fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; 2057*25c28e83SPiotr Jasiukajtis 2058*25c28e83SPiotr Jasiukajtis fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); 2059*25c28e83SPiotr Jasiukajtis 2060*25c28e83SPiotr Jasiukajtis fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; 2061*25c28e83SPiotr Jasiukajtis 2062*25c28e83SPiotr Jasiukajtis fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; 2063*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2064*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2065*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 2066*25c28e83SPiotr Jasiukajtis 2067*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 2068*25c28e83SPiotr Jasiukajtis bneg .begin 2069*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2070*25c28e83SPiotr Jasiukajtis 2071*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; 2072*25c28e83SPiotr Jasiukajtis 2073*25c28e83SPiotr Jasiukajtis fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); 2074*25c28e83SPiotr Jasiukajtis 2075*25c28e83SPiotr Jasiukajtis fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0 2076*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2077*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2078*25c28e83SPiotr Jasiukajtis 2079*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 2080*25c28e83SPiotr Jasiukajtis bneg .begin 2081*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2082*25c28e83SPiotr Jasiukajtis 2083*25c28e83SPiotr Jasiukajtis ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; 2084*25c28e83SPiotr Jasiukajtis 2085*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; 2086*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2087*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2088*25c28e83SPiotr Jasiukajtis 2089*25c28e83SPiotr Jasiukajtis ba .begin 2090*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 2091*25c28e83SPiotr Jasiukajtis 2092*25c28e83SPiotr Jasiukajtis .align 16 2093*25c28e83SPiotr Jasiukajtis.spec0: 2094*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! hx0 ? 0x7ff00000 2095*25c28e83SPiotr Jasiukajtis bne 1f ! if ( hx0 != 0x7ff00000 ) 2096*25c28e83SPiotr Jasiukajtis ld [%i4+4],%i2 ! lx = ((int*)px)[1]; 2097*25c28e83SPiotr Jasiukajtis 2098*25c28e83SPiotr Jasiukajtis cmp %i2,0 ! lx ? 0 2099*25c28e83SPiotr Jasiukajtis be 3f ! if ( lx == 0 ) 2100*25c28e83SPiotr Jasiukajtis nop 2101*25c28e83SPiotr Jasiukajtis1: 2102*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! hy0 ? 0x7ff00000 2103*25c28e83SPiotr Jasiukajtis bne 2f ! if ( hy0 != 0x7ff00000 ) 2104*25c28e83SPiotr Jasiukajtis ld [%i3+4],%o2 ! ly = ((int*)py)[1]; 2105*25c28e83SPiotr Jasiukajtis 2106*25c28e83SPiotr Jasiukajtis cmp %o2,0 ! ly ? 0 2107*25c28e83SPiotr Jasiukajtis be 3f ! if ( ly == 0 ) 2108*25c28e83SPiotr Jasiukajtis2: 2109*25c28e83SPiotr Jasiukajtis ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; 2110*25c28e83SPiotr Jasiukajtis ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; 2111*25c28e83SPiotr Jasiukajtis 2112*25c28e83SPiotr Jasiukajtis ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; 2113*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i4 ! px += stridex 2114*25c28e83SPiotr Jasiukajtis ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; 2115*25c28e83SPiotr Jasiukajtis 2116*25c28e83SPiotr Jasiukajtis fabsd %f0,%f0 2117*25c28e83SPiotr Jasiukajtis 2118*25c28e83SPiotr Jasiukajtis fabsd %f2,%f2 2119*25c28e83SPiotr Jasiukajtis 2120*25c28e83SPiotr Jasiukajtis fmuld %f0,%f2,%f0 ! res0 = fabs(x0) * fabs(y0); 2121*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i3 ! py += stridey; 2122*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; 2123*25c28e83SPiotr Jasiukajtis 2124*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; 2125*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2126*25c28e83SPiotr Jasiukajtis ba .begin1 2127*25c28e83SPiotr Jasiukajtis sub counter,1,counter 2128*25c28e83SPiotr Jasiukajtis3: 2129*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i4 ! px += stridex 2130*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i3 ! py += stridey 2131*25c28e83SPiotr Jasiukajtis st %g0,[%i5] ! ((int*)pz)[0] = 0; 2132*25c28e83SPiotr Jasiukajtis 2133*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez; 2134*25c28e83SPiotr Jasiukajtis st %g0,[%i5+4] ! ((int*)pz)[1] = 0; 2135*25c28e83SPiotr Jasiukajtis ba .begin1 2136*25c28e83SPiotr Jasiukajtis sub counter,1,counter 2137*25c28e83SPiotr Jasiukajtis 2138*25c28e83SPiotr Jasiukajtis .align 16 2139*25c28e83SPiotr Jasiukajtis.spec1: 2140*25c28e83SPiotr Jasiukajtis and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 2141*25c28e83SPiotr Jasiukajtis 2142*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (7_0) hy0 ? 0x00100000 2143*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont_spec0 ! (7_0) if ( hy0 < 0x00100000 ) 2144*25c28e83SPiotr Jasiukajtis 2145*25c28e83SPiotr Jasiukajtis ld [%i4+4],%i2 ! lx = ((int*)px)[1]; 2146*25c28e83SPiotr Jasiukajtis or %o7,%l7,%g5 ! ii = hx0 | hy0; 2147*25c28e83SPiotr Jasiukajtis fzero %f0 2148*25c28e83SPiotr Jasiukajtis 2149*25c28e83SPiotr Jasiukajtis ld [%i3+4],%o2 ! ly = ((int*)py)[1]; 2150*25c28e83SPiotr Jasiukajtis or %i2,%g5,%g5 ! ii |= lx; 2151*25c28e83SPiotr Jasiukajtis 2152*25c28e83SPiotr Jasiukajtis orcc %o2,%g5,%g5 ! ii |= ly; 2153*25c28e83SPiotr Jasiukajtis bnz,a,pn %icc,1f ! if ( ii != 0 ) 2154*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i2 2155*25c28e83SPiotr Jasiukajtis 2156*25c28e83SPiotr Jasiukajtis fdivd DONE,%f0,%f0 ! res0 = 1.0 / 0.0; 2157*25c28e83SPiotr Jasiukajtis 2158*25c28e83SPiotr Jasiukajtis st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; 2159*25c28e83SPiotr Jasiukajtis 2160*25c28e83SPiotr Jasiukajtis add %i4,stridex,%i4 ! px += stridex; 2161*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i3 ! py += stridey; 2162*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; 2163*25c28e83SPiotr Jasiukajtis 2164*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez; 2165*25c28e83SPiotr Jasiukajtis ba .begin1 2166*25c28e83SPiotr Jasiukajtis sub counter,1,counter 2167*25c28e83SPiotr Jasiukajtis1: 2168*25c28e83SPiotr Jasiukajtis ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; 2169*25c28e83SPiotr Jasiukajtis 2170*25c28e83SPiotr Jasiukajtis ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; 2171*25c28e83SPiotr Jasiukajtis 2172*25c28e83SPiotr Jasiukajtis ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; 2173*25c28e83SPiotr Jasiukajtis 2174*25c28e83SPiotr Jasiukajtis fabsd %f0,%f0 ! x0 = fabs(x0); 2175*25c28e83SPiotr Jasiukajtis ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; 2176*25c28e83SPiotr Jasiukajtis 2177*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+64],%f12 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; 2178*25c28e83SPiotr Jasiukajtis add %fp,dtmp2,%i4 2179*25c28e83SPiotr Jasiukajtis add %fp,dtmp3,%i3 2180*25c28e83SPiotr Jasiukajtis 2181*25c28e83SPiotr Jasiukajtis fabsd %f2,%f2 ! y0 = fabs(y0); 2182*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+56],%f10 ! D2ON51 2183*25c28e83SPiotr Jasiukajtis 2184*25c28e83SPiotr Jasiukajtis ldx [TBL+TBL_SHIFT+48],%g5 ! D2ONM52 2185*25c28e83SPiotr Jasiukajtis cmp %o7,%i2 ! hx0 ? 0x00080000 2186*25c28e83SPiotr Jasiukajtis bl,a 1f ! if ( hx0 < 0x00080000 ) 2187*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! x0 = *(long long*)&x0; 2188*25c28e83SPiotr Jasiukajtis 2189*25c28e83SPiotr Jasiukajtis fand %f0,%f12,%f0 ! x0 = vis_fand(x0, dtmp0); 2190*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! x0 = *(long long*)&x0; 2191*25c28e83SPiotr Jasiukajtis faddd %f0,%f10,%f0 ! x0 += D2ON51; 2192*25c28e83SPiotr Jasiukajtis1: 2193*25c28e83SPiotr Jasiukajtis std %f0,[%i4] 2194*25c28e83SPiotr Jasiukajtis 2195*25c28e83SPiotr Jasiukajtis ldx [TBL+TBL_SHIFT+40],%g1 ! D2ON1022 2196*25c28e83SPiotr Jasiukajtis cmp %l7,%i2 ! hy0 ? 0x00080000 2197*25c28e83SPiotr Jasiukajtis bl,a 1f ! if ( hy0 < 0x00080000 ) 2198*25c28e83SPiotr Jasiukajtis fxtod %f2,%f2 ! y0 = *(long long*)&y0; 2199*25c28e83SPiotr Jasiukajtis 2200*25c28e83SPiotr Jasiukajtis fand %f2,%f12,%f2 ! y0 = vis_fand(y0, dtmp0); 2201*25c28e83SPiotr Jasiukajtis fxtod %f2,%f2 ! y0 = *(long long*)&y0; 2202*25c28e83SPiotr Jasiukajtis faddd %f2,%f10,%f2 ! y0 += D2ON51; 2203*25c28e83SPiotr Jasiukajtis1: 2204*25c28e83SPiotr Jasiukajtis std %f2,[%i3] 2205*25c28e83SPiotr Jasiukajtis 2206*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+dtmp15] ! D2ONM52 2207*25c28e83SPiotr Jasiukajtis 2208*25c28e83SPiotr Jasiukajtis ba .cont_spec1 2209*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp0] ! D2ON1022 2210*25c28e83SPiotr Jasiukajtis 2211*25c28e83SPiotr Jasiukajtis .align 16 2212*25c28e83SPiotr Jasiukajtis.update0: 2213*25c28e83SPiotr Jasiukajtis cmp counter,1 2214*25c28e83SPiotr Jasiukajtis ble 1f 2215*25c28e83SPiotr Jasiukajtis nop 2216*25c28e83SPiotr Jasiukajtis 2217*25c28e83SPiotr Jasiukajtis sub counter,1,counter 2218*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2219*25c28e83SPiotr Jasiukajtis 2220*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2221*25c28e83SPiotr Jasiukajtis 2222*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2223*25c28e83SPiotr Jasiukajtis 2224*25c28e83SPiotr Jasiukajtis mov 1,counter 2225*25c28e83SPiotr Jasiukajtis1: 2226*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2227*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2228*25c28e83SPiotr Jasiukajtis ba .cont1 2229*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2230*25c28e83SPiotr Jasiukajtis 2231*25c28e83SPiotr Jasiukajtis .align 16 2232*25c28e83SPiotr Jasiukajtis.update1: 2233*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2234*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont0 ! (0_0) if ( hy0 < 0x00100000 ) 2235*25c28e83SPiotr Jasiukajtis 2236*25c28e83SPiotr Jasiukajtis cmp counter,1 2237*25c28e83SPiotr Jasiukajtis ble,a 1f 2238*25c28e83SPiotr Jasiukajtis nop 2239*25c28e83SPiotr Jasiukajtis 2240*25c28e83SPiotr Jasiukajtis sub counter,1,counter 2241*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2242*25c28e83SPiotr Jasiukajtis 2243*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2244*25c28e83SPiotr Jasiukajtis 2245*25c28e83SPiotr Jasiukajtis mov 1,counter 2246*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2247*25c28e83SPiotr Jasiukajtis1: 2248*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2249*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2250*25c28e83SPiotr Jasiukajtis ba .cont1 2251*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2252*25c28e83SPiotr Jasiukajtis 2253*25c28e83SPiotr Jasiukajtis .align 16 2254*25c28e83SPiotr Jasiukajtis.update2: 2255*25c28e83SPiotr Jasiukajtis cmp counter,2 2256*25c28e83SPiotr Jasiukajtis ble 1f 2257*25c28e83SPiotr Jasiukajtis nop 2258*25c28e83SPiotr Jasiukajtis 2259*25c28e83SPiotr Jasiukajtis sub counter,2,counter 2260*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2261*25c28e83SPiotr Jasiukajtis 2262*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2263*25c28e83SPiotr Jasiukajtis 2264*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2265*25c28e83SPiotr Jasiukajtis 2266*25c28e83SPiotr Jasiukajtis mov 2,counter 2267*25c28e83SPiotr Jasiukajtis1: 2268*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 2269*25c28e83SPiotr Jasiukajtis 2270*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 2271*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 2272*25c28e83SPiotr Jasiukajtis 2273*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 2274*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 2275*25c28e83SPiotr Jasiukajtis 2276*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2277*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2278*25c28e83SPiotr Jasiukajtis ba .cont4 2279*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2280*25c28e83SPiotr Jasiukajtis 2281*25c28e83SPiotr Jasiukajtis .align 16 2282*25c28e83SPiotr Jasiukajtis.update3: 2283*25c28e83SPiotr Jasiukajtis cmp counter,2 2284*25c28e83SPiotr Jasiukajtis ble 1f 2285*25c28e83SPiotr Jasiukajtis nop 2286*25c28e83SPiotr Jasiukajtis 2287*25c28e83SPiotr Jasiukajtis sub counter,2,counter 2288*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2289*25c28e83SPiotr Jasiukajtis 2290*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2291*25c28e83SPiotr Jasiukajtis 2292*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2293*25c28e83SPiotr Jasiukajtis 2294*25c28e83SPiotr Jasiukajtis mov 2,counter 2295*25c28e83SPiotr Jasiukajtis1: 2296*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 2297*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 2298*25c28e83SPiotr Jasiukajtis 2299*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 2300*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 2301*25c28e83SPiotr Jasiukajtis 2302*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2303*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2304*25c28e83SPiotr Jasiukajtis ba .cont4 2305*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2306*25c28e83SPiotr Jasiukajtis 2307*25c28e83SPiotr Jasiukajtis .align 16 2308*25c28e83SPiotr Jasiukajtis.update4: 2309*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2310*25c28e83SPiotr Jasiukajtis bge,a,pn %icc,.cont4 ! (0_0) if ( hy0 < 0x00100000 ) 2311*25c28e83SPiotr Jasiukajtis sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; 2312*25c28e83SPiotr Jasiukajtis 2313*25c28e83SPiotr Jasiukajtis cmp counter,2 2314*25c28e83SPiotr Jasiukajtis ble,a 1f 2315*25c28e83SPiotr Jasiukajtis nop 2316*25c28e83SPiotr Jasiukajtis 2317*25c28e83SPiotr Jasiukajtis sub counter,2,counter 2318*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2319*25c28e83SPiotr Jasiukajtis 2320*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2321*25c28e83SPiotr Jasiukajtis 2322*25c28e83SPiotr Jasiukajtis mov 2,counter 2323*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2324*25c28e83SPiotr Jasiukajtis1: 2325*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2326*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2327*25c28e83SPiotr Jasiukajtis ba .cont4 2328*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2329*25c28e83SPiotr Jasiukajtis 2330*25c28e83SPiotr Jasiukajtis .align 16 2331*25c28e83SPiotr Jasiukajtis.update5: 2332*25c28e83SPiotr Jasiukajtis cmp counter,3 2333*25c28e83SPiotr Jasiukajtis ble 1f 2334*25c28e83SPiotr Jasiukajtis nop 2335*25c28e83SPiotr Jasiukajtis 2336*25c28e83SPiotr Jasiukajtis sub counter,3,counter 2337*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2338*25c28e83SPiotr Jasiukajtis 2339*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2340*25c28e83SPiotr Jasiukajtis 2341*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2342*25c28e83SPiotr Jasiukajtis 2343*25c28e83SPiotr Jasiukajtis mov 3,counter 2344*25c28e83SPiotr Jasiukajtis1: 2345*25c28e83SPiotr Jasiukajtis st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; 2346*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; 2347*25c28e83SPiotr Jasiukajtis 2348*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 2349*25c28e83SPiotr Jasiukajtis 2350*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 2351*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 2352*25c28e83SPiotr Jasiukajtis 2353*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 2354*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 2355*25c28e83SPiotr Jasiukajtis 2356*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2357*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2358*25c28e83SPiotr Jasiukajtis 2359*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 2360*25c28e83SPiotr Jasiukajtis ba .cont8 2361*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2362*25c28e83SPiotr Jasiukajtis 2363*25c28e83SPiotr Jasiukajtis .align 16 2364*25c28e83SPiotr Jasiukajtis.update6: 2365*25c28e83SPiotr Jasiukajtis cmp counter,3 2366*25c28e83SPiotr Jasiukajtis ble 1f 2367*25c28e83SPiotr Jasiukajtis nop 2368*25c28e83SPiotr Jasiukajtis 2369*25c28e83SPiotr Jasiukajtis sub counter,3,counter 2370*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2371*25c28e83SPiotr Jasiukajtis 2372*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2373*25c28e83SPiotr Jasiukajtis 2374*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2375*25c28e83SPiotr Jasiukajtis 2376*25c28e83SPiotr Jasiukajtis mov 3,counter 2377*25c28e83SPiotr Jasiukajtis1: 2378*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 2379*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 2380*25c28e83SPiotr Jasiukajtis 2381*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 2382*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 2383*25c28e83SPiotr Jasiukajtis 2384*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2385*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2386*25c28e83SPiotr Jasiukajtis 2387*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 2388*25c28e83SPiotr Jasiukajtis ba .cont8 2389*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2390*25c28e83SPiotr Jasiukajtis 2391*25c28e83SPiotr Jasiukajtis .align 16 2392*25c28e83SPiotr Jasiukajtis.update7: 2393*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2394*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont7 ! (0_0) if ( hy0 < 0x00100000 ) 2395*25c28e83SPiotr Jasiukajtis 2396*25c28e83SPiotr Jasiukajtis cmp counter,3 2397*25c28e83SPiotr Jasiukajtis ble,a 1f 2398*25c28e83SPiotr Jasiukajtis nop 2399*25c28e83SPiotr Jasiukajtis 2400*25c28e83SPiotr Jasiukajtis sub counter,3,counter 2401*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2402*25c28e83SPiotr Jasiukajtis 2403*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2404*25c28e83SPiotr Jasiukajtis 2405*25c28e83SPiotr Jasiukajtis mov 3,counter 2406*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2407*25c28e83SPiotr Jasiukajtis1: 2408*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2409*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2410*25c28e83SPiotr Jasiukajtis 2411*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 2412*25c28e83SPiotr Jasiukajtis ba .cont8 2413*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2414*25c28e83SPiotr Jasiukajtis 2415*25c28e83SPiotr Jasiukajtis .align 16 2416*25c28e83SPiotr Jasiukajtis.update9: 2417*25c28e83SPiotr Jasiukajtis cmp counter,4 2418*25c28e83SPiotr Jasiukajtis ble 1f 2419*25c28e83SPiotr Jasiukajtis nop 2420*25c28e83SPiotr Jasiukajtis 2421*25c28e83SPiotr Jasiukajtis sub counter,4,counter 2422*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2423*25c28e83SPiotr Jasiukajtis 2424*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2425*25c28e83SPiotr Jasiukajtis 2426*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2427*25c28e83SPiotr Jasiukajtis 2428*25c28e83SPiotr Jasiukajtis mov 4,counter 2429*25c28e83SPiotr Jasiukajtis1: 2430*25c28e83SPiotr Jasiukajtis st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; 2431*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; 2432*25c28e83SPiotr Jasiukajtis 2433*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 2434*25c28e83SPiotr Jasiukajtis 2435*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 2436*25c28e83SPiotr Jasiukajtis 2437*25c28e83SPiotr Jasiukajtis 2438*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 2439*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 2440*25c28e83SPiotr Jasiukajtis 2441*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 2442*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 2443*25c28e83SPiotr Jasiukajtis 2444*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 2445*25c28e83SPiotr Jasiukajtis 2446*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2447*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2448*25c28e83SPiotr Jasiukajtis ba .cont12 2449*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2450*25c28e83SPiotr Jasiukajtis 2451*25c28e83SPiotr Jasiukajtis .align 16 2452*25c28e83SPiotr Jasiukajtis.update10: 2453*25c28e83SPiotr Jasiukajtis cmp counter,4 2454*25c28e83SPiotr Jasiukajtis ble 1f 2455*25c28e83SPiotr Jasiukajtis nop 2456*25c28e83SPiotr Jasiukajtis 2457*25c28e83SPiotr Jasiukajtis sub counter,4,counter 2458*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2459*25c28e83SPiotr Jasiukajtis 2460*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2461*25c28e83SPiotr Jasiukajtis 2462*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2463*25c28e83SPiotr Jasiukajtis 2464*25c28e83SPiotr Jasiukajtis mov 4,counter 2465*25c28e83SPiotr Jasiukajtis1: 2466*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 2467*25c28e83SPiotr Jasiukajtis 2468*25c28e83SPiotr Jasiukajtis 2469*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 2470*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 2471*25c28e83SPiotr Jasiukajtis 2472*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 2473*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 2474*25c28e83SPiotr Jasiukajtis 2475*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 2476*25c28e83SPiotr Jasiukajtis 2477*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2478*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2479*25c28e83SPiotr Jasiukajtis ba .cont12 2480*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2481*25c28e83SPiotr Jasiukajtis 2482*25c28e83SPiotr Jasiukajtis .align 16 2483*25c28e83SPiotr Jasiukajtis.update11: 2484*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2485*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont11 ! (0_0) if ( hy0 < 0x00100000 ) 2486*25c28e83SPiotr Jasiukajtis 2487*25c28e83SPiotr Jasiukajtis cmp counter,4 2488*25c28e83SPiotr Jasiukajtis ble,a 1f 2489*25c28e83SPiotr Jasiukajtis nop 2490*25c28e83SPiotr Jasiukajtis 2491*25c28e83SPiotr Jasiukajtis sub counter,4,counter 2492*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2493*25c28e83SPiotr Jasiukajtis 2494*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2495*25c28e83SPiotr Jasiukajtis 2496*25c28e83SPiotr Jasiukajtis mov 4,counter 2497*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2498*25c28e83SPiotr Jasiukajtis1: 2499*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2500*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2501*25c28e83SPiotr Jasiukajtis 2502*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 2503*25c28e83SPiotr Jasiukajtis ba .cont12 2504*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2505*25c28e83SPiotr Jasiukajtis 2506*25c28e83SPiotr Jasiukajtis .align 16 2507*25c28e83SPiotr Jasiukajtis.update13: 2508*25c28e83SPiotr Jasiukajtis cmp counter,5 2509*25c28e83SPiotr Jasiukajtis ble 1f 2510*25c28e83SPiotr Jasiukajtis nop 2511*25c28e83SPiotr Jasiukajtis 2512*25c28e83SPiotr Jasiukajtis sub counter,5,counter 2513*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2514*25c28e83SPiotr Jasiukajtis 2515*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2516*25c28e83SPiotr Jasiukajtis 2517*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2518*25c28e83SPiotr Jasiukajtis 2519*25c28e83SPiotr Jasiukajtis mov 5,counter 2520*25c28e83SPiotr Jasiukajtis1: 2521*25c28e83SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; 2522*25c28e83SPiotr Jasiukajtis 2523*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 2524*25c28e83SPiotr Jasiukajtis 2525*25c28e83SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 2526*25c28e83SPiotr Jasiukajtis 2527*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 2528*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 2529*25c28e83SPiotr Jasiukajtis 2530*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 2531*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 2532*25c28e83SPiotr Jasiukajtis 2533*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 2534*25c28e83SPiotr Jasiukajtis 2535*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2536*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2537*25c28e83SPiotr Jasiukajtis ba .cont16 2538*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2539*25c28e83SPiotr Jasiukajtis 2540*25c28e83SPiotr Jasiukajtis .align 16 2541*25c28e83SPiotr Jasiukajtis.update14: 2542*25c28e83SPiotr Jasiukajtis cmp counter,5 2543*25c28e83SPiotr Jasiukajtis ble 1f 2544*25c28e83SPiotr Jasiukajtis nop 2545*25c28e83SPiotr Jasiukajtis 2546*25c28e83SPiotr Jasiukajtis sub counter,5,counter 2547*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2548*25c28e83SPiotr Jasiukajtis 2549*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2550*25c28e83SPiotr Jasiukajtis 2551*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2552*25c28e83SPiotr Jasiukajtis 2553*25c28e83SPiotr Jasiukajtis mov 5,counter 2554*25c28e83SPiotr Jasiukajtis1: 2555*25c28e83SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 2556*25c28e83SPiotr Jasiukajtis 2557*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 2558*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 2559*25c28e83SPiotr Jasiukajtis 2560*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 2561*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 2562*25c28e83SPiotr Jasiukajtis 2563*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 2564*25c28e83SPiotr Jasiukajtis 2565*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2566*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2567*25c28e83SPiotr Jasiukajtis ba .cont16 2568*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2569*25c28e83SPiotr Jasiukajtis 2570*25c28e83SPiotr Jasiukajtis .align 16 2571*25c28e83SPiotr Jasiukajtis.update15: 2572*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2573*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont15 ! (0_0) if ( hy0 < 0x00100000 ) 2574*25c28e83SPiotr Jasiukajtis 2575*25c28e83SPiotr Jasiukajtis cmp counter,5 2576*25c28e83SPiotr Jasiukajtis ble,a 1f 2577*25c28e83SPiotr Jasiukajtis nop 2578*25c28e83SPiotr Jasiukajtis 2579*25c28e83SPiotr Jasiukajtis sub counter,5,counter 2580*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2581*25c28e83SPiotr Jasiukajtis 2582*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2583*25c28e83SPiotr Jasiukajtis 2584*25c28e83SPiotr Jasiukajtis mov 5,counter 2585*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2586*25c28e83SPiotr Jasiukajtis1: 2587*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2588*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2589*25c28e83SPiotr Jasiukajtis 2590*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 2591*25c28e83SPiotr Jasiukajtis ba .cont16 2592*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2593*25c28e83SPiotr Jasiukajtis 2594*25c28e83SPiotr Jasiukajtis .align 16 2595*25c28e83SPiotr Jasiukajtis.update17: 2596*25c28e83SPiotr Jasiukajtis cmp counter,6 2597*25c28e83SPiotr Jasiukajtis ble 1f 2598*25c28e83SPiotr Jasiukajtis nop 2599*25c28e83SPiotr Jasiukajtis 2600*25c28e83SPiotr Jasiukajtis sub counter,6,counter 2601*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2602*25c28e83SPiotr Jasiukajtis 2603*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2604*25c28e83SPiotr Jasiukajtis 2605*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2606*25c28e83SPiotr Jasiukajtis 2607*25c28e83SPiotr Jasiukajtis mov 6,counter 2608*25c28e83SPiotr Jasiukajtis1: 2609*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 2610*25c28e83SPiotr Jasiukajtis 2611*25c28e83SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 2612*25c28e83SPiotr Jasiukajtis 2613*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 2614*25c28e83SPiotr Jasiukajtis 2615*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 2616*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 2617*25c28e83SPiotr Jasiukajtis 2618*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 2619*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 2620*25c28e83SPiotr Jasiukajtis 2621*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 2622*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 2623*25c28e83SPiotr Jasiukajtis 2624*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 2625*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 2626*25c28e83SPiotr Jasiukajtis 2627*25c28e83SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 2628*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 2629*25c28e83SPiotr Jasiukajtis 2630*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2631*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2632*25c28e83SPiotr Jasiukajtis 2633*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 2634*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 2635*25c28e83SPiotr Jasiukajtis ba .cont20 2636*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2637*25c28e83SPiotr Jasiukajtis 2638*25c28e83SPiotr Jasiukajtis .align 16 2639*25c28e83SPiotr Jasiukajtis.update18: 2640*25c28e83SPiotr Jasiukajtis cmp counter,6 2641*25c28e83SPiotr Jasiukajtis ble 1f 2642*25c28e83SPiotr Jasiukajtis nop 2643*25c28e83SPiotr Jasiukajtis 2644*25c28e83SPiotr Jasiukajtis sub counter,6,counter 2645*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2646*25c28e83SPiotr Jasiukajtis 2647*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2648*25c28e83SPiotr Jasiukajtis 2649*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2650*25c28e83SPiotr Jasiukajtis 2651*25c28e83SPiotr Jasiukajtis mov 6,counter 2652*25c28e83SPiotr Jasiukajtis1: 2653*25c28e83SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 2654*25c28e83SPiotr Jasiukajtis 2655*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 2656*25c28e83SPiotr Jasiukajtis 2657*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 2658*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 2659*25c28e83SPiotr Jasiukajtis 2660*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 2661*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 2662*25c28e83SPiotr Jasiukajtis 2663*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 2664*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 2665*25c28e83SPiotr Jasiukajtis 2666*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 2667*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 2668*25c28e83SPiotr Jasiukajtis 2669*25c28e83SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 2670*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 2671*25c28e83SPiotr Jasiukajtis 2672*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2673*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2674*25c28e83SPiotr Jasiukajtis 2675*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 2676*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 2677*25c28e83SPiotr Jasiukajtis ba .cont20 2678*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2679*25c28e83SPiotr Jasiukajtis 2680*25c28e83SPiotr Jasiukajtis .align 16 2681*25c28e83SPiotr Jasiukajtis.update19: 2682*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2683*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont19a ! (0_0) if ( hy0 < 0x00100000 ) 2684*25c28e83SPiotr Jasiukajtis 2685*25c28e83SPiotr Jasiukajtis cmp counter,6 2686*25c28e83SPiotr Jasiukajtis ble,a 1f 2687*25c28e83SPiotr Jasiukajtis nop 2688*25c28e83SPiotr Jasiukajtis 2689*25c28e83SPiotr Jasiukajtis sub counter,6,counter 2690*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2691*25c28e83SPiotr Jasiukajtis 2692*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2693*25c28e83SPiotr Jasiukajtis 2694*25c28e83SPiotr Jasiukajtis mov 6,counter 2695*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2696*25c28e83SPiotr Jasiukajtis1: 2697*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 2698*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2699*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2700*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 2701*25c28e83SPiotr Jasiukajtis 2702*25c28e83SPiotr Jasiukajtis ba .cont19b 2703*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2704*25c28e83SPiotr Jasiukajtis 2705*25c28e83SPiotr Jasiukajtis .align 16 2706*25c28e83SPiotr Jasiukajtis.update21: 2707*25c28e83SPiotr Jasiukajtis cmp counter,7 2708*25c28e83SPiotr Jasiukajtis ble 1f 2709*25c28e83SPiotr Jasiukajtis nop 2710*25c28e83SPiotr Jasiukajtis 2711*25c28e83SPiotr Jasiukajtis sub counter,7,counter 2712*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2713*25c28e83SPiotr Jasiukajtis 2714*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2715*25c28e83SPiotr Jasiukajtis 2716*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2717*25c28e83SPiotr Jasiukajtis 2718*25c28e83SPiotr Jasiukajtis mov 7,counter 2719*25c28e83SPiotr Jasiukajtis1: 2720*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 2721*25c28e83SPiotr Jasiukajtis 2722*25c28e83SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 2723*25c28e83SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 2724*25c28e83SPiotr Jasiukajtis 2725*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 2726*25c28e83SPiotr Jasiukajtis 2727*25c28e83SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 2728*25c28e83SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 2729*25c28e83SPiotr Jasiukajtis 2730*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 2731*25c28e83SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 2732*25c28e83SPiotr Jasiukajtis 2733*25c28e83SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 2734*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 2735*25c28e83SPiotr Jasiukajtis 2736*25c28e83SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 2737*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 2738*25c28e83SPiotr Jasiukajtis 2739*25c28e83SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 2740*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2741*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2742*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 2743*25c28e83SPiotr Jasiukajtis 2744*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 2745*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 2746*25c28e83SPiotr Jasiukajtis ba .cont24 2747*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2748*25c28e83SPiotr Jasiukajtis 2749*25c28e83SPiotr Jasiukajtis .align 16 2750*25c28e83SPiotr Jasiukajtis.update22: 2751*25c28e83SPiotr Jasiukajtis cmp counter,7 2752*25c28e83SPiotr Jasiukajtis ble 1f 2753*25c28e83SPiotr Jasiukajtis nop 2754*25c28e83SPiotr Jasiukajtis 2755*25c28e83SPiotr Jasiukajtis sub counter,7,counter 2756*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2757*25c28e83SPiotr Jasiukajtis 2758*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2759*25c28e83SPiotr Jasiukajtis 2760*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2761*25c28e83SPiotr Jasiukajtis 2762*25c28e83SPiotr Jasiukajtis mov 7,counter 2763*25c28e83SPiotr Jasiukajtis1: 2764*25c28e83SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 2765*25c28e83SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 2766*25c28e83SPiotr Jasiukajtis 2767*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 2768*25c28e83SPiotr Jasiukajtis 2769*25c28e83SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 2770*25c28e83SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 2771*25c28e83SPiotr Jasiukajtis 2772*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 2773*25c28e83SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 2774*25c28e83SPiotr Jasiukajtis 2775*25c28e83SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 2776*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 2777*25c28e83SPiotr Jasiukajtis 2778*25c28e83SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 2779*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 2780*25c28e83SPiotr Jasiukajtis 2781*25c28e83SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 2782*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2783*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2784*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 2785*25c28e83SPiotr Jasiukajtis 2786*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 2787*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 2788*25c28e83SPiotr Jasiukajtis ba .cont24 2789*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2790*25c28e83SPiotr Jasiukajtis 2791*25c28e83SPiotr Jasiukajtis .align 16 2792*25c28e83SPiotr Jasiukajtis.update23: 2793*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2794*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont23a ! (0_0) if ( hy0 < 0x00100000 ) 2795*25c28e83SPiotr Jasiukajtis 2796*25c28e83SPiotr Jasiukajtis cmp counter,7 2797*25c28e83SPiotr Jasiukajtis ble,a 1f 2798*25c28e83SPiotr Jasiukajtis nop 2799*25c28e83SPiotr Jasiukajtis 2800*25c28e83SPiotr Jasiukajtis sub counter,7,counter 2801*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2802*25c28e83SPiotr Jasiukajtis 2803*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2804*25c28e83SPiotr Jasiukajtis 2805*25c28e83SPiotr Jasiukajtis mov 7,counter 2806*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2807*25c28e83SPiotr Jasiukajtis1: 2808*25c28e83SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 2809*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2810*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2811*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 2812*25c28e83SPiotr Jasiukajtis 2813*25c28e83SPiotr Jasiukajtis ba .cont23b 2814*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2815*25c28e83SPiotr Jasiukajtis 2816*25c28e83SPiotr Jasiukajtis .align 16 2817*25c28e83SPiotr Jasiukajtis.update25: 2818*25c28e83SPiotr Jasiukajtis cmp counter,8 2819*25c28e83SPiotr Jasiukajtis ble 1f 2820*25c28e83SPiotr Jasiukajtis nop 2821*25c28e83SPiotr Jasiukajtis 2822*25c28e83SPiotr Jasiukajtis sub counter,8,counter 2823*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2824*25c28e83SPiotr Jasiukajtis 2825*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2826*25c28e83SPiotr Jasiukajtis 2827*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2828*25c28e83SPiotr Jasiukajtis 2829*25c28e83SPiotr Jasiukajtis mov 8,counter 2830*25c28e83SPiotr Jasiukajtis1: 2831*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 2832*25c28e83SPiotr Jasiukajtis 2833*25c28e83SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 2834*25c28e83SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 2835*25c28e83SPiotr Jasiukajtis 2836*25c28e83SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 2837*25c28e83SPiotr Jasiukajtis 2838*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 2839*25c28e83SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 2840*25c28e83SPiotr Jasiukajtis 2841*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 2842*25c28e83SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 2843*25c28e83SPiotr Jasiukajtis 2844*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 2845*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 2846*25c28e83SPiotr Jasiukajtis 2847*25c28e83SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 2848*25c28e83SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 2849*25c28e83SPiotr Jasiukajtis 2850*25c28e83SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 2851*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2852*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2853*25c28e83SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 2854*25c28e83SPiotr Jasiukajtis 2855*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 2856*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 2857*25c28e83SPiotr Jasiukajtis ba .cont28 2858*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2859*25c28e83SPiotr Jasiukajtis 2860*25c28e83SPiotr Jasiukajtis .align 16 2861*25c28e83SPiotr Jasiukajtis.update26: 2862*25c28e83SPiotr Jasiukajtis cmp counter,8 2863*25c28e83SPiotr Jasiukajtis ble 1f 2864*25c28e83SPiotr Jasiukajtis nop 2865*25c28e83SPiotr Jasiukajtis 2866*25c28e83SPiotr Jasiukajtis sub counter,8,counter 2867*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2868*25c28e83SPiotr Jasiukajtis 2869*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2870*25c28e83SPiotr Jasiukajtis 2871*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2872*25c28e83SPiotr Jasiukajtis 2873*25c28e83SPiotr Jasiukajtis mov 8,counter 2874*25c28e83SPiotr Jasiukajtis1: 2875*25c28e83SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 2876*25c28e83SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 2877*25c28e83SPiotr Jasiukajtis 2878*25c28e83SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 2879*25c28e83SPiotr Jasiukajtis 2880*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 2881*25c28e83SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 2882*25c28e83SPiotr Jasiukajtis 2883*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 2884*25c28e83SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 2885*25c28e83SPiotr Jasiukajtis 2886*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 2887*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 2888*25c28e83SPiotr Jasiukajtis 2889*25c28e83SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 2890*25c28e83SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 2891*25c28e83SPiotr Jasiukajtis 2892*25c28e83SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 2893*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2894*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2895*25c28e83SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 2896*25c28e83SPiotr Jasiukajtis 2897*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 2898*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 2899*25c28e83SPiotr Jasiukajtis ba .cont28 2900*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2901*25c28e83SPiotr Jasiukajtis 2902*25c28e83SPiotr Jasiukajtis .align 16 2903*25c28e83SPiotr Jasiukajtis.update27: 2904*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2905*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont27a ! (0_0) if ( hy0 < 0x00100000 ) 2906*25c28e83SPiotr Jasiukajtis 2907*25c28e83SPiotr Jasiukajtis cmp counter,8 2908*25c28e83SPiotr Jasiukajtis ble,a 1f 2909*25c28e83SPiotr Jasiukajtis nop 2910*25c28e83SPiotr Jasiukajtis 2911*25c28e83SPiotr Jasiukajtis sub counter,8,counter 2912*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2913*25c28e83SPiotr Jasiukajtis 2914*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2915*25c28e83SPiotr Jasiukajtis 2916*25c28e83SPiotr Jasiukajtis mov 8,counter 2917*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2918*25c28e83SPiotr Jasiukajtis1: 2919*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 2920*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2921*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2922*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 2923*25c28e83SPiotr Jasiukajtis 2924*25c28e83SPiotr Jasiukajtis ba .cont27b 2925*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2926*25c28e83SPiotr Jasiukajtis 2927*25c28e83SPiotr Jasiukajtis .align 16 2928*25c28e83SPiotr Jasiukajtis.update29: 2929*25c28e83SPiotr Jasiukajtis cmp counter,1 2930*25c28e83SPiotr Jasiukajtis ble 1f 2931*25c28e83SPiotr Jasiukajtis nop 2932*25c28e83SPiotr Jasiukajtis 2933*25c28e83SPiotr Jasiukajtis sub counter,1,counter 2934*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2935*25c28e83SPiotr Jasiukajtis 2936*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2937*25c28e83SPiotr Jasiukajtis 2938*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2939*25c28e83SPiotr Jasiukajtis 2940*25c28e83SPiotr Jasiukajtis mov 1,counter 2941*25c28e83SPiotr Jasiukajtis1: 2942*25c28e83SPiotr Jasiukajtis fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; 2943*25c28e83SPiotr Jasiukajtis 2944*25c28e83SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 2945*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 2946*25c28e83SPiotr Jasiukajtis faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 2947*25c28e83SPiotr Jasiukajtis 2948*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 2949*25c28e83SPiotr Jasiukajtis 2950*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 2951*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 2952*25c28e83SPiotr Jasiukajtis 2953*25c28e83SPiotr Jasiukajtis fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 2954*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2955*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 2956*25c28e83SPiotr Jasiukajtis 2957*25c28e83SPiotr Jasiukajtis fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 2958*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2959*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2960*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 2961*25c28e83SPiotr Jasiukajtis 2962*25c28e83SPiotr Jasiukajtis ba .cont32 2963*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2964*25c28e83SPiotr Jasiukajtis 2965*25c28e83SPiotr Jasiukajtis .align 16 2966*25c28e83SPiotr Jasiukajtis.update30: 2967*25c28e83SPiotr Jasiukajtis cmp counter,1 2968*25c28e83SPiotr Jasiukajtis ble 1f 2969*25c28e83SPiotr Jasiukajtis nop 2970*25c28e83SPiotr Jasiukajtis 2971*25c28e83SPiotr Jasiukajtis sub counter,1,counter 2972*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2973*25c28e83SPiotr Jasiukajtis 2974*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2975*25c28e83SPiotr Jasiukajtis 2976*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2977*25c28e83SPiotr Jasiukajtis 2978*25c28e83SPiotr Jasiukajtis mov 1,counter 2979*25c28e83SPiotr Jasiukajtis1: 2980*25c28e83SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 2981*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 2982*25c28e83SPiotr Jasiukajtis faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 2983*25c28e83SPiotr Jasiukajtis 2984*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 2985*25c28e83SPiotr Jasiukajtis 2986*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 2987*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 2988*25c28e83SPiotr Jasiukajtis 2989*25c28e83SPiotr Jasiukajtis fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 2990*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2991*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 2992*25c28e83SPiotr Jasiukajtis 2993*25c28e83SPiotr Jasiukajtis fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 2994*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2995*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2996*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 2997*25c28e83SPiotr Jasiukajtis 2998*25c28e83SPiotr Jasiukajtis ba .cont32 2999*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3000*25c28e83SPiotr Jasiukajtis 3001*25c28e83SPiotr Jasiukajtis .align 16 3002*25c28e83SPiotr Jasiukajtis.update31: 3003*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3004*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont31 ! (0_0) if ( hy0 < 0x00100000 ) 3005*25c28e83SPiotr Jasiukajtis 3006*25c28e83SPiotr Jasiukajtis cmp counter,1 3007*25c28e83SPiotr Jasiukajtis ble,a 1f 3008*25c28e83SPiotr Jasiukajtis nop 3009*25c28e83SPiotr Jasiukajtis 3010*25c28e83SPiotr Jasiukajtis sub counter,1,counter 3011*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3012*25c28e83SPiotr Jasiukajtis 3013*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3014*25c28e83SPiotr Jasiukajtis 3015*25c28e83SPiotr Jasiukajtis mov 1,counter 3016*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3017*25c28e83SPiotr Jasiukajtis1: 3018*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 3019*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 3020*25c28e83SPiotr Jasiukajtis 3021*25c28e83SPiotr Jasiukajtis fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 3022*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3023*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 3024*25c28e83SPiotr Jasiukajtis 3025*25c28e83SPiotr Jasiukajtis fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 3026*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 3027*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3028*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 3029*25c28e83SPiotr Jasiukajtis 3030*25c28e83SPiotr Jasiukajtis ba .cont32 3031*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3032*25c28e83SPiotr Jasiukajtis 3033*25c28e83SPiotr Jasiukajtis .align 16 3034*25c28e83SPiotr Jasiukajtis.update33: 3035*25c28e83SPiotr Jasiukajtis cmp counter,2 3036*25c28e83SPiotr Jasiukajtis ble 1f 3037*25c28e83SPiotr Jasiukajtis nop 3038*25c28e83SPiotr Jasiukajtis 3039*25c28e83SPiotr Jasiukajtis sub counter,2,counter 3040*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3041*25c28e83SPiotr Jasiukajtis 3042*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3043*25c28e83SPiotr Jasiukajtis 3044*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3045*25c28e83SPiotr Jasiukajtis 3046*25c28e83SPiotr Jasiukajtis mov 2,counter 3047*25c28e83SPiotr Jasiukajtis1: 3048*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3049*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 3050*25c28e83SPiotr Jasiukajtis 3051*25c28e83SPiotr Jasiukajtis fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; 3052*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; 3053*25c28e83SPiotr Jasiukajtis 3054*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3055*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 3056*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 3057*25c28e83SPiotr Jasiukajtis 3058*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 3059*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 3060*25c28e83SPiotr Jasiukajtis 3061*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 3062*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 3063*25c28e83SPiotr Jasiukajtis 3064*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 3065*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 3066*25c28e83SPiotr Jasiukajtis 3067*25c28e83SPiotr Jasiukajtis fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 3068*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 3069*25c28e83SPiotr Jasiukajtis 3070*25c28e83SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 3071*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 3072*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3073*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 3074*25c28e83SPiotr Jasiukajtis 3075*25c28e83SPiotr Jasiukajtis sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 3076*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 3077*25c28e83SPiotr Jasiukajtis ba .cont36 3078*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3079*25c28e83SPiotr Jasiukajtis 3080*25c28e83SPiotr Jasiukajtis .align 16 3081*25c28e83SPiotr Jasiukajtis.update34: 3082*25c28e83SPiotr Jasiukajtis cmp counter,2 3083*25c28e83SPiotr Jasiukajtis ble 1f 3084*25c28e83SPiotr Jasiukajtis nop 3085*25c28e83SPiotr Jasiukajtis 3086*25c28e83SPiotr Jasiukajtis sub counter,2,counter 3087*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3088*25c28e83SPiotr Jasiukajtis 3089*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3090*25c28e83SPiotr Jasiukajtis 3091*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3092*25c28e83SPiotr Jasiukajtis 3093*25c28e83SPiotr Jasiukajtis mov 2,counter 3094*25c28e83SPiotr Jasiukajtis1: 3095*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3096*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 3097*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 3098*25c28e83SPiotr Jasiukajtis 3099*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 3100*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 3101*25c28e83SPiotr Jasiukajtis 3102*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 3103*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 3104*25c28e83SPiotr Jasiukajtis 3105*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 3106*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 3107*25c28e83SPiotr Jasiukajtis 3108*25c28e83SPiotr Jasiukajtis fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 3109*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 3110*25c28e83SPiotr Jasiukajtis 3111*25c28e83SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 3112*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 3113*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3114*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 3115*25c28e83SPiotr Jasiukajtis 3116*25c28e83SPiotr Jasiukajtis sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 3117*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 3118*25c28e83SPiotr Jasiukajtis ba .cont36 3119*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3120*25c28e83SPiotr Jasiukajtis 3121*25c28e83SPiotr Jasiukajtis .align 16 3122*25c28e83SPiotr Jasiukajtis.update35: 3123*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3124*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont35a ! (0_0) if ( hy0 < 0x00100000 ) 3125*25c28e83SPiotr Jasiukajtis 3126*25c28e83SPiotr Jasiukajtis cmp counter,2 3127*25c28e83SPiotr Jasiukajtis ble,a 1f 3128*25c28e83SPiotr Jasiukajtis nop 3129*25c28e83SPiotr Jasiukajtis 3130*25c28e83SPiotr Jasiukajtis sub counter,2,counter 3131*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3132*25c28e83SPiotr Jasiukajtis 3133*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3134*25c28e83SPiotr Jasiukajtis 3135*25c28e83SPiotr Jasiukajtis mov 2,counter 3136*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3137*25c28e83SPiotr Jasiukajtis1: 3138*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 3139*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 3140*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3141*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 3142*25c28e83SPiotr Jasiukajtis 3143*25c28e83SPiotr Jasiukajtis ba .cont35b 3144*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3145*25c28e83SPiotr Jasiukajtis 3146*25c28e83SPiotr Jasiukajtis .align 16 3147*25c28e83SPiotr Jasiukajtis.update37: 3148*25c28e83SPiotr Jasiukajtis cmp counter,3 3149*25c28e83SPiotr Jasiukajtis ble 1f 3150*25c28e83SPiotr Jasiukajtis nop 3151*25c28e83SPiotr Jasiukajtis 3152*25c28e83SPiotr Jasiukajtis sub counter,3,counter 3153*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3154*25c28e83SPiotr Jasiukajtis 3155*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3156*25c28e83SPiotr Jasiukajtis 3157*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3158*25c28e83SPiotr Jasiukajtis 3159*25c28e83SPiotr Jasiukajtis mov 3,counter 3160*25c28e83SPiotr Jasiukajtis1: 3161*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3162*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 3163*25c28e83SPiotr Jasiukajtis 3164*25c28e83SPiotr Jasiukajtis fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; 3165*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; 3166*25c28e83SPiotr Jasiukajtis 3167*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3168*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 3169*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 3170*25c28e83SPiotr Jasiukajtis 3171*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 3172*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 3173*25c28e83SPiotr Jasiukajtis 3174*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 3175*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 3176*25c28e83SPiotr Jasiukajtis 3177*25c28e83SPiotr Jasiukajtis fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 3178*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 3179*25c28e83SPiotr Jasiukajtis 3180*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 3181*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 3182*25c28e83SPiotr Jasiukajtis 3183*25c28e83SPiotr Jasiukajtis fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 3184*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3185*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3186*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 3187*25c28e83SPiotr Jasiukajtis 3188*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 3189*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 3190*25c28e83SPiotr Jasiukajtis ba .cont40 3191*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3192*25c28e83SPiotr Jasiukajtis 3193*25c28e83SPiotr Jasiukajtis .align 16 3194*25c28e83SPiotr Jasiukajtis.update38: 3195*25c28e83SPiotr Jasiukajtis cmp counter,3 3196*25c28e83SPiotr Jasiukajtis ble 1f 3197*25c28e83SPiotr Jasiukajtis nop 3198*25c28e83SPiotr Jasiukajtis 3199*25c28e83SPiotr Jasiukajtis sub counter,3,counter 3200*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3201*25c28e83SPiotr Jasiukajtis 3202*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3203*25c28e83SPiotr Jasiukajtis 3204*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3205*25c28e83SPiotr Jasiukajtis 3206*25c28e83SPiotr Jasiukajtis mov 3,counter 3207*25c28e83SPiotr Jasiukajtis1: 3208*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3209*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 3210*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 3211*25c28e83SPiotr Jasiukajtis 3212*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 3213*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 3214*25c28e83SPiotr Jasiukajtis 3215*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 3216*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 3217*25c28e83SPiotr Jasiukajtis 3218*25c28e83SPiotr Jasiukajtis fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 3219*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 3220*25c28e83SPiotr Jasiukajtis 3221*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 3222*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 3223*25c28e83SPiotr Jasiukajtis 3224*25c28e83SPiotr Jasiukajtis fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 3225*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3226*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3227*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 3228*25c28e83SPiotr Jasiukajtis 3229*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 3230*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 3231*25c28e83SPiotr Jasiukajtis ba .cont40 3232*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3233*25c28e83SPiotr Jasiukajtis 3234*25c28e83SPiotr Jasiukajtis .align 16 3235*25c28e83SPiotr Jasiukajtis.update39: 3236*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3237*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont39a ! (0_0) if ( hy0 < 0x00100000 ) 3238*25c28e83SPiotr Jasiukajtis 3239*25c28e83SPiotr Jasiukajtis cmp counter,3 3240*25c28e83SPiotr Jasiukajtis ble,a 1f 3241*25c28e83SPiotr Jasiukajtis nop 3242*25c28e83SPiotr Jasiukajtis 3243*25c28e83SPiotr Jasiukajtis sub counter,3,counter 3244*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3245*25c28e83SPiotr Jasiukajtis 3246*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3247*25c28e83SPiotr Jasiukajtis 3248*25c28e83SPiotr Jasiukajtis mov 3,counter 3249*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3250*25c28e83SPiotr Jasiukajtis1: 3251*25c28e83SPiotr Jasiukajtis fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 3252*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3253*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3254*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 3255*25c28e83SPiotr Jasiukajtis 3256*25c28e83SPiotr Jasiukajtis ba .cont39b 3257*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3258*25c28e83SPiotr Jasiukajtis 3259*25c28e83SPiotr Jasiukajtis .align 16 3260*25c28e83SPiotr Jasiukajtis.update41: 3261*25c28e83SPiotr Jasiukajtis cmp counter,4 3262*25c28e83SPiotr Jasiukajtis ble 1f 3263*25c28e83SPiotr Jasiukajtis nop 3264*25c28e83SPiotr Jasiukajtis 3265*25c28e83SPiotr Jasiukajtis sub counter,4,counter 3266*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3267*25c28e83SPiotr Jasiukajtis 3268*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3269*25c28e83SPiotr Jasiukajtis 3270*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3271*25c28e83SPiotr Jasiukajtis 3272*25c28e83SPiotr Jasiukajtis mov 4,counter 3273*25c28e83SPiotr Jasiukajtis1: 3274*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3275*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 3276*25c28e83SPiotr Jasiukajtis 3277*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 3278*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; 3279*25c28e83SPiotr Jasiukajtis 3280*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3281*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 3282*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 3283*25c28e83SPiotr Jasiukajtis 3284*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 3285*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 3286*25c28e83SPiotr Jasiukajtis 3287*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 3288*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 3289*25c28e83SPiotr Jasiukajtis 3290*25c28e83SPiotr Jasiukajtis fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 3291*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 3292*25c28e83SPiotr Jasiukajtis 3293*25c28e83SPiotr Jasiukajtis fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; 3294*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 3295*25c28e83SPiotr Jasiukajtis 3296*25c28e83SPiotr Jasiukajtis fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 3297*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3298*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3299*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 3300*25c28e83SPiotr Jasiukajtis 3301*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 3302*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 3303*25c28e83SPiotr Jasiukajtis ba .cont44 3304*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3305*25c28e83SPiotr Jasiukajtis 3306*25c28e83SPiotr Jasiukajtis .align 16 3307*25c28e83SPiotr Jasiukajtis.update42: 3308*25c28e83SPiotr Jasiukajtis cmp counter,4 3309*25c28e83SPiotr Jasiukajtis ble 1f 3310*25c28e83SPiotr Jasiukajtis nop 3311*25c28e83SPiotr Jasiukajtis 3312*25c28e83SPiotr Jasiukajtis sub counter,4,counter 3313*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3314*25c28e83SPiotr Jasiukajtis 3315*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3316*25c28e83SPiotr Jasiukajtis 3317*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3318*25c28e83SPiotr Jasiukajtis 3319*25c28e83SPiotr Jasiukajtis mov 4,counter 3320*25c28e83SPiotr Jasiukajtis1: 3321*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3322*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 3323*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 3324*25c28e83SPiotr Jasiukajtis 3325*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 3326*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 3327*25c28e83SPiotr Jasiukajtis 3328*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 3329*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 3330*25c28e83SPiotr Jasiukajtis 3331*25c28e83SPiotr Jasiukajtis fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 3332*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 3333*25c28e83SPiotr Jasiukajtis 3334*25c28e83SPiotr Jasiukajtis fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; 3335*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 3336*25c28e83SPiotr Jasiukajtis 3337*25c28e83SPiotr Jasiukajtis fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 3338*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3339*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3340*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 3341*25c28e83SPiotr Jasiukajtis 3342*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 3343*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 3344*25c28e83SPiotr Jasiukajtis ba .cont44 3345*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3346*25c28e83SPiotr Jasiukajtis 3347*25c28e83SPiotr Jasiukajtis .align 16 3348*25c28e83SPiotr Jasiukajtis.update43: 3349*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3350*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont43a ! (0_0) if ( hy0 < 0x00100000 ) 3351*25c28e83SPiotr Jasiukajtis 3352*25c28e83SPiotr Jasiukajtis cmp counter,4 3353*25c28e83SPiotr Jasiukajtis ble,a 1f 3354*25c28e83SPiotr Jasiukajtis nop 3355*25c28e83SPiotr Jasiukajtis 3356*25c28e83SPiotr Jasiukajtis sub counter,4,counter 3357*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3358*25c28e83SPiotr Jasiukajtis 3359*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3360*25c28e83SPiotr Jasiukajtis 3361*25c28e83SPiotr Jasiukajtis mov 4,counter 3362*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3363*25c28e83SPiotr Jasiukajtis1: 3364*25c28e83SPiotr Jasiukajtis fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 3365*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3366*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3367*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 3368*25c28e83SPiotr Jasiukajtis 3369*25c28e83SPiotr Jasiukajtis ba .cont43b 3370*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3371*25c28e83SPiotr Jasiukajtis 3372*25c28e83SPiotr Jasiukajtis .align 16 3373*25c28e83SPiotr Jasiukajtis.update45: 3374*25c28e83SPiotr Jasiukajtis cmp counter,5 3375*25c28e83SPiotr Jasiukajtis ble 1f 3376*25c28e83SPiotr Jasiukajtis nop 3377*25c28e83SPiotr Jasiukajtis 3378*25c28e83SPiotr Jasiukajtis sub counter,5,counter 3379*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3380*25c28e83SPiotr Jasiukajtis 3381*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3382*25c28e83SPiotr Jasiukajtis 3383*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3384*25c28e83SPiotr Jasiukajtis 3385*25c28e83SPiotr Jasiukajtis mov 5,counter 3386*25c28e83SPiotr Jasiukajtis1: 3387*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 3388*25c28e83SPiotr Jasiukajtis 3389*25c28e83SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 3390*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3391*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 3392*25c28e83SPiotr Jasiukajtis 3393*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); 3394*25c28e83SPiotr Jasiukajtis 3395*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 3396*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 3397*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 3398*25c28e83SPiotr Jasiukajtis 3399*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 3400*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3401*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 3402*25c28e83SPiotr Jasiukajtis 3403*25c28e83SPiotr Jasiukajtis fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 3404*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 3405*25c28e83SPiotr Jasiukajtis 3406*25c28e83SPiotr Jasiukajtis fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 3407*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 3408*25c28e83SPiotr Jasiukajtis 3409*25c28e83SPiotr Jasiukajtis fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; 3410*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3411*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3412*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 3413*25c28e83SPiotr Jasiukajtis 3414*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 3415*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 3416*25c28e83SPiotr Jasiukajtis ba .cont48 3417*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3418*25c28e83SPiotr Jasiukajtis 3419*25c28e83SPiotr Jasiukajtis .align 16 3420*25c28e83SPiotr Jasiukajtis.update46: 3421*25c28e83SPiotr Jasiukajtis cmp counter,5 3422*25c28e83SPiotr Jasiukajtis ble 1f 3423*25c28e83SPiotr Jasiukajtis nop 3424*25c28e83SPiotr Jasiukajtis 3425*25c28e83SPiotr Jasiukajtis sub counter,5,counter 3426*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3427*25c28e83SPiotr Jasiukajtis 3428*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3429*25c28e83SPiotr Jasiukajtis 3430*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3431*25c28e83SPiotr Jasiukajtis 3432*25c28e83SPiotr Jasiukajtis mov 5,counter 3433*25c28e83SPiotr Jasiukajtis1: 3434*25c28e83SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 3435*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3436*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 3437*25c28e83SPiotr Jasiukajtis 3438*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); 3439*25c28e83SPiotr Jasiukajtis 3440*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 3441*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 3442*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 3443*25c28e83SPiotr Jasiukajtis 3444*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 3445*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3446*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 3447*25c28e83SPiotr Jasiukajtis 3448*25c28e83SPiotr Jasiukajtis fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 3449*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 3450*25c28e83SPiotr Jasiukajtis 3451*25c28e83SPiotr Jasiukajtis fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 3452*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 3453*25c28e83SPiotr Jasiukajtis 3454*25c28e83SPiotr Jasiukajtis fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; 3455*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3456*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3457*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 3458*25c28e83SPiotr Jasiukajtis 3459*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 3460*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 3461*25c28e83SPiotr Jasiukajtis ba .cont48 3462*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3463*25c28e83SPiotr Jasiukajtis 3464*25c28e83SPiotr Jasiukajtis .align 16 3465*25c28e83SPiotr Jasiukajtis.update47: 3466*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3467*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont47a ! (0_0) if ( hy0 < 0x00100000 ) 3468*25c28e83SPiotr Jasiukajtis 3469*25c28e83SPiotr Jasiukajtis cmp counter,5 3470*25c28e83SPiotr Jasiukajtis ble,a 1f 3471*25c28e83SPiotr Jasiukajtis nop 3472*25c28e83SPiotr Jasiukajtis 3473*25c28e83SPiotr Jasiukajtis sub counter,5,counter 3474*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3475*25c28e83SPiotr Jasiukajtis 3476*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3477*25c28e83SPiotr Jasiukajtis 3478*25c28e83SPiotr Jasiukajtis mov 5,counter 3479*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3480*25c28e83SPiotr Jasiukajtis1: 3481*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 3482*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 3483*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 3484*25c28e83SPiotr Jasiukajtis 3485*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 3486*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3487*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 3488*25c28e83SPiotr Jasiukajtis 3489*25c28e83SPiotr Jasiukajtis fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 3490*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3491*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3492*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 3493*25c28e83SPiotr Jasiukajtis 3494*25c28e83SPiotr Jasiukajtis ba .cont47b 3495*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3496*25c28e83SPiotr Jasiukajtis 3497*25c28e83SPiotr Jasiukajtis .align 16 3498*25c28e83SPiotr Jasiukajtis.update49: 3499*25c28e83SPiotr Jasiukajtis cmp counter,6 3500*25c28e83SPiotr Jasiukajtis ble 1f 3501*25c28e83SPiotr Jasiukajtis nop 3502*25c28e83SPiotr Jasiukajtis 3503*25c28e83SPiotr Jasiukajtis sub counter,6,counter 3504*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3505*25c28e83SPiotr Jasiukajtis 3506*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3507*25c28e83SPiotr Jasiukajtis 3508*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3509*25c28e83SPiotr Jasiukajtis 3510*25c28e83SPiotr Jasiukajtis mov 6,counter 3511*25c28e83SPiotr Jasiukajtis1: 3512*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 3513*25c28e83SPiotr Jasiukajtis 3514*25c28e83SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 3515*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3516*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; 3517*25c28e83SPiotr Jasiukajtis 3518*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 3519*25c28e83SPiotr Jasiukajtis 3520*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 3521*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 3522*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 3523*25c28e83SPiotr Jasiukajtis 3524*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 3525*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3526*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 3527*25c28e83SPiotr Jasiukajtis 3528*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 3529*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 3530*25c28e83SPiotr Jasiukajtis 3531*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 3532*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 3533*25c28e83SPiotr Jasiukajtis 3534*25c28e83SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 3535*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3536*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3537*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 3538*25c28e83SPiotr Jasiukajtis 3539*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 3540*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 3541*25c28e83SPiotr Jasiukajtis ba .cont52 3542*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3543*25c28e83SPiotr Jasiukajtis 3544*25c28e83SPiotr Jasiukajtis .align 16 3545*25c28e83SPiotr Jasiukajtis.update50: 3546*25c28e83SPiotr Jasiukajtis cmp counter,6 3547*25c28e83SPiotr Jasiukajtis ble 1f 3548*25c28e83SPiotr Jasiukajtis nop 3549*25c28e83SPiotr Jasiukajtis 3550*25c28e83SPiotr Jasiukajtis sub counter,6,counter 3551*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3552*25c28e83SPiotr Jasiukajtis 3553*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3554*25c28e83SPiotr Jasiukajtis 3555*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3556*25c28e83SPiotr Jasiukajtis 3557*25c28e83SPiotr Jasiukajtis mov 6,counter 3558*25c28e83SPiotr Jasiukajtis1: 3559*25c28e83SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 3560*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3561*25c28e83SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; 3562*25c28e83SPiotr Jasiukajtis 3563*25c28e83SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 3564*25c28e83SPiotr Jasiukajtis 3565*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 3566*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 3567*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 3568*25c28e83SPiotr Jasiukajtis 3569*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 3570*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3571*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 3572*25c28e83SPiotr Jasiukajtis 3573*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 3574*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 3575*25c28e83SPiotr Jasiukajtis 3576*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 3577*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 3578*25c28e83SPiotr Jasiukajtis 3579*25c28e83SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 3580*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3581*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3582*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 3583*25c28e83SPiotr Jasiukajtis 3584*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 3585*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 3586*25c28e83SPiotr Jasiukajtis ba .cont52 3587*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3588*25c28e83SPiotr Jasiukajtis 3589*25c28e83SPiotr Jasiukajtis .align 16 3590*25c28e83SPiotr Jasiukajtis.update51: 3591*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3592*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont51a ! (0_0) if ( hy0 < 0x00100000 ) 3593*25c28e83SPiotr Jasiukajtis 3594*25c28e83SPiotr Jasiukajtis cmp counter,6 3595*25c28e83SPiotr Jasiukajtis ble,a 1f 3596*25c28e83SPiotr Jasiukajtis nop 3597*25c28e83SPiotr Jasiukajtis 3598*25c28e83SPiotr Jasiukajtis sub counter,6,counter 3599*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3600*25c28e83SPiotr Jasiukajtis 3601*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3602*25c28e83SPiotr Jasiukajtis 3603*25c28e83SPiotr Jasiukajtis mov 6,counter 3604*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3605*25c28e83SPiotr Jasiukajtis1: 3606*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 3607*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 3608*25c28e83SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 3609*25c28e83SPiotr Jasiukajtis 3610*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 3611*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3612*25c28e83SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 3613*25c28e83SPiotr Jasiukajtis 3614*25c28e83SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 3615*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3616*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3617*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 3618*25c28e83SPiotr Jasiukajtis 3619*25c28e83SPiotr Jasiukajtis ba .cont51b 3620*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3621*25c28e83SPiotr Jasiukajtis 3622*25c28e83SPiotr Jasiukajtis .align 16 3623*25c28e83SPiotr Jasiukajtis.update53: 3624*25c28e83SPiotr Jasiukajtis cmp counter,7 3625*25c28e83SPiotr Jasiukajtis ble 1f 3626*25c28e83SPiotr Jasiukajtis nop 3627*25c28e83SPiotr Jasiukajtis 3628*25c28e83SPiotr Jasiukajtis sub counter,7,counter 3629*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3630*25c28e83SPiotr Jasiukajtis 3631*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3632*25c28e83SPiotr Jasiukajtis 3633*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3634*25c28e83SPiotr Jasiukajtis 3635*25c28e83SPiotr Jasiukajtis mov 7,counter 3636*25c28e83SPiotr Jasiukajtis1: 3637*25c28e83SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 3638*25c28e83SPiotr Jasiukajtis 3639*25c28e83SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 3640*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3641*25c28e83SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 3642*25c28e83SPiotr Jasiukajtis 3643*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 3644*25c28e83SPiotr Jasiukajtis 3645*25c28e83SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 3646*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 3647*25c28e83SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 3648*25c28e83SPiotr Jasiukajtis 3649*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 3650*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3651*25c28e83SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 3652*25c28e83SPiotr Jasiukajtis 3653*25c28e83SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 3654*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 3655*25c28e83SPiotr Jasiukajtis 3656*25c28e83SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 3657*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 3658*25c28e83SPiotr Jasiukajtis 3659*25c28e83SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 3660*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3661*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3662*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 3663*25c28e83SPiotr Jasiukajtis 3664*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 3665*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 3666*25c28e83SPiotr Jasiukajtis ba .cont56 3667*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3668*25c28e83SPiotr Jasiukajtis 3669*25c28e83SPiotr Jasiukajtis .align 16 3670*25c28e83SPiotr Jasiukajtis.update54: 3671*25c28e83SPiotr Jasiukajtis cmp counter,7 3672*25c28e83SPiotr Jasiukajtis ble 1f 3673*25c28e83SPiotr Jasiukajtis nop 3674*25c28e83SPiotr Jasiukajtis 3675*25c28e83SPiotr Jasiukajtis sub counter,7,counter 3676*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3677*25c28e83SPiotr Jasiukajtis 3678*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3679*25c28e83SPiotr Jasiukajtis 3680*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3681*25c28e83SPiotr Jasiukajtis 3682*25c28e83SPiotr Jasiukajtis mov 7,counter 3683*25c28e83SPiotr Jasiukajtis1: 3684*25c28e83SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 3685*25c28e83SPiotr Jasiukajtis st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3686*25c28e83SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 3687*25c28e83SPiotr Jasiukajtis 3688*25c28e83SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 3689*25c28e83SPiotr Jasiukajtis 3690*25c28e83SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 3691*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 3692*25c28e83SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 3693*25c28e83SPiotr Jasiukajtis 3694*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 3695*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3696*25c28e83SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 3697*25c28e83SPiotr Jasiukajtis 3698*25c28e83SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 3699*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 3700*25c28e83SPiotr Jasiukajtis 3701*25c28e83SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 3702*25c28e83SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 3703*25c28e83SPiotr Jasiukajtis 3704*25c28e83SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 3705*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3706*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3707*25c28e83SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 3708*25c28e83SPiotr Jasiukajtis 3709*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 3710*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 3711*25c28e83SPiotr Jasiukajtis ba .cont56 3712*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3713*25c28e83SPiotr Jasiukajtis 3714*25c28e83SPiotr Jasiukajtis .align 16 3715*25c28e83SPiotr Jasiukajtis.update55: 3716*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3717*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont55a ! (0_0) if ( hy0 < 0x00100000 ) 3718*25c28e83SPiotr Jasiukajtis 3719*25c28e83SPiotr Jasiukajtis cmp counter,7 3720*25c28e83SPiotr Jasiukajtis ble,a 1f 3721*25c28e83SPiotr Jasiukajtis nop 3722*25c28e83SPiotr Jasiukajtis 3723*25c28e83SPiotr Jasiukajtis sub counter,7,counter 3724*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3725*25c28e83SPiotr Jasiukajtis 3726*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3727*25c28e83SPiotr Jasiukajtis 3728*25c28e83SPiotr Jasiukajtis mov 7,counter 3729*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3730*25c28e83SPiotr Jasiukajtis1: 3731*25c28e83SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 3732*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 3733*25c28e83SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 3734*25c28e83SPiotr Jasiukajtis 3735*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 3736*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3737*25c28e83SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 3738*25c28e83SPiotr Jasiukajtis 3739*25c28e83SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 3740*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3741*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3742*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 3743*25c28e83SPiotr Jasiukajtis 3744*25c28e83SPiotr Jasiukajtis ba .cont55b 3745*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3746*25c28e83SPiotr Jasiukajtis 3747*25c28e83SPiotr Jasiukajtis .align 16 3748*25c28e83SPiotr Jasiukajtis.update57: 3749*25c28e83SPiotr Jasiukajtis cmp counter,8 3750*25c28e83SPiotr Jasiukajtis ble 1f 3751*25c28e83SPiotr Jasiukajtis nop 3752*25c28e83SPiotr Jasiukajtis 3753*25c28e83SPiotr Jasiukajtis sub counter,8,counter 3754*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3755*25c28e83SPiotr Jasiukajtis 3756*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3757*25c28e83SPiotr Jasiukajtis 3758*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3759*25c28e83SPiotr Jasiukajtis 3760*25c28e83SPiotr Jasiukajtis mov 8,counter 3761*25c28e83SPiotr Jasiukajtis1: 3762*25c28e83SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 3763*25c28e83SPiotr Jasiukajtis 3764*25c28e83SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 3765*25c28e83SPiotr Jasiukajtis st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3766*25c28e83SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 3767*25c28e83SPiotr Jasiukajtis 3768*25c28e83SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 3769*25c28e83SPiotr Jasiukajtis 3770*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 3771*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 3772*25c28e83SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 3773*25c28e83SPiotr Jasiukajtis 3774*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 3775*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3776*25c28e83SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 3777*25c28e83SPiotr Jasiukajtis 3778*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 3779*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 3780*25c28e83SPiotr Jasiukajtis 3781*25c28e83SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 3782*25c28e83SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 3783*25c28e83SPiotr Jasiukajtis 3784*25c28e83SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 3785*25c28e83SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 3786*25c28e83SPiotr Jasiukajtis 3787*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3788*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3789*25c28e83SPiotr Jasiukajtis 3790*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 3791*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 3792*25c28e83SPiotr Jasiukajtis ba .cont60 3793*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3794*25c28e83SPiotr Jasiukajtis 3795*25c28e83SPiotr Jasiukajtis .align 16 3796*25c28e83SPiotr Jasiukajtis.update58: 3797*25c28e83SPiotr Jasiukajtis cmp counter,8 3798*25c28e83SPiotr Jasiukajtis ble 1f 3799*25c28e83SPiotr Jasiukajtis nop 3800*25c28e83SPiotr Jasiukajtis 3801*25c28e83SPiotr Jasiukajtis sub counter,8,counter 3802*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3803*25c28e83SPiotr Jasiukajtis 3804*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3805*25c28e83SPiotr Jasiukajtis 3806*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3807*25c28e83SPiotr Jasiukajtis 3808*25c28e83SPiotr Jasiukajtis mov 8,counter 3809*25c28e83SPiotr Jasiukajtis1: 3810*25c28e83SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 3811*25c28e83SPiotr Jasiukajtis st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3812*25c28e83SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 3813*25c28e83SPiotr Jasiukajtis 3814*25c28e83SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 3815*25c28e83SPiotr Jasiukajtis 3816*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 3817*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 3818*25c28e83SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 3819*25c28e83SPiotr Jasiukajtis 3820*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 3821*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3822*25c28e83SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 3823*25c28e83SPiotr Jasiukajtis 3824*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 3825*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 3826*25c28e83SPiotr Jasiukajtis 3827*25c28e83SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 3828*25c28e83SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 3829*25c28e83SPiotr Jasiukajtis 3830*25c28e83SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 3831*25c28e83SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 3832*25c28e83SPiotr Jasiukajtis 3833*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3834*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3835*25c28e83SPiotr Jasiukajtis 3836*25c28e83SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 3837*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 3838*25c28e83SPiotr Jasiukajtis ba .cont60 3839*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3840*25c28e83SPiotr Jasiukajtis 3841*25c28e83SPiotr Jasiukajtis .align 16 3842*25c28e83SPiotr Jasiukajtis.update59: 3843*25c28e83SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3844*25c28e83SPiotr Jasiukajtis bge,pn %icc,.cont59a ! (0_0) if ( hy0 < 0x00100000 ) 3845*25c28e83SPiotr Jasiukajtis 3846*25c28e83SPiotr Jasiukajtis cmp counter,8 3847*25c28e83SPiotr Jasiukajtis ble,a 1f 3848*25c28e83SPiotr Jasiukajtis nop 3849*25c28e83SPiotr Jasiukajtis 3850*25c28e83SPiotr Jasiukajtis sub counter,8,counter 3851*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3852*25c28e83SPiotr Jasiukajtis 3853*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3854*25c28e83SPiotr Jasiukajtis 3855*25c28e83SPiotr Jasiukajtis mov 8,counter 3856*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3857*25c28e83SPiotr Jasiukajtis1: 3858*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 3859*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 3860*25c28e83SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 3861*25c28e83SPiotr Jasiukajtis 3862*25c28e83SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 3863*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3864*25c28e83SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 3865*25c28e83SPiotr Jasiukajtis 3866*25c28e83SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 3867*25c28e83SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3868*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3869*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 3870*25c28e83SPiotr Jasiukajtis 3871*25c28e83SPiotr Jasiukajtis ba .cont59b 3872*25c28e83SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3873*25c28e83SPiotr Jasiukajtis 3874*25c28e83SPiotr Jasiukajtis .align 16 3875*25c28e83SPiotr Jasiukajtis.exit: 3876*25c28e83SPiotr Jasiukajtis ret 3877*25c28e83SPiotr Jasiukajtis restore 3878*25c28e83SPiotr Jasiukajtis SET_SIZE(__vrhypot) 3879*25c28e83SPiotr Jasiukajtis 3880