/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ .file "__vrhypot.S" #include "libm.h" RO_DATA .align 64 .CONST_TBL: .word 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465, .word 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a, .word 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6, .word 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3, .word 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b, .word 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036, .word 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01, .word 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1, .word 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb, .word 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5, .word 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405, .word 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc, .word 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7, .word 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec, .word 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b, .word 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed, .word 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150, .word 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539, .word 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66, .word 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995, .word 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d, .word 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19, .word 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404, .word 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22, .word 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47, .word 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a, .word 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06, .word 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358, .word 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20, .word 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f, .word 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197, .word 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010, .word 0x42300000, 0 ! D2ON36 = 2**36 .word 0xffffff00, 0 ! DA0 .word 0xfff00000, 0 ! DA1 .word 0x3ff00000, 0 ! DONE = 1.0 .word 0x40000000, 0 ! DTWO = 2.0 .word 0x7fd00000, 0 ! D2ON1022 .word 0x3cb00000, 0 ! D2ONM52 .word 0x43200000, 0 ! D2ON51 .word 0x0007ffff, 0xffffffff ! 0x0007ffffffffffff #define stridex %l2 #define stridey %l3 #define stridez %l5 #define TBL_SHIFT 512 #define TBL %l1 #define counter %l4 #define _0x7ff00000 %l0 #define _0x00100000 %o5 #define _0x7fffffff %l6 #define D2ON36 %f4 #define DTWO %f6 #define DONE %f8 #define DA0 %f58 #define DA1 %f56 #define dtmp0 STACK_BIAS-0x80 #define dtmp1 STACK_BIAS-0x78 #define dtmp2 STACK_BIAS-0x70 #define dtmp3 STACK_BIAS-0x68 #define dtmp4 STACK_BIAS-0x60 #define dtmp5 STACK_BIAS-0x58 #define dtmp6 STACK_BIAS-0x50 #define dtmp7 STACK_BIAS-0x48 #define dtmp8 STACK_BIAS-0x40 #define dtmp9 STACK_BIAS-0x38 #define dtmp10 STACK_BIAS-0x30 #define dtmp11 STACK_BIAS-0x28 #define dtmp12 STACK_BIAS-0x20 #define dtmp13 STACK_BIAS-0x18 #define dtmp14 STACK_BIAS-0x10 #define dtmp15 STACK_BIAS-0x08 #define ftmp0 STACK_BIAS-0x100 #define tmp_px STACK_BIAS-0x98 #define tmp_py STACK_BIAS-0x90 #define tmp_counter STACK_BIAS-0x88 ! sizeof temp storage - must be a multiple of 16 for V9 #define tmps 0x100 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! !!!!! algorithm !!!!! ! hx0 = *(int*)px; ! hy0 = *(int*)py; ! ! ((float*)&x0)[0] = ((float*)px)[0]; ! ((float*)&x0)[1] = ((float*)px)[1]; ! ((float*)&y0)[0] = ((float*)py)[0]; ! ((float*)&y0)[1] = ((float*)py)[1]; ! ! hx0 &= 0x7fffffff; ! hy0 &= 0x7fffffff; ! ! diff0 = hy0 - hx0; ! j0 = diff0 >> 31; ! j0 &= diff0; ! j0 = hy0 - j0; ! j0 &= 0x7ff00000; ! ! j0 = 0x7ff00000 - j0; ! ll = (long long)j0 << 32; ! *(long long*)&scl0 = ll; ! ! if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 ) ! { ! lx = ((int*)px)[1]; ! ly = ((int*)py)[1]; ! ! if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0; ! else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0; ! else res0 = fabs(x0) * fabs(y0); ! ! ((float*)pz)[0] = ((float*)&res0)[0]; ! ((float*)pz)[1] = ((float*)&res0)[1]; ! ! px += stridex; ! py += stridey; ! pz += stridez; ! continue; ! } ! if ( hx0 < 0x00100000 && hy0 < 0x00100000 ) ! { ! lx = ((int*)px)[1]; ! ly = ((int*)py)[1]; ! ii = hx0 | hy0; ! ii |= lx; ! ii |= ly; ! if ( ii == 0 ) ! { ! res0 = 1.0 / 0.0; ! ((float*)pz)[0] = ((float*)&res0)[0]; ! ((float*)pz)[1] = ((float*)&res0)[1]; ! ! px += stridex; ! py += stridey; ! pz += stridez; ! continue; ! } ! x0 = fabs(x0); ! y0 = fabs(y0); ! if ( hx0 < 0x00080000 ) ! { ! x0 = *(long long*)&x0; ! } ! else ! { ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; ! x0 = vis_fand(x0, dtmp0); ! x0 = *(long long*)&x0; ! x0 += D2ON51; ! } ! x0 *= D2ONM52; ! if ( hy0 < 0x00080000 ) ! { ! y0 = *(long long*)&y0; ! } ! else ! { ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; ! y0 = vis_fand(y0, dtmp0); ! y0 = *(long long*)&y0; ! y0 += D2ON51; ! } ! y0 *= D2ONM52; ! *(long long*)&scl0 = 0x7fd0000000000000ULL; ! } ! else ! { ! x0 *= scl0; ! y0 *= scl0; ! } ! ! x_hi0 = x0 + D2ON36; ! y_hi0 = y0 + D2ON36; ! x_hi0 -= D2ON36; ! y_hi0 -= D2ON36; ! x_lo0 = x0 - x_hi0; ! y_lo0 = y0 - y_hi0; ! res0_hi = x_hi0 * x_hi0; ! dtmp0 = y_hi0 * y_hi0; ! res0_hi += dtmp0; ! res0_lo = x0 + x_hi0; ! res0_lo *= x_lo0; ! dtmp1 = y0 + y_hi0; ! dtmp1 *= y_lo0; ! res0_lo += dtmp1; ! ! dres = res0_hi + res0_lo; ! dexp0 = vis_fand(dres,DA1); ! iarr = ((int*)&dres)[0]; ! ! iarr >>= 11; ! iarr &= 0x1fc; ! dtmp0 = ((double*)((char*)dll1 + iarr))[0]; ! dd = vis_fpsub32(dtmp0, dexp0); ! ! dtmp0 = dd * dres; ! dtmp0 = DTWO - dtmp0; ! dd *= dtmp0; ! dtmp1 = dd * dres; ! dtmp1 = DTWO - dtmp1; ! dd *= dtmp1; ! dtmp2 = dd * dres; ! dtmp2 = DTWO - dtmp2; ! dres = dd * dtmp2; ! ! res0 = vis_fand(dres,DA0); ! ! dtmp0 = res0_hi * res0; ! dtmp0 = DONE - dtmp0; ! dtmp1 = res0_lo * res0; ! dtmp0 -= dtmp1; ! dtmp0 *= dres; ! res0 += dtmp0; ! ! res0 = sqrt ( res0 ); ! ! res0 = scl0 * res0; ! ! ((float*)pz)[0] = ((float*)&res0)[0]; ! ((float*)pz)[1] = ((float*)&res0)[1]; ! ! px += stridex; ! py += stridey; ! pz += stridez; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ENTRY(__vrhypot) save %sp,-SA(MINFRAME)-tmps,%sp PIC_SETUP(l7) PIC_SET(l7,.CONST_TBL,l1) wr %g0,0x82,%asi #ifdef __sparcv9 ldx [%fp+STACK_BIAS+176],stridez #else ld [%fp+STACK_BIAS+92],stridez #endif sll %i2,3,stridex sethi %hi(0x7ff00000),_0x7ff00000 st %i0,[%fp+tmp_counter] sll %i4,3,stridey sethi %hi(0x00100000),_0x00100000 stx %i1,[%fp+tmp_px] sll stridez,3,stridez sethi %hi(0x7ffffc00),_0x7fffffff stx %i3,[%fp+tmp_py] ldd [TBL+TBL_SHIFT],D2ON36 add _0x7fffffff,1023,_0x7fffffff ldd [TBL+TBL_SHIFT+8],DA0 ldd [TBL+TBL_SHIFT+16],DA1 ldd [TBL+TBL_SHIFT+24],DONE ldd [TBL+TBL_SHIFT+32],DTWO .begin: ld [%fp+tmp_counter],counter ldx [%fp+tmp_px],%i4 ldx [%fp+tmp_py],%i3 st %g0,[%fp+tmp_counter] .begin1: cmp counter,0 ble,pn %icc,.exit lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; add %i4,stridex,%i1 lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; add %i3,stridey,%i0 ! py += stridey and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 bge,pn %icc,.spec0 ! (7_0) if ( hx0 >= 0x7ff00000 ) and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 bge,pn %icc,.spec0 ! (7_0) if ( hy0 >= 0x7ff00000 ) sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 bl,pn %icc,.spec1 ! (7_0) if ( hx0 < 0x00100000 ) and %o1,%o3,%o1 ! (7_0) j0 &= diff0; .cont_spec0: sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; .cont_spec1: lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; mov %i1,%i2 lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; mov %i0,%o0 cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 bge,pn %icc,.update0 ! (0_0) if ( hx0 >= 0x7ff00000 ) and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; bge,pn %icc,.update0 ! (0_0) if ( hy0 >= 0x7ff00000 ) sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 and %o1,%o3,%o1 ! (0_0) j0 &= diff0; bl,pn %icc,.update1 ! (0_0) if ( hx0 < 0x00100000 ) sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; .cont0: and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; .cont1: sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; add %i1,stridex,%i4 ! px += stridex lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; add %i4,stridex,%i1 ! px += stridex fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; add %i0,stridey,%i3 ! py += stridey faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; add %i3,stridey,%i0 ! py += stridey faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; bge,pn %icc,.update2 ! (1_0) if ( hx0 >= 0x7ff00000 ) fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; bge,pn %icc,.update3 ! (1_0) if ( hy0 >= 0x7ff00000 ) fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; and %o1,%o3,%o1 ! (1_0) j0 &= diff0; fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; bl,pn %icc,.update4 ! (1_0) if ( hx0 < 0x00100000 ) faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; .cont4: sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; mov %i1,%i2 faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; mov %i0,%o0 faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 bge,pn %icc,.update5 ! (2_0) if ( hx0 >= 0x7ff00000 ) stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 bge,pn %icc,.update6 ! (2_0) if ( hy0 >= 0x7ff00000 ) fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; and %o1,%o3,%o1 ! (2_0) j0 &= diff0; fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; bl,pn %icc,.update7 ! (2_0) if ( hx0 < 0x00100000 ) faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; .cont7: sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; .cont8: stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; add %i1,stridex,%i4 ! px += stridex lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; add %i4,stridex,%i1 ! px += stridex fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; sra %o2,11,%i3 ! (7_1) iarr >>= 11; faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; add %i0,stridey,%i3 ! py += stridey ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; add %i3,stridey,%i0 ! py += stridey faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; bge,pn %icc,.update9 ! (3_0) if ( hx0 >= 0x7ff00000 ) fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 bge,pn %icc,.update10 ! (3_0) if ( hy0 >= 0x7ff00000 ) fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; and %o1,%o3,%o1 ! (3_0) j0 &= diff0; fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; bl,pn %icc,.update11 ! (3_0) if ( hx0 < 0x00100000 ) faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; .cont11: sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; .cont12: sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; sra %o2,11,%o4 ! (0_0) iarr >>= 11; faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr mov %i1,%i2 lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; mov %i0,%o0 faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; bge,pn %icc,.update13 ! (4_0) if ( hx0 >= 0x7ff00000 ) st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 bge,pn %icc,.update14 ! (4_0) if ( hy0 >= 0x7ff00000 ) fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; and %o1,%o3,%o1 ! (4_0) j0 &= diff0; fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; bl,pn %icc,.update15 ! (4_0) if ( hx0 < 0x00100000 ) faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; .cont15: sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; .cont16: fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; add %i1,stridex,%i4 ! px += stridex fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; add %i4,stridex,%i1 ! px += stridex fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; sra %o2,11,%i3 ! (1_0) iarr >>= 11; faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; add %i0,stridey,%i3 ! py += stridey ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; add %i3,stridey,%i0 ! py += stridey faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; bge,pn %icc,.update17 ! (5_0) if ( hx0 >= 0x7ff00000 ) fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 bge,pn %icc,.update18 ! (5_0) if ( hy0 >= 0x7ff00000 ) fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; and %o1,%o3,%o1 ! (5_0) j0 &= diff0; fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; bl,pn %icc,.update19 ! (5_0) if ( hx0 < 0x00100000 ) faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; .cont19a: fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; .cont19b: fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; .cont20: fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; sra %o2,11,%o4 ! (2_0) iarr >>= 11; faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr mov %i1,%i2 lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; mov %i0,%o0 faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; bge,pn %icc,.update21 ! (6_0) if ( hx0 >= 0x7ff00000 ) fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 bge,pn %icc,.update22 ! (6_0) if ( hy0 >= 0x7ff00000 ) fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; and %o1,%o3,%o1 ! (6_0) j0 &= diff0; fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; bl,pn %icc,.update23 ! (6_0) if ( hx0 < 0x00100000 ) faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; .cont23a: fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; .cont23b: fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; .cont24: fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; add %i1,stridex,%i4 ! px += stridex lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; add %i4,stridex,%i1 ! px += stridex ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; sra %o2,11,%i3 ! (3_0) iarr >>= 11; faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; add %i0,stridey,%i3 ! py += stridey ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; add %i3,stridey,%i0 ! py += stridey faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; bge,pn %icc,.update25 ! (7_0) if ( hx0 >= 0x7ff00000 ) fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 bge,pn %icc,.update26 ! (7_0) if ( hy0 >= 0x7ff00000 ) fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; and %o1,%o3,%o1 ! (7_0) j0 &= diff0; fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; bl,pn %icc,.update27 ! (7_0) if ( hx0 < 0x00100000 ) faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; .cont27a: fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; .cont27b: fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; .cont28: fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; sra %o2,11,%o4 ! (4_0) iarr >>= 11; faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; cmp counter,8 bl,pn %icc,.tail nop ba .main_loop sub counter,8,counter .align 16 .main_loop: fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; mov %i1,%i2 ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f10,D2ON36,%f50 ! (6_1) x_hi0 = x0 + D2ON36; nop mov %i0,%o0 lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; faddd %f60,D2ON36,%f2 ! (6_1) y_hi0 = y0 + D2ON36; faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; bge,pn %icc,.update29 ! (0_0) if ( hx0 >= 0x7ff00000 ) fsubd %f50,D2ON36,%f20 ! (6_1) x_hi0 -= D2ON36; cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; bge,pn %icc,.update30 ! (0_0) if ( hy0 >= 0x7ff00000 ) fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; and %o1,%o3,%o1 ! (0_0) j0 &= diff0; cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 bl,pn %icc,.update31 ! (0_0) if ( hx0 < 0x00100000 ) fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); .cont31: fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; nop fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; nop fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; .cont32: fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; faddd %f60,%f2,%f50 ! (6_1) dtmp1 = y0 + y_hi0; fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; nop bn,pn %icc,.exit fsubd %f60,%f2,%f2 ! (6_1) y_lo0 = y0 - y_hi0; fmuld %f62,%f28,%f28 ! (6_1) res0_lo *= x_lo0; nop ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; faddd %f0,%f46,%f30 ! (6_1) res0_hi += dtmp0; nop nop lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; nop nop lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; fmuld %f50,%f2,%f46 ! (6_1) dtmp1 *= y_lo0; nop lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; add %i1,stridex,%i4 ! px += stridex nop lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; bn,pn %icc,.exit fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; add %i4,stridex,%i1 ! px += stridex ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; nop ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; sra %o2,11,%i3 ! (5_1) iarr >>= 11; nop faddd %f28,%f46,%f40 ! (6_1) res0_lo += dtmp1; and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; nop bn,pn %icc,.exit fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; add %i0,stridey,%i3 ! py += stridey ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; nop add %i3,stridey,%i0 ! py += stridey lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; faddd %f30,%f40,%f18 ! (6_1) dres = res0_hi + res0_lo; and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; st %f18,[%fp+ftmp0] ! (6_1) iarr = ((int*)&dres)[0]; fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; nop bge,pn %icc,.update33 ! (1_0) if ( hx0 >= 0x7ff00000 ) fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; bge,pn %icc,.update34 ! (1_0) if ( hy0 >= 0x7ff00000 ) faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; and %o1,%o3,%o1 ! (1_0) j0 &= diff0; add %i5,stridez,%i5 ! pz += stridez stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; bl,pn %icc,.update35 ! (1_0) if ( hx0 < 0x00100000 ) faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; .cont35a: fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; nop sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; .cont35b: fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; nop nop fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; .cont36: fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; nop ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; nop lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; nop nop lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; bn,pn %icc,.exit fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; nop lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; nop nop lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; bn,pn %icc,.exit fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; nop ld [%fp+ftmp0],%o2 ! (6_1) iarr = ((int*)&dres)[0]; fand %f18,DA1,%f2 ! (6_1) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; nop ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; sra %o2,11,%g1 ! (6_1) iarr >>= 11; nop faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; nop and %g1,0x1fc,%g1 ! (6_1) iarr &= 0x1fc; bn,pn %icc,.exit fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; add %g1,TBL,%g1 ! (6_1) (char*)dll1 + iarr fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; mov %i1,%i2 ld [%g1],%f28 ! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; nop mov %i0,%o0 lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; fpsub32 %f28,%f2,%f28 ! (6_1) dd = vis_fpsub32(dtmp0, dexp0); and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; nop bge,pn %icc,.update37 ! (2_0) if ( hx0 >= 0x7ff00000 ) fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; bge,pn %icc,.update38 ! (2_0) if ( hy0 >= 0x7ff00000 ) faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; and %o1,%o3,%o1 ! (2_0) j0 &= diff0; add %i5,stridez,%i5 ! pz += stridez stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; bl,pn %icc,.update39 ! (2_0) if ( hx0 < 0x00100000 ) faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; .cont39a: fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; nop fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; .cont39b: fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; nop nop fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; .cont40: fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; nop ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; nop lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; fmuld %f28,%f20,%f54 ! (6_1) dd *= dtmp0; nop nop lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; bn,pn %icc,.exit fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; nop lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; add %i1,stridex,%i4 ! px += stridex nop lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; bn,pn %icc,.exit fmuld %f54,%f18,%f46 ! (6_1) dtmp1 = dd * dres; add %i4,stridex,%i1 ! px += stridex ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; nop ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; sra %o2,11,%i3 ! (7_1) iarr >>= 11; nop faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; nop bn,pn %icc,.exit fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; fsubd DTWO,%f46,%f62 ! (6_1) dtmp1 = DTWO - dtmp1; fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; add %i0,stridey,%i3 ! py += stridey ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; nop add %i3,stridey,%i0 ! py += stridey lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; fmuld %f54,%f62,%f24 ! (6_1) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; nop bge,pn %icc,.update41 ! (3_0) if ( hx0 >= 0x7ff00000 ) fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; bge,pn %icc,.update42 ! (3_0) if ( hy0 >= 0x7ff00000 ) faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; and %o1,%o3,%o1 ! (3_0) j0 &= diff0; add %i5,stridez,%i5 ! pz += stridez stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; bl,pn %icc,.update43 ! (3_0) if ( hx0 < 0x00100000 ) faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; .cont43a: fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; nop sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; .cont43b: fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; nop nop fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 .cont44: fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; nop ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; nop lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; nop nop lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; bn,pn %icc,.exit fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; nop lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f18,%f20 ! (6_1) dtmp2 = DTWO - dtmp2; nop nop lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; bn,pn %icc,.exit fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; nop ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; nop ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; sra %o2,11,%o4 ! (0_0) iarr >>= 11; nop faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; nop bn,pn %icc,.exit fmuld %f24,%f20,%f26 ! (6_1) dres = dd * dtmp2; fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; mov %i1,%i2 ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; nop mov %i0,%o0 lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; nop and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; bge,pn %icc,.update45 ! (4_0) if ( hx0 >= 0x7ff00000 ) fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 bge,pn %icc,.update46 ! (4_0) if ( hy0 >= 0x7ff00000 ) fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; and %o1,%o3,%o1 ! (4_0) j0 &= diff0; cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 bl,pn %icc,.update47 ! (4_0) if ( hx0 < 0x00100000 ) fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); .cont47a: fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; nop sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; .cont47b: fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; nop nop fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; .cont48: fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; nop ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; fsubd DONE,%f10,%f60 ! (6_1) dtmp0 = DONE - dtmp0; nop lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; nop nop lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; bn,pn %icc,.exit fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; nop lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; add %i1,stridex,%i4 ! px += stridex nop bn,pn %icc,.exit fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; add %i4,stridex,%i1 ! px += stridex ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; nop ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; fsubd %f60,%f40,%f46 ! (6_1) dtmp0 -= dtmp1; fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; sra %o2,11,%i3 ! (1_0) iarr >>= 11; nop faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; nop bn,pn %icc,.exit fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; fmuld %f46,%f26,%f52 ! (6_1) dtmp0 *= dres; add %i0,stridey,%i3 ! py += stridey ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; nop add %i3,stridey,%i0 ! py += stridey lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0; and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; nop faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; bge,pn %icc,.update49 ! (5_0) if ( hx0 >= 0x7ff00000 ) fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 bge,pn %icc,.update50 ! (5_0) if ( hy0 >= 0x7ff00000 ) fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; and %o1,%o3,%o1 ! (5_0) j0 &= diff0; cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 bl,pn %icc,.update51 ! (5_0) if ( hx0 < 0x00100000 ) fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); .cont51a: fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; nop fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; .cont51b: fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; nop nop fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; .cont52: fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; nop ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; nop lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; nop nop lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; bn,pn %icc,.exit fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; nop lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; nop nop lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; bn,pn %icc,.exit fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; nop ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; nop ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; sra %o2,11,%o4 ! (2_0) iarr >>= 11; nop faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; nop bn,pn %icc,.exit fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; fsqrtd %f52,%f24 ! (6_1) res0 = sqrt ( res0 ); add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; mov %i1,%i2 ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; nop mov %i0,%o0 lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; nop faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; bge,pn %icc,.update53 ! (6_0) if ( hx0 >= 0x7ff00000 ) fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 bge,pn %icc,.update54 ! (6_0) if ( hy0 >= 0x7ff00000 ) fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; and %o1,%o3,%o1 ! (6_0) j0 &= diff0; cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 bl,pn %icc,.update55 ! (6_0) if ( hx0 < 0x00100000 ) fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); .cont55a: fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; nop fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; .cont55b: fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; nop nop fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; .cont56: fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; nop ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; nop nop fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; nop nop fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; nop lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; nop add %i1,stridex,%i4 ! px += stridex lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; bn,pn %icc,.exit fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; add %i4,stridex,%i1 ! px += stridex ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; nop ldd [%fp+dtmp14],%f0 ! (6_1) *(long long*)&scl0 = ll; fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; sra %o2,11,%i3 ! (3_0) iarr >>= 11; nop faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; nop bn,pn %icc,.exit fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; add %i0,stridey,%i3 ! py += stridey ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; nop add %i3,stridey,%i0 ! py += stridey lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; fmuld %f0,%f24,%f2 ! (6_1) res0 = scl0 * res0; and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; nop faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; st %f2,[%i5] ! (6_1) ((float*)pz)[0] = ((float*)&res0)[0]; bge,pn %icc,.update57 ! (7_0) if ( hx0 >= 0x7ff00000 ) fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 bge,pn %icc,.update58 ! (7_0) if ( hy0 >= 0x7ff00000 ) fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; and %o1,%o3,%o1 ! (7_0) j0 &= diff0; cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 bl,pn %icc,.update59 ! (7_0) if ( hx0 < 0x00100000 ) fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); .cont59a: fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; add %i5,stridez,%i5 ! pz += stridez faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; nop fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; .cont59b: fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; nop nop fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; .cont60: fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; nop ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; nop lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; bn,pn %icc,.exit lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; nop nop fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; nop lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; nop nop lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; bn,pn %icc,.exit fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; nop ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; nop ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; sra %o2,11,%o4 ! (4_0) iarr >>= 11; nop faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; subcc counter,8,counter ! counter -= 8; bpos,pt %icc,.main_loop fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; add counter,8,counter .tail: subcc counter,1,counter bneg .begin nop fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; subcc counter,1,counter st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; bneg .begin add %i5,stridez,%i5 ! pz += stridez fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; sra %o2,11,%i3 ! (5_1) iarr >>= 11; and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; subcc counter,1,counter bneg .begin add %i5,stridez,%i5 ! pz += stridez fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; subcc counter,1,counter st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; bneg .begin add %i5,stridez,%i5 ! pz += stridez faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; subcc counter,1,counter bneg .begin add %i5,stridez,%i5 ! pz += stridez fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; subcc counter,1,counter bneg .begin add %i5,stridez,%i5 ! pz += stridez ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0 st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; subcc counter,1,counter bneg .begin add %i5,stridez,%i5 ! pz += stridez ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; ba .begin add %i5,stridez,%i5 .align 16 .spec0: cmp %o7,_0x7ff00000 ! hx0 ? 0x7ff00000 bne 1f ! if ( hx0 != 0x7ff00000 ) ld [%i4+4],%i2 ! lx = ((int*)px)[1]; cmp %i2,0 ! lx ? 0 be 3f ! if ( lx == 0 ) nop 1: cmp %l7,_0x7ff00000 ! hy0 ? 0x7ff00000 bne 2f ! if ( hy0 != 0x7ff00000 ) ld [%i3+4],%o2 ! ly = ((int*)py)[1]; cmp %o2,0 ! ly ? 0 be 3f ! if ( ly == 0 ) 2: ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; add %i4,stridex,%i4 ! px += stridex ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; fabsd %f0,%f0 fabsd %f2,%f2 fmuld %f0,%f2,%f0 ! res0 = fabs(x0) * fabs(y0); add %i3,stridey,%i3 ! py += stridey; st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; add %i5,stridez,%i5 ! pz += stridez ba .begin1 sub counter,1,counter 3: add %i4,stridex,%i4 ! px += stridex add %i3,stridey,%i3 ! py += stridey st %g0,[%i5] ! ((int*)pz)[0] = 0; add %i5,stridez,%i5 ! pz += stridez; st %g0,[%i5+4] ! ((int*)pz)[1] = 0; ba .begin1 sub counter,1,counter .align 16 .spec1: and %o1,%o3,%o1 ! (7_0) j0 &= diff0; cmp %l7,_0x00100000 ! (7_0) hy0 ? 0x00100000 bge,pn %icc,.cont_spec0 ! (7_0) if ( hy0 < 0x00100000 ) ld [%i4+4],%i2 ! lx = ((int*)px)[1]; or %o7,%l7,%g5 ! ii = hx0 | hy0; fzero %f0 ld [%i3+4],%o2 ! ly = ((int*)py)[1]; or %i2,%g5,%g5 ! ii |= lx; orcc %o2,%g5,%g5 ! ii |= ly; bnz,a,pn %icc,1f ! if ( ii != 0 ) sethi %hi(0x00080000),%i2 fdivd DONE,%f0,%f0 ! res0 = 1.0 / 0.0; st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; add %i4,stridex,%i4 ! px += stridex; add %i3,stridey,%i3 ! py += stridey; st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; add %i5,stridez,%i5 ! pz += stridez; ba .begin1 sub counter,1,counter 1: ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; fabsd %f0,%f0 ! x0 = fabs(x0); ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; ldd [TBL+TBL_SHIFT+64],%f12 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; add %fp,dtmp2,%i4 add %fp,dtmp3,%i3 fabsd %f2,%f2 ! y0 = fabs(y0); ldd [TBL+TBL_SHIFT+56],%f10 ! D2ON51 ldx [TBL+TBL_SHIFT+48],%g5 ! D2ONM52 cmp %o7,%i2 ! hx0 ? 0x00080000 bl,a 1f ! if ( hx0 < 0x00080000 ) fxtod %f0,%f0 ! x0 = *(long long*)&x0; fand %f0,%f12,%f0 ! x0 = vis_fand(x0, dtmp0); fxtod %f0,%f0 ! x0 = *(long long*)&x0; faddd %f0,%f10,%f0 ! x0 += D2ON51; 1: std %f0,[%i4] ldx [TBL+TBL_SHIFT+40],%g1 ! D2ON1022 cmp %l7,%i2 ! hy0 ? 0x00080000 bl,a 1f ! if ( hy0 < 0x00080000 ) fxtod %f2,%f2 ! y0 = *(long long*)&y0; fand %f2,%f12,%f2 ! y0 = vis_fand(y0, dtmp0); fxtod %f2,%f2 ! y0 = *(long long*)&y0; faddd %f2,%f10,%f2 ! y0 += D2ON51; 1: std %f2,[%i3] stx %g5,[%fp+dtmp15] ! D2ONM52 ba .cont_spec1 stx %g1,[%fp+dtmp0] ! D2ON1022 .align 16 .update0: cmp counter,1 ble 1f nop sub counter,1,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 1,counter 1: sethi %hi(0x3ff00000),%o4 add TBL,TBL_SHIFT+24,%i2 ba .cont1 add TBL,TBL_SHIFT+24,%o0 .align 16 .update1: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont0 ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,1 ble,a 1f nop sub counter,1,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] mov 1,counter stx %o0,[%fp+tmp_py] 1: sethi %hi(0x3ff00000),%o4 add TBL,TBL_SHIFT+24,%i2 ba .cont1 add TBL,TBL_SHIFT+24,%o0 .align 16 .update2: cmp counter,2 ble 1f nop sub counter,2,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 2,counter 1: fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; sethi %hi(0x3ff00000),%o4 add TBL,TBL_SHIFT+24,%i4 ba .cont4 add TBL,TBL_SHIFT+24,%i3 .align 16 .update3: cmp counter,2 ble 1f nop sub counter,2,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 2,counter 1: fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; sethi %hi(0x3ff00000),%o4 add TBL,TBL_SHIFT+24,%i4 ba .cont4 add TBL,TBL_SHIFT+24,%i3 .align 16 .update4: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,a,pn %icc,.cont4 ! (0_0) if ( hy0 < 0x00100000 ) sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; cmp counter,2 ble,a 1f nop sub counter,2,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] mov 2,counter stx %i3,[%fp+tmp_py] 1: sethi %hi(0x3ff00000),%o4 add TBL,TBL_SHIFT+24,%i4 ba .cont4 add TBL,TBL_SHIFT+24,%i3 .align 16 .update5: cmp counter,3 ble 1f nop sub counter,3,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 3,counter 1: st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 sllx %g1,32,%g1 ba .cont8 add TBL,TBL_SHIFT+24,%o0 .align 16 .update6: cmp counter,3 ble 1f nop sub counter,3,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 3,counter 1: fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 sllx %g1,32,%g1 ba .cont8 add TBL,TBL_SHIFT+24,%o0 .align 16 .update7: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont7 ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,3 ble,a 1f nop sub counter,3,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] mov 3,counter stx %o0,[%fp+tmp_py] 1: sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 sllx %g1,32,%g1 ba .cont8 add TBL,TBL_SHIFT+24,%o0 .align 16 .update9: cmp counter,4 ble 1f nop sub counter,4,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 4,counter 1: st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 ba .cont12 add TBL,TBL_SHIFT+24,%i3 .align 16 .update10: cmp counter,4 ble 1f nop sub counter,4,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 4,counter 1: fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 ba .cont12 add TBL,TBL_SHIFT+24,%i3 .align 16 .update11: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont11 ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,4 ble,a 1f nop sub counter,4,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] mov 4,counter stx %i3,[%fp+tmp_py] 1: sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; ba .cont12 add TBL,TBL_SHIFT+24,%i3 .align 16 .update13: cmp counter,5 ble 1f nop sub counter,5,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 5,counter 1: fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 ba .cont16 add TBL,TBL_SHIFT+24,%o0 .align 16 .update14: cmp counter,5 ble 1f nop sub counter,5,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 5,counter 1: fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 ba .cont16 add TBL,TBL_SHIFT+24,%o0 .align 16 .update15: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont15 ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,5 ble,a 1f nop sub counter,5,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] mov 5,counter stx %o0,[%fp+tmp_py] 1: sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; ba .cont16 add TBL,TBL_SHIFT+24,%o0 .align 16 .update17: cmp counter,6 ble 1f nop sub counter,6,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 6,counter 1: fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; ba .cont20 add TBL,TBL_SHIFT+24,%i3 .align 16 .update18: cmp counter,6 ble 1f nop sub counter,6,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 6,counter 1: fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; ba .cont20 add TBL,TBL_SHIFT+24,%i3 .align 16 .update19: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont19a ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,6 ble,a 1f nop sub counter,6,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] mov 6,counter stx %i3,[%fp+tmp_py] 1: fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; ba .cont19b add TBL,TBL_SHIFT+24,%i3 .align 16 .update21: cmp counter,7 ble 1f nop sub counter,7,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 7,counter 1: fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; ba .cont24 add TBL,TBL_SHIFT+24,%o0 .align 16 .update22: cmp counter,7 ble 1f nop sub counter,7,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 7,counter 1: fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; ba .cont24 add TBL,TBL_SHIFT+24,%o0 .align 16 .update23: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont23a ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,7 ble,a 1f nop sub counter,7,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] mov 7,counter stx %o0,[%fp+tmp_py] 1: fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; ba .cont23b add TBL,TBL_SHIFT+24,%o0 .align 16 .update25: cmp counter,8 ble 1f nop sub counter,8,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 8,counter 1: fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; ba .cont28 add TBL,TBL_SHIFT+24,%i3 .align 16 .update26: cmp counter,8 ble 1f nop sub counter,8,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 8,counter 1: fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; ba .cont28 add TBL,TBL_SHIFT+24,%i3 .align 16 .update27: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont27a ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,8 ble,a 1f nop sub counter,8,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] mov 8,counter stx %i3,[%fp+tmp_py] 1: fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; ba .cont27b add TBL,TBL_SHIFT+24,%i3 .align 16 .update29: cmp counter,1 ble 1f nop sub counter,1,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 1,counter 1: fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; sethi %hi(0x3ff00000),%o4 add TBL,TBL_SHIFT+24,%i2 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; ba .cont32 add TBL,TBL_SHIFT+24,%o0 .align 16 .update30: cmp counter,1 ble 1f nop sub counter,1,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 1,counter 1: fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; sethi %hi(0x3ff00000),%o4 add TBL,TBL_SHIFT+24,%i2 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; ba .cont32 add TBL,TBL_SHIFT+24,%o0 .align 16 .update31: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont31 ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,1 ble,a 1f nop sub counter,1,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] mov 1,counter stx %o0,[%fp+tmp_py] 1: fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; sethi %hi(0x3ff00000),%o4 add TBL,TBL_SHIFT+24,%i2 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; ba .cont32 add TBL,TBL_SHIFT+24,%o0 .align 16 .update33: cmp counter,2 ble 1f nop sub counter,2,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 2,counter 1: st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; add %i5,stridez,%i5 ! pz += stridez stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%o4 add TBL,TBL_SHIFT+24,%i4 fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; ba .cont36 add TBL,TBL_SHIFT+24,%i3 .align 16 .update34: cmp counter,2 ble 1f nop sub counter,2,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 2,counter 1: add %i5,stridez,%i5 ! pz += stridez stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%o4 add TBL,TBL_SHIFT+24,%i4 fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; ba .cont36 add TBL,TBL_SHIFT+24,%i3 .align 16 .update35: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont35a ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,2 ble,a 1f nop sub counter,2,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] mov 2,counter stx %i3,[%fp+tmp_py] 1: fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; sethi %hi(0x3ff00000),%o4 add TBL,TBL_SHIFT+24,%i4 fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; ba .cont35b add TBL,TBL_SHIFT+24,%i3 .align 16 .update37: cmp counter,3 ble 1f nop sub counter,3,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 3,counter 1: st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; add %i5,stridez,%i5 ! pz += stridez stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; ba .cont40 add TBL,TBL_SHIFT+24,%o0 .align 16 .update38: cmp counter,3 ble 1f nop sub counter,3,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 3,counter 1: add %i5,stridez,%i5 ! pz += stridez stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; ba .cont40 add TBL,TBL_SHIFT+24,%o0 .align 16 .update39: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont39a ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,3 ble,a 1f nop sub counter,3,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] mov 3,counter stx %o0,[%fp+tmp_py] 1: fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; ba .cont39b add TBL,TBL_SHIFT+24,%o0 .align 16 .update41: cmp counter,4 ble 1f nop sub counter,4,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 4,counter 1: st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; add %i5,stridez,%i5 ! pz += stridez stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; ba .cont44 add TBL,TBL_SHIFT+24,%i3 .align 16 .update42: cmp counter,4 ble 1f nop sub counter,4,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 4,counter 1: add %i5,stridez,%i5 ! pz += stridez stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; ba .cont44 add TBL,TBL_SHIFT+24,%i3 .align 16 .update43: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont43a ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,4 ble,a 1f nop sub counter,4,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] mov 4,counter stx %i3,[%fp+tmp_py] 1: fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; ba .cont43b add TBL,TBL_SHIFT+24,%i3 .align 16 .update45: cmp counter,5 ble 1f nop sub counter,5,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 5,counter 1: fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; ba .cont48 add TBL,TBL_SHIFT+24,%o0 .align 16 .update46: cmp counter,5 ble 1f nop sub counter,5,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 5,counter 1: fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; ba .cont48 add TBL,TBL_SHIFT+24,%o0 .align 16 .update47: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont47a ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,5 ble,a 1f nop sub counter,5,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] mov 5,counter stx %o0,[%fp+tmp_py] 1: fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; ba .cont47b add TBL,TBL_SHIFT+24,%o0 .align 16 .update49: cmp counter,6 ble 1f nop sub counter,6,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 6,counter 1: fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; ba .cont52 add TBL,TBL_SHIFT+24,%i3 .align 16 .update50: cmp counter,6 ble 1f nop sub counter,6,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 6,counter 1: fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; ba .cont52 add TBL,TBL_SHIFT+24,%i3 .align 16 .update51: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont51a ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,6 ble,a 1f nop sub counter,6,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] mov 6,counter stx %i3,[%fp+tmp_py] 1: fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; ba .cont51b add TBL,TBL_SHIFT+24,%i3 .align 16 .update53: cmp counter,7 ble 1f nop sub counter,7,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 7,counter 1: fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; ba .cont56 add TBL,TBL_SHIFT+24,%o0 .align 16 .update54: cmp counter,7 ble 1f nop sub counter,7,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] stx %o0,[%fp+tmp_py] mov 7,counter 1: fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; ba .cont56 add TBL,TBL_SHIFT+24,%o0 .align 16 .update55: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont55a ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,7 ble,a 1f nop sub counter,7,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] mov 7,counter stx %o0,[%fp+tmp_py] 1: fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i2 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; ba .cont55b add TBL,TBL_SHIFT+24,%o0 .align 16 .update57: cmp counter,8 ble 1f nop sub counter,8,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 8,counter 1: fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; ba .cont60 add TBL,TBL_SHIFT+24,%i3 .align 16 .update58: cmp counter,8 ble 1f nop sub counter,8,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] stx %i3,[%fp+tmp_py] mov 8,counter 1: fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; ba .cont60 add TBL,TBL_SHIFT+24,%i3 .align 16 .update59: cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 bge,pn %icc,.cont59a ! (0_0) if ( hy0 < 0x00100000 ) cmp counter,8 ble,a 1f nop sub counter,8,counter st counter,[%fp+tmp_counter] stx %i4,[%fp+tmp_px] mov 8,counter stx %i3,[%fp+tmp_py] 1: fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; add %i5,stridez,%i5 ! pz += stridez faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; sethi %hi(0x3ff00000),%g1 add TBL,TBL_SHIFT+24,%i4 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; ba .cont59b add TBL,TBL_SHIFT+24,%i3 .align 16 .exit: ret restore SET_SIZE(__vrhypot)