/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ .file "__vatanf.S" #include "libm.h" RO_DATA .align 64 .CONST_TBL: .word 0x3fefffff, 0xfffccbbc ! K0 = 9.99999999976686608841e-01 .word 0xbfd55554, 0x51c6b90f ! K1 = -3.33333091601972730504e-01 .word 0x3fc98d6d, 0x926596cc ! K2 = 1.99628540499523379702e-01 .word 0x00020000, 0x00000000 ! DC1 .word 0xfffc0000, 0x00000000 ! DC2 .word 0x7ff00000, 0x00000000 ! DC3 .word 0x3ff00000, 0x00000000 ! DONE = 1.0 .word 0x40000000, 0x00000000 ! DTWO = 2.0 ! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127] .word 0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6 .word 0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91 .word 0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac .word 0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26 .word 0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd .word 0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b .word 0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741 .word 0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24 .word 0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f .word 0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427 .word 0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225 .word 0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca .word 0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6 .word 0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f .word 0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867 .word 0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397 .word 0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f .word 0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805 .word 0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5 .word 0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60 .word 0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce .word 0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8 .word 0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c .word 0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d .word 0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120 .word 0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c .word 0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d .word 0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30 .word 0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244 .word 0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab .word 0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949 .word 0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804 .word 0x3ff00000, 0x00000000 ! 1.0 .word 0xbff00000, 0x00000000 ! -1.0 ! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155] .word 0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f .word 0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf .word 0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2 .word 0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3 .word 0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19 .word 0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30 .word 0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195 .word 0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302 .word 0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a .word 0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1 .word 0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c .word 0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c .word 0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700 .word 0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712 .word 0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9 .word 0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444 .word 0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d .word 0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4 .word 0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c .word 0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2 .word 0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc .word 0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd .word 0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4 .word 0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634 .word 0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e .word 0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f .word 0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8 .word 0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5 .word 0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857 .word 0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd .word 0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054 .word 0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0 .word 0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f .word 0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc .word 0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45 .word 0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f .word 0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665 .word 0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0 .word 0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5 .word 0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27 .word 0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38 .word 0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2 .word 0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849 .word 0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff .word 0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619 .word 0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa .word 0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105 .word 0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7 .word 0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc .word 0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb .word 0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28 .word 0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1 .word 0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94 .word 0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6 .word 0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395 .word 0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7 .word 0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e .word 0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5 .word 0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2 .word 0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886 .word 0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5 .word 0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf .word 0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f .word 0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4 .word 0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b .word 0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886 .word 0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2 .word 0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf .word 0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5 .word 0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4 .word 0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f .word 0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886 .word 0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b .word 0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf .word 0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2 .word 0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4 .word 0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5 .word 0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886 #define DC2 %f2 #define DTWO %f6 #define DONE %f52 #define K0 %f54 #define K1 %f56 #define K2 %f58 #define DC1 %f60 #define DC3 %f62 #define stridex %o2 #define stridey %o3 #define MASK_0x7fffffff %i1 #define MASK_0x100000 %i5 #define tmp_px STACK_BIAS-32 #define tmp_counter STACK_BIAS-24 #define tmp0 STACK_BIAS-16 #define tmp1 STACK_BIAS-8 #define counter %l1 ! sizeof temp storage - must be a multiple of 16 for V9 #define tmps 0x20 !-------------------------------------------------------------------- ! !!!!! vatanf algorithm !!!!! ! ux = ((int*)px)[0]; ! ax = ux & 0x7fffffff; ! ! if ( ax < 0x39b89c55 ) ! { ! *(int*)py = ux; ! goto next; ! } ! ! if ( ax > 0x4c700518 ) ! { ! if ( ax > 0x7f800000 ) ! { ! float fpx = fabsf(*px); ! fpx *= fpx; ! *py = fpx; ! goto next; ! } ! ! sign = ux & 0x80000000; ! sign |= pi_2; ! *(int*)py = sign; ! goto next; ! } ! ! ftmp0 = *px; ! x = (double)ftmp0; ! px += stridex; ! y = vis_fpadd32(x,DC1); ! y = vis_fand(y,DC2); ! div = x * y; ! xx = x - y; ! div += DONE; ! i = ((unsigned long long*)&div)[0]; ! y0 = vis_fand(div,DC3); ! i >>= 43; ! i &= 508; ! *(float*)&dtmp0 = *(float*)((char*)parr0 + i); ! y0 = vis_fpsub32(dtmp0, y0); ! dtmp0 = div0 * y0; ! dtmp0 = DTWO - dtmp0; ! y0 *= dtmp0; ! dtmp1 = div0 * y0; ! dtmp1 = DTWO - dtmp1; ! y0 *= dtmp1; ! ax = ux & 0x7fffffff; ! ax += 0x00100000; ! ax >>= 18; ! ax &= -8; ! res = *(double*)((char*)parr1 + ax); ! ux >>= 28; ! ux &= -8; ! dtmp0 = *(double*)((char*)sign_arr + ux); ! res *= dtmp0; ! xx *= y0; ! x2 = xx * xx; ! dtmp0 = K2 * x2; ! dtmp0 += K1; ! dtmp0 *= x2; ! dtmp0 += K0; ! dtmp0 *= xx; ! res += dtmp0; ! ftmp0 = (float)res; ! py[0] = ftmp0; ! py += stridey; !-------------------------------------------------------------------- ENTRY(__vatanf) save %sp,-SA(MINFRAME)-tmps,%sp PIC_SETUP(l7) PIC_SET(l7,.CONST_TBL,l2) st %i0,[%fp+tmp_counter] sllx %i2,2,stridex sllx %i4,2,stridey or %g0,%i3,%o1 stx %i1,[%fp+tmp_px] ldd [%l2],K0 ldd [%l2+8],K1 ldd [%l2+16],K2 ldd [%l2+24],DC1 ldd [%l2+32],DC2 ldd [%l2+40],DC3 ldd [%l2+48],DONE ldd [%l2+56],DTWO add %l2,64,%i4 add %l2,64+512,%l0 add %l2,64+512+16-0x1cc*8,%l7 sethi %hi(0x100000),MASK_0x100000 sethi %hi(0x7ffffc00),MASK_0x7fffffff add MASK_0x7fffffff,1023,MASK_0x7fffffff sethi %hi(0x39b89c00),%o4 add %o4,0x55,%o4 sethi %hi(0x4c700400),%o5 add %o5,0x118,%o5 .begin: ld [%fp+tmp_counter],counter ldx [%fp+tmp_px],%i3 st %g0,[%fp+tmp_counter] .begin1: cmp counter,0 ble,pn %icc,.exit nop lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; and %l6,MASK_0x7fffffff,%l5 ! (0_0) ax = ux & 0x7fffffff; lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; cmp %l5,%o4 ! (0_0) ax ? 0x39b89c55 bl,pn %icc,.spec0 ! (0_0) if ( ax < 0x39b89c55 ) nop cmp %l5,%o5 ! (0_0) ax ? 0x4c700518 bg,pn %icc,.spec1 ! (0_0) if ( ax > 0x4c700518 ) nop add %i3,stridex,%l5 ! px += stridex; fstod %f0,%f22 ! (0_0) ftmp0 = *px; mov %l6,%i3 lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; add %l5,stridex,%l4 ! px += stridex; fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 bl,pn %icc,.update0 ! (1_0) if ( ax < 0x39b89c55 ) nop .cont0: cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 bg,pn %icc,.update1 ! (1_0) if ( ax > 0x4c700518 ) nop .cont1: fstod %f0,%f20 ! (1_0) x = (double)ftmp0; mov %l6,%l5 fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); fmuld %f22,%f26,%f32 ! (0_0) div = x * y; lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; add %l4,stridex,%l3 ! px += stridex; fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 bl,pn %icc,.update2 ! (2_0) if ( ax < 0x39b89c55 ) faddd DONE,%f32,%f32 ! (0_0) div += done; .cont2: cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 bg,pn %icc,.update3 ! (2_0) if ( ax > 0x4c700518 ) nop .cont3: std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; mov %l6,%l4 fstod %f0,%f18 ! (2_0) x = (double)ftmp0; fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); fmuld %f20,%f26,%f30 ! (1_0) div = x * y; lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; add %l3,stridex,%i0 ! px += stridex; fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 bl,pn %icc,.update4 ! (3_0) if ( ax < 0x39b89c55 ) faddd DONE,%f30,%f30 ! (1_0) div += done; .cont4: cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 bg,pn %icc,.update5 ! (3_0) if ( ax > 0x4c700518 ) nop .cont5: std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; mov %l6,%l3 fstod %f0,%f16 ! (3_0) x = (double)ftmp0; ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); srlx %o0,43,%o0 ! (0_0) i >>= 43; and %o0,508,%l6 ! (0_0) i &= 508; ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fmuld %f18,%f26,%f28 ! (2_0) div = x * y; lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; add %i0,stridex,%i2 ! px += stridex; fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 bl,pn %icc,.update6 ! (4_0) if ( ax < 0x39b89c55 ) faddd DONE,%f28,%f28 ! (2_0) div += done; .cont6: fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 bg,pn %icc,.update7 ! (4_0) if ( ax > 0x4c700518 ) nop .cont7: std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; mov %l6,%i0 fstod %f0,%f14 ! (4_0) x = (double)ftmp0; ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; srlx %g1,43,%g1 ! (1_0) i >>= 43; and %g1,508,%l6 ! (1_0) i &= 508; ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fmuld %f16,%f26,%f34 ! (3_0) div = x * y; lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); add %i2,stridex,%l2 ! px += stridex; fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 bl,pn %icc,.update8 ! (5_0) if ( ax < 0x39b89c55 ) faddd DONE,%f34,%f34 ! (3_0) div += done; .cont8: fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 bg,pn %icc,.update9 ! (5_0) if ( ax > 0x4c700518 ) nop .cont9: std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; mov %l6,%i2 fstod %f0,%f36 ! (5_0) x = (double)ftmp0; fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; srlx %o0,43,%o0 ! (2_0) i >>= 43; and %o0,508,%l6 ! (2_0) i &= 508; fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fmuld %f14,%f26,%f32 ! (4_0) div = x * y; lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; add %l2,stridex,%g5 ! px += stridex; fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 bl,pn %icc,.update10 ! (6_0) if ( ax < 0x39b89c55 ) faddd DONE,%f32,%f32 ! (4_0) div += done; .cont10: fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 bg,pn %icc,.update11 ! (6_0) if ( ax > 0x4c700518 ) nop .cont11: fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; mov %l6,%l2 std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; fstod %f0,%f10 ! (6_0) x = (double)ftmp0; fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; srlx %g1,43,%g1 ! (3_0) i >>= 43; fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; and %g1,508,%l6 ! (3_0) i &= 508; mov %i3,%o7 fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fmuld %f36,%f26,%f30 ! (5_0) div = x * y; srl %o7,28,%g1 ! (0_0) ux >>= 28; add %g5,stridex,%i3 ! px += stridex; fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%o0 ! (0_0) ax = ux & 0x7fffffff; lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; add %o0,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; and %g1,-8,%g1 ! (0_0) ux &= -8; fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 bl,pn %icc,.update12 ! (7_0) if ( ax < 0x39b89c55 ) faddd DONE,%f30,%f30 ! (5_0) div += done; .cont12: fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 bg,pn %icc,.update13 ! (7_0) if ( ax > 0x4c700518 ) faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; .cont13: fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; srl %o0,18,%o7 ! (0_0) ax >>= 18; std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; fstod %f0,%f8 ! (7_0) x = (double)ftmp0; fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; and %o7,-8,%o7 ! (0_0) ux &= -8; ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); add %o7,%l7,%o7 ! (0_0) (char*)parr1 + ax; mov %l6,%g5 ldd [%l0+%g1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; srlx %o0,43,%o0 ! (4_0) i >>= 43; ldd [%o7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; and %o0,508,%l6 ! (4_0) i &= 508; mov %l5,%o7 fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; srl %o7,28,%l5 ! (1_0) ux >>= 28; ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fmuld %f10,%f26,%f28 ! (6_0) div = x * y; faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; subcc counter,8,counter bneg,pn %icc,.tail or %g0,%o1,%o0 add %fp,tmp0,%g1 lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; ba .main_loop add %i3,stridex,%l5 ! px += stridex; .align 16 .main_loop: fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; and %o7,MASK_0x7fffffff,%o1 ! (1_1) ax = ux & 0x7fffffff; st %f12,[%g1] ! (7_1) py[0] = ftmp0; fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; srl %o7,28,%o7 ! (1_0) ux >>= 28; add %o1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; and %l6,MASK_0x7fffffff,%o1 ! (0_0) ax = ux & 0x7fffffff; lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; fpadd32 %f8,DC1,%f24 ! (7_1) y = vis_fpadd32(x,dconst1); fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; cmp %o1,%o4 ! (0_0) ax ? 0x39b89c55 bl,pn %icc,.update14 ! (0_0) if ( ax < 0x39b89c55 ) faddd DONE,%f28,%f28 ! (6_1) div += done; .cont14: fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; cmp %o1,%o5 ! (0_0) ax ? 0x4c700518 bg,pn %icc,.update15 ! (0_0) if ( ax > 0x4c700518 ) faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; .cont15: fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; srl %g1,18,%o1 ! (1_1) ax >>= 18; std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; fstod %f0,%f22 ! (0_0) ftmp0 = *px; fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; and %o1,-8,%o1 ! (1_1) ax &= -8; ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (7_1) y = vis_fand(y,dconst2); ldd [%o1+%l7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); and %o7,-8,%o7 ! (1_1) ux &= -8; mov %l6,%i3 faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; nop ldd [%l0+%o7],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; srlx %g1,43,%g1 ! (5_1) i >>= 43; mov %l4,%o7 fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; and %g1,508,%l6 ! (5_1) i &= 508; nop bn,pn %icc,.exit fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; add %o0,stridey,%g1 ! py += stridey; ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; fmuld %f8,%f26,%f34 ! (7_1) div = x * y; srl %o7,28,%o1 ! (2_1) ux >>= 28; lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%o7 ! (2_1) ax = ux & 0x7fffffff; st %f12,[%o0] ! (0_1) py[0] = ftmp0; fsubd %f8,%f26,%f8 ! (7_1) xx = x - y; fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; add %l5,stridex,%l4 ! px += stridex; add %o7,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 bl,pn %icc,.update16 ! (1_0) if ( ax < 0x39b89c55 ) faddd DONE,%f34,%f34 ! (7_1) div += done; .cont16: fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 bg,pn %icc,.update17 ! (1_0) if ( ax > 0x4c700518 ) faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; .cont17: fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; srl %o0,18,%o7 ! (2_1) ax >>= 18; std %f34,[%fp+tmp1] ! (7_1) i = ((unsigned long long*)&div)[0]; fstod %f0,%f20 ! (1_0) x = (double)ftmp0; fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; and %o1,-8,%o1 ! (2_1) ux &= -8; fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; and %o7,-8,%o7 ! (2_1) ax &= -8; ldd [%l0+%o1],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); bn,pn %icc,.exit ldd [%o7+%l7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); mov %l6,%l5 fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; srlx %o0,43,%o0 ! (6_1) i >>= 43; mov %l3,%o7 fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; and %o0,508,%l6 ! (6_1) i &= 508; add %l4,stridex,%l3 ! px += stridex; bn,pn %icc,.exit fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; add %g1,stridey,%o0 ! py += stridey; ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; fmuld %f22,%f26,%f32 ! (0_0) div = x * y; srl %o7,28,%o1 ! (3_1) ux >>= 28; lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%o7 ! (3_1) ax = ux & 0x7fffffff; st %f12,[%g1] ! (1_1) py[0] = ftmp0; fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; add %o7,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; and %o1,-8,%o1 ! (3_1) ux &= -8; fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 bl,pn %icc,.update18 ! (2_0) if ( ax < 0x39b89c55 ) faddd DONE,%f32,%f32 ! (0_0) div += done; .cont18: fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 bg,pn %icc,.update19 ! (2_0) if ( ax > 0x4c700518 ) faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; .cont19: fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; srl %g1,18,%o7 ! (3_1) ax >>= 18; std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; fstod %f0,%f18 ! (2_0) x = (double)ftmp0; fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; and %o7,-8,%o7 ! (3_1) ax &= -8; ldx [%fp+tmp1],%g1 ! (7_1) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; mov %l6,%l4 ldd [%l0+%o1],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); bn,pn %icc,.exit fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; ldd [%o7+%l7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) nop fand %f34,DC3,%f24 ! (7_1) y0 = vis_fand(div,dconst3); fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; srlx %g1,43,%g1 ! (7_1) i >>= 43; mov %i0,%o7 fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; and %g1,508,%l6 ! (7_1) i &= 508; add %l3,stridex,%i0 ! px += stridex; bn,pn %icc,.exit fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; add %o0,stridey,%g1 ! py += stridey; ld [%i4+%l6],%f0 ! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; fmuld %f20,%f26,%f30 ! (1_0) div = x * y; srl %o7,28,%o1 ! (4_1) ux >>= 28; lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%o7 ! (4_1) ax = ux & 0x7fffffff; st %f12,[%o0] ! (2_1) py[0] = ftmp0; fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; add %o7,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; and %o1,-8,%o1 ! (4_1) ux &= -8; fpsub32 %f0,%f24,%f38 ! (7_1) y0 = vis_fpsub32(dtmp0, y0); fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 bl,pn %icc,.update20 ! (3_0) if ( ax < 0x39b89c55 ) faddd DONE,%f30,%f30 ! (1_0) div += done; .cont20: fmuld %f34,%f38,%f42 ! (7_1) dtmp0 = div0 * y0; cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 bg,pn %icc,.update21 ! (3_0) if ( ax > 0x4c700518 ) faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; .cont21: fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; srl %o0,18,%o7 ! (4_1) ax >>= 18; std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; fstod %f0,%f16 ! (3_0) x = (double)ftmp0; fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; and %o7,-8,%o7 ! (4_1) ax &= -8; ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; nop ldd [%l0+%o1],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); bn,pn %icc,.exit ldd [%o7+%l7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); mov %l6,%l3 fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; srlx %o0,43,%o0 ! (0_0) i >>= 43; mov %i2,%o7 fsubd DTWO,%f42,%f44 ! (7_1) dtmp0 = dtwo - dtmp0; and %o0,508,%l6 ! (0_0) i &= 508; add %i0,stridex,%i2 ! px += stridex; bn,pn %icc,.exit fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; add %g1,stridey,%o0 ! py += stridey; ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; fmuld %f18,%f26,%f28 ! (2_0) div = x * y; srl %o7,28,%o1 ! (5_1) ux >>= 28; lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%o7 ! (5_1) ax = ux & 0x7fffffff; st %f12,[%g1] ! (3_1) py[0] = ftmp0; fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; add %o7,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; and %o1,-8,%o1 ! (5_1) ux &= -8; fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); fmuld %f38,%f44,%f38 ! (7_1) y0 *= dtmp0; and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 bl,pn %icc,.update22 ! (4_0) if ( ax < 0x39b89c55 ) faddd DONE,%f28,%f28 ! (2_0) div += done; .cont22: fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 bg,pn %icc,.update23 ! (4_0) if ( ax > 0x4c700518 ) faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; .cont23: fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; srl %g1,18,%o7 ! (5_1) ax >>= 18; std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; fstod %f0,%f14 ! (4_0) x = (double)ftmp0; fmuld %f34,%f38,%f34 ! (7_1) dtmp1 = div0 * y0; and %o7,-8,%o7 ! (5_1) ax &= -8; ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; mov %l6,%i0 ldd [%l0+%o1],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); bn,pn %icc,.exit ldd [%o7+%l7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); nop fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; srlx %g1,43,%g1 ! (1_0) i >>= 43; mov %l2,%o7 fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; and %g1,508,%l6 ! (1_0) i &= 508; add %i2,stridex,%l2 ! px += stridex; bn,pn %icc,.exit fsubd DTWO,%f34,%f46 ! (7_1) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; add %o0,stridey,%g1 ! py += stridey; ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; fmuld %f16,%f26,%f34 ! (3_0) div = x * y; srl %o7,28,%o1 ! (6_1) ux >>= 28; lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%o7 ! (6_1) ax = ux & 0x7fffffff; st %f12,[%o0] ! (4_1) py[0] = ftmp0; fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; fmuld %f38,%f46,%f26 ! (7_1) y0 *= dtmp1; add %o7,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; and %o1,-8,%o1 ! (6_1) ux &= -8; fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 bl,pn %icc,.update24 ! (5_0) if ( ax < 0x39b89c55 ) faddd DONE,%f34,%f34 ! (3_0) div += done; .cont24: fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 bg,pn %icc,.update25 ! (5_0) if ( ax > 0x4c700518 ) faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; .cont25: fmuld %f8,%f26,%f8 ! (7_1) xx *= y0; srl %o0,18,%o7 ! (6_1) ax >>= 18; std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; fstod %f0,%f36 ! (5_0) x = (double)ftmp0; fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; and %o7,-8,%o7 ! (6_1) ax &= -8; ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; mov %l6,%i2 ldd [%l0+%o1],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); bn,pn %icc,.exit ldd [%o7+%l7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); nop fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); fmuld %f8,%f8,%f50 ! (7_1) x2 = xx * xx; srlx %o0,43,%o0 ! (2_0) i >>= 43; mov %g5,%o7 fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; and %o0,508,%l6 ! (2_0) i &= 508; add %l2,stridex,%g5 ! px += stridex; bn,pn %icc,.exit fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; add %g1,stridey,%o0 ! py += stridey; ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; fmuld %f14,%f26,%f32 ! (4_0) div = x * y; srl %o7,28,%o1 ! (7_1) ux >>= 28; lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; fmuld K2,%f50,%f4 ! (7_1) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%o7 ! (7_1) ax = ux & 0x7fffffff; st %f12,[%g1] ! (5_1) py[0] = ftmp0; fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; add %o7,MASK_0x100000,%g1 ! (7_1) ax += 0x00100000; and %o1,-8,%o1 ! (7_1) ux &= -8; fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 bl,pn %icc,.update26 ! (6_0) if ( ax < 0x39b89c55 ) faddd DONE,%f32,%f32 ! (4_0) div += done; .cont26: fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 bg,pn %icc,.update27 ! (6_0) if ( ax > 0x4c700518 ) faddd %f4,K1,%f4 ! (7_1) dtmp0 += K1; .cont27: fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; srl %g1,18,%o7 ! (7_1) ax >>= 18; std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; fstod %f0,%f10 ! (6_0) x = (double)ftmp0; fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; and %o7,-8,%o7 ! (7_1) ax &= -8; ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; mov %l6,%l2 ldd [%l0+%o1],%f48 ! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux); bn,pn %icc,.exit ldd [%o7+%l7],%f0 ! (7_1) res = *(double*)((char*)parr1 + ax); nop fmuld %f4,%f50,%f4 ! (7_1) dtmp0 *= x2; fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; srlx %g1,43,%g1 ! (3_0) i >>= 43; mov %i3,%o7 fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; and %g1,508,%l6 ! (3_0) i &= 508; add %g5,stridex,%i3 ! px += stridex; bn,pn %icc,.exit fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (7_1) res *= dtmp0; add %o0,stridey,%g1 ! py += stridey; ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; fmuld %f36,%f26,%f30 ! (5_0) div = x * y; srl %o7,28,%o1 ! (0_0) ux >>= 28; lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; faddd %f4,K0,%f42 ! (7_1) dtmp0 += K0; fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%o7 ! (0_0) ax = ux & 0x7fffffff; st %f12,[%o0] ! (6_1) py[0] = ftmp0; fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; add %o7,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; and %o1,-8,%o1 ! (0_0) ux &= -8; fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); fmuld %f42,%f8,%f44 ! (7_1) dtmp0 *= xx; cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 bl,pn %icc,.update28 ! (7_0) if ( ax < 0x39b89c55 ) faddd DONE,%f30,%f30 ! (5_0) div += done; .cont28: fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 bg,pn %icc,.update29 ! (7_0) if ( ax > 0x4c700518 ) faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; .cont29: fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; srl %o0,18,%o7 ! (0_0) ax >>= 18; std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; fstod %f0,%f8 ! (7_0) x = (double)ftmp0; fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; and %o7,-8,%o7 ! (0_0) ux &= -8; ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); faddd %f48,%f44,%f12 ! (7_1) res += dtmp0; subcc counter,8,counter ldd [%l0+%o1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); bn,pn %icc,.exit fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; mov %l6,%g5 ldd [%o7+%l7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; srlx %o0,43,%l6 ! (4_0) i >>= 43; mov %l5,%o7 fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; add %g1,stridey,%o0 ! py += stridey; and %l6,508,%l6 ! (4_0) i &= 508; bn,pn %icc,.exit fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); add %i3,stridex,%l5 ! px += stridex; fdtos %f12,%f12 ! (7_1) ftmp0 = (float)res; lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; fmuld %f10,%f26,%f28 ! (6_0) div = x * y; bpos,pt %icc,.main_loop faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; srl %o7,28,%l5 ! (1_0) ux >>= 28; st %f12,[%g1] ! (7_1) py[0] = ftmp0; .tail: addcc counter,7,counter bneg,pn %icc,.begin or %g0,%o0,%o1 fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; and %o7,MASK_0x7fffffff,%g1 ! (1_1) ax = ux & 0x7fffffff; fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; add %g1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; and %l5,-8,%l5 ! (1_1) ux &= -8; fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; faddd DONE,%f28,%f28 ! (6_1) div += done; fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; srl %g1,18,%o7 ! (1_1) ax >>= 18; std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; and %o7,-8,%o7 ! (1_1) ax &= -8; ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; add %o7,%l7,%o7 ! (1_1) (char*)parr1 + ax; ldd [%l0+%l5],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); ldd [%o7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; srlx %g1,43,%g1 ! (5_1) i >>= 43; and %g1,508,%l6 ! (5_1) i &= 508; mov %l4,%o7 fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; add %o0,stridey,%g1 ! py += stridey; ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; srl %o7,28,%l4 ! (2_1) ux >>= 28; st %f12,[%o0] ! (0_1) py[0] = ftmp0; faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; subcc counter,1,counter bneg,pn %icc,.begin or %g0,%g1,%o1 fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%o0 ! (2_1) ax = ux & 0x7fffffff; fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; add %o0,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; and %l4,-8,%l4 ! (2_1) ux &= -8; fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; srl %o0,18,%o7 ! (2_1) ax >>= 18; fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; and %o7,-8,%o7 ! (2_1) ax &= -8; ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; add %o7,%l7,%o7 ! (2_1) (char*)parr1 + ax; ldd [%l0+%l4],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); ldd [%o7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; srlx %o0,43,%o0 ! (6_1) i >>= 43; and %o0,508,%l6 ! (6_1) i &= 508; mov %l3,%o7 fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; add %g1,stridey,%o0 ! py += stridey; ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; srl %o7,28,%l3 ! (3_1) ux >>= 28; st %f12,[%g1] ! (1_1) py[0] = ftmp0; faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; subcc counter,1,counter bneg,pn %icc,.begin or %g0,%o0,%o1 fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%g1 ! (3_1) ax = ux & 0x7fffffff; fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; add %g1,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; and %l3,-8,%l3 ! (3_1) ux &= -8; fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; srl %g1,18,%o7 ! (3_1) ax >>= 18; fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; and %o7,-8,%o7 ! (3_1) ax &= -8; faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; add %o7,%l7,%o7 ! (3_1) (char*)parr1 + ax; ldd [%l0+%l3],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; ldd [%o7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; mov %i0,%o7 fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; add %o0,stridey,%g1 ! py += stridey; fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; srl %o7,28,%i0 ! (4_1) ux >>= 28; st %f12,[%o0] ! (2_1) py[0] = ftmp0; faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; subcc counter,1,counter bneg,pn %icc,.begin or %g0,%g1,%o1 fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%o0 ! (4_1) ax = ux & 0x7fffffff; fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; add %o0,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; and %i0,-8,%i0 ! (4_1) ux &= -8; fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; srl %o0,18,%o7 ! (4_1) ax >>= 18; fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; and %o7,-8,%o7 ! (4_1) ax &= -8; faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; add %o7,%l7,%o7 ! (4_1) (char*)parr1 + ax; ldd [%l0+%i0],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; ldd [%o7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; mov %i2,%o7 fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; add %g1,stridey,%o0 ! py += stridey; fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; srl %o7,28,%i2 ! (5_1) ux >>= 28; st %f12,[%g1] ! (3_1) py[0] = ftmp0; faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; subcc counter,1,counter bneg,pn %icc,.begin or %g0,%o0,%o1 fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%g1 ! (5_1) ax = ux & 0x7fffffff; fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; add %g1,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; and %i2,-8,%i2 ! (5_1) ux &= -8; fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; srl %g1,18,%o7 ! (5_1) ax >>= 18; and %o7,-8,%o7 ! (5_1) ax &= -8; faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; add %o7,%l7,%o7 ! (5_1) (char*)parr1 + ax; ldd [%l0+%i2],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; ldd [%o7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; mov %l2,%o7 fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; add %o0,stridey,%g1 ! py += stridey; fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; srl %o7,28,%l2 ! (6_1) ux >>= 28; st %f12,[%o0] ! (4_1) py[0] = ftmp0; faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; subcc counter,1,counter bneg,pn %icc,.begin or %g0,%g1,%o1 fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; and %o7,MASK_0x7fffffff,%o0 ! (6_1) ax = ux & 0x7fffffff; add %o0,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; and %l2,-8,%l2 ! (6_1) ux &= -8; fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; srl %o0,18,%o7 ! (6_1) ax >>= 18; and %o7,-8,%o7 ! (6_1) ax &= -8; faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; add %o7,%l7,%o7 ! (6_1) (char*)parr1 + ax; ldd [%l0+%l2],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; ldd [%o7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; add %g1,stridey,%o0 ! py += stridey; fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; st %f12,[%g1] ! (5_1) py[0] = ftmp0; faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; subcc counter,1,counter bneg,pn %icc,.begin or %g0,%o0,%o1 fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; add %o0,stridey,%g1 ! py += stridey; fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; st %f12,[%o0] ! (6_1) py[0] = ftmp0; ba .begin or %g0,%g1,%o1 ! py += stridey; .exit: ret restore %g0,%g0,%g0 .align 16 .spec0: add %i3,stridex,%i3 ! px += stridex; sub counter,1,counter st %l6,[%o1] ! *(int*)py = ux; ba .begin1 add %o1,stridey,%o1 ! py += stridey; .align 16 .spec1: sethi %hi(0x7f800000),%l3 sethi %hi(0x3fc90c00),%l4 ! pi_2 sethi %hi(0x80000000),%o0 add %l4,0x3db,%l4 ! pi_2 cmp %l5,%l3 ! if ( ax > 0x7f800000 ) bg,a,pn %icc,1f fabss %f0,%f0 ! fpx = fabsf(*px); and %l6,%o0,%l6 ! sign = ux & 0x80000000; or %l6,%l4,%l6 ! sign |= pi_2; add %i3,stridex,%i3 ! px += stridex; sub counter,1,counter st %l6,[%o1] ! *(int*)py = sign; ba .begin1 add %o1,stridey,%o1 ! py += stridey; 1: fmuls %f0,%f0,%f0 ! fpx *= fpx; add %i3,stridex,%i3 ! px += stridex sub counter,1,counter st %f0,[%o1] ! *py = fpx; ba .begin1 add %o1,stridey,%o1 ! py += stridey; .align 16 .update0: cmp counter,1 fzeros %f0 ble,a .cont0 sethi %hi(0x3fffffff),%l6 sub counter,1,counter st counter,[%fp+tmp_counter] stx %l5,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont0 or %g0,1,counter .align 16 .update1: cmp counter,1 fzeros %f0 ble,a .cont1 sethi %hi(0x3fffffff),%l6 sub counter,1,counter st counter,[%fp+tmp_counter] stx %l5,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont1 or %g0,1,counter .align 16 .update2: cmp counter,2 fzeros %f0 ble,a .cont2 sethi %hi(0x3fffffff),%l6 sub counter,2,counter st counter,[%fp+tmp_counter] stx %l4,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont2 or %g0,2,counter .align 16 .update3: cmp counter,2 fzeros %f0 ble,a .cont3 sethi %hi(0x3fffffff),%l6 sub counter,2,counter st counter,[%fp+tmp_counter] stx %l4,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont3 or %g0,2,counter .align 16 .update4: cmp counter,3 fzeros %f0 ble,a .cont4 sethi %hi(0x3fffffff),%l6 sub counter,3,counter st counter,[%fp+tmp_counter] stx %l3,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont4 or %g0,3,counter .align 16 .update5: cmp counter,3 fzeros %f0 ble,a .cont5 sethi %hi(0x3fffffff),%l6 sub counter,3,counter st counter,[%fp+tmp_counter] stx %l3,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont5 or %g0,3,counter .align 16 .update6: cmp counter,4 fzeros %f0 ble,a .cont6 sethi %hi(0x3fffffff),%l6 sub counter,4,counter st counter,[%fp+tmp_counter] stx %i0,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont6 or %g0,4,counter .align 16 .update7: cmp counter,4 fzeros %f0 ble,a .cont7 sethi %hi(0x3fffffff),%l6 sub counter,4,counter st counter,[%fp+tmp_counter] stx %i0,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont7 or %g0,4,counter .align 16 .update8: cmp counter,5 fzeros %f0 ble,a .cont8 sethi %hi(0x3fffffff),%l6 sub counter,5,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont8 or %g0,5,counter .align 16 .update9: cmp counter,5 fzeros %f0 ble,a .cont9 sethi %hi(0x3fffffff),%l6 sub counter,5,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont9 or %g0,5,counter .align 16 .update10: cmp counter,6 fzeros %f0 ble,a .cont10 sethi %hi(0x3fffffff),%l6 sub counter,6,counter st counter,[%fp+tmp_counter] stx %l2,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont10 or %g0,6,counter .align 16 .update11: cmp counter,6 fzeros %f0 ble,a .cont11 sethi %hi(0x3fffffff),%l6 sub counter,6,counter st counter,[%fp+tmp_counter] stx %l2,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont11 or %g0,6,counter .align 16 .update12: cmp counter,7 fzeros %f0 ble,a .cont12 sethi %hi(0x3fffffff),%l6 sub counter,7,counter st counter,[%fp+tmp_counter] stx %g5,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont12 or %g0,7,counter .align 16 .update13: cmp counter,7 fzeros %f0 ble,a .cont13 sethi %hi(0x3fffffff),%l6 sub counter,7,counter st counter,[%fp+tmp_counter] stx %g5,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont13 or %g0,7,counter .align 16 .update14: cmp counter,0 fzeros %f0 ble,a .cont14 sethi %hi(0x3fffffff),%l6 sub counter,0,counter st counter,[%fp+tmp_counter] stx %i3,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont14 or %g0,0,counter .align 16 .update15: cmp counter,0 fzeros %f0 ble,a .cont15 sethi %hi(0x3fffffff),%l6 sub counter,0,counter st counter,[%fp+tmp_counter] stx %i3,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont15 or %g0,0,counter .align 16 .update16: cmp counter,1 fzeros %f0 ble,a .cont16 sethi %hi(0x3fffffff),%l6 sub counter,1,counter st counter,[%fp+tmp_counter] stx %l5,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont16 or %g0,1,counter .align 16 .update17: cmp counter,1 fzeros %f0 ble,a .cont17 sethi %hi(0x3fffffff),%l6 sub counter,1,counter st counter,[%fp+tmp_counter] stx %l5,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont17 or %g0,1,counter .align 16 .update18: cmp counter,2 fzeros %f0 ble,a .cont18 sethi %hi(0x3fffffff),%l6 sub counter,2,counter st counter,[%fp+tmp_counter] stx %l4,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont18 or %g0,2,counter .align 16 .update19: cmp counter,2 fzeros %f0 ble,a .cont19 sethi %hi(0x3fffffff),%l6 sub counter,2,counter st counter,[%fp+tmp_counter] stx %l4,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont19 or %g0,2,counter .align 16 .update20: cmp counter,3 fzeros %f0 ble,a .cont20 sethi %hi(0x3fffffff),%l6 sub counter,3,counter st counter,[%fp+tmp_counter] stx %l3,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont20 or %g0,3,counter .align 16 .update21: cmp counter,3 fzeros %f0 ble,a .cont21 sethi %hi(0x3fffffff),%l6 sub counter,3,counter st counter,[%fp+tmp_counter] stx %l3,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont21 or %g0,3,counter .align 16 .update22: cmp counter,4 fzeros %f0 ble,a .cont22 sethi %hi(0x3fffffff),%l6 sub counter,4,counter st counter,[%fp+tmp_counter] stx %i0,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont22 or %g0,4,counter .align 16 .update23: cmp counter,4 fzeros %f0 ble,a .cont23 sethi %hi(0x3fffffff),%l6 sub counter,4,counter st counter,[%fp+tmp_counter] stx %i0,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont23 or %g0,4,counter .align 16 .update24: cmp counter,5 fzeros %f0 ble,a .cont24 sethi %hi(0x3fffffff),%l6 sub counter,5,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont24 or %g0,5,counter .align 16 .update25: cmp counter,5 fzeros %f0 ble,a .cont25 sethi %hi(0x3fffffff),%l6 sub counter,5,counter st counter,[%fp+tmp_counter] stx %i2,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont25 or %g0,5,counter .align 16 .update26: cmp counter,6 fzeros %f0 ble,a .cont26 sethi %hi(0x3fffffff),%l6 sub counter,6,counter st counter,[%fp+tmp_counter] stx %l2,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont26 or %g0,6,counter .align 16 .update27: cmp counter,6 fzeros %f0 ble,a .cont27 sethi %hi(0x3fffffff),%l6 sub counter,6,counter st counter,[%fp+tmp_counter] stx %l2,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont27 or %g0,6,counter .align 16 .update28: cmp counter,7 fzeros %f0 ble,a .cont28 sethi %hi(0x3fffffff),%l6 sub counter,7,counter st counter,[%fp+tmp_counter] stx %g5,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont28 or %g0,7,counter .align 16 .update29: cmp counter,7 fzeros %f0 ble,a .cont29 sethi %hi(0x3fffffff),%l6 sub counter,7,counter st counter,[%fp+tmp_counter] stx %g5,[%fp+tmp_px] sethi %hi(0x3fffffff),%l6 ba .cont29 or %g0,7,counter SET_SIZE(__vatanf)