/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ .file "__vsin.S" #include "libm.h" RO_DATA .align 64 constants: .word 0x3ec718e3,0xa6972785 .word 0x3ef9fd39,0x94293940 .word 0xbf2a019f,0x75ee4be1 .word 0xbf56c16b,0xba552569 .word 0x3f811111,0x1108c703 .word 0x3fa55555,0x554f5b35 .word 0xbfc55555,0x555554d0 .word 0xbfdfffff,0xffffff85 .word 0x3ff00000,0x00000000 .word 0xbfc55555,0x5551fc28 .word 0x3f811107,0x62eacc9d .word 0xbfdfffff,0xffff6328 .word 0x3fa55551,0x5f7acf0c .word 0x3fe45f30,0x6dc9c883 .word 0x43380000,0x00000000 .word 0x3ff921fb,0x54400000 .word 0x3dd0b461,0x1a600000 .word 0x3ba3198a,0x2e000000 .word 0x397b839a,0x252049c1 .word 0x80000000,0x00004000 .word 0xffff8000,0x00000000 ! N.B.: low-order words used .word 0x3fc90000,0x80000000 ! for sign bit hacking; see .word 0x3fc40000,0x00000000 ! references to "thresh" below #define p4 0x0 #define q4 0x08 #define p3 0x10 #define q3 0x18 #define p2 0x20 #define q2 0x28 #define p1 0x30 #define q1 0x38 #define one 0x40 #define pp1 0x48 #define pp2 0x50 #define qq1 0x58 #define qq2 0x60 #define invpio2 0x68 #define round 0x70 #define pio2_1 0x78 #define pio2_2 0x80 #define pio2_3 0x88 #define pio2_3t 0x90 #define f30val 0x98 #define mask 0xa0 #define thresh 0xa8 ! local storage indices #define xsave STACK_BIAS-0x8 #define ysave STACK_BIAS-0x10 #define nsave STACK_BIAS-0x14 #define sxsave STACK_BIAS-0x18 #define sysave STACK_BIAS-0x1c #define biguns STACK_BIAS-0x20 #define n2 STACK_BIAS-0x24 #define n1 STACK_BIAS-0x28 #define n0 STACK_BIAS-0x2c #define x2_1 STACK_BIAS-0x40 #define x1_1 STACK_BIAS-0x50 #define x0_1 STACK_BIAS-0x60 #define y2_0 STACK_BIAS-0x70 #define y1_0 STACK_BIAS-0x80 #define y0_0 STACK_BIAS-0x90 ! sizeof temp storage - must be a multiple of 16 for V9 #define tmps 0x90 !-------------------------------------------------------------- ! Some defines to keep code more readable #define LIM_l6 %l6 ! in primary range, contains |x| upper limit when cos(x)=1. ! in transferring to medium range, denotes what loop was active. !-------------------------------------------------------------- ENTRY(__vsin) save %sp,-SA(MINFRAME)-tmps,%sp PIC_SETUP(g5) PIC_SET(g5,__vlibm_TBL_sincos_hi,l3) PIC_SET(g5,__vlibm_TBL_sincos_lo,l4) PIC_SET(g5,constants,l5) mov %l5,%g1 wr %g0,0x82,%asi ! set %asi for non-faulting loads ! ========== primary range ========== ! register use ! i0 n ! i1 x ! i2 stridex ! i3 y ! i4 stridey ! i5 0x80000000 ! l0 hx0 ! l1 hx1 ! l2 hx2 ! l3 __vlibm_TBL_sincos_hi ! l4 __vlibm_TBL_sincos_lo ! l5 0x3fc90000 ! l6 0x3e400000 ! l7 0x3fe921fb ! the following are 64-bit registers in both V8+ and V9 ! g1 scratch ! g5 ! o0 py0 ! o1 py1 ! o2 py2 ! o3 oy0 ! o4 oy1 ! o5 oy2 ! o7 scratch ! f0 x0 ! f2 ! f4 ! f6 ! f8 scratch for table base ! f9 signbit0 ! f10 x1 ! f12 ! f14 ! f16 ! f18 scratch for table base ! f19 signbit1 ! f20 x2 ! f22 ! f24 ! f26 ! f28 scratch for table base ! f29 signbit2 ! f30 0x80000000 ! f31 0x4000 ! f32 ! f34 ! f36 ! f38 ! f40 ! f42 ! f44 0xffff800000000000 ! f46 p1 ! f48 p2 ! f50 p3 ! f52 p4 ! f54 one ! f56 pp1 ! f58 pp2 ! f60 qq1 ! f62 qq2 #ifdef __sparcv9 stx %i1,[%fp+xsave] ! save arguments stx %i3,[%fp+ysave] #else st %i1,[%fp+xsave] ! save arguments st %i3,[%fp+ysave] #endif st %i0,[%fp+nsave] st %i2,[%fp+sxsave] st %i4,[%fp+sysave] sethi %hi(0x80000000),%i5 ! load/set up constants sethi %hi(0x3fc90000),%l5 sethi %hi(0x3e400000),LIM_l6 sethi %hi(0x3fe921fb),%l7 or %l7,%lo(0x3fe921fb),%l7 ldd [%g1+f30val],%f30 ldd [%g1+mask],%f44 ldd [%g1+p1],%f46 ldd [%g1+p2],%f48 ldd [%g1+p3],%f50 ldd [%g1+p4],%f52 ldd [%g1+one],%f54 ldd [%g1+pp1],%f56 ldd [%g1+pp2],%f58 ldd [%g1+qq1],%f60 ldd [%g1+qq2],%f62 sll %i2,3,%i2 ! scale strides sll %i4,3,%i4 add %fp,x0_1,%o3 ! precondition loop add %fp,x0_1,%o4 add %fp,x0_1,%o5 ld [%i1],%l0 ! hx = *x ld [%i1],%f0 ld [%i1+4],%f1 andn %l0,%i5,%l0 ! hx &= ~0x80000000 add %i1,%i2,%i1 ! x += stridex ba,pt %icc,.loop0 ! delay slot nop .align 32 .loop0: lda [%i1]%asi,%l1 ! preload next argument sub %l0,LIM_l6,%g1 sub %l7,%l0,%o7 fands %f0,%f30,%f9 ! save signbit lda [%i1]%asi,%f10 orcc %o7,%g1,%g0 mov %i3,%o0 ! py0 = y bl,pn %icc,.range0 ! if hx < 0x3e400000 or > 0x3fe921fb ! delay slot lda [%i1+4]%asi,%f11 addcc %i0,-1,%i0 add %i3,%i4,%i3 ! y += stridey ble,pn %icc,.endloop1 ! delay slot andn %l1,%i5,%l1 add %i1,%i2,%i1 ! x += stridex fabsd %f0,%f0 fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only .loop1: lda [%i1]%asi,%l2 ! preload next argument sub %l1,LIM_l6,%g1 sub %l7,%l1,%o7 fands %f10,%f30,%f19 ! save signbit lda [%i1]%asi,%f20 orcc %o7,%g1,%g0 mov %i3,%o1 ! py1 = y bl,pn %icc,.range1 ! if hx < 0x3e400000 or > 0x3fe921fb ! delay slot lda [%i1+4]%asi,%f21 addcc %i0,-1,%i0 add %i3,%i4,%i3 ! y += stridey ble,pn %icc,.endloop2 ! delay slot andn %l2,%i5,%l2 add %i1,%i2,%i1 ! x += stridex fabsd %f10,%f10 fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only .loop2: st %f6,[%o3] sub %l2,LIM_l6,%g1 sub %l7,%l2,%o7 fands %f20,%f30,%f29 ! save signbit st %f7,[%o3+4] orcc %g1,%o7,%g0 mov %i3,%o2 ! py2 = y bl,pn %icc,.range2 ! if hx < 0x3e400000 or > 0x3fe921fb ! delay slot add %i3,%i4,%i3 ! y += stridey cmp %l0,%l5 fabsd %f20,%f20 bl,pn %icc,.case4 ! delay slot st %f16,[%o4] cmp %l1,%l5 fpadd32s %f0,%f31,%f8 bl,pn %icc,.case2 ! delay slot st %f17,[%o4+4] cmp %l2,%l5 fpadd32s %f10,%f31,%f18 bl,pn %icc,.case1 ! delay slot st %f26,[%o5] mov %o0,%o3 sethi %hi(0x3fc3c000),%o7 fpadd32s %f20,%f31,%f28 st %f27,[%o5+4] fand %f8,%f44,%f2 mov %o1,%o4 fand %f18,%f44,%f12 mov %o2,%o5 sub %l0,%o7,%l0 fand %f28,%f44,%f22 sub %l1,%o7,%l1 sub %l2,%o7,%l2 fsubd %f0,%f2,%f0 srl %l0,10,%l0 add %l3,8,%g1 fsubd %f10,%f12,%f10 srl %l1,10,%l1 fsubd %f20,%f22,%f20 srl %l2,10,%l2 fmuld %f0,%f0,%f2 andn %l0,0x1f,%l0 fmuld %f10,%f10,%f12 andn %l1,0x1f,%l1 fmuld %f20,%f20,%f22 andn %l2,0x1f,%l2 fmuld %f2,%f58,%f6 ldd [%l3+%l0],%f32 fmuld %f12,%f58,%f16 ldd [%l3+%l1],%f36 fmuld %f22,%f58,%f26 ldd [%l3+%l2],%f40 faddd %f6,%f56,%f6 fmuld %f2,%f62,%f4 ldd [%g1+%l0],%f34 faddd %f16,%f56,%f16 fmuld %f12,%f62,%f14 ldd [%g1+%l1],%f38 faddd %f26,%f56,%f26 fmuld %f22,%f62,%f24 ldd [%g1+%l2],%f42 fmuld %f2,%f6,%f6 faddd %f4,%f60,%f4 fmuld %f12,%f16,%f16 faddd %f14,%f60,%f14 fmuld %f22,%f26,%f26 faddd %f24,%f60,%f24 faddd %f6,%f54,%f6 fmuld %f2,%f4,%f4 faddd %f16,%f54,%f16 fmuld %f12,%f14,%f14 faddd %f26,%f54,%f26 fmuld %f22,%f24,%f24 fmuld %f0,%f6,%f6 ldd [%l4+%l0],%f2 fmuld %f10,%f16,%f16 ldd [%l4+%l1],%f12 fmuld %f20,%f26,%f26 ldd [%l4+%l2],%f22 fmuld %f4,%f32,%f4 lda [%i1]%asi,%l0 ! preload next argument fmuld %f14,%f36,%f14 lda [%i1]%asi,%f0 fmuld %f24,%f40,%f24 lda [%i1+4]%asi,%f1 fmuld %f6,%f34,%f6 add %i1,%i2,%i1 ! x += stridex fmuld %f16,%f38,%f16 fmuld %f26,%f42,%f26 faddd %f6,%f4,%f6 faddd %f16,%f14,%f16 faddd %f26,%f24,%f26 faddd %f6,%f2,%f6 faddd %f16,%f12,%f16 faddd %f26,%f22,%f26 faddd %f6,%f32,%f6 faddd %f16,%f36,%f16 faddd %f26,%f40,%f26 andn %l0,%i5,%l0 ! hx &= ~0x80000000 fors %f6,%f9,%f6 addcc %i0,-1,%i0 fors %f16,%f19,%f16 bg,pt %icc,.loop0 ! delay slot fors %f26,%f29,%f26 ba,pt %icc,.endloop0 ! delay slot nop .align 32 .case1: st %f27,[%o5+4] sethi %hi(0x3fc3c000),%o7 add %l3,8,%g1 fand %f8,%f44,%f2 sub %l0,%o7,%l0 sub %l1,%o7,%l1 fand %f18,%f44,%f12 fmuld %f20,%f20,%f22 fsubd %f0,%f2,%f0 srl %l0,10,%l0 mov %o0,%o3 fsubd %f10,%f12,%f10 srl %l1,10,%l1 mov %o1,%o4 fmuld %f22,%f52,%f24 mov %o2,%o5 fmuld %f0,%f0,%f2 andn %l0,0x1f,%l0 fmuld %f10,%f10,%f12 andn %l1,0x1f,%l1 faddd %f24,%f50,%f24 fmuld %f2,%f58,%f6 ldd [%l3+%l0],%f32 fmuld %f12,%f58,%f16 ldd [%l3+%l1],%f36 fmuld %f22,%f24,%f24 faddd %f6,%f56,%f6 fmuld %f2,%f62,%f4 ldd [%g1+%l0],%f34 faddd %f16,%f56,%f16 fmuld %f12,%f62,%f14 ldd [%g1+%l1],%f38 faddd %f24,%f48,%f24 fmuld %f2,%f6,%f6 faddd %f4,%f60,%f4 fmuld %f12,%f16,%f16 faddd %f14,%f60,%f14 fmuld %f22,%f24,%f24 faddd %f6,%f54,%f6 fmuld %f2,%f4,%f4 faddd %f16,%f54,%f16 fmuld %f12,%f14,%f14 faddd %f24,%f46,%f24 fmuld %f0,%f6,%f6 ldd [%l4+%l0],%f2 fmuld %f10,%f16,%f16 ldd [%l4+%l1],%f12 fmuld %f4,%f32,%f4 lda [%i1]%asi,%l0 ! preload next argument fmuld %f14,%f36,%f14 lda [%i1]%asi,%f0 fmuld %f6,%f34,%f6 lda [%i1+4]%asi,%f1 fmuld %f16,%f38,%f16 add %i1,%i2,%i1 ! x += stridex fmuld %f22,%f24,%f24 faddd %f6,%f4,%f6 faddd %f16,%f14,%f16 fmuld %f20,%f24,%f24 faddd %f6,%f2,%f6 faddd %f16,%f12,%f16 faddd %f20,%f24,%f26 faddd %f6,%f32,%f6 faddd %f16,%f36,%f16 andn %l0,%i5,%l0 ! hx &= ~0x80000000 fors %f26,%f29,%f26 addcc %i0,-1,%i0 fors %f6,%f9,%f6 bg,pt %icc,.loop0 ! delay slot fors %f16,%f19,%f16 ba,pt %icc,.endloop0 ! delay slot nop .align 32 .case2: st %f26,[%o5] cmp %l2,%l5 fpadd32s %f20,%f31,%f28 bl,pn %icc,.case3 ! delay slot st %f27,[%o5+4] sethi %hi(0x3fc3c000),%o7 add %l3,8,%g1 fand %f8,%f44,%f2 sub %l0,%o7,%l0 sub %l2,%o7,%l2 fand %f28,%f44,%f22 fmuld %f10,%f10,%f12 fsubd %f0,%f2,%f0 srl %l0,10,%l0 mov %o0,%o3 fsubd %f20,%f22,%f20 srl %l2,10,%l2 mov %o2,%o5 fmuld %f12,%f52,%f14 mov %o1,%o4 fmuld %f0,%f0,%f2 andn %l0,0x1f,%l0 fmuld %f20,%f20,%f22 andn %l2,0x1f,%l2 faddd %f14,%f50,%f14 fmuld %f2,%f58,%f6 ldd [%l3+%l0],%f32 fmuld %f22,%f58,%f26 ldd [%l3+%l2],%f40 fmuld %f12,%f14,%f14 faddd %f6,%f56,%f6 fmuld %f2,%f62,%f4 ldd [%g1+%l0],%f34 faddd %f26,%f56,%f26 fmuld %f22,%f62,%f24 ldd [%g1+%l2],%f42 faddd %f14,%f48,%f14 fmuld %f2,%f6,%f6 faddd %f4,%f60,%f4 fmuld %f22,%f26,%f26 faddd %f24,%f60,%f24 fmuld %f12,%f14,%f14 faddd %f6,%f54,%f6 fmuld %f2,%f4,%f4 faddd %f26,%f54,%f26 fmuld %f22,%f24,%f24 faddd %f14,%f46,%f14 fmuld %f0,%f6,%f6 ldd [%l4+%l0],%f2 fmuld %f20,%f26,%f26 ldd [%l4+%l2],%f22 fmuld %f4,%f32,%f4 lda [%i1]%asi,%l0 ! preload next argument fmuld %f24,%f40,%f24 lda [%i1]%asi,%f0 fmuld %f6,%f34,%f6 lda [%i1+4]%asi,%f1 fmuld %f26,%f42,%f26 add %i1,%i2,%i1 ! x += stridex fmuld %f12,%f14,%f14 faddd %f6,%f4,%f6 faddd %f26,%f24,%f26 fmuld %f10,%f14,%f14 faddd %f6,%f2,%f6 faddd %f26,%f22,%f26 faddd %f10,%f14,%f16 faddd %f6,%f32,%f6 faddd %f26,%f40,%f26 andn %l0,%i5,%l0 ! hx &= ~0x80000000 fors %f16,%f19,%f16 addcc %i0,-1,%i0 fors %f6,%f9,%f6 bg,pt %icc,.loop0 ! delay slot fors %f26,%f29,%f26 ba,pt %icc,.endloop0 ! delay slot nop .align 32 .case3: sethi %hi(0x3fc3c000),%o7 add %l3,8,%g1 fand %f8,%f44,%f2 fmuld %f10,%f10,%f12 sub %l0,%o7,%l0 fmuld %f20,%f20,%f22 fsubd %f0,%f2,%f0 srl %l0,10,%l0 mov %o0,%o3 fmuld %f12,%f52,%f14 mov %o1,%o4 fmuld %f22,%f52,%f24 mov %o2,%o5 fmuld %f0,%f0,%f2 andn %l0,0x1f,%l0 faddd %f14,%f50,%f14 faddd %f24,%f50,%f24 fmuld %f2,%f58,%f6 ldd [%l3+%l0],%f32 fmuld %f12,%f14,%f14 fmuld %f22,%f24,%f24 faddd %f6,%f56,%f6 fmuld %f2,%f62,%f4 ldd [%g1+%l0],%f34 faddd %f14,%f48,%f14 faddd %f24,%f48,%f24 fmuld %f2,%f6,%f6 faddd %f4,%f60,%f4 fmuld %f12,%f14,%f14 fmuld %f22,%f24,%f24 faddd %f6,%f54,%f6 fmuld %f2,%f4,%f4 faddd %f14,%f46,%f14 faddd %f24,%f46,%f24 fmuld %f0,%f6,%f6 ldd [%l4+%l0],%f2 fmuld %f4,%f32,%f4 lda [%i1]%asi,%l0 ! preload next argument fmuld %f12,%f14,%f14 lda [%i1]%asi,%f0 fmuld %f6,%f34,%f6 lda [%i1+4]%asi,%f1 fmuld %f22,%f24,%f24 add %i1,%i2,%i1 ! x += stridex fmuld %f10,%f14,%f14 faddd %f6,%f4,%f6 fmuld %f20,%f24,%f24 faddd %f10,%f14,%f16 faddd %f6,%f2,%f6 faddd %f20,%f24,%f26 fors %f16,%f19,%f16 andn %l0,%i5,%l0 ! hx &= ~0x80000000 faddd %f6,%f32,%f6 addcc %i0,-1,%i0 fors %f26,%f29,%f26 bg,pt %icc,.loop0 ! delay slot fors %f6,%f9,%f6 ba,pt %icc,.endloop0 ! delay slot nop .align 32 .case4: st %f17,[%o4+4] cmp %l1,%l5 fpadd32s %f10,%f31,%f18 bl,pn %icc,.case6 ! delay slot st %f26,[%o5] cmp %l2,%l5 fpadd32s %f20,%f31,%f28 bl,pn %icc,.case5 ! delay slot st %f27,[%o5+4] sethi %hi(0x3fc3c000),%o7 add %l3,8,%g1 fand %f18,%f44,%f12 sub %l1,%o7,%l1 sub %l2,%o7,%l2 fand %f28,%f44,%f22 fmuld %f0,%f0,%f2 fsubd %f10,%f12,%f10 srl %l1,10,%l1 mov %o1,%o4 fsubd %f20,%f22,%f20 srl %l2,10,%l2 mov %o2,%o5 fmovd %f0,%f6 fmuld %f2,%f52,%f4 mov %o0,%o3 fmuld %f10,%f10,%f12 andn %l1,0x1f,%l1 fmuld %f20,%f20,%f22 andn %l2,0x1f,%l2 faddd %f4,%f50,%f4 fmuld %f12,%f58,%f16 ldd [%l3+%l1],%f36 fmuld %f22,%f58,%f26 ldd [%l3+%l2],%f40 fmuld %f2,%f4,%f4 faddd %f16,%f56,%f16 fmuld %f12,%f62,%f14 ldd [%g1+%l1],%f38 faddd %f26,%f56,%f26 fmuld %f22,%f62,%f24 ldd [%g1+%l2],%f42 faddd %f4,%f48,%f4 fmuld %f12,%f16,%f16 faddd %f14,%f60,%f14 fmuld %f22,%f26,%f26 faddd %f24,%f60,%f24 fmuld %f2,%f4,%f4 faddd %f16,%f54,%f16 fmuld %f12,%f14,%f14 faddd %f26,%f54,%f26 fmuld %f22,%f24,%f24 faddd %f4,%f46,%f4 fmuld %f10,%f16,%f16 ldd [%l4+%l1],%f12 fmuld %f20,%f26,%f26 ldd [%l4+%l2],%f22 fmuld %f14,%f36,%f14 lda [%i1]%asi,%l0 ! preload next argument fmuld %f24,%f40,%f24 lda [%i1]%asi,%f0 fmuld %f16,%f38,%f16 lda [%i1+4]%asi,%f1 fmuld %f26,%f42,%f26 add %i1,%i2,%i1 ! x += stridex fmuld %f2,%f4,%f4 faddd %f16,%f14,%f16 faddd %f26,%f24,%f26 fmuld %f6,%f4,%f4 faddd %f16,%f12,%f16 faddd %f26,%f22,%f26 faddd %f6,%f4,%f6 faddd %f16,%f36,%f16 faddd %f26,%f40,%f26 andn %l0,%i5,%l0 ! hx &= ~0x80000000 fors %f6,%f9,%f6 addcc %i0,-1,%i0 fors %f16,%f19,%f16 bg,pt %icc,.loop0 ! delay slot fors %f26,%f29,%f26 ba,pt %icc,.endloop0 ! delay slot nop .align 32 .case5: sethi %hi(0x3fc3c000),%o7 add %l3,8,%g1 fand %f18,%f44,%f12 fmuld %f0,%f0,%f2 sub %l1,%o7,%l1 fmuld %f20,%f20,%f22 fsubd %f10,%f12,%f10 srl %l1,10,%l1 mov %o1,%o4 fmovd %f0,%f6 fmuld %f2,%f52,%f4 mov %o0,%o3 fmuld %f22,%f52,%f24 mov %o2,%o5 fmuld %f10,%f10,%f12 andn %l1,0x1f,%l1 faddd %f4,%f50,%f4 faddd %f24,%f50,%f24 fmuld %f12,%f58,%f16 ldd [%l3+%l1],%f36 fmuld %f2,%f4,%f4 fmuld %f22,%f24,%f24 faddd %f16,%f56,%f16 fmuld %f12,%f62,%f14 ldd [%g1+%l1],%f38 faddd %f4,%f48,%f4 faddd %f24,%f48,%f24 fmuld %f12,%f16,%f16 faddd %f14,%f60,%f14 fmuld %f2,%f4,%f4 fmuld %f22,%f24,%f24 faddd %f16,%f54,%f16 fmuld %f12,%f14,%f14 faddd %f4,%f46,%f4 faddd %f24,%f46,%f24 fmuld %f10,%f16,%f16 ldd [%l4+%l1],%f12 fmuld %f14,%f36,%f14 lda [%i1]%asi,%l0 ! preload next argument fmuld %f2,%f4,%f4 lda [%i1]%asi,%f0 fmuld %f16,%f38,%f16 lda [%i1+4]%asi,%f1 fmuld %f22,%f24,%f24 add %i1,%i2,%i1 ! x += stridex fmuld %f6,%f4,%f4 faddd %f16,%f14,%f16 fmuld %f20,%f24,%f24 faddd %f6,%f4,%f6 faddd %f16,%f12,%f16 faddd %f20,%f24,%f26 fors %f6,%f9,%f6 andn %l0,%i5,%l0 ! hx &= ~0x80000000 faddd %f16,%f36,%f16 addcc %i0,-1,%i0 fors %f26,%f29,%f26 bg,pt %icc,.loop0 ! delay slot fors %f16,%f19,%f16 ba,pt %icc,.endloop0 ! delay slot nop .align 32 .case6: st %f27,[%o5+4] cmp %l2,%l5 fpadd32s %f20,%f31,%f28 bl,pn %icc,.case7 ! delay slot sethi %hi(0x3fc3c000),%o7 add %l3,8,%g1 fand %f28,%f44,%f22 fmuld %f0,%f0,%f2 sub %l2,%o7,%l2 fmuld %f10,%f10,%f12 fsubd %f20,%f22,%f20 srl %l2,10,%l2 mov %o2,%o5 fmovd %f0,%f6 fmuld %f2,%f52,%f4 mov %o0,%o3 fmuld %f12,%f52,%f14 mov %o1,%o4 fmuld %f20,%f20,%f22 andn %l2,0x1f,%l2 faddd %f4,%f50,%f4 faddd %f14,%f50,%f14 fmuld %f22,%f58,%f26 ldd [%l3+%l2],%f40 fmuld %f2,%f4,%f4 fmuld %f12,%f14,%f14 faddd %f26,%f56,%f26 fmuld %f22,%f62,%f24 ldd [%g1+%l2],%f42 faddd %f4,%f48,%f4 faddd %f14,%f48,%f14 fmuld %f22,%f26,%f26 faddd %f24,%f60,%f24 fmuld %f2,%f4,%f4 fmuld %f12,%f14,%f14 faddd %f26,%f54,%f26 fmuld %f22,%f24,%f24 faddd %f4,%f46,%f4 faddd %f14,%f46,%f14 fmuld %f20,%f26,%f26 ldd [%l4+%l2],%f22 fmuld %f24,%f40,%f24 lda [%i1]%asi,%l0 ! preload next argument fmuld %f2,%f4,%f4 lda [%i1]%asi,%f0 fmuld %f26,%f42,%f26 lda [%i1+4]%asi,%f1 fmuld %f12,%f14,%f14 add %i1,%i2,%i1 ! x += stridex fmuld %f6,%f4,%f4 faddd %f26,%f24,%f26 fmuld %f10,%f14,%f14 faddd %f6,%f4,%f6 faddd %f26,%f22,%f26 faddd %f10,%f14,%f16 fors %f6,%f9,%f6 andn %l0,%i5,%l0 ! hx &= ~0x80000000 faddd %f26,%f40,%f26 addcc %i0,-1,%i0 fors %f16,%f19,%f16 bg,pt %icc,.loop0 ! delay slot fors %f26,%f29,%f26 ba,pt %icc,.endloop0 ! delay slot nop .align 32 .case7: fmuld %f0,%f0,%f2 fmovd %f0,%f6 mov %o0,%o3 fmuld %f10,%f10,%f12 mov %o1,%o4 fmuld %f20,%f20,%f22 mov %o2,%o5 fmuld %f2,%f52,%f4 lda [%i1]%asi,%l0 ! preload next argument fmuld %f12,%f52,%f14 lda [%i1]%asi,%f0 fmuld %f22,%f52,%f24 lda [%i1+4]%asi,%f1 faddd %f4,%f50,%f4 add %i1,%i2,%i1 ! x += stridex faddd %f14,%f50,%f14 faddd %f24,%f50,%f24 fmuld %f2,%f4,%f4 fmuld %f12,%f14,%f14 fmuld %f22,%f24,%f24 faddd %f4,%f48,%f4 faddd %f14,%f48,%f14 faddd %f24,%f48,%f24 fmuld %f2,%f4,%f4 fmuld %f12,%f14,%f14 fmuld %f22,%f24,%f24 faddd %f4,%f46,%f4 faddd %f14,%f46,%f14 faddd %f24,%f46,%f24 fmuld %f2,%f4,%f4 fmuld %f12,%f14,%f14 fmuld %f22,%f24,%f24 fmuld %f6,%f4,%f4 fmuld %f10,%f14,%f14 fmuld %f20,%f24,%f24 faddd %f6,%f4,%f6 faddd %f10,%f14,%f16 faddd %f20,%f24,%f26 andn %l0,%i5,%l0 ! hx &= ~0x80000000 fors %f6,%f9,%f6 addcc %i0,-1,%i0 fors %f16,%f19,%f16 bg,pt %icc,.loop0 ! delay slot fors %f26,%f29,%f26 ba,pt %icc,.endloop0 ! delay slot nop .align 32 .endloop2: cmp %l1,%l5 bl,pn %icc,1f ! delay slot fabsd %f10,%f10 sethi %hi(0x3fc3c000),%o7 fpadd32s %f10,%f31,%f18 add %l3,8,%g1 fand %f18,%f44,%f12 sub %l1,%o7,%l1 fsubd %f10,%f12,%f10 srl %l1,10,%l1 fmuld %f10,%f10,%f12 andn %l1,0x1f,%l1 fmuld %f12,%f58,%f20 ldd [%l3+%l1],%f36 faddd %f20,%f56,%f20 fmuld %f12,%f62,%f14 ldd [%g1+%l1],%f38 fmuld %f12,%f20,%f20 faddd %f14,%f60,%f14 faddd %f20,%f54,%f20 fmuld %f12,%f14,%f14 fmuld %f10,%f20,%f20 ldd [%l4+%l1],%f12 fmuld %f14,%f36,%f14 fmuld %f20,%f38,%f20 faddd %f20,%f14,%f20 faddd %f20,%f12,%f20 ba,pt %icc,2f ! delay slot faddd %f20,%f36,%f20 1: fmuld %f10,%f10,%f12 fmuld %f12,%f52,%f14 faddd %f14,%f50,%f14 fmuld %f12,%f14,%f14 faddd %f14,%f48,%f14 fmuld %f12,%f14,%f14 faddd %f14,%f46,%f14 fmuld %f12,%f14,%f14 fmuld %f10,%f14,%f14 faddd %f10,%f14,%f20 2: fors %f20,%f19,%f20 st %f20,[%o1] st %f21,[%o1+4] .endloop1: cmp %l0,%l5 bl,pn %icc,1f ! delay slot fabsd %f0,%f0 sethi %hi(0x3fc3c000),%o7 fpadd32s %f0,%f31,%f8 add %l3,8,%g1 fand %f8,%f44,%f2 sub %l0,%o7,%l0 fsubd %f0,%f2,%f0 srl %l0,10,%l0 fmuld %f0,%f0,%f2 andn %l0,0x1f,%l0 fmuld %f2,%f58,%f20 ldd [%l3+%l0],%f32 faddd %f20,%f56,%f20 fmuld %f2,%f62,%f4 ldd [%g1+%l0],%f34 fmuld %f2,%f20,%f20 faddd %f4,%f60,%f4 faddd %f20,%f54,%f20 fmuld %f2,%f4,%f4 fmuld %f0,%f20,%f20 ldd [%l4+%l0],%f2 fmuld %f4,%f32,%f4 fmuld %f20,%f34,%f20 faddd %f20,%f4,%f20 faddd %f20,%f2,%f20 ba,pt %icc,2f ! delay slot faddd %f20,%f32,%f20 1: fmuld %f0,%f0,%f2 fmuld %f2,%f52,%f4 faddd %f4,%f50,%f4 fmuld %f2,%f4,%f4 faddd %f4,%f48,%f4 fmuld %f2,%f4,%f4 faddd %f4,%f46,%f4 fmuld %f2,%f4,%f4 fmuld %f0,%f4,%f4 faddd %f0,%f4,%f20 2: fors %f20,%f9,%f20 st %f20,[%o0] st %f21,[%o0+4] .endloop0: st %f6,[%o3] st %f7,[%o3+4] st %f16,[%o4] st %f17,[%o4+4] st %f26,[%o5] st %f27,[%o5+4] ! return. finished off with only primary range arguments. ret restore .align 32 .range0: cmp %l0,LIM_l6 bg,a,pt %icc,.MEDIUM ! branch if x is not tiny ! delay slot, annulled if branch not taken mov 0x1,LIM_l6 ! set "processing loop0" st %f0,[%o0] ! *y = *x with inexact if x nonzero st %f1,[%o0+4] fdtoi %f0,%f2 addcc %i0,-1,%i0 ble,pn %icc,.endloop0 ! delay slot, harmless if branch taken add %i3,%i4,%i3 ! y += stridey andn %l1,%i5,%l0 ! hx &= ~0x80000000 fmovd %f10,%f0 ba,pt %icc,.loop0 ! delay slot add %i1,%i2,%i1 ! x += stridex .align 32 .range1: cmp %l1,LIM_l6 bg,a,pt %icc,.MEDIUM ! branch if x is not tiny ! delay slot, annulled if branch not taken mov 0x2,LIM_l6 ! set "processing loop1" st %f10,[%o1] ! *y = *x with inexact if x nonzero st %f11,[%o1+4] fdtoi %f10,%f12 addcc %i0,-1,%i0 ble,pn %icc,.endloop1 ! delay slot, harmless if branch taken add %i3,%i4,%i3 ! y += stridey andn %l2,%i5,%l1 ! hx &= ~0x80000000 fmovd %f20,%f10 ba,pt %icc,.loop1 ! delay slot add %i1,%i2,%i1 ! x += stridex .align 32 .range2: cmp %l2,LIM_l6 bg,a,pt %icc,.MEDIUM ! branch if x is not tiny ! delay slot, annulled if branch not taken mov 0x3,LIM_l6 ! set "processing loop2" st %f20,[%o2] ! *y = *x with inexact if x nonzero st %f21,[%o2+4] fdtoi %f20,%f22 1: addcc %i0,-1,%i0 ble,pn %icc,.endloop2 ! delay slot nop ld [%i1],%l2 ld [%i1],%f20 ld [%i1+4],%f21 andn %l2,%i5,%l2 ! hx &= ~0x80000000 ba,pt %icc,.loop2 ! delay slot add %i1,%i2,%i1 ! x += stridex .align 32 .MEDIUM: ! ========== medium range ========== ! register use ! i0 n ! i1 x ! i2 stridex ! i3 y ! i4 stridey ! i5 0x80000000 ! l0 hx0 ! l1 hx1 ! l2 hx2 ! l3 __vlibm_TBL_sincos_hi ! l4 __vlibm_TBL_sincos_lo ! l5 constants ! l6 in transition from pri-range and here, use for biguns ! l7 0x413921fb ! the following are 64-bit registers in both V8+ and V9 ! g1 scratch ! g5 ! o0 py0 ! o1 py1 ! o2 py2 ! o3 n0 ! o4 n1 ! o5 n2 ! o7 scratch ! f0 x0 ! f2 n0,y0 ! f4 ! f6 ! f8 scratch for table base ! f9 signbit0 ! f10 x1 ! f12 n1,y1 ! f14 ! f16 ! f18 scratch for table base ! f19 signbit1 ! f20 x2 ! f22 n2,y2 ! f24 ! f26 ! f28 scratch for table base ! f29 signbit2 ! f30 0x80000000 ! f31 0x4000 ! f32 ! f34 ! f36 ! f38 ! f40 invpio2 ! f42 round ! f44 0xffff800000000000 ! f46 pio2_1 ! f48 pio2_2 ! f50 pio2_3 ! f52 pio2_3t ! f54 one ! f56 pp1 ! f58 pp2 ! f60 qq1 ! f62 qq2 PIC_SET(g5,constants,l5) ! %o3,%o4,%o5 need to be stored st %f6,[%o3] sethi %hi(0x413921fb),%l7 st %f7,[%o3+4] or %l7,%lo(0x413921fb),%l7 st %f16,[%o4] st %f17,[%o4+4] st %f26,[%o5] st %f27,[%o5+4] ldd [%l5+invpio2],%f40 ldd [%l5+round],%f42 ldd [%l5+pio2_1],%f46 ldd [%l5+pio2_2],%f48 ldd [%l5+pio2_3],%f50 ldd [%l5+pio2_3t],%f52 std %f54,[%fp+x0_1+8] ! set up stack data std %f54,[%fp+x1_1+8] std %f54,[%fp+x2_1+8] stx %g0,[%fp+y0_0+8] stx %g0,[%fp+y1_0+8] stx %g0,[%fp+y2_0+8] ! branched here in the middle of the array. Need to adjust ! for the members of the triple that were selected in the primary ! loop. ! no adjustment since all three selected here subcc LIM_l6,0x1,%g0 ! continue in LOOP0? bz,a %icc,.LOOP0 mov 0x0,LIM_l6 ! delay slot set biguns=0 ! ajust 1st triple since 2d and 3d done here subcc LIM_l6,0x2,%g0 ! continue in LOOP1? fors %f0,%f9,%f0 ! restore sign bit fmuld %f0,%f40,%f2 ! adj LOOP0 bz,a %icc,.LOOP1 mov 0x0,LIM_l6 ! delay slot set biguns=0 ! ajust 1st and 2d triple since 3d done here subcc LIM_l6,0x3,%g0 ! continue in LOOP2? !done fmuld %f0,%f40,%f2 ! adj LOOP0 sub %i3,%i4,%i3 ! adjust to not double increment fors %f10,%f19,%f10 ! restore sign bit fmuld %f10,%f40,%f12 ! adj LOOP1 faddd %f2,%f42,%f2 ! adj LOOP1 bz,a %icc,.LOOP2 mov 0x0,LIM_l6 ! delay slot set biguns=0 .align 32 .LOOP0: lda [%i1]%asi,%l1 ! preload next argument mov %i3,%o0 ! py0 = y lda [%i1]%asi,%f10 cmp %l0,%l7 add %i3,%i4,%i3 ! y += stridey bg,pn %icc,.BIG0 ! if hx > 0x413921fb ! delay slot lda [%i1+4]%asi,%f11 addcc %i0,-1,%i0 add %i1,%i2,%i1 ! x += stridex ble,pn %icc,.ENDLOOP1 ! delay slot andn %l1,%i5,%l1 nop fmuld %f0,%f40,%f2 fabsd %f54,%f54 ! a nop for alignment only .LOOP1: lda [%i1]%asi,%l2 ! preload next argument mov %i3,%o1 ! py1 = y lda [%i1]%asi,%f20 cmp %l1,%l7 add %i3,%i4,%i3 ! y += stridey bg,pn %icc,.BIG1 ! if hx > 0x413921fb ! delay slot lda [%i1+4]%asi,%f21 addcc %i0,-1,%i0 add %i1,%i2,%i1 ! x += stridex ble,pn %icc,.ENDLOOP2 ! delay slot andn %l2,%i5,%l2 nop fmuld %f10,%f40,%f12 faddd %f2,%f42,%f2 .LOOP2: st %f3,[%fp+n0] mov %i3,%o2 ! py2 = y cmp %l2,%l7 add %i3,%i4,%i3 ! y += stridey fmuld %f20,%f40,%f22 bg,pn %icc,.BIG2 ! if hx > 0x413921fb ! delay slot add %l5,thresh+4,%o7 faddd %f12,%f42,%f12 st %f13,[%fp+n1] ! - add %l5,thresh,%g1 faddd %f22,%f42,%f22 st %f23,[%fp+n2] fsubd %f2,%f42,%f2 ! n fsubd %f12,%f42,%f12 ! n fsubd %f22,%f42,%f22 ! n fmuld %f2,%f46,%f4 fmuld %f12,%f46,%f14 fmuld %f22,%f46,%f24 fsubd %f0,%f4,%f4 fmuld %f2,%f48,%f6 fsubd %f10,%f14,%f14 fmuld %f12,%f48,%f16 fsubd %f20,%f24,%f24 fmuld %f22,%f48,%f26 fsubd %f4,%f6,%f0 ld [%fp+n0],%o3 fsubd %f14,%f16,%f10 ld [%fp+n1],%o4 fsubd %f24,%f26,%f20 ld [%fp+n2],%o5 fsubd %f4,%f0,%f32 and %o3,1,%o3 fsubd %f14,%f10,%f34 and %o4,1,%o4 fsubd %f24,%f20,%f36 and %o5,1,%o5 fsubd %f32,%f6,%f32 fmuld %f2,%f50,%f8 sll %o3,3,%o3 fsubd %f34,%f16,%f34 fmuld %f12,%f50,%f18 sll %o4,3,%o4 fsubd %f36,%f26,%f36 fmuld %f22,%f50,%f28 sll %o5,3,%o5 fsubd %f8,%f32,%f8 ld [%g1+%o3],%f6 fsubd %f18,%f34,%f18 ld [%g1+%o4],%f16 fsubd %f28,%f36,%f28 ld [%g1+%o5],%f26 fsubd %f0,%f8,%f4 fsubd %f10,%f18,%f14 fsubd %f20,%f28,%f24 fsubd %f0,%f4,%f32 fsubd %f10,%f14,%f34 fsubd %f20,%f24,%f36 fsubd %f32,%f8,%f32 fmuld %f2,%f52,%f2 fsubd %f34,%f18,%f34 fmuld %f12,%f52,%f12 fsubd %f36,%f28,%f36 fmuld %f22,%f52,%f22 fsubd %f2,%f32,%f2 ld [%o7+%o3],%f8 fsubd %f12,%f34,%f12 ld [%o7+%o4],%f18 fsubd %f22,%f36,%f22 ld [%o7+%o5],%f28 fsubd %f4,%f2,%f0 ! x fsubd %f14,%f12,%f10 ! x fsubd %f24,%f22,%f20 ! x fsubd %f4,%f0,%f4 fsubd %f14,%f10,%f14 fsubd %f24,%f20,%f24 fands %f0,%f30,%f9 ! save signbit fands %f10,%f30,%f19 ! save signbit fands %f20,%f30,%f29 ! save signbit fabsd %f0,%f0 std %f0,[%fp+x0_1] fabsd %f10,%f10 std %f10,[%fp+x1_1] fabsd %f20,%f20 std %f20,[%fp+x2_1] fsubd %f4,%f2,%f2 ! y fsubd %f14,%f12,%f12 ! y fsubd %f24,%f22,%f22 ! y fcmpgt32 %f6,%f0,%l0 fcmpgt32 %f16,%f10,%l1 fcmpgt32 %f26,%f20,%l2 ! -- 16 byte aligned fxors %f2,%f9,%f2 fxors %f12,%f19,%f12 fxors %f22,%f29,%f22 fands %f9,%f8,%f9 ! if (n & 1) clear sign bit andcc %l0,2,%g0 bne,pn %icc,.CASE4 ! delay slot fands %f19,%f18,%f19 ! if (n & 1) clear sign bit andcc %l1,2,%g0 bne,pn %icc,.CASE2 ! delay slot fands %f29,%f28,%f29 ! if (n & 1) clear sign bit andcc %l2,2,%g0 bne,pn %icc,.CASE1 ! delay slot fpadd32s %f0,%f31,%f8 sethi %hi(0x3fc3c000),%o7 ld [%fp+x0_1],%l0 fpadd32s %f10,%f31,%f18 add %l3,8,%g1 ld [%fp+x1_1],%l1 fpadd32s %f20,%f31,%f28 ld [%fp+x2_1],%l2 fand %f8,%f44,%f4 sub %l0,%o7,%l0 fand %f18,%f44,%f14 sub %l1,%o7,%l1 fand %f28,%f44,%f24 sub %l2,%o7,%l2 fsubd %f0,%f4,%f0 srl %l0,10,%l0 fsubd %f10,%f14,%f10 srl %l1,10,%l1 fsubd %f20,%f24,%f20 srl %l2,10,%l2 faddd %f0,%f2,%f0 andn %l0,0x1f,%l0 faddd %f10,%f12,%f10 andn %l1,0x1f,%l1 faddd %f20,%f22,%f20 andn %l2,0x1f,%l2 fmuld %f0,%f0,%f2 add %l0,%o3,%l0 fmuld %f10,%f10,%f12 add %l1,%o4,%l1 fmuld %f20,%f20,%f22 add %l2,%o5,%l2 fmuld %f2,%f58,%f6 ldd [%l3+%l0],%f32 fmuld %f12,%f58,%f16 ldd [%l3+%l1],%f34 fmuld %f22,%f58,%f26 ldd [%l3+%l2],%f36 faddd %f6,%f56,%f6 fmuld %f2,%f62,%f4 faddd %f16,%f56,%f16 fmuld %f12,%f62,%f14 faddd %f26,%f56,%f26 fmuld %f22,%f62,%f24 fmuld %f2,%f6,%f6 faddd %f4,%f60,%f4 fmuld %f12,%f16,%f16 faddd %f14,%f60,%f14 fmuld %f22,%f26,%f26 faddd %f24,%f60,%f24 faddd %f6,%f54,%f6 fmuld %f2,%f4,%f4 faddd %f16,%f54,%f16 fmuld %f12,%f14,%f14 faddd %f26,%f54,%f26 fmuld %f22,%f24,%f24 fmuld %f0,%f6,%f6 ldd [%g1+%l0],%f2 fmuld %f10,%f16,%f16 ldd [%g1+%l1],%f12 fmuld %f20,%f26,%f26 ldd [%g1+%l2],%f22 fmuld %f4,%f32,%f4 ldd [%l4+%l0],%f0 fmuld %f14,%f34,%f14 ldd [%l4+%l1],%f10 fmuld %f24,%f36,%f24 ldd [%l4+%l2],%f20 fmuld %f6,%f2,%f6 fmuld %f16,%f12,%f16 fmuld %f26,%f22,%f26 faddd %f6,%f4,%f6 faddd %f16,%f14,%f16 faddd %f26,%f24,%f26 faddd %f6,%f0,%f6 faddd %f16,%f10,%f16 faddd %f26,%f20,%f26 faddd %f6,%f32,%f6 faddd %f16,%f34,%f16 faddd %f26,%f36,%f26 .FIXSIGN: ld [%fp+n0],%o3 add %l5,thresh-4,%g1 ld [%fp+n1],%o4 ld [%fp+n2],%o5 and %o3,2,%o3 sll %o3,2,%o3 and %o4,2,%o4 lda [%i1]%asi,%l0 ! preload next argument sll %o4,2,%o4 and %o5,2,%o5 ld [%g1+%o3],%f8 sll %o5,2,%o5 ld [%g1+%o4],%f18 ld [%g1+%o5],%f28 fxors %f9,%f8,%f9 lda [%i1]%asi,%f0 fxors %f29,%f28,%f29 lda [%i1+4]%asi,%f1 fxors %f19,%f18,%f19 fors %f6,%f9,%f6 ! tack on sign add %i1,%i2,%i1 ! x += stridex st %f6,[%o0] fors %f26,%f29,%f26 ! tack on sign st %f7,[%o0+4] fors %f16,%f19,%f16 ! tack on sign st %f26,[%o2] st %f27,[%o2+4] addcc %i0,-1,%i0 st %f16,[%o1] andn %l0,%i5,%l0 ! hx &= ~0x80000000 bg,pt %icc,.LOOP0 ! delay slot st %f17,[%o1+4] ba,pt %icc,.ENDLOOP0 ! delay slot nop .align 32 .CASE1: fpadd32s %f10,%f31,%f18 sethi %hi(0x3fc3c000),%o7 ld [%fp+x0_1],%l0 fand %f8,%f44,%f4 add %l3,8,%g1 ld [%fp+x1_1],%l1 fand %f18,%f44,%f14 sub %l0,%o7,%l0 fsubd %f0,%f4,%f0 srl %l0,10,%l0 sub %l1,%o7,%l1 fsubd %f10,%f14,%f10 srl %l1,10,%l1 fmuld %f20,%f20,%f20 ldd [%l5+%o5],%f36 add %l5,%o5,%l2 faddd %f0,%f2,%f0 andn %l0,0x1f,%l0 faddd %f10,%f12,%f10 andn %l1,0x1f,%l1 fmuld %f20,%f36,%f24 ldd [%l2+0x10],%f26 add %fp,%o5,%o5 fmuld %f0,%f0,%f2 add %l0,%o3,%l0 fmuld %f10,%f10,%f12 add %l1,%o4,%l1 faddd %f24,%f26,%f24 ldd [%l2+0x20],%f36 fmuld %f2,%f58,%f6 ldd [%l3+%l0],%f32 fmuld %f12,%f58,%f16 ldd [%l3+%l1],%f34 fmuld %f20,%f24,%f24 ldd [%l2+0x30],%f26 faddd %f6,%f56,%f6 fmuld %f2,%f62,%f4 faddd %f16,%f56,%f16 fmuld %f12,%f62,%f14 faddd %f24,%f36,%f24 ldd [%o5+x2_1],%f36 fmuld %f2,%f6,%f6 faddd %f4,%f60,%f4 fmuld %f12,%f16,%f16 faddd %f14,%f60,%f14 fmuld %f20,%f24,%f24 faddd %f6,%f54,%f6 fmuld %f2,%f4,%f4 ldd [%g1+%l0],%f2 faddd %f16,%f54,%f16 fmuld %f12,%f14,%f14 ldd [%g1+%l1],%f12 faddd %f24,%f26,%f24 fmuld %f0,%f6,%f6 ldd [%l4+%l0],%f0 fmuld %f10,%f16,%f16 ldd [%l4+%l1],%f10 fmuld %f4,%f32,%f4 std %f22,[%fp+y2_0] fmuld %f14,%f34,%f14 fmuld %f6,%f2,%f6 fmuld %f16,%f12,%f16 fmuld %f20,%f24,%f24 faddd %f6,%f4,%f6 faddd %f16,%f14,%f16 fmuld %f36,%f24,%f24 ldd [%o5+y2_0],%f22 faddd %f6,%f0,%f6 faddd %f16,%f10,%f16 faddd %f24,%f22,%f24 faddd %f6,%f32,%f6 faddd %f16,%f34,%f16 ba,pt %icc,.FIXSIGN ! delay slot faddd %f36,%f24,%f26 .align 32 .CASE2: fpadd32s %f0,%f31,%f8 ld [%fp+x0_1],%l0 andcc %l2,2,%g0 bne,pn %icc,.CASE3 ! delay slot sethi %hi(0x3fc3c000),%o7 fpadd32s %f20,%f31,%f28 ld [%fp+x2_1],%l2 fand %f8,%f44,%f4 sub %l0,%o7,%l0 add %l3,8,%g1 fand %f28,%f44,%f24 sub %l2,%o7,%l2 fsubd %f0,%f4,%f0 srl %l0,10,%l0 fsubd %f20,%f24,%f20 srl %l2,10,%l2 fmuld %f10,%f10,%f10 ldd [%l5+%o4],%f34 add %l5,%o4,%l1 faddd %f0,%f2,%f0 andn %l0,0x1f,%l0 faddd %f20,%f22,%f20 andn %l2,0x1f,%l2 fmuld %f10,%f34,%f14 ldd [%l1+0x10],%f16 add %fp,%o4,%o4 fmuld %f0,%f0,%f2 add %l0,%o3,%l0 fmuld %f20,%f20,%f22 add %l2,%o5,%l2 faddd %f14,%f16,%f14 ldd [%l1+0x20],%f34 fmuld %f2,%f58,%f6 ldd [%l3+%l0],%f32 fmuld %f22,%f58,%f26 ldd [%l3+%l2],%f36 fmuld %f10,%f14,%f14 ldd [%l1+0x30],%f16 faddd %f6,%f56,%f6 fmuld %f2,%f62,%f4 faddd %f26,%f56,%f26 fmuld %f22,%f62,%f24 faddd %f14,%f34,%f14 ldd [%o4+x1_1],%f34 fmuld %f2,%f6,%f6 faddd %f4,%f60,%f4 fmuld %f22,%f26,%f26 faddd %f24,%f60,%f24 fmuld %f10,%f14,%f14 faddd %f6,%f54,%f6 fmuld %f2,%f4,%f4 ldd [%g1+%l0],%f2 faddd %f26,%f54,%f26 fmuld %f22,%f24,%f24 ldd [%g1+%l2],%f22 faddd %f14,%f16,%f14 fmuld %f0,%f6,%f6 ldd [%l4+%l0],%f0 fmuld %f20,%f26,%f26 ldd [%l4+%l2],%f20 fmuld %f4,%f32,%f4 std %f12,[%fp+y1_0] fmuld %f24,%f36,%f24 fmuld %f6,%f2,%f6 fmuld %f26,%f22,%f26 fmuld %f10,%f14,%f14 faddd %f6,%f4,%f6 faddd %f26,%f24,%f26 fmuld %f34,%f14,%f14 ldd [%o4+y1_0],%f12 faddd %f6,%f0,%f6 faddd %f26,%f20,%f26 faddd %f14,%f12,%f14 faddd %f6,%f32,%f6 faddd %f26,%f36,%f26 ba,pt %icc,.FIXSIGN ! delay slot faddd %f34,%f14,%f16 .align 32 .CASE3: fand %f8,%f44,%f4 add %l3,8,%g1 sub %l0,%o7,%l0 fmuld %f10,%f10,%f10 ldd [%l5+%o4],%f34 add %l5,%o4,%l1 fsubd %f0,%f4,%f0 srl %l0,10,%l0 fmuld %f20,%f20,%f20 ldd [%l5+%o5],%f36 add %l5,%o5,%l2 fmuld %f10,%f34,%f14 ldd [%l1+0x10],%f16 add %fp,%o4,%o4 faddd %f0,%f2,%f0 andn %l0,0x1f,%l0 fmuld %f20,%f36,%f24 ldd [%l2+0x10],%f26 add %fp,%o5,%o5 faddd %f14,%f16,%f14 ldd [%l1+0x20],%f34 fmuld %f0,%f0,%f2 add %l0,%o3,%l0 faddd %f24,%f26,%f24 ldd [%l2+0x20],%f36 fmuld %f10,%f14,%f14 ldd [%l1+0x30],%f16 fmuld %f2,%f58,%f6 ldd [%l3+%l0],%f32 fmuld %f20,%f24,%f24 ldd [%l2+0x30],%f26 faddd %f14,%f34,%f14 ldd [%o4+x1_1],%f34 faddd %f6,%f56,%f6 fmuld %f2,%f62,%f4 faddd %f24,%f36,%f24 ldd [%o5+x2_1],%f36 fmuld %f10,%f14,%f14 std %f12,[%fp+y1_0] fmuld %f2,%f6,%f6 faddd %f4,%f60,%f4 fmuld %f20,%f24,%f24 std %f22,[%fp+y2_0] faddd %f14,%f16,%f14 faddd %f6,%f54,%f6 fmuld %f2,%f4,%f4 ldd [%g1+%l0],%f2 faddd %f24,%f26,%f24 fmuld %f10,%f14,%f14 fmuld %f0,%f6,%f6 ldd [%l4+%l0],%f0 fmuld %f4,%f32,%f4 fmuld %f20,%f24,%f24 fmuld %f6,%f2,%f6 fmuld %f34,%f14,%f14 ldd [%o4+y1_0],%f12 fmuld %f36,%f24,%f24 ldd [%o5+y2_0],%f22 faddd %f6,%f4,%f6 faddd %f14,%f12,%f14 faddd %f24,%f22,%f24 faddd %f6,%f0,%f6 faddd %f34,%f14,%f16 faddd %f36,%f24,%f26 ba,pt %icc,.FIXSIGN ! delay slot faddd %f6,%f32,%f6 .align 32 .CASE4: fands %f29,%f28,%f29 ! if (n & 1) clear sign bit sethi %hi(0x3fc3c000),%o7 andcc %l1,2,%g0 bne,pn %icc,.CASE6 ! delay slot andcc %l2,2,%g0 fpadd32s %f10,%f31,%f18 ld [%fp+x1_1],%l1 bne,pn %icc,.CASE5 ! delay slot add %l3,8,%g1 ld [%fp+x2_1],%l2 fpadd32s %f20,%f31,%f28 fand %f18,%f44,%f14 sub %l1,%o7,%l1 fand %f28,%f44,%f24 sub %l2,%o7,%l2 fsubd %f10,%f14,%f10 srl %l1,10,%l1 fsubd %f20,%f24,%f20 srl %l2,10,%l2 fmuld %f0,%f0,%f0 ldd [%l5+%o3],%f32 add %l5,%o3,%l0 faddd %f10,%f12,%f10 andn %l1,0x1f,%l1 faddd %f20,%f22,%f20 andn %l2,0x1f,%l2 fmuld %f0,%f32,%f4 ldd [%l0+0x10],%f6 add %fp,%o3,%o3 fmuld %f10,%f10,%f12 add %l1,%o4,%l1 fmuld %f20,%f20,%f22 add %l2,%o5,%l2 faddd %f4,%f6,%f4 ldd [%l0+0x20],%f32 fmuld %f12,%f58,%f16 ldd [%l3+%l1],%f34 fmuld %f22,%f58,%f26 ldd [%l3+%l2],%f36 fmuld %f0,%f4,%f4 ldd [%l0+0x30],%f6 faddd %f16,%f56,%f16 fmuld %f12,%f62,%f14 faddd %f26,%f56,%f26 fmuld %f22,%f62,%f24 faddd %f4,%f32,%f4 ldd [%o3+x0_1],%f32 fmuld %f12,%f16,%f16 faddd %f14,%f60,%f14 fmuld %f22,%f26,%f26 faddd %f24,%f60,%f24 fmuld %f0,%f4,%f4 faddd %f16,%f54,%f16 fmuld %f12,%f14,%f14 ldd [%g1+%l1],%f12 faddd %f26,%f54,%f26 fmuld %f22,%f24,%f24 ldd [%g1+%l2],%f22 faddd %f4,%f6,%f4 fmuld %f10,%f16,%f16 ldd [%l4+%l1],%f10 fmuld %f20,%f26,%f26 ldd [%l4+%l2],%f20 fmuld %f14,%f34,%f14 std %f2,[%fp+y0_0] fmuld %f24,%f36,%f24 fmuld %f0,%f4,%f4 fmuld %f16,%f12,%f16 fmuld %f26,%f22,%f26 fmuld %f32,%f4,%f4 ldd [%o3+y0_0],%f2 faddd %f16,%f14,%f16 faddd %f26,%f24,%f26 faddd %f4,%f2,%f4 faddd %f16,%f10,%f16 faddd %f26,%f20,%f26 faddd %f32,%f4,%f6 faddd %f16,%f34,%f16 ba,pt %icc,.FIXSIGN ! delay slot faddd %f26,%f36,%f26 .align 32 .CASE5: fand %f18,%f44,%f14 sub %l1,%o7,%l1 fmuld %f0,%f0,%f0 ldd [%l5+%o3],%f32 add %l5,%o3,%l0 fsubd %f10,%f14,%f10 srl %l1,10,%l1 fmuld %f20,%f20,%f20 ldd [%l5+%o5],%f36 add %l5,%o5,%l2 fmuld %f0,%f32,%f4 ldd [%l0+0x10],%f6 add %fp,%o3,%o3 faddd %f10,%f12,%f10 andn %l1,0x1f,%l1 fmuld %f20,%f36,%f24 ldd [%l2+0x10],%f26 add %fp,%o5,%o5 faddd %f4,%f6,%f4 ldd [%l0+0x20],%f32 fmuld %f10,%f10,%f12 add %l1,%o4,%l1 faddd %f24,%f26,%f24 ldd [%l2+0x20],%f36 fmuld %f0,%f4,%f4 ldd [%l0+0x30],%f6 fmuld %f12,%f58,%f16 ldd [%l3+%l1],%f34 fmuld %f20,%f24,%f24 ldd [%l2+0x30],%f26 faddd %f4,%f32,%f4 ldd [%o3+x0_1],%f32 faddd %f16,%f56,%f16 fmuld %f12,%f62,%f14 faddd %f24,%f36,%f24 ldd [%o5+x2_1],%f36 fmuld %f0,%f4,%f4 std %f2,[%fp+y0_0] fmuld %f12,%f16,%f16 faddd %f14,%f60,%f14 fmuld %f20,%f24,%f24 std %f22,[%fp+y2_0] faddd %f4,%f6,%f4 faddd %f16,%f54,%f16 fmuld %f12,%f14,%f14 ldd [%g1+%l1],%f12 faddd %f24,%f26,%f24 fmuld %f0,%f4,%f4 fmuld %f10,%f16,%f16 ldd [%l4+%l1],%f10 fmuld %f14,%f34,%f14 fmuld %f20,%f24,%f24 fmuld %f16,%f12,%f16 fmuld %f32,%f4,%f4 ldd [%o3+y0_0],%f2 fmuld %f36,%f24,%f24 ldd [%o5+y2_0],%f22 faddd %f16,%f14,%f16 faddd %f4,%f2,%f4 faddd %f24,%f22,%f24 faddd %f16,%f10,%f16 faddd %f32,%f4,%f6 faddd %f36,%f24,%f26 ba,pt %icc,.FIXSIGN ! delay slot faddd %f16,%f34,%f16 .align 32 .CASE6: ld [%fp+x2_1],%l2 add %l3,8,%g1 bne,pn %icc,.CASE7 ! delay slot fpadd32s %f20,%f31,%f28 fand %f28,%f44,%f24 ldd [%l5+%o3],%f32 add %l5,%o3,%l0 fmuld %f0,%f0,%f0 sub %l2,%o7,%l2 fsubd %f20,%f24,%f20 srl %l2,10,%l2 fmuld %f10,%f10,%f10 ldd [%l5+%o4],%f34 add %l5,%o4,%l1 fmuld %f0,%f32,%f4 ldd [%l0+0x10],%f6 add %fp,%o3,%o3 faddd %f20,%f22,%f20 andn %l2,0x1f,%l2 fmuld %f10,%f34,%f14 ldd [%l1+0x10],%f16 add %fp,%o4,%o4 faddd %f4,%f6,%f4 ldd [%l0+0x20],%f32 fmuld %f20,%f20,%f22 add %l2,%o5,%l2 faddd %f14,%f16,%f14 ldd [%l1+0x20],%f34 fmuld %f0,%f4,%f4 ldd [%l0+0x30],%f6 fmuld %f22,%f58,%f26 ldd [%l3+%l2],%f36 fmuld %f10,%f14,%f14 ldd [%l1+0x30],%f16 faddd %f4,%f32,%f4 ldd [%o3+x0_1],%f32 faddd %f26,%f56,%f26 fmuld %f22,%f62,%f24 faddd %f14,%f34,%f14 ldd [%o4+x1_1],%f34 fmuld %f0,%f4,%f4 std %f2,[%fp+y0_0] fmuld %f22,%f26,%f26 faddd %f24,%f60,%f24 fmuld %f10,%f14,%f14 std %f12,[%fp+y1_0] faddd %f4,%f6,%f4 faddd %f26,%f54,%f26 fmuld %f22,%f24,%f24 ldd [%g1+%l2],%f22 faddd %f14,%f16,%f14 fmuld %f0,%f4,%f4 fmuld %f20,%f26,%f26 ldd [%l4+%l2],%f20 fmuld %f24,%f36,%f24 fmuld %f10,%f14,%f14 fmuld %f26,%f22,%f26 fmuld %f32,%f4,%f4 ldd [%o3+y0_0],%f2 fmuld %f34,%f14,%f14 ldd [%o4+y1_0],%f12 faddd %f26,%f24,%f26 faddd %f4,%f2,%f4 faddd %f14,%f12,%f14 faddd %f26,%f20,%f26 faddd %f32,%f4,%f6 faddd %f34,%f14,%f16 ba,pt %icc,.FIXSIGN ! delay slot faddd %f26,%f36,%f26 .align 32 .CASE7: fmuld %f0,%f0,%f0 ldd [%l5+%o3],%f32 add %l5,%o3,%l0 fmuld %f10,%f10,%f10 ldd [%l5+%o4],%f34 add %l5,%o4,%l1 fmuld %f20,%f20,%f20 ldd [%l5+%o5],%f36 add %l5,%o5,%l2 fmuld %f0,%f32,%f4 ldd [%l0+0x10],%f6 add %fp,%o3,%o3 fmuld %f10,%f34,%f14 ldd [%l1+0x10],%f16 add %fp,%o4,%o4 fmuld %f20,%f36,%f24 ldd [%l2+0x10],%f26 add %fp,%o5,%o5 faddd %f4,%f6,%f4 ldd [%l0+0x20],%f32 faddd %f14,%f16,%f14 ldd [%l1+0x20],%f34 faddd %f24,%f26,%f24 ldd [%l2+0x20],%f36 fmuld %f0,%f4,%f4 ldd [%l0+0x30],%f6 fmuld %f10,%f14,%f14 ldd [%l1+0x30],%f16 fmuld %f20,%f24,%f24 ldd [%l2+0x30],%f26 faddd %f4,%f32,%f4 ldd [%o3+x0_1],%f32 faddd %f14,%f34,%f14 ldd [%o4+x1_1],%f34 faddd %f24,%f36,%f24 ldd [%o5+x2_1],%f36 fmuld %f0,%f4,%f4 std %f2,[%fp+y0_0] fmuld %f10,%f14,%f14 std %f12,[%fp+y1_0] fmuld %f20,%f24,%f24 std %f22,[%fp+y2_0] faddd %f4,%f6,%f4 faddd %f14,%f16,%f14 faddd %f24,%f26,%f24 fmuld %f0,%f4,%f4 fmuld %f10,%f14,%f14 fmuld %f20,%f24,%f24 fmuld %f32,%f4,%f4 ldd [%o3+y0_0],%f2 fmuld %f34,%f14,%f14 ldd [%o4+y1_0],%f12 fmuld %f36,%f24,%f24 ldd [%o5+y2_0],%f22 faddd %f4,%f2,%f4 faddd %f14,%f12,%f14 faddd %f24,%f22,%f24 faddd %f32,%f4,%f6 faddd %f34,%f14,%f16 ba,pt %icc,.FIXSIGN ! delay slot faddd %f36,%f24,%f26 .align 32 .ENDLOOP2: fmuld %f10,%f40,%f12 add %l5,thresh,%g1 faddd %f12,%f42,%f12 st %f13,[%fp+n1] fsubd %f12,%f42,%f12 ! n fmuld %f12,%f46,%f14 fsubd %f10,%f14,%f14 fmuld %f12,%f48,%f16 fsubd %f14,%f16,%f10 ld [%fp+n1],%o4 fsubd %f14,%f10,%f34 and %o4,1,%o4 fsubd %f34,%f16,%f34 fmuld %f12,%f50,%f18 sll %o4,3,%o4 fsubd %f18,%f34,%f18 ld [%g1+%o4],%f16 fsubd %f10,%f18,%f14 fsubd %f10,%f14,%f34 add %l5,thresh+4,%o7 fsubd %f34,%f18,%f34 fmuld %f12,%f52,%f12 fsubd %f12,%f34,%f12 ld [%o7+%o4],%f18 fsubd %f14,%f12,%f10 ! x fsubd %f14,%f10,%f14 fands %f10,%f30,%f19 ! save signbit fabsd %f10,%f10 std %f10,[%fp+x1_1] fsubd %f14,%f12,%f12 ! y fcmpgt32 %f16,%f10,%l1 fxors %f12,%f19,%f12 fands %f19,%f18,%f19 ! if (n & 1) clear sign bit andcc %l1,2,%g0 bne,pn %icc,1f ! delay slot nop fpadd32s %f10,%f31,%f18 ld [%fp+x1_1],%l1 fand %f18,%f44,%f14 sethi %hi(0x3fc3c000),%o7 add %l3,8,%g1 fsubd %f10,%f14,%f10 sub %l1,%o7,%l1 srl %l1,10,%l1 faddd %f10,%f12,%f10 andn %l1,0x1f,%l1 fmuld %f10,%f10,%f12 add %l1,%o4,%l1 fmuld %f12,%f58,%f16 ldd [%l3+%l1],%f34 faddd %f16,%f56,%f16 fmuld %f12,%f62,%f14 fmuld %f12,%f16,%f16 faddd %f14,%f60,%f14 faddd %f16,%f54,%f16 fmuld %f12,%f14,%f14 ldd [%g1+%l1],%f12 fmuld %f10,%f16,%f16 ldd [%l4+%l1],%f10 fmuld %f14,%f34,%f14 fmuld %f16,%f12,%f16 faddd %f16,%f14,%f16 faddd %f16,%f10,%f16 ba,pt %icc,2f faddd %f16,%f34,%f16 1: fmuld %f10,%f10,%f10 ldd [%l5+%o4],%f34 add %l5,%o4,%l1 fmuld %f10,%f34,%f14 ldd [%l1+0x10],%f16 add %fp,%o4,%o4 faddd %f14,%f16,%f14 ldd [%l1+0x20],%f34 fmuld %f10,%f14,%f14 ldd [%l1+0x30],%f16 faddd %f14,%f34,%f14 ldd [%o4+x1_1],%f34 fmuld %f10,%f14,%f14 std %f12,[%fp+y1_0] faddd %f14,%f16,%f14 fmuld %f10,%f14,%f14 fmuld %f34,%f14,%f14 ldd [%o4+y1_0],%f12 faddd %f14,%f12,%f14 faddd %f34,%f14,%f16 2: add %l5,thresh-4,%g1 ld [%fp+n1],%o4 and %o4,2,%o4 sll %o4,2,%o4 ld [%g1+%o4],%f18 fxors %f19,%f18,%f19 fors %f16,%f19,%f16 ! tack on sign st %f16,[%o1] st %f17,[%o1+4] .ENDLOOP1: fmuld %f0,%f40,%f2 add %l5,thresh,%g1 faddd %f2,%f42,%f2 st %f3,[%fp+n0] fsubd %f2,%f42,%f2 ! n fmuld %f2,%f46,%f4 fsubd %f0,%f4,%f4 fmuld %f2,%f48,%f6 fsubd %f4,%f6,%f0 ld [%fp+n0],%o3 fsubd %f4,%f0,%f32 and %o3,1,%o3 fsubd %f32,%f6,%f32 fmuld %f2,%f50,%f8 sll %o3,3,%o3 fsubd %f8,%f32,%f8 ld [%g1+%o3],%f6 fsubd %f0,%f8,%f4 fsubd %f0,%f4,%f32 add %l5,thresh+4,%o7 fsubd %f32,%f8,%f32 fmuld %f2,%f52,%f2 fsubd %f2,%f32,%f2 ld [%o7+%o3],%f8 fsubd %f4,%f2,%f0 ! x fsubd %f4,%f0,%f4 fands %f0,%f30,%f9 ! save signbit fabsd %f0,%f0 std %f0,[%fp+x0_1] fsubd %f4,%f2,%f2 ! y fcmpgt32 %f6,%f0,%l0 fxors %f2,%f9,%f2 fands %f9,%f8,%f9 ! if (n & 1) clear sign bit andcc %l0,2,%g0 bne,pn %icc,1f ! delay slot nop fpadd32s %f0,%f31,%f8 ld [%fp+x0_1],%l0 fand %f8,%f44,%f4 sethi %hi(0x3fc3c000),%o7 add %l3,8,%g1 fsubd %f0,%f4,%f0 sub %l0,%o7,%l0 srl %l0,10,%l0 faddd %f0,%f2,%f0 andn %l0,0x1f,%l0 fmuld %f0,%f0,%f2 add %l0,%o3,%l0 fmuld %f2,%f58,%f6 ldd [%l3+%l0],%f32 faddd %f6,%f56,%f6 fmuld %f2,%f62,%f4 fmuld %f2,%f6,%f6 faddd %f4,%f60,%f4 faddd %f6,%f54,%f6 fmuld %f2,%f4,%f4 ldd [%g1+%l0],%f2 fmuld %f0,%f6,%f6 ldd [%l4+%l0],%f0 fmuld %f4,%f32,%f4 fmuld %f6,%f2,%f6 faddd %f6,%f4,%f6 faddd %f6,%f0,%f6 ba,pt %icc,2f faddd %f6,%f32,%f6 1: fmuld %f0,%f0,%f0 ldd [%l5+%o3],%f32 add %l5,%o3,%l0 fmuld %f0,%f32,%f4 ldd [%l0+0x10],%f6 add %fp,%o3,%o3 faddd %f4,%f6,%f4 ldd [%l0+0x20],%f32 fmuld %f0,%f4,%f4 ldd [%l0+0x30],%f6 faddd %f4,%f32,%f4 ldd [%o3+x0_1],%f32 fmuld %f0,%f4,%f4 std %f2,[%fp+y0_0] faddd %f4,%f6,%f4 fmuld %f0,%f4,%f4 fmuld %f32,%f4,%f4 ldd [%o3+y0_0],%f2 faddd %f4,%f2,%f4 faddd %f32,%f4,%f6 2: add %l5,thresh-4,%g1 ld [%fp+n0],%o3 and %o3,2,%o3 sll %o3,2,%o3 ld [%g1+%o3],%f8 fxors %f9,%f8,%f9 fors %f6,%f9,%f6 ! tack on sign st %f6,[%o0] st %f7,[%o0+4] .ENDLOOP0: ! check for huge arguments remaining tst LIM_l6 be,pt %icc,.exit ! delay slot nop ! ========== huge range (use C code) ========== #ifdef __sparcv9 ldx [%fp+xsave],%o1 ldx [%fp+ysave],%o3 #else ld [%fp+xsave],%o1 ld [%fp+ysave],%o3 #endif ld [%fp+nsave],%o0 ld [%fp+sxsave],%o2 ld [%fp+sysave],%o4 sra %o2,0,%o2 ! sign-extend for V9 sra %o4,0,%o4 call __vlibm_vsin_big mov %l7,%o5 ! delay slot .exit: ret restore .align 32 .SKIP0: addcc %i0,-1,%i0 ble,pn %icc,.ENDLOOP0 ! delay slot, harmless if branch taken add %i3,%i4,%i3 ! y += stridey andn %l1,%i5,%l0 ! hx &= ~0x80000000 fmovs %f10,%f0 ld [%i1+4],%f1 ba,pt %icc,.LOOP0 ! delay slot add %i1,%i2,%i1 ! x += stridex .align 32 .SKIP1: addcc %i0,-1,%i0 ble,pn %icc,.ENDLOOP1 ! delay slot, harmless if branch taken add %i3,%i4,%i3 ! y += stridey andn %l2,%i5,%l1 ! hx &= ~0x80000000 fmovs %f20,%f10 ld [%i1+4],%f11 ba,pt %icc,.LOOP1 ! delay slot add %i1,%i2,%i1 ! x += stridex .align 32 .SKIP2: addcc %i0,-1,%i0 ble,pn %icc,.ENDLOOP2 ! delay slot, harmless if branch taken add %i3,%i4,%i3 ! y += stridey ld [%i1],%l2 ld [%i1],%f20 ld [%i1+4],%f21 andn %l2,%i5,%l2 ! hx &= ~0x80000000 ba,pt %icc,.LOOP2 ! delay slot add %i1,%i2,%i1 ! x += stridex .align 32 .BIG0: sethi %hi(0x7ff00000),%o7 cmp %l0,%o7 bl,a,pt %icc,1f ! if hx < 0x7ff00000 ! delay slot, annulled if branch not taken mov %l7,LIM_l6 ! set biguns flag or fsubd %f0,%f0,%f0 ! y = x - x st %f0,[%o0] st %f1,[%o0+4] 1: addcc %i0,-1,%i0 ble,pn %icc,.ENDLOOP0 ! delay slot, harmless if branch taken andn %l1,%i5,%l0 ! hx &= ~0x80000000 fmovd %f10,%f0 ba,pt %icc,.LOOP0 ! delay slot add %i1,%i2,%i1 ! x += stridex .align 32 .BIG1: sethi %hi(0x7ff00000),%o7 cmp %l1,%o7 bl,a,pt %icc,1f ! if hx < 0x7ff00000 ! delay slot, annulled if branch not taken mov %l7,LIM_l6 ! set biguns flag or fsubd %f10,%f10,%f10 ! y = x - x st %f10,[%o1] st %f11,[%o1+4] 1: addcc %i0,-1,%i0 ble,pn %icc,.ENDLOOP1 ! delay slot, harmless if branch taken andn %l2,%i5,%l1 ! hx &= ~0x80000000 fmovd %f20,%f10 ba,pt %icc,.LOOP1 ! delay slot add %i1,%i2,%i1 ! x += stridex .align 32 .BIG2: sethi %hi(0x7ff00000),%o7 cmp %l2,%o7 bl,a,pt %icc,1f ! if hx < 0x7ff00000 ! delay slot, annulled if branch not taken mov %l7,LIM_l6 ! set biguns flag or fsubd %f20,%f20,%f20 ! y = x - x st %f20,[%o2] st %f21,[%o2+4] 1: addcc %i0,-1,%i0 ble,pn %icc,.ENDLOOP2 ! delay slot nop ld [%i1],%l2 ld [%i1],%f20 ld [%i1+4],%f21 andn %l2,%i5,%l2 ! hx &= ~0x80000000 ba,pt %icc,.LOOP2 ! delay slot add %i1,%i2,%i1 ! x += stridex SET_SIZE(__vsin)