1*25c28e83SPiotr Jasiukajtis/*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis/*
22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*25c28e83SPiotr Jasiukajtis */
24*25c28e83SPiotr Jasiukajtis/*
25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
27*25c28e83SPiotr Jasiukajtis */
28*25c28e83SPiotr Jasiukajtis
29*25c28e83SPiotr Jasiukajtis	.file	"__vcos.S"
30*25c28e83SPiotr Jasiukajtis
31*25c28e83SPiotr Jasiukajtis#include "libm.h"
32*25c28e83SPiotr Jasiukajtis
33*25c28e83SPiotr Jasiukajtis	RO_DATA
34*25c28e83SPiotr Jasiukajtis	.align	64
35*25c28e83SPiotr Jasiukajtisconstants:
36*25c28e83SPiotr Jasiukajtis	.word	0x3ec718e3,0xa6972785
37*25c28e83SPiotr Jasiukajtis	.word	0x3ef9fd39,0x94293940
38*25c28e83SPiotr Jasiukajtis	.word	0xbf2a019f,0x75ee4be1
39*25c28e83SPiotr Jasiukajtis	.word	0xbf56c16b,0xba552569
40*25c28e83SPiotr Jasiukajtis	.word	0x3f811111,0x1108c703
41*25c28e83SPiotr Jasiukajtis	.word	0x3fa55555,0x554f5b35
42*25c28e83SPiotr Jasiukajtis	.word	0xbfc55555,0x555554d0
43*25c28e83SPiotr Jasiukajtis	.word	0xbfdfffff,0xffffff85
44*25c28e83SPiotr Jasiukajtis	.word	0x3ff00000,0x00000000
45*25c28e83SPiotr Jasiukajtis	.word	0xbfc55555,0x5551fc28
46*25c28e83SPiotr Jasiukajtis	.word	0x3f811107,0x62eacc9d
47*25c28e83SPiotr Jasiukajtis	.word	0xbfdfffff,0xffff6328
48*25c28e83SPiotr Jasiukajtis	.word	0x3fa55551,0x5f7acf0c
49*25c28e83SPiotr Jasiukajtis	.word	0x3fe45f30,0x6dc9c883
50*25c28e83SPiotr Jasiukajtis	.word	0x43380000,0x00000000
51*25c28e83SPiotr Jasiukajtis	.word	0x3ff921fb,0x54400000
52*25c28e83SPiotr Jasiukajtis	.word	0x3dd0b461,0x1a600000
53*25c28e83SPiotr Jasiukajtis	.word	0x3ba3198a,0x2e000000
54*25c28e83SPiotr Jasiukajtis	.word	0x397b839a,0x252049c1
55*25c28e83SPiotr Jasiukajtis	.word	0x80000000,0x00004000
56*25c28e83SPiotr Jasiukajtis	.word	0xffff8000,0x00000000	! N.B.: low-order words used
57*25c28e83SPiotr Jasiukajtis	.word	0x3fc90000,0x80000000	! for sign bit hacking; see
58*25c28e83SPiotr Jasiukajtis	.word	0x3fc40000,0x00000000	! references to "thresh" below
59*25c28e83SPiotr Jasiukajtis
60*25c28e83SPiotr Jasiukajtis#define p4		0x0
61*25c28e83SPiotr Jasiukajtis#define q4		0x08
62*25c28e83SPiotr Jasiukajtis#define p3		0x10
63*25c28e83SPiotr Jasiukajtis#define q3		0x18
64*25c28e83SPiotr Jasiukajtis#define p2		0x20
65*25c28e83SPiotr Jasiukajtis#define q2		0x28
66*25c28e83SPiotr Jasiukajtis#define p1		0x30
67*25c28e83SPiotr Jasiukajtis#define q1		0x38
68*25c28e83SPiotr Jasiukajtis#define one		0x40
69*25c28e83SPiotr Jasiukajtis#define pp1		0x48
70*25c28e83SPiotr Jasiukajtis#define pp2		0x50
71*25c28e83SPiotr Jasiukajtis#define qq1		0x58
72*25c28e83SPiotr Jasiukajtis#define qq2		0x60
73*25c28e83SPiotr Jasiukajtis#define invpio2		0x68
74*25c28e83SPiotr Jasiukajtis#define round		0x70
75*25c28e83SPiotr Jasiukajtis#define pio2_1		0x78
76*25c28e83SPiotr Jasiukajtis#define pio2_2		0x80
77*25c28e83SPiotr Jasiukajtis#define pio2_3		0x88
78*25c28e83SPiotr Jasiukajtis#define pio2_3t		0x90
79*25c28e83SPiotr Jasiukajtis#define f30val		0x98
80*25c28e83SPiotr Jasiukajtis#define mask		0xa0
81*25c28e83SPiotr Jasiukajtis#define thresh		0xa8
82*25c28e83SPiotr Jasiukajtis
83*25c28e83SPiotr Jasiukajtis! local storage indices
84*25c28e83SPiotr Jasiukajtis
85*25c28e83SPiotr Jasiukajtis#define xsave		STACK_BIAS-0x8
86*25c28e83SPiotr Jasiukajtis#define ysave		STACK_BIAS-0x10
87*25c28e83SPiotr Jasiukajtis#define nsave		STACK_BIAS-0x14
88*25c28e83SPiotr Jasiukajtis#define sxsave		STACK_BIAS-0x18
89*25c28e83SPiotr Jasiukajtis#define sysave		STACK_BIAS-0x1c
90*25c28e83SPiotr Jasiukajtis#define biguns		STACK_BIAS-0x20
91*25c28e83SPiotr Jasiukajtis#define n2		STACK_BIAS-0x24
92*25c28e83SPiotr Jasiukajtis#define n1		STACK_BIAS-0x28
93*25c28e83SPiotr Jasiukajtis#define n0		STACK_BIAS-0x2c
94*25c28e83SPiotr Jasiukajtis#define x2_1		STACK_BIAS-0x40
95*25c28e83SPiotr Jasiukajtis#define x1_1		STACK_BIAS-0x50
96*25c28e83SPiotr Jasiukajtis#define x0_1		STACK_BIAS-0x60
97*25c28e83SPiotr Jasiukajtis#define y2_0		STACK_BIAS-0x70
98*25c28e83SPiotr Jasiukajtis#define y1_0		STACK_BIAS-0x80
99*25c28e83SPiotr Jasiukajtis#define y0_0		STACK_BIAS-0x90
100*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9
101*25c28e83SPiotr Jasiukajtis#define tmps		0x90
102*25c28e83SPiotr Jasiukajtis
103*25c28e83SPiotr Jasiukajtis!--------------------------------------------------------------------
104*25c28e83SPiotr Jasiukajtis! define pipes for easier reading
105*25c28e83SPiotr Jasiukajtis
106*25c28e83SPiotr Jasiukajtis#define P0_f0		%f0
107*25c28e83SPiotr Jasiukajtis#define P0_f1		%f1
108*25c28e83SPiotr Jasiukajtis#define P0_f2		%f2
109*25c28e83SPiotr Jasiukajtis#define P0_f3		%f3
110*25c28e83SPiotr Jasiukajtis#define P0_f4		%f4
111*25c28e83SPiotr Jasiukajtis#define P0_f5		%f5
112*25c28e83SPiotr Jasiukajtis#define P0_f6		%f6
113*25c28e83SPiotr Jasiukajtis#define P0_f7		%f7
114*25c28e83SPiotr Jasiukajtis#define P0_f8		%f8
115*25c28e83SPiotr Jasiukajtis#define P0_f9		%f9
116*25c28e83SPiotr Jasiukajtis
117*25c28e83SPiotr Jasiukajtis#define P1_f10		%f10
118*25c28e83SPiotr Jasiukajtis#define P1_f11		%f11
119*25c28e83SPiotr Jasiukajtis#define P1_f12		%f12
120*25c28e83SPiotr Jasiukajtis#define P1_f13		%f13
121*25c28e83SPiotr Jasiukajtis#define P1_f14		%f14
122*25c28e83SPiotr Jasiukajtis#define P1_f15		%f15
123*25c28e83SPiotr Jasiukajtis#define P1_f16		%f16
124*25c28e83SPiotr Jasiukajtis#define P1_f17		%f17
125*25c28e83SPiotr Jasiukajtis#define P1_f18		%f18
126*25c28e83SPiotr Jasiukajtis#define P1_f19		%f19
127*25c28e83SPiotr Jasiukajtis
128*25c28e83SPiotr Jasiukajtis#define P2_f20		%f20
129*25c28e83SPiotr Jasiukajtis#define P2_f21		%f21
130*25c28e83SPiotr Jasiukajtis#define P2_f22		%f22
131*25c28e83SPiotr Jasiukajtis#define P2_f23		%f23
132*25c28e83SPiotr Jasiukajtis#define P2_f24		%f24
133*25c28e83SPiotr Jasiukajtis#define P2_f25		%f25
134*25c28e83SPiotr Jasiukajtis#define P2_f26		%f26
135*25c28e83SPiotr Jasiukajtis#define P2_f27		%f27
136*25c28e83SPiotr Jasiukajtis#define P2_f28		%f28
137*25c28e83SPiotr Jasiukajtis#define P2_f29		%f29
138*25c28e83SPiotr Jasiukajtis
139*25c28e83SPiotr Jasiukajtis! define __vlibm_TBL_sincos_hi & lo for easy reading
140*25c28e83SPiotr Jasiukajtis
141*25c28e83SPiotr Jasiukajtis#define SC_HI		%l3
142*25c28e83SPiotr Jasiukajtis#define SC_LO		%l4
143*25c28e83SPiotr Jasiukajtis
144*25c28e83SPiotr Jasiukajtis! define constants for easy reading
145*25c28e83SPiotr Jasiukajtis
146*25c28e83SPiotr Jasiukajtis#define C_q1 %f46
147*25c28e83SPiotr Jasiukajtis#define C_q2 %f48
148*25c28e83SPiotr Jasiukajtis#define C_q3 %f50
149*25c28e83SPiotr Jasiukajtis#define C_q4 %f52
150*25c28e83SPiotr Jasiukajtis
151*25c28e83SPiotr Jasiukajtis! one ( 1 ) uno eins echi un
152*25c28e83SPiotr Jasiukajtis#define C_ONE		%f54
153*25c28e83SPiotr Jasiukajtis#define C_ONE_LO	%f55
154*25c28e83SPiotr Jasiukajtis
155*25c28e83SPiotr Jasiukajtis! masks
156*25c28e83SPiotr Jasiukajtis#define MSK_SIGN	%i5
157*25c28e83SPiotr Jasiukajtis#define MSK_BIT31	%f30
158*25c28e83SPiotr Jasiukajtis#define MSK_BIT13	%f31
159*25c28e83SPiotr Jasiukajtis#define MSK_BITSHI17	%f44
160*25c28e83SPiotr Jasiukajtis
161*25c28e83SPiotr Jasiukajtis
162*25c28e83SPiotr Jasiukajtis! constants for pp and qq
163*25c28e83SPiotr Jasiukajtis#define C_pp1 %f56
164*25c28e83SPiotr Jasiukajtis#define C_pp2 %f58
165*25c28e83SPiotr Jasiukajtis#define C_qq1 %f60
166*25c28e83SPiotr Jasiukajtis#define C_qq2 %f62
167*25c28e83SPiotr Jasiukajtis
168*25c28e83SPiotr Jasiukajtis! sign mask
169*25c28e83SPiotr Jasiukajtis#define C_signM		%i5
170*25c28e83SPiotr Jasiukajtis
171*25c28e83SPiotr Jasiukajtis#define LIM_l5		%l5
172*25c28e83SPiotr Jasiukajtis#define LIM_l6		%l6
173*25c28e83SPiotr Jasiukajtis! when in pri range, using value as transition from poly to table.
174*25c28e83SPiotr Jasiukajtis! for Medium range,change use of %l6 and use to keep track of biguns.
175*25c28e83SPiotr Jasiukajtis#define LIM_l7		%l7
176*25c28e83SPiotr Jasiukajtis
177*25c28e83SPiotr Jasiukajtis!--------------------------------------------------------------------
178*25c28e83SPiotr Jasiukajtis
179*25c28e83SPiotr Jasiukajtis
180*25c28e83SPiotr Jasiukajtis	ENTRY(__vcos)
181*25c28e83SPiotr Jasiukajtis	save	%sp,-SA(MINFRAME)-tmps,%sp
182*25c28e83SPiotr Jasiukajtis	PIC_SETUP(g5)
183*25c28e83SPiotr Jasiukajtis	PIC_SET(g5,__vlibm_TBL_sincos_hi,l3)
184*25c28e83SPiotr Jasiukajtis	PIC_SET(g5,__vlibm_TBL_sincos_lo,l4)
185*25c28e83SPiotr Jasiukajtis	PIC_SET(g5,constants,o0)
186*25c28e83SPiotr Jasiukajtis	mov	%o0,%g1
187*25c28e83SPiotr Jasiukajtis	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
188*25c28e83SPiotr Jasiukajtis
189*25c28e83SPiotr Jasiukajtis! ========== primary range ==========
190*25c28e83SPiotr Jasiukajtis
191*25c28e83SPiotr Jasiukajtis! register use
192*25c28e83SPiotr Jasiukajtis
193*25c28e83SPiotr Jasiukajtis! i0  n
194*25c28e83SPiotr Jasiukajtis! i1  x
195*25c28e83SPiotr Jasiukajtis! i2  stridex
196*25c28e83SPiotr Jasiukajtis! i3  y
197*25c28e83SPiotr Jasiukajtis! i4  stridey
198*25c28e83SPiotr Jasiukajtis! i5  0x80000000
199*25c28e83SPiotr Jasiukajtis
200*25c28e83SPiotr Jasiukajtis! l0  hx0
201*25c28e83SPiotr Jasiukajtis! l1  hx1
202*25c28e83SPiotr Jasiukajtis! l2  hx2
203*25c28e83SPiotr Jasiukajtis! l3  __vlibm_TBL_sincos_hi
204*25c28e83SPiotr Jasiukajtis! l4  __vlibm_TBL_sincos_lo
205*25c28e83SPiotr Jasiukajtis! l5  0x3fc40000
206*25c28e83SPiotr Jasiukajtis! l6  0x3e400000
207*25c28e83SPiotr Jasiukajtis! l7  0x3fe921fb
208*25c28e83SPiotr Jasiukajtis
209*25c28e83SPiotr Jasiukajtis! the following are 64-bit registers in both V8+ and V9
210*25c28e83SPiotr Jasiukajtis
211*25c28e83SPiotr Jasiukajtis! g1  scratch
212*25c28e83SPiotr Jasiukajtis! g5
213*25c28e83SPiotr Jasiukajtis
214*25c28e83SPiotr Jasiukajtis! o0  py0
215*25c28e83SPiotr Jasiukajtis! o1  py1
216*25c28e83SPiotr Jasiukajtis! o2  py2
217*25c28e83SPiotr Jasiukajtis! o3  oy0
218*25c28e83SPiotr Jasiukajtis! o4  oy1
219*25c28e83SPiotr Jasiukajtis! o5  oy2
220*25c28e83SPiotr Jasiukajtis! o7  scratch
221*25c28e83SPiotr Jasiukajtis
222*25c28e83SPiotr Jasiukajtis! f0  x0
223*25c28e83SPiotr Jasiukajtis! f2
224*25c28e83SPiotr Jasiukajtis! f4
225*25c28e83SPiotr Jasiukajtis! f6
226*25c28e83SPiotr Jasiukajtis! f8  scratch for table base
227*25c28e83SPiotr Jasiukajtis! f9  signbit0
228*25c28e83SPiotr Jasiukajtis! f10 x1
229*25c28e83SPiotr Jasiukajtis! f12
230*25c28e83SPiotr Jasiukajtis! f14
231*25c28e83SPiotr Jasiukajtis! f16
232*25c28e83SPiotr Jasiukajtis! f18 scratch for table base
233*25c28e83SPiotr Jasiukajtis! f19 signbit1
234*25c28e83SPiotr Jasiukajtis! f20 x2
235*25c28e83SPiotr Jasiukajtis! f22
236*25c28e83SPiotr Jasiukajtis! f24
237*25c28e83SPiotr Jasiukajtis! f26
238*25c28e83SPiotr Jasiukajtis! f28 scratch for table base
239*25c28e83SPiotr Jasiukajtis! f29 signbit2
240*25c28e83SPiotr Jasiukajtis! f30 0x80000000
241*25c28e83SPiotr Jasiukajtis! f31 0x4000
242*25c28e83SPiotr Jasiukajtis! f32
243*25c28e83SPiotr Jasiukajtis! f34
244*25c28e83SPiotr Jasiukajtis! f36
245*25c28e83SPiotr Jasiukajtis! f38
246*25c28e83SPiotr Jasiukajtis! f40
247*25c28e83SPiotr Jasiukajtis! f42
248*25c28e83SPiotr Jasiukajtis! f44 0xffff800000000000
249*25c28e83SPiotr Jasiukajtis! f46 p1
250*25c28e83SPiotr Jasiukajtis! f48 p2
251*25c28e83SPiotr Jasiukajtis! f50 p3
252*25c28e83SPiotr Jasiukajtis! f52 p4
253*25c28e83SPiotr Jasiukajtis! f54 one
254*25c28e83SPiotr Jasiukajtis! f56 pp1
255*25c28e83SPiotr Jasiukajtis! f58 pp2
256*25c28e83SPiotr Jasiukajtis! f60 qq1
257*25c28e83SPiotr Jasiukajtis! f62 qq2
258*25c28e83SPiotr Jasiukajtis
259*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9
260*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+xsave]		! save arguments
261*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+ysave]
262*25c28e83SPiotr Jasiukajtis#else
263*25c28e83SPiotr Jasiukajtis	st	%i1,[%fp+xsave]		! save arguments
264*25c28e83SPiotr Jasiukajtis	st	%i3,[%fp+ysave]
265*25c28e83SPiotr Jasiukajtis#endif
266*25c28e83SPiotr Jasiukajtis
267*25c28e83SPiotr Jasiukajtis	st	%i0,[%fp+nsave]
268*25c28e83SPiotr Jasiukajtis	st	%i2,[%fp+sxsave]
269*25c28e83SPiotr Jasiukajtis	st	%i4,[%fp+sysave]
270*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x80000000),MSK_SIGN	! load/set up constants
271*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc40000),LIM_l5
272*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3e400000),LIM_l6
273*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fe921fb),LIM_l7
274*25c28e83SPiotr Jasiukajtis	or	LIM_l7,%lo(0x3fe921fb),LIM_l7
275*25c28e83SPiotr Jasiukajtis	ldd	[%g1+f30val],MSK_BIT31
276*25c28e83SPiotr Jasiukajtis	ldd	[%g1+mask],MSK_BITSHI17
277*25c28e83SPiotr Jasiukajtis	ldd	[%g1+q1],C_q1
278*25c28e83SPiotr Jasiukajtis	ldd	[%g1+q2],C_q2
279*25c28e83SPiotr Jasiukajtis	ldd	[%g1+q3],C_q3
280*25c28e83SPiotr Jasiukajtis	ldd	[%g1+q4],C_q4
281*25c28e83SPiotr Jasiukajtis	ldd	[%g1+one],C_ONE
282*25c28e83SPiotr Jasiukajtis	ldd	[%g1+pp1],C_pp1
283*25c28e83SPiotr Jasiukajtis	ldd	[%g1+pp2],C_pp2
284*25c28e83SPiotr Jasiukajtis	ldd	[%g1+qq1],C_qq1
285*25c28e83SPiotr Jasiukajtis	ldd	[%g1+qq2],C_qq2
286*25c28e83SPiotr Jasiukajtis	sll	%i2,3,%i2		! scale strides
287*25c28e83SPiotr Jasiukajtis	sll	%i4,3,%i4
288*25c28e83SPiotr Jasiukajtis	add	%fp,x0_1,%o3		! precondition loop
289*25c28e83SPiotr Jasiukajtis	add	%fp,x0_1,%o4
290*25c28e83SPiotr Jasiukajtis	add	%fp,x0_1,%o5
291*25c28e83SPiotr Jasiukajtis	ld	[%i1],%l0		! hx = *x
292*25c28e83SPiotr Jasiukajtis	ld	[%i1],P0_f0
293*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],P0_f1
294*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
295*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
296*25c28e83SPiotr Jasiukajtis
297*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.loop0
298*25c28e83SPiotr Jasiukajtis!delay slot
299*25c28e83SPiotr Jasiukajtis	nop
300*25c28e83SPiotr Jasiukajtis
301*25c28e83SPiotr Jasiukajtis	.align 32
302*25c28e83SPiotr Jasiukajtis.loop0:
303*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l1		! preload next argument
304*25c28e83SPiotr Jasiukajtis	sub	%l0,LIM_l6,%g1
305*25c28e83SPiotr Jasiukajtis	sub	LIM_l7,%l0,%o7
306*25c28e83SPiotr Jasiukajtis	fands	P0_f0,MSK_BIT31,P0_f9		! save signbit
307*25c28e83SPiotr Jasiukajtis
308*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P1_f10
309*25c28e83SPiotr Jasiukajtis	orcc	%o7,%g1,%g0
310*25c28e83SPiotr Jasiukajtis	mov	%i3,%o0			! py0 = y
311*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.range0		! if hx < 0x3e400000 or > 0x3fe921fb
312*25c28e83SPiotr Jasiukajtis
313*25c28e83SPiotr Jasiukajtis! delay slot
314*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P1_f11
315*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
316*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
317*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop1
318*25c28e83SPiotr Jasiukajtis
319*25c28e83SPiotr Jasiukajtis! delay slot
320*25c28e83SPiotr Jasiukajtis	andn	%l1,MSK_SIGN,%l1
321*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
322*25c28e83SPiotr Jasiukajtis	fabsd	P0_f0,P0_f0
323*25c28e83SPiotr Jasiukajtis	fmuld	C_ONE,C_ONE,C_ONE		! one*one; a nop for alignment only
324*25c28e83SPiotr Jasiukajtis
325*25c28e83SPiotr Jasiukajtis.loop1:
326*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l2		! preload next argument
327*25c28e83SPiotr Jasiukajtis	sub	%l1,LIM_l6,%g1
328*25c28e83SPiotr Jasiukajtis	sub	LIM_l7,%l1,%o7
329*25c28e83SPiotr Jasiukajtis	fands	P1_f10,MSK_BIT31,P1_f19		! save signbit
330*25c28e83SPiotr Jasiukajtis
331*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P2_f20
332*25c28e83SPiotr Jasiukajtis	orcc	%o7,%g1,%g0
333*25c28e83SPiotr Jasiukajtis	mov	%i3,%o1			! py1 = y
334*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.range1		! if hx < 0x3e400000 or > 0x3fe921fb
335*25c28e83SPiotr Jasiukajtis
336*25c28e83SPiotr Jasiukajtis! delay slot
337*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P2_f21
338*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
339*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
340*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop2
341*25c28e83SPiotr Jasiukajtis
342*25c28e83SPiotr Jasiukajtis! delay slot
343*25c28e83SPiotr Jasiukajtis	andn	%l2,MSK_SIGN,%l2
344*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
345*25c28e83SPiotr Jasiukajtis	fabsd	P1_f10,P1_f10
346*25c28e83SPiotr Jasiukajtis	fmuld	C_ONE,C_ONE,C_ONE		! one*one; a nop for alignment only
347*25c28e83SPiotr Jasiukajtis
348*25c28e83SPiotr Jasiukajtis.loop2:
349*25c28e83SPiotr Jasiukajtis	st	P0_f6,[%o3]
350*25c28e83SPiotr Jasiukajtis	sub	%l2,LIM_l6,%g1
351*25c28e83SPiotr Jasiukajtis	sub	LIM_l7,%l2,%o7
352*25c28e83SPiotr Jasiukajtis	fands	P2_f20,MSK_BIT31,P2_f29		! save signbit
353*25c28e83SPiotr Jasiukajtis
354*25c28e83SPiotr Jasiukajtis	st	P0_f7,[%o3+4]
355*25c28e83SPiotr Jasiukajtis	orcc	%g1,%o7,%g0
356*25c28e83SPiotr Jasiukajtis	mov	%i3,%o2			! py2 = y
357*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.range2		! if hx < 0x3e400000 or > 0x3fe921fb
358*25c28e83SPiotr Jasiukajtis
359*25c28e83SPiotr Jasiukajtis! delay slot
360*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
361*25c28e83SPiotr Jasiukajtis	cmp	%l0,LIM_l5
362*25c28e83SPiotr Jasiukajtis	fabsd	P2_f20,P2_f20
363*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.case4
364*25c28e83SPiotr Jasiukajtis
365*25c28e83SPiotr Jasiukajtis! delay slot
366*25c28e83SPiotr Jasiukajtis	st	P1_f16,[%o4]
367*25c28e83SPiotr Jasiukajtis	cmp	%l1,LIM_l5
368*25c28e83SPiotr Jasiukajtis	fpadd32s P0_f0,MSK_BIT13,P0_f8
369*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.case2
370*25c28e83SPiotr Jasiukajtis
371*25c28e83SPiotr Jasiukajtis! delay slot
372*25c28e83SPiotr Jasiukajtis	st	P1_f17,[%o4+4]
373*25c28e83SPiotr Jasiukajtis	cmp	%l2,LIM_l5
374*25c28e83SPiotr Jasiukajtis	fpadd32s P1_f10,MSK_BIT13,P1_f18
375*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.case1
376*25c28e83SPiotr Jasiukajtis
377*25c28e83SPiotr Jasiukajtis! delay slot
378*25c28e83SPiotr Jasiukajtis	st	P2_f26,[%o5]
379*25c28e83SPiotr Jasiukajtis	mov	%o0,%o3
380*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
381*25c28e83SPiotr Jasiukajtis	fpadd32s P2_f20,MSK_BIT13,P2_f28
382*25c28e83SPiotr Jasiukajtis
383*25c28e83SPiotr Jasiukajtis	st	P2_f27,[%o5+4]
384*25c28e83SPiotr Jasiukajtis	fand	P0_f8,MSK_BITSHI17,P0_f2
385*25c28e83SPiotr Jasiukajtis	mov	%o1,%o4
386*25c28e83SPiotr Jasiukajtis
387*25c28e83SPiotr Jasiukajtis	fand	P1_f18,MSK_BITSHI17,P1_f12
388*25c28e83SPiotr Jasiukajtis	mov	%o2,%o5
389*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
390*25c28e83SPiotr Jasiukajtis
391*25c28e83SPiotr Jasiukajtis	fand	P2_f28,MSK_BITSHI17,P2_f22
392*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
393*25c28e83SPiotr Jasiukajtis	sub	%l2,%o7,%l2
394*25c28e83SPiotr Jasiukajtis
395*25c28e83SPiotr Jasiukajtis	fsubd	P0_f0,P0_f2,P0_f0
396*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
397*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
398*25c28e83SPiotr Jasiukajtis
399*25c28e83SPiotr Jasiukajtis	fsubd	P1_f10,P1_f12,P1_f10
400*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
401*25c28e83SPiotr Jasiukajtis
402*25c28e83SPiotr Jasiukajtis	fsubd	P2_f20,P2_f22,P2_f20
403*25c28e83SPiotr Jasiukajtis	srl	%l2,10,%l2
404*25c28e83SPiotr Jasiukajtis
405*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
406*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
407*25c28e83SPiotr Jasiukajtis
408*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
409*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
410*25c28e83SPiotr Jasiukajtis
411*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f20,P2_f22
412*25c28e83SPiotr Jasiukajtis	andn	%l2,0x1f,%l2
413*25c28e83SPiotr Jasiukajtis
414*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_pp2,P0_f6
415*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f32
416*25c28e83SPiotr Jasiukajtis
417*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_pp2,P1_f16
418*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f36
419*25c28e83SPiotr Jasiukajtis
420*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_pp2,P2_f26
421*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l2],%f40
422*25c28e83SPiotr Jasiukajtis
423*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_pp1,P0_f6
424*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_qq2,P0_f4
425*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l0],%f34
426*25c28e83SPiotr Jasiukajtis
427*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_pp1,P1_f16
428*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_qq2,P1_f14
429*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l1],%f38
430*25c28e83SPiotr Jasiukajtis
431*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_pp1,P2_f26
432*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_qq2,P2_f24
433*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l2],%f42
434*25c28e83SPiotr Jasiukajtis
435*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f6,P0_f6
436*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_qq1,P0_f4
437*25c28e83SPiotr Jasiukajtis
438*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f16,P1_f16
439*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_qq1,P1_f14
440*25c28e83SPiotr Jasiukajtis
441*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f26,P2_f26
442*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_qq1,P2_f24
443*25c28e83SPiotr Jasiukajtis
444*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_ONE,P0_f6
445*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
446*25c28e83SPiotr Jasiukajtis
447*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_ONE,P1_f16
448*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
449*25c28e83SPiotr Jasiukajtis
450*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_ONE,P2_f26
451*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
452*25c28e83SPiotr Jasiukajtis
453*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f6,P0_f6
454*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l0],P0_f2
455*25c28e83SPiotr Jasiukajtis
456*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f16,P1_f16
457*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l1],P1_f12
458*25c28e83SPiotr Jasiukajtis
459*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f26,P2_f26
460*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l2],P2_f22
461*25c28e83SPiotr Jasiukajtis
462*25c28e83SPiotr Jasiukajtis	fmuld	P0_f4,%f32,P0_f4
463*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
464*25c28e83SPiotr Jasiukajtis
465*25c28e83SPiotr Jasiukajtis	fmuld	P1_f14,%f36,P1_f14
466*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P0_f0
467*25c28e83SPiotr Jasiukajtis
468*25c28e83SPiotr Jasiukajtis	fmuld	P2_f24,%f40,P2_f24
469*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P0_f1
470*25c28e83SPiotr Jasiukajtis
471*25c28e83SPiotr Jasiukajtis	fmuld	P0_f6,%f34,P0_f6
472*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
473*25c28e83SPiotr Jasiukajtis
474*25c28e83SPiotr Jasiukajtis	fmuld	P1_f16,%f38,P1_f16
475*25c28e83SPiotr Jasiukajtis
476*25c28e83SPiotr Jasiukajtis	fmuld	P2_f26,%f42,P2_f26
477*25c28e83SPiotr Jasiukajtis
478*25c28e83SPiotr Jasiukajtis	fsubd	P0_f6,P0_f4,P0_f6
479*25c28e83SPiotr Jasiukajtis
480*25c28e83SPiotr Jasiukajtis	fsubd	P1_f16,P1_f14,P1_f16
481*25c28e83SPiotr Jasiukajtis
482*25c28e83SPiotr Jasiukajtis	fsubd	P2_f26,P2_f24,P2_f26
483*25c28e83SPiotr Jasiukajtis
484*25c28e83SPiotr Jasiukajtis	fsubd	P0_f2,P0_f6,P0_f6
485*25c28e83SPiotr Jasiukajtis
486*25c28e83SPiotr Jasiukajtis	fsubd	P1_f12,P1_f16,P1_f16
487*25c28e83SPiotr Jasiukajtis
488*25c28e83SPiotr Jasiukajtis	fsubd	P2_f22,P2_f26,P2_f26
489*25c28e83SPiotr Jasiukajtis
490*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,%f32,P0_f6
491*25c28e83SPiotr Jasiukajtis
492*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,%f36,P1_f16
493*25c28e83SPiotr Jasiukajtis
494*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,%f40,P2_f26
495*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
496*25c28e83SPiotr Jasiukajtis
497*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P0_f6,P0_f9,P0_f6
498*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
499*25c28e83SPiotr Jasiukajtis
500*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P1_f16,P1_f19,P1_f16
501*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.loop0
502*25c28e83SPiotr Jasiukajtis
503*25c28e83SPiotr Jasiukajtis! delay slot
504*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f26,P2_f29,P2_f26
505*25c28e83SPiotr Jasiukajtis
506*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.endloop0
507*25c28e83SPiotr Jasiukajtis! delay slot
508*25c28e83SPiotr Jasiukajtis	nop
509*25c28e83SPiotr Jasiukajtis
510*25c28e83SPiotr Jasiukajtis	.align	32
511*25c28e83SPiotr Jasiukajtis.case1:
512*25c28e83SPiotr Jasiukajtis	st	P2_f27,[%o5+4]
513*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
514*25c28e83SPiotr Jasiukajtis	fand	P0_f8,MSK_BITSHI17,P0_f2
515*25c28e83SPiotr Jasiukajtis
516*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
517*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
518*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
519*25c28e83SPiotr Jasiukajtis	fand	P1_f18,MSK_BITSHI17,P1_f12
520*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f20,P2_f22
521*25c28e83SPiotr Jasiukajtis
522*25c28e83SPiotr Jasiukajtis	fsubd	P0_f0,P0_f2,P0_f0
523*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
524*25c28e83SPiotr Jasiukajtis	mov	%o0,%o3
525*25c28e83SPiotr Jasiukajtis
526*25c28e83SPiotr Jasiukajtis	fsubd	P1_f10,P1_f12,P1_f10
527*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
528*25c28e83SPiotr Jasiukajtis	mov	%o1,%o4
529*25c28e83SPiotr Jasiukajtis
530*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_q4,P2_f24
531*25c28e83SPiotr Jasiukajtis	mov	%o2,%o5
532*25c28e83SPiotr Jasiukajtis
533*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
534*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
535*25c28e83SPiotr Jasiukajtis
536*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
537*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
538*25c28e83SPiotr Jasiukajtis
539*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q3,P2_f24
540*25c28e83SPiotr Jasiukajtis
541*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_pp2,P0_f6
542*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f32
543*25c28e83SPiotr Jasiukajtis
544*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_pp2,P1_f16
545*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f36
546*25c28e83SPiotr Jasiukajtis
547*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
548*25c28e83SPiotr Jasiukajtis
549*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_pp1,P0_f6
550*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_qq2,P0_f4
551*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l0],%f34
552*25c28e83SPiotr Jasiukajtis
553*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_pp1,P1_f16
554*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_qq2,P1_f14
555*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l1],%f38
556*25c28e83SPiotr Jasiukajtis
557*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q2,P2_f24
558*25c28e83SPiotr Jasiukajtis
559*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f6,P0_f6
560*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_qq1,P0_f4
561*25c28e83SPiotr Jasiukajtis
562*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f16,P1_f16
563*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_qq1,P1_f14
564*25c28e83SPiotr Jasiukajtis
565*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
566*25c28e83SPiotr Jasiukajtis
567*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_ONE,P0_f6
568*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
569*25c28e83SPiotr Jasiukajtis
570*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_ONE,P1_f16
571*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
572*25c28e83SPiotr Jasiukajtis
573*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q1,P2_f24
574*25c28e83SPiotr Jasiukajtis
575*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f6,P0_f6
576*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l0],P0_f2
577*25c28e83SPiotr Jasiukajtis
578*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f16,P1_f16
579*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l1],P1_f12
580*25c28e83SPiotr Jasiukajtis
581*25c28e83SPiotr Jasiukajtis	fmuld	P0_f4,%f32,P0_f4
582*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
583*25c28e83SPiotr Jasiukajtis
584*25c28e83SPiotr Jasiukajtis	fmuld	P1_f14,%f36,P1_f14
585*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P0_f0
586*25c28e83SPiotr Jasiukajtis
587*25c28e83SPiotr Jasiukajtis	fmuld	P0_f6,%f34,P0_f6
588*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P0_f1
589*25c28e83SPiotr Jasiukajtis
590*25c28e83SPiotr Jasiukajtis	fmuld	P1_f16,%f38,P1_f16
591*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
592*25c28e83SPiotr Jasiukajtis
593*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
594*25c28e83SPiotr Jasiukajtis
595*25c28e83SPiotr Jasiukajtis	fsubd	P0_f6,P0_f4,P0_f6
596*25c28e83SPiotr Jasiukajtis
597*25c28e83SPiotr Jasiukajtis	fsubd	P1_f16,P1_f14,P1_f16
598*25c28e83SPiotr Jasiukajtis
599*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P2_f20,P2_f24,P2_f24
600*25c28e83SPiotr Jasiukajtis
601*25c28e83SPiotr Jasiukajtis	fsubd	P0_f2,P0_f6,P0_f6
602*25c28e83SPiotr Jasiukajtis
603*25c28e83SPiotr Jasiukajtis	fsubd	P1_f12,P1_f16,P1_f16
604*25c28e83SPiotr Jasiukajtis
605*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P2_f24,P2_f26 !!(vsin)faddd	P2_f20,P2_f24,P2_f26
606*25c28e83SPiotr Jasiukajtis
607*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,%f32,P0_f6
608*25c28e83SPiotr Jasiukajtis
609*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,%f36,P1_f16
610*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
611*25c28e83SPiotr Jasiukajtis
612*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f26,P2_f29,P2_f26
613*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
614*25c28e83SPiotr Jasiukajtis
615*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P0_f6,P0_f9,P0_f6
616*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.loop0
617*25c28e83SPiotr Jasiukajtis
618*25c28e83SPiotr Jasiukajtis! delay slot
619*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P1_f16,P1_f19,P1_f16
620*25c28e83SPiotr Jasiukajtis
621*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.endloop0
622*25c28e83SPiotr Jasiukajtis! delay slot
623*25c28e83SPiotr Jasiukajtis	nop
624*25c28e83SPiotr Jasiukajtis
625*25c28e83SPiotr Jasiukajtis	.align	32
626*25c28e83SPiotr Jasiukajtis.case2:
627*25c28e83SPiotr Jasiukajtis	st	P2_f26,[%o5]
628*25c28e83SPiotr Jasiukajtis	cmp	%l2,LIM_l5
629*25c28e83SPiotr Jasiukajtis	fpadd32s P2_f20,MSK_BIT13,P2_f28
630*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.case3
631*25c28e83SPiotr Jasiukajtis
632*25c28e83SPiotr Jasiukajtis! delay slot
633*25c28e83SPiotr Jasiukajtis	st	P2_f27,[%o5+4]
634*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
635*25c28e83SPiotr Jasiukajtis	fand	P0_f8,MSK_BITSHI17,P0_f2
636*25c28e83SPiotr Jasiukajtis
637*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
638*25c28e83SPiotr Jasiukajtis	sub	%l2,%o7,%l2
639*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
640*25c28e83SPiotr Jasiukajtis	fand	P2_f28,MSK_BITSHI17,P2_f22
641*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
642*25c28e83SPiotr Jasiukajtis
643*25c28e83SPiotr Jasiukajtis	fsubd	P0_f0,P0_f2,P0_f0
644*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
645*25c28e83SPiotr Jasiukajtis	mov	%o0,%o3
646*25c28e83SPiotr Jasiukajtis
647*25c28e83SPiotr Jasiukajtis	fsubd	P2_f20,P2_f22,P2_f20
648*25c28e83SPiotr Jasiukajtis	srl	%l2,10,%l2
649*25c28e83SPiotr Jasiukajtis	mov	%o2,%o5
650*25c28e83SPiotr Jasiukajtis
651*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_q4,P1_f14
652*25c28e83SPiotr Jasiukajtis	mov	%o1,%o4
653*25c28e83SPiotr Jasiukajtis
654*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
655*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
656*25c28e83SPiotr Jasiukajtis
657*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f20,P2_f22
658*25c28e83SPiotr Jasiukajtis	andn	%l2,0x1f,%l2
659*25c28e83SPiotr Jasiukajtis
660*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q3,P1_f14
661*25c28e83SPiotr Jasiukajtis
662*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_pp2,P0_f6
663*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f32
664*25c28e83SPiotr Jasiukajtis
665*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_pp2,P2_f26
666*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l2],%f40
667*25c28e83SPiotr Jasiukajtis
668*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
669*25c28e83SPiotr Jasiukajtis
670*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_pp1,P0_f6
671*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_qq2,P0_f4
672*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l0],%f34
673*25c28e83SPiotr Jasiukajtis
674*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_pp1,P2_f26
675*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_qq2,P2_f24
676*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l2],%f42
677*25c28e83SPiotr Jasiukajtis
678*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q2,P1_f14
679*25c28e83SPiotr Jasiukajtis
680*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f6,P0_f6
681*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_qq1,P0_f4
682*25c28e83SPiotr Jasiukajtis
683*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f26,P2_f26
684*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_qq1,P2_f24
685*25c28e83SPiotr Jasiukajtis
686*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
687*25c28e83SPiotr Jasiukajtis
688*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_ONE,P0_f6
689*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
690*25c28e83SPiotr Jasiukajtis
691*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_ONE,P2_f26
692*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
693*25c28e83SPiotr Jasiukajtis
694*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q1,P1_f14
695*25c28e83SPiotr Jasiukajtis
696*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f6,P0_f6
697*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l0],P0_f2
698*25c28e83SPiotr Jasiukajtis
699*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f26,P2_f26
700*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l2],P2_f22
701*25c28e83SPiotr Jasiukajtis
702*25c28e83SPiotr Jasiukajtis	fmuld	P0_f4,%f32,P0_f4
703*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
704*25c28e83SPiotr Jasiukajtis
705