1*25c28e83SPiotr Jasiukajtis/*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis/*
22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*25c28e83SPiotr Jasiukajtis */
24*25c28e83SPiotr Jasiukajtis/*
25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
27*25c28e83SPiotr Jasiukajtis */
28*25c28e83SPiotr Jasiukajtis
29*25c28e83SPiotr Jasiukajtis	.file	"__vcos.S"
30*25c28e83SPiotr Jasiukajtis
31*25c28e83SPiotr Jasiukajtis#include "libm.h"
32*25c28e83SPiotr Jasiukajtis
33*25c28e83SPiotr Jasiukajtis	RO_DATA
34*25c28e83SPiotr Jasiukajtis	.align	64
35*25c28e83SPiotr Jasiukajtisconstants:
36*25c28e83SPiotr Jasiukajtis	.word	0x3ec718e3,0xa6972785
37*25c28e83SPiotr Jasiukajtis	.word	0x3ef9fd39,0x94293940
38*25c28e83SPiotr Jasiukajtis	.word	0xbf2a019f,0x75ee4be1
39*25c28e83SPiotr Jasiukajtis	.word	0xbf56c16b,0xba552569
40*25c28e83SPiotr Jasiukajtis	.word	0x3f811111,0x1108c703
41*25c28e83SPiotr Jasiukajtis	.word	0x3fa55555,0x554f5b35
42*25c28e83SPiotr Jasiukajtis	.word	0xbfc55555,0x555554d0
43*25c28e83SPiotr Jasiukajtis	.word	0xbfdfffff,0xffffff85
44*25c28e83SPiotr Jasiukajtis	.word	0x3ff00000,0x00000000
45*25c28e83SPiotr Jasiukajtis	.word	0xbfc55555,0x5551fc28
46*25c28e83SPiotr Jasiukajtis	.word	0x3f811107,0x62eacc9d
47*25c28e83SPiotr Jasiukajtis	.word	0xbfdfffff,0xffff6328
48*25c28e83SPiotr Jasiukajtis	.word	0x3fa55551,0x5f7acf0c
49*25c28e83SPiotr Jasiukajtis	.word	0x3fe45f30,0x6dc9c883
50*25c28e83SPiotr Jasiukajtis	.word	0x43380000,0x00000000
51*25c28e83SPiotr Jasiukajtis	.word	0x3ff921fb,0x54400000
52*25c28e83SPiotr Jasiukajtis	.word	0x3dd0b461,0x1a600000
53*25c28e83SPiotr Jasiukajtis	.word	0x3ba3198a,0x2e000000
54*25c28e83SPiotr Jasiukajtis	.word	0x397b839a,0x252049c1
55*25c28e83SPiotr Jasiukajtis	.word	0x80000000,0x00004000
56*25c28e83SPiotr Jasiukajtis	.word	0xffff8000,0x00000000	! N.B.: low-order words used
57*25c28e83SPiotr Jasiukajtis	.word	0x3fc90000,0x80000000	! for sign bit hacking; see
58*25c28e83SPiotr Jasiukajtis	.word	0x3fc40000,0x00000000	! references to "thresh" below
59*25c28e83SPiotr Jasiukajtis
60*25c28e83SPiotr Jasiukajtis#define p4		0x0
61*25c28e83SPiotr Jasiukajtis#define q4		0x08
62*25c28e83SPiotr Jasiukajtis#define p3		0x10
63*25c28e83SPiotr Jasiukajtis#define q3		0x18
64*25c28e83SPiotr Jasiukajtis#define p2		0x20
65*25c28e83SPiotr Jasiukajtis#define q2		0x28
66*25c28e83SPiotr Jasiukajtis#define p1		0x30
67*25c28e83SPiotr Jasiukajtis#define q1		0x38
68*25c28e83SPiotr Jasiukajtis#define one		0x40
69*25c28e83SPiotr Jasiukajtis#define pp1		0x48
70*25c28e83SPiotr Jasiukajtis#define pp2		0x50
71*25c28e83SPiotr Jasiukajtis#define qq1		0x58
72*25c28e83SPiotr Jasiukajtis#define qq2		0x60
73*25c28e83SPiotr Jasiukajtis#define invpio2		0x68
74*25c28e83SPiotr Jasiukajtis#define round		0x70
75*25c28e83SPiotr Jasiukajtis#define pio2_1		0x78
76*25c28e83SPiotr Jasiukajtis#define pio2_2		0x80
77*25c28e83SPiotr Jasiukajtis#define pio2_3		0x88
78*25c28e83SPiotr Jasiukajtis#define pio2_3t		0x90
79*25c28e83SPiotr Jasiukajtis#define f30val		0x98
80*25c28e83SPiotr Jasiukajtis#define mask		0xa0
81*25c28e83SPiotr Jasiukajtis#define thresh		0xa8
82*25c28e83SPiotr Jasiukajtis
83*25c28e83SPiotr Jasiukajtis! local storage indices
84*25c28e83SPiotr Jasiukajtis
85*25c28e83SPiotr Jasiukajtis#define xsave		STACK_BIAS-0x8
86*25c28e83SPiotr Jasiukajtis#define ysave		STACK_BIAS-0x10
87*25c28e83SPiotr Jasiukajtis#define nsave		STACK_BIAS-0x14
88*25c28e83SPiotr Jasiukajtis#define sxsave		STACK_BIAS-0x18
89*25c28e83SPiotr Jasiukajtis#define sysave		STACK_BIAS-0x1c
90*25c28e83SPiotr Jasiukajtis#define biguns		STACK_BIAS-0x20
91*25c28e83SPiotr Jasiukajtis#define n2		STACK_BIAS-0x24
92*25c28e83SPiotr Jasiukajtis#define n1		STACK_BIAS-0x28
93*25c28e83SPiotr Jasiukajtis#define n0		STACK_BIAS-0x2c
94*25c28e83SPiotr Jasiukajtis#define x2_1		STACK_BIAS-0x40
95*25c28e83SPiotr Jasiukajtis#define x1_1		STACK_BIAS-0x50
96*25c28e83SPiotr Jasiukajtis#define x0_1		STACK_BIAS-0x60
97*25c28e83SPiotr Jasiukajtis#define y2_0		STACK_BIAS-0x70
98*25c28e83SPiotr Jasiukajtis#define y1_0		STACK_BIAS-0x80
99*25c28e83SPiotr Jasiukajtis#define y0_0		STACK_BIAS-0x90
100*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9
101*25c28e83SPiotr Jasiukajtis#define tmps		0x90
102*25c28e83SPiotr Jasiukajtis
103*25c28e83SPiotr Jasiukajtis!--------------------------------------------------------------------
104*25c28e83SPiotr Jasiukajtis! define pipes for easier reading
105*25c28e83SPiotr Jasiukajtis
106*25c28e83SPiotr Jasiukajtis#define P0_f0		%f0
107*25c28e83SPiotr Jasiukajtis#define P0_f1		%f1
108*25c28e83SPiotr Jasiukajtis#define P0_f2		%f2
109*25c28e83SPiotr Jasiukajtis#define P0_f3		%f3
110*25c28e83SPiotr Jasiukajtis#define P0_f4		%f4
111*25c28e83SPiotr Jasiukajtis#define P0_f5		%f5
112*25c28e83SPiotr Jasiukajtis#define P0_f6		%f6
113*25c28e83SPiotr Jasiukajtis#define P0_f7		%f7
114*25c28e83SPiotr Jasiukajtis#define P0_f8		%f8
115*25c28e83SPiotr Jasiukajtis#define P0_f9		%f9
116*25c28e83SPiotr Jasiukajtis
117*25c28e83SPiotr Jasiukajtis#define P1_f10		%f10
118*25c28e83SPiotr Jasiukajtis#define P1_f11		%f11
119*25c28e83SPiotr Jasiukajtis#define P1_f12		%f12
120*25c28e83SPiotr Jasiukajtis#define P1_f13		%f13
121*25c28e83SPiotr Jasiukajtis#define P1_f14		%f14
122*25c28e83SPiotr Jasiukajtis#define P1_f15		%f15
123*25c28e83SPiotr Jasiukajtis#define P1_f16		%f16
124*25c28e83SPiotr Jasiukajtis#define P1_f17		%f17
125*25c28e83SPiotr Jasiukajtis#define P1_f18		%f18
126*25c28e83SPiotr Jasiukajtis#define P1_f19		%f19
127*25c28e83SPiotr Jasiukajtis
128*25c28e83SPiotr Jasiukajtis#define P2_f20		%f20
129*25c28e83SPiotr Jasiukajtis#define P2_f21		%f21
130*25c28e83SPiotr Jasiukajtis#define P2_f22		%f22
131*25c28e83SPiotr Jasiukajtis#define P2_f23		%f23
132*25c28e83SPiotr Jasiukajtis#define P2_f24		%f24
133*25c28e83SPiotr Jasiukajtis#define P2_f25		%f25
134*25c28e83SPiotr Jasiukajtis#define P2_f26		%f26
135*25c28e83SPiotr Jasiukajtis#define P2_f27		%f27
136*25c28e83SPiotr Jasiukajtis#define P2_f28		%f28
137*25c28e83SPiotr Jasiukajtis#define P2_f29		%f29
138*25c28e83SPiotr Jasiukajtis
139*25c28e83SPiotr Jasiukajtis! define __vlibm_TBL_sincos_hi & lo for easy reading
140*25c28e83SPiotr Jasiukajtis
141*25c28e83SPiotr Jasiukajtis#define SC_HI		%l3
142*25c28e83SPiotr Jasiukajtis#define SC_LO		%l4
143*25c28e83SPiotr Jasiukajtis
144*25c28e83SPiotr Jasiukajtis! define constants for easy reading
145*25c28e83SPiotr Jasiukajtis
146*25c28e83SPiotr Jasiukajtis#define C_q1 %f46
147*25c28e83SPiotr Jasiukajtis#define C_q2 %f48
148*25c28e83SPiotr Jasiukajtis#define C_q3 %f50
149*25c28e83SPiotr Jasiukajtis#define C_q4 %f52
150*25c28e83SPiotr Jasiukajtis
151*25c28e83SPiotr Jasiukajtis! one ( 1 ) uno eins echi un
152*25c28e83SPiotr Jasiukajtis#define C_ONE		%f54
153*25c28e83SPiotr Jasiukajtis#define C_ONE_LO	%f55
154*25c28e83SPiotr Jasiukajtis
155*25c28e83SPiotr Jasiukajtis! masks
156*25c28e83SPiotr Jasiukajtis#define MSK_SIGN	%i5
157*25c28e83SPiotr Jasiukajtis#define MSK_BIT31	%f30
158*25c28e83SPiotr Jasiukajtis#define MSK_BIT13	%f31
159*25c28e83SPiotr Jasiukajtis#define MSK_BITSHI17	%f44
160*25c28e83SPiotr Jasiukajtis
161*25c28e83SPiotr Jasiukajtis
162*25c28e83SPiotr Jasiukajtis! constants for pp and qq
163*25c28e83SPiotr Jasiukajtis#define C_pp1 %f56
164*25c28e83SPiotr Jasiukajtis#define C_pp2 %f58
165*25c28e83SPiotr Jasiukajtis#define C_qq1 %f60
166*25c28e83SPiotr Jasiukajtis#define C_qq2 %f62
167*25c28e83SPiotr Jasiukajtis
168*25c28e83SPiotr Jasiukajtis! sign mask
169*25c28e83SPiotr Jasiukajtis#define C_signM		%i5
170*25c28e83SPiotr Jasiukajtis
171*25c28e83SPiotr Jasiukajtis#define LIM_l5		%l5
172*25c28e83SPiotr Jasiukajtis#define LIM_l6		%l6
173*25c28e83SPiotr Jasiukajtis! when in pri range, using value as transition from poly to table.
174*25c28e83SPiotr Jasiukajtis! for Medium range,change use of %l6 and use to keep track of biguns.
175*25c28e83SPiotr Jasiukajtis#define LIM_l7		%l7
176*25c28e83SPiotr Jasiukajtis
177*25c28e83SPiotr Jasiukajtis!--------------------------------------------------------------------
178*25c28e83SPiotr Jasiukajtis
179*25c28e83SPiotr Jasiukajtis
180*25c28e83SPiotr Jasiukajtis	ENTRY(__vcos)
181*25c28e83SPiotr Jasiukajtis	save	%sp,-SA(MINFRAME)-tmps,%sp
182*25c28e83SPiotr Jasiukajtis	PIC_SETUP(g5)
183*25c28e83SPiotr Jasiukajtis	PIC_SET(g5,__vlibm_TBL_sincos_hi,l3)
184*25c28e83SPiotr Jasiukajtis	PIC_SET(g5,__vlibm_TBL_sincos_lo,l4)
185*25c28e83SPiotr Jasiukajtis	PIC_SET(g5,constants,o0)
186*25c28e83SPiotr Jasiukajtis	mov	%o0,%g1
187*25c28e83SPiotr Jasiukajtis	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
188*25c28e83SPiotr Jasiukajtis
189*25c28e83SPiotr Jasiukajtis! ========== primary range ==========
190*25c28e83SPiotr Jasiukajtis
191*25c28e83SPiotr Jasiukajtis! register use
192*25c28e83SPiotr Jasiukajtis
193*25c28e83SPiotr Jasiukajtis! i0  n
194*25c28e83SPiotr Jasiukajtis! i1  x
195*25c28e83SPiotr Jasiukajtis! i2  stridex
196*25c28e83SPiotr Jasiukajtis! i3  y
197*25c28e83SPiotr Jasiukajtis! i4  stridey
198*25c28e83SPiotr Jasiukajtis! i5  0x80000000
199*25c28e83SPiotr Jasiukajtis
200*25c28e83SPiotr Jasiukajtis! l0  hx0
201*25c28e83SPiotr Jasiukajtis! l1  hx1
202*25c28e83SPiotr Jasiukajtis! l2  hx2
203*25c28e83SPiotr Jasiukajtis! l3  __vlibm_TBL_sincos_hi
204*25c28e83SPiotr Jasiukajtis! l4  __vlibm_TBL_sincos_lo
205*25c28e83SPiotr Jasiukajtis! l5  0x3fc40000
206*25c28e83SPiotr Jasiukajtis! l6  0x3e400000
207*25c28e83SPiotr Jasiukajtis! l7  0x3fe921fb
208*25c28e83SPiotr Jasiukajtis
209*25c28e83SPiotr Jasiukajtis! the following are 64-bit registers in both V8+ and V9
210*25c28e83SPiotr Jasiukajtis
211*25c28e83SPiotr Jasiukajtis! g1  scratch
212*25c28e83SPiotr Jasiukajtis! g5
213*25c28e83SPiotr Jasiukajtis
214*25c28e83SPiotr Jasiukajtis! o0  py0
215*25c28e83SPiotr Jasiukajtis! o1  py1
216*25c28e83SPiotr Jasiukajtis! o2  py2
217*25c28e83SPiotr Jasiukajtis! o3  oy0
218*25c28e83SPiotr Jasiukajtis! o4  oy1
219*25c28e83SPiotr Jasiukajtis! o5  oy2
220*25c28e83SPiotr Jasiukajtis! o7  scratch
221*25c28e83SPiotr Jasiukajtis
222*25c28e83SPiotr Jasiukajtis! f0  x0
223*25c28e83SPiotr Jasiukajtis! f2
224*25c28e83SPiotr Jasiukajtis! f4
225*25c28e83SPiotr Jasiukajtis! f6
226*25c28e83SPiotr Jasiukajtis! f8  scratch for table base
227*25c28e83SPiotr Jasiukajtis! f9  signbit0
228*25c28e83SPiotr Jasiukajtis! f10 x1
229*25c28e83SPiotr Jasiukajtis! f12
230*25c28e83SPiotr Jasiukajtis! f14
231*25c28e83SPiotr Jasiukajtis! f16
232*25c28e83SPiotr Jasiukajtis! f18 scratch for table base
233*25c28e83SPiotr Jasiukajtis! f19 signbit1
234*25c28e83SPiotr Jasiukajtis! f20 x2
235*25c28e83SPiotr Jasiukajtis! f22
236*25c28e83SPiotr Jasiukajtis! f24
237*25c28e83SPiotr Jasiukajtis! f26
238*25c28e83SPiotr Jasiukajtis! f28 scratch for table base
239*25c28e83SPiotr Jasiukajtis! f29 signbit2
240*25c28e83SPiotr Jasiukajtis! f30 0x80000000
241*25c28e83SPiotr Jasiukajtis! f31 0x4000
242*25c28e83SPiotr Jasiukajtis! f32
243*25c28e83SPiotr Jasiukajtis! f34
244*25c28e83SPiotr Jasiukajtis! f36
245*25c28e83SPiotr Jasiukajtis! f38
246*25c28e83SPiotr Jasiukajtis! f40
247*25c28e83SPiotr Jasiukajtis! f42
248*25c28e83SPiotr Jasiukajtis! f44 0xffff800000000000
249*25c28e83SPiotr Jasiukajtis! f46 p1
250*25c28e83SPiotr Jasiukajtis! f48 p2
251*25c28e83SPiotr Jasiukajtis! f50 p3
252*25c28e83SPiotr Jasiukajtis! f52 p4
253*25c28e83SPiotr Jasiukajtis! f54 one
254*25c28e83SPiotr Jasiukajtis! f56 pp1
255*25c28e83SPiotr Jasiukajtis! f58 pp2
256*25c28e83SPiotr Jasiukajtis! f60 qq1
257*25c28e83SPiotr Jasiukajtis! f62 qq2
258*25c28e83SPiotr Jasiukajtis
259*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9
260*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+xsave]		! save arguments
261*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+ysave]
262*25c28e83SPiotr Jasiukajtis#else
263*25c28e83SPiotr Jasiukajtis	st	%i1,[%fp+xsave]		! save arguments
264*25c28e83SPiotr Jasiukajtis	st	%i3,[%fp+ysave]
265*25c28e83SPiotr Jasiukajtis#endif
266*25c28e83SPiotr Jasiukajtis
267*25c28e83SPiotr Jasiukajtis	st	%i0,[%fp+nsave]
268*25c28e83SPiotr Jasiukajtis	st	%i2,[%fp+sxsave]
269*25c28e83SPiotr Jasiukajtis	st	%i4,[%fp+sysave]
270*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x80000000),MSK_SIGN	! load/set up constants
271*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc40000),LIM_l5
272*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3e400000),LIM_l6
273*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fe921fb),LIM_l7
274*25c28e83SPiotr Jasiukajtis	or	LIM_l7,%lo(0x3fe921fb),LIM_l7
275*25c28e83SPiotr Jasiukajtis	ldd	[%g1+f30val],MSK_BIT31
276*25c28e83SPiotr Jasiukajtis	ldd	[%g1+mask],MSK_BITSHI17
277*25c28e83SPiotr Jasiukajtis	ldd	[%g1+q1],C_q1
278*25c28e83SPiotr Jasiukajtis	ldd	[%g1+q2],C_q2
279*25c28e83SPiotr Jasiukajtis	ldd	[%g1+q3],C_q3
280*25c28e83SPiotr Jasiukajtis	ldd	[%g1+q4],C_q4
281*25c28e83SPiotr Jasiukajtis	ldd	[%g1+one],C_ONE
282*25c28e83SPiotr Jasiukajtis	ldd	[%g1+pp1],C_pp1
283*25c28e83SPiotr Jasiukajtis	ldd	[%g1+pp2],C_pp2
284*25c28e83SPiotr Jasiukajtis	ldd	[%g1+qq1],C_qq1
285*25c28e83SPiotr Jasiukajtis	ldd	[%g1+qq2],C_qq2
286*25c28e83SPiotr Jasiukajtis	sll	%i2,3,%i2		! scale strides
287*25c28e83SPiotr Jasiukajtis	sll	%i4,3,%i4
288*25c28e83SPiotr Jasiukajtis	add	%fp,x0_1,%o3		! precondition loop
289*25c28e83SPiotr Jasiukajtis	add	%fp,x0_1,%o4
290*25c28e83SPiotr Jasiukajtis	add	%fp,x0_1,%o5
291*25c28e83SPiotr Jasiukajtis	ld	[%i1],%l0		! hx = *x
292*25c28e83SPiotr Jasiukajtis	ld	[%i1],P0_f0
293*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],P0_f1
294*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
295*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
296*25c28e83SPiotr Jasiukajtis
297*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.loop0
298*25c28e83SPiotr Jasiukajtis!delay slot
299*25c28e83SPiotr Jasiukajtis	nop
300*25c28e83SPiotr Jasiukajtis
301*25c28e83SPiotr Jasiukajtis	.align 32
302*25c28e83SPiotr Jasiukajtis.loop0:
303*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l1		! preload next argument
304*25c28e83SPiotr Jasiukajtis	sub	%l0,LIM_l6,%g1
305*25c28e83SPiotr Jasiukajtis	sub	LIM_l7,%l0,%o7
306*25c28e83SPiotr Jasiukajtis	fands	P0_f0,MSK_BIT31,P0_f9		! save signbit
307*25c28e83SPiotr Jasiukajtis
308*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P1_f10
309*25c28e83SPiotr Jasiukajtis	orcc	%o7,%g1,%g0
310*25c28e83SPiotr Jasiukajtis	mov	%i3,%o0			! py0 = y
311*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.range0		! if hx < 0x3e400000 or > 0x3fe921fb
312*25c28e83SPiotr Jasiukajtis
313*25c28e83SPiotr Jasiukajtis! delay slot
314*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P1_f11
315*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
316*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
317*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop1
318*25c28e83SPiotr Jasiukajtis
319*25c28e83SPiotr Jasiukajtis! delay slot
320*25c28e83SPiotr Jasiukajtis	andn	%l1,MSK_SIGN,%l1
321*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
322*25c28e83SPiotr Jasiukajtis	fabsd	P0_f0,P0_f0
323*25c28e83SPiotr Jasiukajtis	fmuld	C_ONE,C_ONE,C_ONE		! one*one; a nop for alignment only
324*25c28e83SPiotr Jasiukajtis
325*25c28e83SPiotr Jasiukajtis.loop1:
326*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l2		! preload next argument
327*25c28e83SPiotr Jasiukajtis	sub	%l1,LIM_l6,%g1
328*25c28e83SPiotr Jasiukajtis	sub	LIM_l7,%l1,%o7
329*25c28e83SPiotr Jasiukajtis	fands	P1_f10,MSK_BIT31,P1_f19		! save signbit
330*25c28e83SPiotr Jasiukajtis
331*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P2_f20
332*25c28e83SPiotr Jasiukajtis	orcc	%o7,%g1,%g0
333*25c28e83SPiotr Jasiukajtis	mov	%i3,%o1			! py1 = y
334*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.range1		! if hx < 0x3e400000 or > 0x3fe921fb
335*25c28e83SPiotr Jasiukajtis
336*25c28e83SPiotr Jasiukajtis! delay slot
337*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P2_f21
338*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
339*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
340*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop2
341*25c28e83SPiotr Jasiukajtis
342*25c28e83SPiotr Jasiukajtis! delay slot
343*25c28e83SPiotr Jasiukajtis	andn	%l2,MSK_SIGN,%l2
344*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
345*25c28e83SPiotr Jasiukajtis	fabsd	P1_f10,P1_f10
346*25c28e83SPiotr Jasiukajtis	fmuld	C_ONE,C_ONE,C_ONE		! one*one; a nop for alignment only
347*25c28e83SPiotr Jasiukajtis
348*25c28e83SPiotr Jasiukajtis.loop2:
349*25c28e83SPiotr Jasiukajtis	st	P0_f6,[%o3]
350*25c28e83SPiotr Jasiukajtis	sub	%l2,LIM_l6,%g1
351*25c28e83SPiotr Jasiukajtis	sub	LIM_l7,%l2,%o7
352*25c28e83SPiotr Jasiukajtis	fands	P2_f20,MSK_BIT31,P2_f29		! save signbit
353*25c28e83SPiotr Jasiukajtis
354*25c28e83SPiotr Jasiukajtis	st	P0_f7,[%o3+4]
355*25c28e83SPiotr Jasiukajtis	orcc	%g1,%o7,%g0
356*25c28e83SPiotr Jasiukajtis	mov	%i3,%o2			! py2 = y
357*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.range2		! if hx < 0x3e400000 or > 0x3fe921fb
358*25c28e83SPiotr Jasiukajtis
359*25c28e83SPiotr Jasiukajtis! delay slot
360*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
361*25c28e83SPiotr Jasiukajtis	cmp	%l0,LIM_l5
362*25c28e83SPiotr Jasiukajtis	fabsd	P2_f20,P2_f20
363*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.case4
364*25c28e83SPiotr Jasiukajtis
365*25c28e83SPiotr Jasiukajtis! delay slot
366*25c28e83SPiotr Jasiukajtis	st	P1_f16,[%o4]
367*25c28e83SPiotr Jasiukajtis	cmp	%l1,LIM_l5
368*25c28e83SPiotr Jasiukajtis	fpadd32s P0_f0,MSK_BIT13,P0_f8
369*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.case2
370*25c28e83SPiotr Jasiukajtis
371*25c28e83SPiotr Jasiukajtis! delay slot
372*25c28e83SPiotr Jasiukajtis	st	P1_f17,[%o4+4]
373*25c28e83SPiotr Jasiukajtis	cmp	%l2,LIM_l5
374*25c28e83SPiotr Jasiukajtis	fpadd32s P1_f10,MSK_BIT13,P1_f18
375*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.case1
376*25c28e83SPiotr Jasiukajtis
377*25c28e83SPiotr Jasiukajtis! delay slot
378*25c28e83SPiotr Jasiukajtis	st	P2_f26,[%o5]
379*25c28e83SPiotr Jasiukajtis	mov	%o0,%o3
380*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
381*25c28e83SPiotr Jasiukajtis	fpadd32s P2_f20,MSK_BIT13,P2_f28
382*25c28e83SPiotr Jasiukajtis
383*25c28e83SPiotr Jasiukajtis	st	P2_f27,[%o5+4]
384*25c28e83SPiotr Jasiukajtis	fand	P0_f8,MSK_BITSHI17,P0_f2
385*25c28e83SPiotr Jasiukajtis	mov	%o1,%o4
386*25c28e83SPiotr Jasiukajtis
387*25c28e83SPiotr Jasiukajtis	fand	P1_f18,MSK_BITSHI17,P1_f12
388*25c28e83SPiotr Jasiukajtis	mov	%o2,%o5
389*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
390*25c28e83SPiotr Jasiukajtis
391*25c28e83SPiotr Jasiukajtis	fand	P2_f28,MSK_BITSHI17,P2_f22
392*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
393*25c28e83SPiotr Jasiukajtis	sub	%l2,%o7,%l2
394*25c28e83SPiotr Jasiukajtis
395*25c28e83SPiotr Jasiukajtis	fsubd	P0_f0,P0_f2,P0_f0
396*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
397*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
398*25c28e83SPiotr Jasiukajtis
399*25c28e83SPiotr Jasiukajtis	fsubd	P1_f10,P1_f12,P1_f10
400*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
401*25c28e83SPiotr Jasiukajtis
402*25c28e83SPiotr Jasiukajtis	fsubd	P2_f20,P2_f22,P2_f20
403*25c28e83SPiotr Jasiukajtis	srl	%l2,10,%l2
404*25c28e83SPiotr Jasiukajtis
405*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
406*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
407*25c28e83SPiotr Jasiukajtis
408*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
409*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
410*25c28e83SPiotr Jasiukajtis
411*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f20,P2_f22
412*25c28e83SPiotr Jasiukajtis	andn	%l2,0x1f,%l2
413*25c28e83SPiotr Jasiukajtis
414*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_pp2,P0_f6
415*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f32
416*25c28e83SPiotr Jasiukajtis
417*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_pp2,P1_f16
418*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f36
419*25c28e83SPiotr Jasiukajtis
420*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_pp2,P2_f26
421*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l2],%f40
422*25c28e83SPiotr Jasiukajtis
423*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_pp1,P0_f6
424*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_qq2,P0_f4
425*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l0],%f34
426*25c28e83SPiotr Jasiukajtis
427*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_pp1,P1_f16
428*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_qq2,P1_f14
429*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l1],%f38
430*25c28e83SPiotr Jasiukajtis
431*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_pp1,P2_f26
432*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_qq2,P2_f24
433*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l2],%f42
434*25c28e83SPiotr Jasiukajtis
435*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f6,P0_f6
436*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_qq1,P0_f4
437*25c28e83SPiotr Jasiukajtis
438*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f16,P1_f16
439*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_qq1,P1_f14
440*25c28e83SPiotr Jasiukajtis
441*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f26,P2_f26
442*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_qq1,P2_f24
443*25c28e83SPiotr Jasiukajtis
444*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_ONE,P0_f6
445*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
446*25c28e83SPiotr Jasiukajtis
447*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_ONE,P1_f16
448*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
449*25c28e83SPiotr Jasiukajtis
450*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_ONE,P2_f26
451*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
452*25c28e83SPiotr Jasiukajtis
453*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f6,P0_f6
454*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l0],P0_f2
455*25c28e83SPiotr Jasiukajtis
456*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f16,P1_f16
457*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l1],P1_f12
458*25c28e83SPiotr Jasiukajtis
459*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f26,P2_f26
460*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l2],P2_f22
461*25c28e83SPiotr Jasiukajtis
462*25c28e83SPiotr Jasiukajtis	fmuld	P0_f4,%f32,P0_f4
463*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
464*25c28e83SPiotr Jasiukajtis
465*25c28e83SPiotr Jasiukajtis	fmuld	P1_f14,%f36,P1_f14
466*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P0_f0
467*25c28e83SPiotr Jasiukajtis
468*25c28e83SPiotr Jasiukajtis	fmuld	P2_f24,%f40,P2_f24
469*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P0_f1
470*25c28e83SPiotr Jasiukajtis
471*25c28e83SPiotr Jasiukajtis	fmuld	P0_f6,%f34,P0_f6
472*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
473*25c28e83SPiotr Jasiukajtis
474*25c28e83SPiotr Jasiukajtis	fmuld	P1_f16,%f38,P1_f16
475*25c28e83SPiotr Jasiukajtis
476*25c28e83SPiotr Jasiukajtis	fmuld	P2_f26,%f42,P2_f26
477*25c28e83SPiotr Jasiukajtis
478*25c28e83SPiotr Jasiukajtis	fsubd	P0_f6,P0_f4,P0_f6
479*25c28e83SPiotr Jasiukajtis
480*25c28e83SPiotr Jasiukajtis	fsubd	P1_f16,P1_f14,P1_f16
481*25c28e83SPiotr Jasiukajtis
482*25c28e83SPiotr Jasiukajtis	fsubd	P2_f26,P2_f24,P2_f26
483*25c28e83SPiotr Jasiukajtis
484*25c28e83SPiotr Jasiukajtis	fsubd	P0_f2,P0_f6,P0_f6
485*25c28e83SPiotr Jasiukajtis
486*25c28e83SPiotr Jasiukajtis	fsubd	P1_f12,P1_f16,P1_f16
487*25c28e83SPiotr Jasiukajtis
488*25c28e83SPiotr Jasiukajtis	fsubd	P2_f22,P2_f26,P2_f26
489*25c28e83SPiotr Jasiukajtis
490*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,%f32,P0_f6
491*25c28e83SPiotr Jasiukajtis
492*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,%f36,P1_f16
493*25c28e83SPiotr Jasiukajtis
494*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,%f40,P2_f26
495*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
496*25c28e83SPiotr Jasiukajtis
497*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P0_f6,P0_f9,P0_f6
498*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
499*25c28e83SPiotr Jasiukajtis
500*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P1_f16,P1_f19,P1_f16
501*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.loop0
502*25c28e83SPiotr Jasiukajtis
503*25c28e83SPiotr Jasiukajtis! delay slot
504*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f26,P2_f29,P2_f26
505*25c28e83SPiotr Jasiukajtis
506*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.endloop0
507*25c28e83SPiotr Jasiukajtis! delay slot
508*25c28e83SPiotr Jasiukajtis	nop
509*25c28e83SPiotr Jasiukajtis
510*25c28e83SPiotr Jasiukajtis	.align	32
511*25c28e83SPiotr Jasiukajtis.case1:
512*25c28e83SPiotr Jasiukajtis	st	P2_f27,[%o5+4]
513*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
514*25c28e83SPiotr Jasiukajtis	fand	P0_f8,MSK_BITSHI17,P0_f2
515*25c28e83SPiotr Jasiukajtis
516*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
517*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
518*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
519*25c28e83SPiotr Jasiukajtis	fand	P1_f18,MSK_BITSHI17,P1_f12
520*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f20,P2_f22
521*25c28e83SPiotr Jasiukajtis
522*25c28e83SPiotr Jasiukajtis	fsubd	P0_f0,P0_f2,P0_f0
523*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
524*25c28e83SPiotr Jasiukajtis	mov	%o0,%o3
525*25c28e83SPiotr Jasiukajtis
526*25c28e83SPiotr Jasiukajtis	fsubd	P1_f10,P1_f12,P1_f10
527*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
528*25c28e83SPiotr Jasiukajtis	mov	%o1,%o4
529*25c28e83SPiotr Jasiukajtis
530*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_q4,P2_f24
531*25c28e83SPiotr Jasiukajtis	mov	%o2,%o5
532*25c28e83SPiotr Jasiukajtis
533*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
534*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
535*25c28e83SPiotr Jasiukajtis
536*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
537*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
538*25c28e83SPiotr Jasiukajtis
539*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q3,P2_f24
540*25c28e83SPiotr Jasiukajtis
541*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_pp2,P0_f6
542*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f32
543*25c28e83SPiotr Jasiukajtis
544*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_pp2,P1_f16
545*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f36
546*25c28e83SPiotr Jasiukajtis
547*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
548*25c28e83SPiotr Jasiukajtis
549*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_pp1,P0_f6
550*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_qq2,P0_f4
551*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l0],%f34
552*25c28e83SPiotr Jasiukajtis
553*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_pp1,P1_f16
554*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_qq2,P1_f14
555*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l1],%f38
556*25c28e83SPiotr Jasiukajtis
557*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q2,P2_f24
558*25c28e83SPiotr Jasiukajtis
559*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f6,P0_f6
560*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_qq1,P0_f4
561*25c28e83SPiotr Jasiukajtis
562*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f16,P1_f16
563*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_qq1,P1_f14
564*25c28e83SPiotr Jasiukajtis
565*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
566*25c28e83SPiotr Jasiukajtis
567*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_ONE,P0_f6
568*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
569*25c28e83SPiotr Jasiukajtis
570*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_ONE,P1_f16
571*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
572*25c28e83SPiotr Jasiukajtis
573*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q1,P2_f24
574*25c28e83SPiotr Jasiukajtis
575*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f6,P0_f6
576*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l0],P0_f2
577*25c28e83SPiotr Jasiukajtis
578*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f16,P1_f16
579*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l1],P1_f12
580*25c28e83SPiotr Jasiukajtis
581*25c28e83SPiotr Jasiukajtis	fmuld	P0_f4,%f32,P0_f4
582*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
583*25c28e83SPiotr Jasiukajtis
584*25c28e83SPiotr Jasiukajtis	fmuld	P1_f14,%f36,P1_f14
585*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P0_f0
586*25c28e83SPiotr Jasiukajtis
587*25c28e83SPiotr Jasiukajtis	fmuld	P0_f6,%f34,P0_f6
588*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P0_f1
589*25c28e83SPiotr Jasiukajtis
590*25c28e83SPiotr Jasiukajtis	fmuld	P1_f16,%f38,P1_f16
591*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
592*25c28e83SPiotr Jasiukajtis
593*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
594*25c28e83SPiotr Jasiukajtis
595*25c28e83SPiotr Jasiukajtis	fsubd	P0_f6,P0_f4,P0_f6
596*25c28e83SPiotr Jasiukajtis
597*25c28e83SPiotr Jasiukajtis	fsubd	P1_f16,P1_f14,P1_f16
598*25c28e83SPiotr Jasiukajtis
599*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P2_f20,P2_f24,P2_f24
600*25c28e83SPiotr Jasiukajtis
601*25c28e83SPiotr Jasiukajtis	fsubd	P0_f2,P0_f6,P0_f6
602*25c28e83SPiotr Jasiukajtis
603*25c28e83SPiotr Jasiukajtis	fsubd	P1_f12,P1_f16,P1_f16
604*25c28e83SPiotr Jasiukajtis
605*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P2_f24,P2_f26 !!(vsin)faddd	P2_f20,P2_f24,P2_f26
606*25c28e83SPiotr Jasiukajtis
607*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,%f32,P0_f6
608*25c28e83SPiotr Jasiukajtis
609*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,%f36,P1_f16
610*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
611*25c28e83SPiotr Jasiukajtis
612*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f26,P2_f29,P2_f26
613*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
614*25c28e83SPiotr Jasiukajtis
615*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P0_f6,P0_f9,P0_f6
616*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.loop0
617*25c28e83SPiotr Jasiukajtis
618*25c28e83SPiotr Jasiukajtis! delay slot
619*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P1_f16,P1_f19,P1_f16
620*25c28e83SPiotr Jasiukajtis
621*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.endloop0
622*25c28e83SPiotr Jasiukajtis! delay slot
623*25c28e83SPiotr Jasiukajtis	nop
624*25c28e83SPiotr Jasiukajtis
625*25c28e83SPiotr Jasiukajtis	.align	32
626*25c28e83SPiotr Jasiukajtis.case2:
627*25c28e83SPiotr Jasiukajtis	st	P2_f26,[%o5]
628*25c28e83SPiotr Jasiukajtis	cmp	%l2,LIM_l5
629*25c28e83SPiotr Jasiukajtis	fpadd32s P2_f20,MSK_BIT13,P2_f28
630*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.case3
631*25c28e83SPiotr Jasiukajtis
632*25c28e83SPiotr Jasiukajtis! delay slot
633*25c28e83SPiotr Jasiukajtis	st	P2_f27,[%o5+4]
634*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
635*25c28e83SPiotr Jasiukajtis	fand	P0_f8,MSK_BITSHI17,P0_f2
636*25c28e83SPiotr Jasiukajtis
637*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
638*25c28e83SPiotr Jasiukajtis	sub	%l2,%o7,%l2
639*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
640*25c28e83SPiotr Jasiukajtis	fand	P2_f28,MSK_BITSHI17,P2_f22
641*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
642*25c28e83SPiotr Jasiukajtis
643*25c28e83SPiotr Jasiukajtis	fsubd	P0_f0,P0_f2,P0_f0
644*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
645*25c28e83SPiotr Jasiukajtis	mov	%o0,%o3
646*25c28e83SPiotr Jasiukajtis
647*25c28e83SPiotr Jasiukajtis	fsubd	P2_f20,P2_f22,P2_f20
648*25c28e83SPiotr Jasiukajtis	srl	%l2,10,%l2
649*25c28e83SPiotr Jasiukajtis	mov	%o2,%o5
650*25c28e83SPiotr Jasiukajtis
651*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_q4,P1_f14
652*25c28e83SPiotr Jasiukajtis	mov	%o1,%o4
653*25c28e83SPiotr Jasiukajtis
654*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
655*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
656*25c28e83SPiotr Jasiukajtis
657*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f20,P2_f22
658*25c28e83SPiotr Jasiukajtis	andn	%l2,0x1f,%l2
659*25c28e83SPiotr Jasiukajtis
660*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q3,P1_f14
661*25c28e83SPiotr Jasiukajtis
662*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_pp2,P0_f6
663*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f32
664*25c28e83SPiotr Jasiukajtis
665*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_pp2,P2_f26
666*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l2],%f40
667*25c28e83SPiotr Jasiukajtis
668*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
669*25c28e83SPiotr Jasiukajtis
670*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_pp1,P0_f6
671*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_qq2,P0_f4
672*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l0],%f34
673*25c28e83SPiotr Jasiukajtis
674*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_pp1,P2_f26
675*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_qq2,P2_f24
676*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l2],%f42
677*25c28e83SPiotr Jasiukajtis
678*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q2,P1_f14
679*25c28e83SPiotr Jasiukajtis
680*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f6,P0_f6
681*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_qq1,P0_f4
682*25c28e83SPiotr Jasiukajtis
683*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f26,P2_f26
684*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_qq1,P2_f24
685*25c28e83SPiotr Jasiukajtis
686*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
687*25c28e83SPiotr Jasiukajtis
688*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_ONE,P0_f6
689*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
690*25c28e83SPiotr Jasiukajtis
691*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_ONE,P2_f26
692*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
693*25c28e83SPiotr Jasiukajtis
694*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q1,P1_f14
695*25c28e83SPiotr Jasiukajtis
696*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f6,P0_f6
697*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l0],P0_f2
698*25c28e83SPiotr Jasiukajtis
699*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f26,P2_f26
700*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l2],P2_f22
701*25c28e83SPiotr Jasiukajtis
702*25c28e83SPiotr Jasiukajtis	fmuld	P0_f4,%f32,P0_f4
703*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
704*25c28e83SPiotr Jasiukajtis
705*25c28e83SPiotr Jasiukajtis	fmuld	P2_f24,%f40,P2_f24
706*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P0_f0
707*25c28e83SPiotr Jasiukajtis
708*25c28e83SPiotr Jasiukajtis	fmuld	P0_f6,%f34,P0_f6
709*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P0_f1
710*25c28e83SPiotr Jasiukajtis
711*25c28e83SPiotr Jasiukajtis	fmuld	P2_f26,%f42,P2_f26
712*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
713*25c28e83SPiotr Jasiukajtis
714*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
715*25c28e83SPiotr Jasiukajtis
716*25c28e83SPiotr Jasiukajtis	fsubd	P0_f6,P0_f4,P0_f6
717*25c28e83SPiotr Jasiukajtis
718*25c28e83SPiotr Jasiukajtis	fsubd	P2_f26,P2_f24,P2_f26
719*25c28e83SPiotr Jasiukajtis
720*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P1_f10,P1_f14,P1_f14
721*25c28e83SPiotr Jasiukajtis
722*25c28e83SPiotr Jasiukajtis	fsubd	P0_f2,P0_f6,P0_f6
723*25c28e83SPiotr Jasiukajtis
724*25c28e83SPiotr Jasiukajtis	fsubd	P2_f22,P2_f26,P2_f26
725*25c28e83SPiotr Jasiukajtis
726*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P1_f14,P1_f16 !!(vsin)faddd	P1_f10,P1_f14,P1_f16
727*25c28e83SPiotr Jasiukajtis
728*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,%f32,P0_f6
729*25c28e83SPiotr Jasiukajtis
730*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,%f40,P2_f26
731*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
732*25c28e83SPiotr Jasiukajtis
733*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P1_f16,P1_f19,P1_f16
734*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
735*25c28e83SPiotr Jasiukajtis
736*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P0_f6,P0_f9,P0_f6
737*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.loop0
738*25c28e83SPiotr Jasiukajtis
739*25c28e83SPiotr Jasiukajtis! delay slot
740*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f26,P2_f29,P2_f26
741*25c28e83SPiotr Jasiukajtis
742*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.endloop0
743*25c28e83SPiotr Jasiukajtis! delay slot
744*25c28e83SPiotr Jasiukajtis	nop
745*25c28e83SPiotr Jasiukajtis
746*25c28e83SPiotr Jasiukajtis	.align	32
747*25c28e83SPiotr Jasiukajtis.case3:
748*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
749*25c28e83SPiotr Jasiukajtis	fand	P0_f8,MSK_BITSHI17,P0_f2
750*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
751*25c28e83SPiotr Jasiukajtis
752*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
753*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
754*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f20,P2_f22
755*25c28e83SPiotr Jasiukajtis
756*25c28e83SPiotr Jasiukajtis	fsubd	P0_f0,P0_f2,P0_f0
757*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
758*25c28e83SPiotr Jasiukajtis	mov	%o0,%o3
759*25c28e83SPiotr Jasiukajtis
760*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_q4,P1_f14
761*25c28e83SPiotr Jasiukajtis	mov	%o1,%o4
762*25c28e83SPiotr Jasiukajtis
763*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_q4,P2_f24
764*25c28e83SPiotr Jasiukajtis	mov	%o2,%o5
765*25c28e83SPiotr Jasiukajtis
766*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
767*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
768*25c28e83SPiotr Jasiukajtis
769*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q3,P1_f14
770*25c28e83SPiotr Jasiukajtis
771*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q3,P2_f24
772*25c28e83SPiotr Jasiukajtis
773*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_pp2,P0_f6
774*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f32
775*25c28e83SPiotr Jasiukajtis
776*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
777*25c28e83SPiotr Jasiukajtis
778*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
779*25c28e83SPiotr Jasiukajtis
780*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_pp1,P0_f6
781*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_qq2,P0_f4
782*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l0],%f34
783*25c28e83SPiotr Jasiukajtis
784*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q2,P1_f14
785*25c28e83SPiotr Jasiukajtis
786*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q2,P2_f24
787*25c28e83SPiotr Jasiukajtis
788*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f6,P0_f6
789*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_qq1,P0_f4
790*25c28e83SPiotr Jasiukajtis
791*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
792*25c28e83SPiotr Jasiukajtis
793*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
794*25c28e83SPiotr Jasiukajtis
795*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,C_ONE,P0_f6
796*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
797*25c28e83SPiotr Jasiukajtis
798*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q1,P1_f14
799*25c28e83SPiotr Jasiukajtis
800*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q1,P2_f24
801*25c28e83SPiotr Jasiukajtis
802*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f6,P0_f6
803*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l0],P0_f2
804*25c28e83SPiotr Jasiukajtis
805*25c28e83SPiotr Jasiukajtis	fmuld	P0_f4,%f32,P0_f4
806*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
807*25c28e83SPiotr Jasiukajtis
808*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
809*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P0_f0
810*25c28e83SPiotr Jasiukajtis
811*25c28e83SPiotr Jasiukajtis	fmuld	P0_f6,%f34,P0_f6
812*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P0_f1
813*25c28e83SPiotr Jasiukajtis
814*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
815*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
816*25c28e83SPiotr Jasiukajtis
817*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P1_f10,P1_f14,P1_f14
818*25c28e83SPiotr Jasiukajtis
819*25c28e83SPiotr Jasiukajtis	fsubd	P0_f6,P0_f4,P0_f6
820*25c28e83SPiotr Jasiukajtis
821*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P2_f20,P2_f24,P2_f24
822*25c28e83SPiotr Jasiukajtis
823*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P1_f14,P1_f16 !!(vsin)faddd	P1_f10,P1_f14,P1_f16
824*25c28e83SPiotr Jasiukajtis
825*25c28e83SPiotr Jasiukajtis	fsubd	P0_f2,P0_f6,P0_f6
826*25c28e83SPiotr Jasiukajtis
827*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P2_f24,P2_f26 !!(vsin)faddd	P2_f20,P2_f24,P2_f26
828*25c28e83SPiotr Jasiukajtis
829*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P1_f16,P1_f19,P1_f16
830*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
831*25c28e83SPiotr Jasiukajtis
832*25c28e83SPiotr Jasiukajtis	faddd	P0_f6,%f32,P0_f6
833*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
834*25c28e83SPiotr Jasiukajtis
835*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f26,P2_f29,P2_f26
836*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.loop0
837*25c28e83SPiotr Jasiukajtis
838*25c28e83SPiotr Jasiukajtis! delay slot
839*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P0_f6,P0_f9,P0_f6
840*25c28e83SPiotr Jasiukajtis
841*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.endloop0
842*25c28e83SPiotr Jasiukajtis! delay slot
843*25c28e83SPiotr Jasiukajtis	nop
844*25c28e83SPiotr Jasiukajtis
845*25c28e83SPiotr Jasiukajtis	.align	32
846*25c28e83SPiotr Jasiukajtis.case4:
847*25c28e83SPiotr Jasiukajtis	st	P1_f17,[%o4+4]
848*25c28e83SPiotr Jasiukajtis	cmp	%l1,LIM_l5
849*25c28e83SPiotr Jasiukajtis	fpadd32s P1_f10,MSK_BIT13,P1_f18
850*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.case6
851*25c28e83SPiotr Jasiukajtis
852*25c28e83SPiotr Jasiukajtis! delay slot
853*25c28e83SPiotr Jasiukajtis	st	P2_f26,[%o5]
854*25c28e83SPiotr Jasiukajtis	cmp	%l2,LIM_l5
855*25c28e83SPiotr Jasiukajtis	fpadd32s P2_f20,MSK_BIT13,P2_f28
856*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.case5
857*25c28e83SPiotr Jasiukajtis
858*25c28e83SPiotr Jasiukajtis! delay slot
859*25c28e83SPiotr Jasiukajtis	st	P2_f27,[%o5+4]
860*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
861*25c28e83SPiotr Jasiukajtis	fand	P1_f18,MSK_BITSHI17,P1_f12
862*25c28e83SPiotr Jasiukajtis
863*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
864*25c28e83SPiotr Jasiukajtis	sub	%l2,%o7,%l2
865*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
866*25c28e83SPiotr Jasiukajtis	fand	P2_f28,MSK_BITSHI17,P2_f22
867*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
868*25c28e83SPiotr Jasiukajtis
869*25c28e83SPiotr Jasiukajtis	fsubd	P1_f10,P1_f12,P1_f10
870*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
871*25c28e83SPiotr Jasiukajtis	mov	%o1,%o4
872*25c28e83SPiotr Jasiukajtis
873*25c28e83SPiotr Jasiukajtis	fsubd	P2_f20,P2_f22,P2_f20
874*25c28e83SPiotr Jasiukajtis	srl	%l2,10,%l2
875*25c28e83SPiotr Jasiukajtis	mov	%o2,%o5
876*25c28e83SPiotr Jasiukajtis
877*25c28e83SPiotr Jasiukajtis	fmovd	P0_f0,P0_f6		!ID for processing
878*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_q4,P0_f4
879*25c28e83SPiotr Jasiukajtis	mov	%o0,%o3
880*25c28e83SPiotr Jasiukajtis
881*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
882*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
883*25c28e83SPiotr Jasiukajtis
884*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f20,P2_f22
885*25c28e83SPiotr Jasiukajtis	andn	%l2,0x1f,%l2
886*25c28e83SPiotr Jasiukajtis
887*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q3,P0_f4
888*25c28e83SPiotr Jasiukajtis
889*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_pp2,P1_f16
890*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f36
891*25c28e83SPiotr Jasiukajtis
892*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_pp2,P2_f26
893*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l2],%f40
894*25c28e83SPiotr Jasiukajtis
895*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
896*25c28e83SPiotr Jasiukajtis
897*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_pp1,P1_f16
898*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_qq2,P1_f14
899*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l1],%f38
900*25c28e83SPiotr Jasiukajtis
901*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_pp1,P2_f26
902*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_qq2,P2_f24
903*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l2],%f42
904*25c28e83SPiotr Jasiukajtis
905*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q2,P0_f4
906*25c28e83SPiotr Jasiukajtis
907*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f16,P1_f16
908*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_qq1,P1_f14
909*25c28e83SPiotr Jasiukajtis
910*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f26,P2_f26
911*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_qq1,P2_f24
912*25c28e83SPiotr Jasiukajtis
913*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
914*25c28e83SPiotr Jasiukajtis
915*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_ONE,P1_f16
916*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
917*25c28e83SPiotr Jasiukajtis
918*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_ONE,P2_f26
919*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
920*25c28e83SPiotr Jasiukajtis
921*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q1,P0_f4
922*25c28e83SPiotr Jasiukajtis
923*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f16,P1_f16
924*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l1],P1_f12
925*25c28e83SPiotr Jasiukajtis
926*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f26,P2_f26
927*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l2],P2_f22
928*25c28e83SPiotr Jasiukajtis
929*25c28e83SPiotr Jasiukajtis	fmuld	P1_f14,%f36,P1_f14
930*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
931*25c28e83SPiotr Jasiukajtis
932*25c28e83SPiotr Jasiukajtis	fmuld	P2_f24,%f40,P2_f24
933*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P0_f0
934*25c28e83SPiotr Jasiukajtis
935*25c28e83SPiotr Jasiukajtis	fmuld	P1_f16,%f38,P1_f16
936*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P0_f1
937*25c28e83SPiotr Jasiukajtis
938*25c28e83SPiotr Jasiukajtis	fmuld	P2_f26,%f42,P2_f26
939*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
940*25c28e83SPiotr Jasiukajtis
941*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
942*25c28e83SPiotr Jasiukajtis
943*25c28e83SPiotr Jasiukajtis	fsubd	P1_f16,P1_f14,P1_f16
944*25c28e83SPiotr Jasiukajtis
945*25c28e83SPiotr Jasiukajtis	fsubd	P2_f26,P2_f24,P2_f26
946*25c28e83SPiotr Jasiukajtis
947*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P0_f6,P0_f4,P0_f4
948*25c28e83SPiotr Jasiukajtis
949*25c28e83SPiotr Jasiukajtis	fsubd	P1_f12,P1_f16,P1_f16
950*25c28e83SPiotr Jasiukajtis
951*25c28e83SPiotr Jasiukajtis	fsubd	P2_f22,P2_f26,P2_f26
952*25c28e83SPiotr Jasiukajtis
953*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P0_f4,P0_f6 !!(vsin)faddd   P0_f6,P0_f4,P0_f6	! faddd then spaces for processing
954*25c28e83SPiotr Jasiukajtis
955*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,%f36,P1_f16
956*25c28e83SPiotr Jasiukajtis
957*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,%f40,P2_f26
958*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
959*25c28e83SPiotr Jasiukajtis
960*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P0_f6,P0_f9,P0_f6
961*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
962*25c28e83SPiotr Jasiukajtis
963*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P1_f16,P1_f19,P1_f16
964*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.loop0
965*25c28e83SPiotr Jasiukajtis
966*25c28e83SPiotr Jasiukajtis! delay slot
967*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f26,P2_f29,P2_f26
968*25c28e83SPiotr Jasiukajtis
969*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.endloop0
970*25c28e83SPiotr Jasiukajtis! delay slot
971*25c28e83SPiotr Jasiukajtis	nop
972*25c28e83SPiotr Jasiukajtis
973*25c28e83SPiotr Jasiukajtis	.align	32
974*25c28e83SPiotr Jasiukajtis.case5:
975*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
976*25c28e83SPiotr Jasiukajtis	fand	P1_f18,MSK_BITSHI17,P1_f12
977*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
978*25c28e83SPiotr Jasiukajtis
979*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
980*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
981*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f20,P2_f22
982*25c28e83SPiotr Jasiukajtis
983*25c28e83SPiotr Jasiukajtis	fsubd	P1_f10,P1_f12,P1_f10
984*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
985*25c28e83SPiotr Jasiukajtis	mov	%o1,%o4
986*25c28e83SPiotr Jasiukajtis
987*25c28e83SPiotr Jasiukajtis	fmovd	P0_f0,P0_f6		!ID for processing
988*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_q4,P0_f4
989*25c28e83SPiotr Jasiukajtis	mov	%o0,%o3
990*25c28e83SPiotr Jasiukajtis
991*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_q4,P2_f24
992*25c28e83SPiotr Jasiukajtis	mov	%o2,%o5
993*25c28e83SPiotr Jasiukajtis
994*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
995*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
996*25c28e83SPiotr Jasiukajtis
997*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q3,P0_f4
998*25c28e83SPiotr Jasiukajtis
999*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q3,P2_f24
1000*25c28e83SPiotr Jasiukajtis
1001*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_pp2,P1_f16
1002*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f36
1003*25c28e83SPiotr Jasiukajtis
1004*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1005*25c28e83SPiotr Jasiukajtis
1006*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
1007*25c28e83SPiotr Jasiukajtis
1008*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_pp1,P1_f16
1009*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_qq2,P1_f14
1010*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l1],%f38
1011*25c28e83SPiotr Jasiukajtis
1012*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q2,P0_f4
1013*25c28e83SPiotr Jasiukajtis
1014*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q2,P2_f24
1015*25c28e83SPiotr Jasiukajtis
1016*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f16,P1_f16
1017*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_qq1,P1_f14
1018*25c28e83SPiotr Jasiukajtis
1019*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1020*25c28e83SPiotr Jasiukajtis
1021*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
1022*25c28e83SPiotr Jasiukajtis
1023*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,C_ONE,P1_f16
1024*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
1025*25c28e83SPiotr Jasiukajtis
1026*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q1,P0_f4
1027*25c28e83SPiotr Jasiukajtis
1028*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q1,P2_f24
1029*25c28e83SPiotr Jasiukajtis
1030*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f16,P1_f16
1031*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l1],P1_f12
1032*25c28e83SPiotr Jasiukajtis
1033*25c28e83SPiotr Jasiukajtis	fmuld	P1_f14,%f36,P1_f14
1034*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
1035*25c28e83SPiotr Jasiukajtis
1036*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1037*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P0_f0
1038*25c28e83SPiotr Jasiukajtis
1039*25c28e83SPiotr Jasiukajtis	fmuld	P1_f16,%f38,P1_f16
1040*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P0_f1
1041*25c28e83SPiotr Jasiukajtis
1042*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
1043*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
1044*25c28e83SPiotr Jasiukajtis
1045*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P0_f6,P0_f4,P0_f4
1046*25c28e83SPiotr Jasiukajtis
1047*25c28e83SPiotr Jasiukajtis	fsubd	P1_f16,P1_f14,P1_f16
1048*25c28e83SPiotr Jasiukajtis
1049*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P2_f20,P2_f24,P2_f24
1050*25c28e83SPiotr Jasiukajtis
1051*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P0_f4,P0_f6 !!(vsin)faddd   P0_f6,P0_f4,P0_f6	! faddd then spaces for processing
1052*25c28e83SPiotr Jasiukajtis
1053*25c28e83SPiotr Jasiukajtis	fsubd	P1_f12,P1_f16,P1_f16
1054*25c28e83SPiotr Jasiukajtis
1055*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P2_f24,P2_f26 !!(vsin)faddd	P2_f20,P2_f24,P2_f26
1056*25c28e83SPiotr Jasiukajtis
1057*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P0_f6,P0_f9,P0_f6
1058*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
1059*25c28e83SPiotr Jasiukajtis
1060*25c28e83SPiotr Jasiukajtis	faddd	P1_f16,%f36,P1_f16
1061*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
1062*25c28e83SPiotr Jasiukajtis
1063*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f26,P2_f29,P2_f26
1064*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.loop0
1065*25c28e83SPiotr Jasiukajtis
1066*25c28e83SPiotr Jasiukajtis! delay slot
1067*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P1_f16,P1_f19,P1_f16
1068*25c28e83SPiotr Jasiukajtis
1069*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.endloop0
1070*25c28e83SPiotr Jasiukajtis! delay slot
1071*25c28e83SPiotr Jasiukajtis	nop
1072*25c28e83SPiotr Jasiukajtis
1073*25c28e83SPiotr Jasiukajtis	.align	32
1074*25c28e83SPiotr Jasiukajtis.case6:
1075*25c28e83SPiotr Jasiukajtis	st	P2_f27,[%o5+4]
1076*25c28e83SPiotr Jasiukajtis	cmp	%l2,LIM_l5
1077*25c28e83SPiotr Jasiukajtis	fpadd32s P2_f20,MSK_BIT13,P2_f28
1078*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.case7
1079*25c28e83SPiotr Jasiukajtis
1080*25c28e83SPiotr Jasiukajtis! delay slot
1081*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
1082*25c28e83SPiotr Jasiukajtis	fand	P2_f28,MSK_BITSHI17,P2_f22
1083*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
1084*25c28e83SPiotr Jasiukajtis
1085*25c28e83SPiotr Jasiukajtis	sub	%l2,%o7,%l2
1086*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
1087*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
1088*25c28e83SPiotr Jasiukajtis
1089*25c28e83SPiotr Jasiukajtis	fsubd	P2_f20,P2_f22,P2_f20
1090*25c28e83SPiotr Jasiukajtis	srl	%l2,10,%l2
1091*25c28e83SPiotr Jasiukajtis	mov	%o2,%o5
1092*25c28e83SPiotr Jasiukajtis
1093*25c28e83SPiotr Jasiukajtis	fmovd	P0_f0,P0_f6		!ID for processing
1094*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_q4,P0_f4
1095*25c28e83SPiotr Jasiukajtis	mov	%o0,%o3
1096*25c28e83SPiotr Jasiukajtis
1097*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_q4,P1_f14
1098*25c28e83SPiotr Jasiukajtis	mov	%o1,%o4
1099*25c28e83SPiotr Jasiukajtis
1100*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f20,P2_f22
1101*25c28e83SPiotr Jasiukajtis	andn	%l2,0x1f,%l2
1102*25c28e83SPiotr Jasiukajtis
1103*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q3,P0_f4
1104*25c28e83SPiotr Jasiukajtis
1105*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q3,P1_f14
1106*25c28e83SPiotr Jasiukajtis
1107*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_pp2,P2_f26
1108*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l2],%f40
1109*25c28e83SPiotr Jasiukajtis
1110*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1111*25c28e83SPiotr Jasiukajtis
1112*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
1113*25c28e83SPiotr Jasiukajtis
1114*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_pp1,P2_f26
1115*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_qq2,P2_f24
1116*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l2],%f42
1117*25c28e83SPiotr Jasiukajtis
1118*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q2,P0_f4
1119*25c28e83SPiotr Jasiukajtis
1120*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q2,P1_f14
1121*25c28e83SPiotr Jasiukajtis
1122*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f26,P2_f26
1123*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_qq1,P2_f24
1124*25c28e83SPiotr Jasiukajtis
1125*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1126*25c28e83SPiotr Jasiukajtis
1127*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
1128*25c28e83SPiotr Jasiukajtis
1129*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,C_ONE,P2_f26
1130*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
1131*25c28e83SPiotr Jasiukajtis
1132*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q1,P0_f4
1133*25c28e83SPiotr Jasiukajtis
1134*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q1,P1_f14
1135*25c28e83SPiotr Jasiukajtis
1136*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f26,P2_f26
1137*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l2],P2_f22
1138*25c28e83SPiotr Jasiukajtis
1139*25c28e83SPiotr Jasiukajtis	fmuld	P2_f24,%f40,P2_f24
1140*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
1141*25c28e83SPiotr Jasiukajtis
1142*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1143*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P0_f0
1144*25c28e83SPiotr Jasiukajtis
1145*25c28e83SPiotr Jasiukajtis	fmuld	P2_f26,%f42,P2_f26
1146*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P0_f1
1147*25c28e83SPiotr Jasiukajtis
1148*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
1149*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
1150*25c28e83SPiotr Jasiukajtis
1151*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P0_f6,P0_f4,P0_f4
1152*25c28e83SPiotr Jasiukajtis
1153*25c28e83SPiotr Jasiukajtis	fsubd	P2_f26,P2_f24,P2_f26
1154*25c28e83SPiotr Jasiukajtis
1155*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P1_f10,P1_f14,P1_f14
1156*25c28e83SPiotr Jasiukajtis
1157*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P0_f4,P0_f6 !!(vsin)faddd   P0_f6,P0_f4,P0_f6	! faddd then spaces for processing
1158*25c28e83SPiotr Jasiukajtis
1159*25c28e83SPiotr Jasiukajtis	fsubd	P2_f22,P2_f26,P2_f26
1160*25c28e83SPiotr Jasiukajtis
1161*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P1_f14,P1_f16 !!(vsin)faddd	P1_f10,P1_f14,P1_f16
1162*25c28e83SPiotr Jasiukajtis
1163*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P0_f6,P0_f9,P0_f6
1164*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
1165*25c28e83SPiotr Jasiukajtis
1166*25c28e83SPiotr Jasiukajtis	faddd	P2_f26,%f40,P2_f26
1167*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
1168*25c28e83SPiotr Jasiukajtis
1169*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P1_f16,P1_f19,P1_f16
1170*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.loop0
1171*25c28e83SPiotr Jasiukajtis
1172*25c28e83SPiotr Jasiukajtis! delay slot
1173*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f26,P2_f29,P2_f26
1174*25c28e83SPiotr Jasiukajtis
1175*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.endloop0
1176*25c28e83SPiotr Jasiukajtis! delay slot
1177*25c28e83SPiotr Jasiukajtis	nop
1178*25c28e83SPiotr Jasiukajtis
1179*25c28e83SPiotr Jasiukajtis	.align	32
1180*25c28e83SPiotr Jasiukajtis.case7:
1181*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
1182*25c28e83SPiotr Jasiukajtis	fmovd	P0_f0,P0_f6		!ID for processing
1183*25c28e83SPiotr Jasiukajtis	mov	%o0,%o3
1184*25c28e83SPiotr Jasiukajtis
1185*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
1186*25c28e83SPiotr Jasiukajtis	mov	%o1,%o4
1187*25c28e83SPiotr Jasiukajtis
1188*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,P2_f20,P2_f22
1189*25c28e83SPiotr Jasiukajtis	mov	%o2,%o5
1190*25c28e83SPiotr Jasiukajtis
1191*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_q4,P0_f4
1192*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
1193*25c28e83SPiotr Jasiukajtis
1194*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_q4,P1_f14
1195*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,P0_f0
1196*25c28e83SPiotr Jasiukajtis
1197*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,C_q4,P2_f24
1198*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,P0_f1
1199*25c28e83SPiotr Jasiukajtis
1200*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q3,P0_f4
1201*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
1202*25c28e83SPiotr Jasiukajtis
1203*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q3,P1_f14
1204*25c28e83SPiotr Jasiukajtis
1205*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q3,P2_f24
1206*25c28e83SPiotr Jasiukajtis
1207*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1208*25c28e83SPiotr Jasiukajtis
1209*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
1210*25c28e83SPiotr Jasiukajtis
1211*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
1212*25c28e83SPiotr Jasiukajtis
1213*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q2,P0_f4
1214*25c28e83SPiotr Jasiukajtis
1215*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q2,P1_f14
1216*25c28e83SPiotr Jasiukajtis
1217*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q2,P2_f24
1218*25c28e83SPiotr Jasiukajtis
1219*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1220*25c28e83SPiotr Jasiukajtis
1221*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
1222*25c28e83SPiotr Jasiukajtis
1223*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
1224*25c28e83SPiotr Jasiukajtis
1225*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q1,P0_f4
1226*25c28e83SPiotr Jasiukajtis
1227*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q1,P1_f14
1228*25c28e83SPiotr Jasiukajtis
1229*25c28e83SPiotr Jasiukajtis	faddd	P2_f24,C_q1,P2_f24
1230*25c28e83SPiotr Jasiukajtis
1231*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1232*25c28e83SPiotr Jasiukajtis
1233*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
1234*25c28e83SPiotr Jasiukajtis
1235*25c28e83SPiotr Jasiukajtis	fmuld	P2_f22,P2_f24,P2_f24
1236*25c28e83SPiotr Jasiukajtis
1237*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P0_f6,P0_f4,P0_f4
1238*25c28e83SPiotr Jasiukajtis
1239*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P1_f10,P1_f14,P1_f14
1240*25c28e83SPiotr Jasiukajtis
1241*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P2_f20,P2_f24,P2_f24
1242*25c28e83SPiotr Jasiukajtis
1243*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P0_f4,P0_f6 !!(vsin)faddd   P0_f6,P0_f4,P0_f6	! faddd then spaces for processing
1244*25c28e83SPiotr Jasiukajtis
1245*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P1_f14,P1_f16 !!(vsin)faddd	P1_f10,P1_f14,P1_f16
1246*25c28e83SPiotr Jasiukajtis
1247*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P2_f24,P2_f26 !!(vsin)faddd	P2_f20,P2_f24,P2_f26
1248*25c28e83SPiotr Jasiukajtis	andn	%l0,MSK_SIGN,%l0		! hx &= ~0x80000000
1249*25c28e83SPiotr Jasiukajtis
1250*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P0_f6,P0_f9,P0_f6
1251*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
1252*25c28e83SPiotr Jasiukajtis
1253*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P1_f16,P1_f19,P1_f16
1254*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.loop0
1255*25c28e83SPiotr Jasiukajtis
1256*25c28e83SPiotr Jasiukajtis! delay slot
1257*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f26,P2_f29,P2_f26
1258*25c28e83SPiotr Jasiukajtis
1259*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.endloop0
1260*25c28e83SPiotr Jasiukajtis! delay slot
1261*25c28e83SPiotr Jasiukajtis	nop
1262*25c28e83SPiotr Jasiukajtis
1263*25c28e83SPiotr Jasiukajtis
1264*25c28e83SPiotr Jasiukajtis	.align	32
1265*25c28e83SPiotr Jasiukajtis.endloop2:
1266*25c28e83SPiotr Jasiukajtis	cmp	%l1,LIM_l5
1267*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,1f
1268*25c28e83SPiotr Jasiukajtis! delay slot
1269*25c28e83SPiotr Jasiukajtis	fabsd	P1_f10,P1_f10
1270*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
1271*25c28e83SPiotr Jasiukajtis	fpadd32s P1_f10,MSK_BIT13,P1_f18
1272*25c28e83SPiotr Jasiukajtis	fand	P1_f18,MSK_BITSHI17,P1_f12
1273*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
1274*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
1275*25c28e83SPiotr Jasiukajtis	fsubd	P1_f10,P1_f12,P1_f10
1276*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
1277*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
1278*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
1279*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_pp2,P2_f20
1280*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f36
1281*25c28e83SPiotr Jasiukajtis	faddd	P2_f20,C_pp1,P2_f20
1282*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_qq2,P1_f14
1283*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l1],%f38
1284*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P2_f20,P2_f20
1285*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_qq1,P1_f14
1286*25c28e83SPiotr Jasiukajtis	faddd	P2_f20,C_ONE,P2_f20
1287*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
1288*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P2_f20,P2_f20
1289*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l1],P1_f12
1290*25c28e83SPiotr Jasiukajtis	fmuld	P1_f14,%f36,P1_f14
1291*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,%f38,P2_f20
1292*25c28e83SPiotr Jasiukajtis	fsubd	P2_f20,P1_f14,P2_f20
1293*25c28e83SPiotr Jasiukajtis	fsubd	P1_f12,P2_f20,P2_f20
1294*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,2f
1295*25c28e83SPiotr Jasiukajtis! delay slot
1296*25c28e83SPiotr Jasiukajtis	faddd	P2_f20,%f36,P2_f20
1297*25c28e83SPiotr Jasiukajtis1:
1298*25c28e83SPiotr Jasiukajtis	fmuld	P1_f10,P1_f10,P1_f12
1299*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,C_q4,P1_f14
1300*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q3,P1_f14
1301*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
1302*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q2,P1_f14
1303*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
1304*25c28e83SPiotr Jasiukajtis	faddd	P1_f14,C_q1,P1_f14
1305*25c28e83SPiotr Jasiukajtis	fmuld	P1_f12,P1_f14,P1_f14
1306*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P1_f10,P1_f14,P1_f14
1307*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P1_f14,P2_f20 !!(vsin)faddd	P1_f10,P1_f14,P2_f20
1308*25c28e83SPiotr Jasiukajtis2:
1309*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f20,P1_f19,P2_f20
1310*25c28e83SPiotr Jasiukajtis	st	P2_f20,[%o1]
1311*25c28e83SPiotr Jasiukajtis	st	P2_f21,[%o1+4]
1312*25c28e83SPiotr Jasiukajtis
1313*25c28e83SPiotr Jasiukajtis.endloop1:
1314*25c28e83SPiotr Jasiukajtis	cmp	%l0,LIM_l5
1315*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,1f
1316*25c28e83SPiotr Jasiukajtis! delay slot
1317*25c28e83SPiotr Jasiukajtis	fabsd	P0_f0,P0_f0
1318*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
1319*25c28e83SPiotr Jasiukajtis	fpadd32s P0_f0,MSK_BIT13,P0_f8
1320*25c28e83SPiotr Jasiukajtis	fand	P0_f8,MSK_BITSHI17,P0_f2
1321*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
1322*25c28e83SPiotr Jasiukajtis	add	SC_HI,8,%g1;add	SC_LO,8,%o7
1323*25c28e83SPiotr Jasiukajtis	fsubd	P0_f0,P0_f2,P0_f0
1324*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
1325*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
1326*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
1327*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_pp2,P2_f20
1328*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f32
1329*25c28e83SPiotr Jasiukajtis	faddd	P2_f20,C_pp1,P2_f20
1330*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_qq2,P0_f4
1331*25c28e83SPiotr Jasiukajtis	ldd	[SC_HI+%l0],%f34
1332*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P2_f20,P2_f20
1333*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_qq1,P0_f4
1334*25c28e83SPiotr Jasiukajtis	faddd	P2_f20,C_ONE,P2_f20
1335*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1336*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P2_f20,P2_f20
1337*25c28e83SPiotr Jasiukajtis	ldd	[%o7+%l0],P0_f2
1338*25c28e83SPiotr Jasiukajtis	fmuld	P0_f4,%f32,P0_f4
1339*25c28e83SPiotr Jasiukajtis	fmuld	P2_f20,%f34,P2_f20
1340*25c28e83SPiotr Jasiukajtis	fsubd	P2_f20,P0_f4,P2_f20
1341*25c28e83SPiotr Jasiukajtis	fsubd	P0_f2,P2_f20,P2_f20
1342*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,2f
1343*25c28e83SPiotr Jasiukajtis! delay slot
1344*25c28e83SPiotr Jasiukajtis	faddd	P2_f20,%f32,P2_f20
1345*25c28e83SPiotr Jasiukajtis1:
1346*25c28e83SPiotr Jasiukajtis	fmuld	P0_f0,P0_f0,P0_f2
1347*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,C_q4,P0_f4
1348*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q3,P0_f4
1349*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1350*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q2,P0_f4
1351*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1352*25c28e83SPiotr Jasiukajtis	faddd	P0_f4,C_q1,P0_f4
1353*25c28e83SPiotr Jasiukajtis	fmuld	P0_f2,P0_f4,P0_f4
1354*25c28e83SPiotr Jasiukajtis	!!(vsin)fmuld	P0_f0,P0_f4,P0_f4
1355*25c28e83SPiotr Jasiukajtis	faddd	C_ONE,P0_f4,P2_f20 !!(vsin)faddd	P0_f0,P0_f4,P2_f20
1356*25c28e83SPiotr Jasiukajtis2:
1357*25c28e83SPiotr Jasiukajtis	nop	!!(vsin) 	fors	P2_f20,P0_f9,P2_f20
1358*25c28e83SPiotr Jasiukajtis	st	P2_f20,[%o0]
1359*25c28e83SPiotr Jasiukajtis	st	P2_f21,[%o0+4]
1360*25c28e83SPiotr Jasiukajtis
1361*25c28e83SPiotr Jasiukajtis.endloop0:
1362*25c28e83SPiotr Jasiukajtis	st	P0_f6,[%o3]
1363*25c28e83SPiotr Jasiukajtis	st	P0_f7,[%o3+4]
1364*25c28e83SPiotr Jasiukajtis	st	P1_f16,[%o4]
1365*25c28e83SPiotr Jasiukajtis	st	P1_f17,[%o4+4]
1366*25c28e83SPiotr Jasiukajtis	st	P2_f26,[%o5]
1367*25c28e83SPiotr Jasiukajtis	st	P2_f27,[%o5+4]
1368*25c28e83SPiotr Jasiukajtis
1369*25c28e83SPiotr Jasiukajtis! return.  finished off with only primary range arguments
1370*25c28e83SPiotr Jasiukajtis
1371*25c28e83SPiotr Jasiukajtis	ret
1372*25c28e83SPiotr Jasiukajtis	restore
1373*25c28e83SPiotr Jasiukajtis
1374*25c28e83SPiotr Jasiukajtis
1375*25c28e83SPiotr Jasiukajtis	.align	32
1376*25c28e83SPiotr Jasiukajtis.range0:
1377*25c28e83SPiotr Jasiukajtis	cmp	%l0,LIM_l6
1378*25c28e83SPiotr Jasiukajtis	bg,a,pt	%icc,.MEDIUM		! branch to Medium range on big arg.
1379*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken
1380*25c28e83SPiotr Jasiukajtis	mov	0x1,LIM_l6		! set biguns flag or
1381*25c28e83SPiotr Jasiukajtis	fdtoi	P0_f0,P0_f2; fmovd	C_ONE,P0_f0 ; st	P0_f0,[%o0]		! *y = *x with inexact if x nonzero
1382*25c28e83SPiotr Jasiukajtis	st	P0_f1,[%o0+4]
1383*25c28e83SPiotr Jasiukajtis	!nop		! (vsin) fdtoi	P0_f0,P0_f2
1384*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
1385*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop0
1386*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken
1387*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
1388*25c28e83SPiotr Jasiukajtis	andn	%l1,MSK_SIGN,%l0		! hx &= ~0x80000000
1389*25c28e83SPiotr Jasiukajtis	fmovd	P1_f10,P0_f0
1390*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.loop0
1391*25c28e83SPiotr Jasiukajtis! delay slot
1392*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
1393*25c28e83SPiotr Jasiukajtis
1394*25c28e83SPiotr Jasiukajtis
1395*25c28e83SPiotr Jasiukajtis	.align	32
1396*25c28e83SPiotr Jasiukajtis.range1:
1397*25c28e83SPiotr Jasiukajtis	cmp	%l1,LIM_l6
1398*25c28e83SPiotr Jasiukajtis	bg,a,pt	%icc,.MEDIUM		! branch to Medium range on big arg.
1399*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken
1400*25c28e83SPiotr Jasiukajtis	mov	0x2,LIM_l6		! set biguns flag or
1401*25c28e83SPiotr Jasiukajtis	fdtoi	P1_f10,P1_f12; fmovd	C_ONE,P1_f10 ; st	P1_f10,[%o1]		! *y = *x with inexact if x nonzero
1402*25c28e83SPiotr Jasiukajtis	st	P1_f11,[%o1+4]
1403*25c28e83SPiotr Jasiukajtis	!nop		! (vsin) fdtoi	P1_f10,P1_f12
1404*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
1405*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop1
1406*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken
1407*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
1408*25c28e83SPiotr Jasiukajtis	andn	%l2,MSK_SIGN,%l1		! hx &= ~0x80000000
1409*25c28e83SPiotr Jasiukajtis	fmovd	P2_f20,P1_f10
1410*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.loop1
1411*25c28e83SPiotr Jasiukajtis! delay slot
1412*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
1413*25c28e83SPiotr Jasiukajtis
1414*25c28e83SPiotr Jasiukajtis
1415*25c28e83SPiotr Jasiukajtis	.align	32
1416*25c28e83SPiotr Jasiukajtis.range2:
1417*25c28e83SPiotr Jasiukajtis	cmp	%l2,LIM_l6
1418*25c28e83SPiotr Jasiukajtis	bg,a,pt	%icc,.MEDIUM		! brance to Medium range on big arg.
1419*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken
1420*25c28e83SPiotr Jasiukajtis	mov	0x3,LIM_l6		! set biguns flag or
1421*25c28e83SPiotr Jasiukajtis	fdtoi	P2_f20,P2_f22; fmovd	C_ONE,P2_f20 ; st	P2_f20,[%o2]		! *y = *x with inexact if x nonzero
1422*25c28e83SPiotr Jasiukajtis	st	P2_f21,[%o2+4]
1423*25c28e83SPiotr Jasiukajtis	nop		! (vsin) fdtoi	P2_f20,P2_f22
1424*25c28e83SPiotr Jasiukajtis1:
1425*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
1426*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop2
1427*25c28e83SPiotr Jasiukajtis! delay slot
1428*25c28e83SPiotr Jasiukajtis	nop
1429*25c28e83SPiotr Jasiukajtis	ld	[%i1],%l2
1430*25c28e83SPiotr Jasiukajtis	ld	[%i1],P2_f20
1431*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],P2_f21
1432*25c28e83SPiotr Jasiukajtis	andn	%l2,MSK_SIGN,%l2		! hx &= ~0x80000000
1433*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.loop2
1434*25c28e83SPiotr Jasiukajtis! delay slot
1435*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
1436*25c28e83SPiotr Jasiukajtis
1437*25c28e83SPiotr Jasiukajtis
1438*25c28e83SPiotr Jasiukajtis	.align	32
1439*25c28e83SPiotr Jasiukajtis.MEDIUM:
1440*25c28e83SPiotr Jasiukajtis
1441*25c28e83SPiotr Jasiukajtis! ========== medium range ==========
1442*25c28e83SPiotr Jasiukajtis
1443*25c28e83SPiotr Jasiukajtis! register use
1444*25c28e83SPiotr Jasiukajtis
1445*25c28e83SPiotr Jasiukajtis! i0  n
1446*25c28e83SPiotr Jasiukajtis! i1  x
1447*25c28e83SPiotr Jasiukajtis! i2  stridex
1448*25c28e83SPiotr Jasiukajtis! i3  y
1449*25c28e83SPiotr Jasiukajtis! i4  stridey
1450*25c28e83SPiotr Jasiukajtis! i5  0x80000000
1451*25c28e83SPiotr Jasiukajtis
1452*25c28e83SPiotr Jasiukajtis! l0  hx0
1453*25c28e83SPiotr Jasiukajtis! l1  hx1
1454*25c28e83SPiotr Jasiukajtis! l2  hx2
1455*25c28e83SPiotr Jasiukajtis! l3  __vlibm_TBL_sincos_hi
1456*25c28e83SPiotr Jasiukajtis! l4  __vlibm_TBL_sincos_lo
1457*25c28e83SPiotr Jasiukajtis! l5  constants
1458*25c28e83SPiotr Jasiukajtis! l6  biguns stored here : still called LIM_l6
1459*25c28e83SPiotr Jasiukajtis! l7  0x413921fb
1460*25c28e83SPiotr Jasiukajtis
1461*25c28e83SPiotr Jasiukajtis! the following are 64-bit registers in both V8+ and V9
1462*25c28e83SPiotr Jasiukajtis
1463*25c28e83SPiotr Jasiukajtis! g1  scratch
1464*25c28e83SPiotr Jasiukajtis! g5
1465*25c28e83SPiotr Jasiukajtis
1466*25c28e83SPiotr Jasiukajtis! o0  py0
1467*25c28e83SPiotr Jasiukajtis! o1  py1
1468*25c28e83SPiotr Jasiukajtis! o2  py2
1469*25c28e83SPiotr Jasiukajtis! o3  n0
1470*25c28e83SPiotr Jasiukajtis! o4  n1
1471*25c28e83SPiotr Jasiukajtis! o5  n2
1472*25c28e83SPiotr Jasiukajtis! o7  scratch
1473*25c28e83SPiotr Jasiukajtis
1474*25c28e83SPiotr Jasiukajtis! f0  x0
1475*25c28e83SPiotr Jasiukajtis! f2  n0,y0
1476*25c28e83SPiotr Jasiukajtis! f4
1477*25c28e83SPiotr Jasiukajtis! f6
1478*25c28e83SPiotr Jasiukajtis! f8  scratch for table base
1479*25c28e83SPiotr Jasiukajtis! f9  signbit0
1480*25c28e83SPiotr Jasiukajtis! f10 x1
1481*25c28e83SPiotr Jasiukajtis! f12 n1,y1
1482*25c28e83SPiotr Jasiukajtis! f14
1483*25c28e83SPiotr Jasiukajtis! f16
1484*25c28e83SPiotr Jasiukajtis! f18 scratch for table base
1485*25c28e83SPiotr Jasiukajtis! f19 signbit1
1486*25c28e83SPiotr Jasiukajtis! f20 x2
1487*25c28e83SPiotr Jasiukajtis! f22 n2,y2
1488*25c28e83SPiotr Jasiukajtis! f24
1489*25c28e83SPiotr Jasiukajtis! f26
1490*25c28e83SPiotr Jasiukajtis! f28 scratch for table base
1491*25c28e83SPiotr Jasiukajtis! f29 signbit2
1492*25c28e83SPiotr Jasiukajtis! f30 0x80000000
1493*25c28e83SPiotr Jasiukajtis! f31 0x4000
1494*25c28e83SPiotr Jasiukajtis! f32
1495*25c28e83SPiotr Jasiukajtis! f34
1496*25c28e83SPiotr Jasiukajtis! f36
1497*25c28e83SPiotr Jasiukajtis! f38
1498*25c28e83SPiotr Jasiukajtis! f40 invpio2
1499*25c28e83SPiotr Jasiukajtis! f42 round
1500*25c28e83SPiotr Jasiukajtis! f44 0xffff800000000000
1501*25c28e83SPiotr Jasiukajtis! f46 pio2_1
1502*25c28e83SPiotr Jasiukajtis! f48 pio2_2
1503*25c28e83SPiotr Jasiukajtis! f50 pio2_3
1504*25c28e83SPiotr Jasiukajtis! f52 pio2_3t
1505*25c28e83SPiotr Jasiukajtis! f54 one
1506*25c28e83SPiotr Jasiukajtis! f56 pp1
1507*25c28e83SPiotr Jasiukajtis! f58 pp2
1508*25c28e83SPiotr Jasiukajtis! f60 qq1
1509*25c28e83SPiotr Jasiukajtis! f62 qq2
1510*25c28e83SPiotr Jasiukajtis
1511*25c28e83SPiotr Jasiukajtis
1512*25c28e83SPiotr Jasiukajtis	PIC_SET(g5,constants,l5)
1513*25c28e83SPiotr Jasiukajtis
1514*25c28e83SPiotr Jasiukajtis	! %o3,%o4,%o5 need to be stored
1515*25c28e83SPiotr Jasiukajtis	st      P0_f6,[%o3]
1516*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x413921fb),%l7
1517*25c28e83SPiotr Jasiukajtis	st      P0_f7,[%o3+4]
1518*25c28e83SPiotr Jasiukajtis	or	%l7,%lo(0x413921fb),%l7
1519*25c28e83SPiotr Jasiukajtis	st      P1_f16,[%o4]
1520*25c28e83SPiotr Jasiukajtis	st      P1_f17,[%o4+4]
1521*25c28e83SPiotr Jasiukajtis	st      P2_f26,[%o5]
1522*25c28e83SPiotr Jasiukajtis	st      P2_f27,[%o5+4]
1523*25c28e83SPiotr Jasiukajtis	ldd	[%l5+invpio2],%f40
1524*25c28e83SPiotr Jasiukajtis	ldd	[%l5+round],%f42
1525*25c28e83SPiotr Jasiukajtis	ldd	[%l5+pio2_1],%f46
1526*25c28e83SPiotr Jasiukajtis	ldd	[%l5+pio2_2],%f48
1527*25c28e83SPiotr Jasiukajtis	ldd	[%l5+pio2_3],%f50
1528*25c28e83SPiotr Jasiukajtis	ldd	[%l5+pio2_3t],%f52
1529*25c28e83SPiotr Jasiukajtis	std	%f54,[%fp+x0_1+8]	! set up stack data
1530*25c28e83SPiotr Jasiukajtis	std	%f54,[%fp+x1_1+8]
1531*25c28e83SPiotr Jasiukajtis	std	%f54,[%fp+x2_1+8]
1532*25c28e83SPiotr Jasiukajtis	stx	%g0,[%fp+y0_0+8]
1533*25c28e83SPiotr Jasiukajtis	stx	%g0,[%fp+y1_0+8]
1534*25c28e83SPiotr Jasiukajtis	stx	%g0,[%fp+y2_0+8]
1535*25c28e83SPiotr Jasiukajtis
1536*25c28e83SPiotr Jasiukajtis!	branched here in the middle of the array.  Need to adjust
1537*25c28e83SPiotr Jasiukajtis!	for the members of the triple that were selected in the primary
1538*25c28e83SPiotr Jasiukajtis!	loop.
1539*25c28e83SPiotr Jasiukajtis
1540*25c28e83SPiotr Jasiukajtis!	no adjustment since all three selected here
1541*25c28e83SPiotr Jasiukajtis	subcc	LIM_l6,0x1,%g0		! continue in LOOP0?
1542*25c28e83SPiotr Jasiukajtis	bz,a	%icc,.LOOP0
1543*25c28e83SPiotr Jasiukajtis	mov	0x0,LIM_l6		! delay slot set biguns=0
1544*25c28e83SPiotr Jasiukajtis
1545*25c28e83SPiotr Jasiukajtis!	ajust 1st triple since 2d and 3d done here
1546*25c28e83SPiotr Jasiukajtis	subcc	LIM_l6,0x2,%g0		! continue in LOOP1?
1547*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f40,%f2		! adj LOOP0
1548*25c28e83SPiotr Jasiukajtis	bz,a	%icc,.LOOP1
1549*25c28e83SPiotr Jasiukajtis	mov	0x0,LIM_l6		! delay slot set biguns=0
1550*25c28e83SPiotr Jasiukajtis
1551*25c28e83SPiotr Jasiukajtis!	ajust 1st and 2d triple since 3d done here
1552*25c28e83SPiotr Jasiukajtis	subcc	LIM_l6,0x3,%g0		! continue in LOOP2?
1553*25c28e83SPiotr Jasiukajtis	!done fmuld	%f0,%f40,%f2		! adj LOOP0
1554*25c28e83SPiotr Jasiukajtis	sub	%i3,%i4,%i3		! adjust to not double increment
1555*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f40,%f12		! adj LOOP1
1556*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f42,%f2		! adj LOOP1
1557*25c28e83SPiotr Jasiukajtis	bz,a	%icc,.LOOP2
1558*25c28e83SPiotr Jasiukajtis	mov	0x0,LIM_l6		! delay slot set biguns=0
1559*25c28e83SPiotr Jasiukajtis
1560*25c28e83SPiotr Jasiukajtis	ba	.LOOP0
1561*25c28e83SPiotr Jasiukajtis	nop
1562*25c28e83SPiotr Jasiukajtis
1563*25c28e83SPiotr Jasiukajtis! -- 16 byte aligned
1564*25c28e83SPiotr Jasiukajtis
1565*25c28e83SPiotr Jasiukajtis	.align	32
1566*25c28e83SPiotr Jasiukajtis.LOOP0:
1567*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l1		! preload next argument
1568*25c28e83SPiotr Jasiukajtis	mov	%i3,%o0			! py0 = y
1569*25c28e83SPiotr Jasiukajtis
1570*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f10
1571*25c28e83SPiotr Jasiukajtis	cmp	%l0,%l7
1572*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
1573*25c28e83SPiotr Jasiukajtis	bg,pn	%icc,.BIG0		! if hx > 0x413921fb
1574*25c28e83SPiotr Jasiukajtis
1575*25c28e83SPiotr Jasiukajtis! delay slot
1576*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f11
1577*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
1578*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
1579*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.ENDLOOP1
1580*25c28e83SPiotr Jasiukajtis
1581*25c28e83SPiotr Jasiukajtis! delay slot
1582*25c28e83SPiotr Jasiukajtis	andn	%l1,%i5,%l1
1583*25c28e83SPiotr Jasiukajtis	nop
1584*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f40,%f2
1585*25c28e83SPiotr Jasiukajtis	fabsd	%f54,%f54		! a nop for alignment only
1586*25c28e83SPiotr Jasiukajtis
1587*25c28e83SPiotr Jasiukajtis.LOOP1:
1588*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l2		! preload next argument
1589*25c28e83SPiotr Jasiukajtis	mov	%i3,%o1			! py1 = y
1590*25c28e83SPiotr Jasiukajtis
1591*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f20
1592*25c28e83SPiotr Jasiukajtis	cmp	%l1,%l7
1593*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
1594*25c28e83SPiotr Jasiukajtis	bg,pn	%icc,.BIG1		! if hx > 0x413921fb
1595*25c28e83SPiotr Jasiukajtis
1596*25c28e83SPiotr Jasiukajtis! delay slot
1597*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f21
1598*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
1599*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
1600*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.ENDLOOP2
1601*25c28e83SPiotr Jasiukajtis
1602*25c28e83SPiotr Jasiukajtis! delay slot
1603*25c28e83SPiotr Jasiukajtis	andn	%l2,%i5,%l2
1604*25c28e83SPiotr Jasiukajtis	nop
1605*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f40,%f12
1606*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f42,%f2
1607*25c28e83SPiotr Jasiukajtis
1608*25c28e83SPiotr Jasiukajtis.LOOP2:
1609*25c28e83SPiotr Jasiukajtis	st	%f3,[%fp+n0]
1610*25c28e83SPiotr Jasiukajtis	mov	%i3,%o2			! py2 = y
1611*25c28e83SPiotr Jasiukajtis
1612*25c28e83SPiotr Jasiukajtis	cmp	%l2,%l7
1613*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
1614*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f40,%f22
1615*25c28e83SPiotr Jasiukajtis	bg,pn	%icc,.BIG2		! if hx > 0x413921fb
1616*25c28e83SPiotr Jasiukajtis
1617*25c28e83SPiotr Jasiukajtis! delay slot
1618*25c28e83SPiotr Jasiukajtis	add	%l5,thresh+4,%o7
1619*25c28e83SPiotr Jasiukajtis	faddd	%f12,%f42,%f12
1620*25c28e83SPiotr Jasiukajtis	st	%f13,[%fp+n1]
1621*25c28e83SPiotr Jasiukajtis
1622*25c28e83SPiotr Jasiukajtis! -
1623*25c28e83SPiotr Jasiukajtis
1624*25c28e83SPiotr Jasiukajtis	add	%l5,thresh,%g1
1625*25c28e83SPiotr Jasiukajtis	faddd	%f22,%f42,%f22
1626*25c28e83SPiotr Jasiukajtis	st	%f23,[%fp+n2]
1627*25c28e83SPiotr Jasiukajtis
1628*25c28e83SPiotr Jasiukajtis	fsubd	%f2,%f42,%f2		! n
1629*25c28e83SPiotr Jasiukajtis
1630*25c28e83SPiotr Jasiukajtis	fsubd	%f12,%f42,%f12		! n
1631*25c28e83SPiotr Jasiukajtis
1632*25c28e83SPiotr Jasiukajtis	fsubd	%f22,%f42,%f22		! n
1633*25c28e83SPiotr Jasiukajtis
1634*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f46,%f4
1635*25c28e83SPiotr Jasiukajtis
1636*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f46,%f14
1637*25c28e83SPiotr Jasiukajtis
1638*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f46,%f24
1639*25c28e83SPiotr Jasiukajtis
1640*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f4,%f4
1641*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f48,%f6
1642*25c28e83SPiotr Jasiukajtis
1643*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f14,%f14
1644*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f48,%f16
1645*25c28e83SPiotr Jasiukajtis
1646*25c28e83SPiotr Jasiukajtis	fsubd	%f20,%f24,%f24
1647*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f48,%f26
1648*25c28e83SPiotr Jasiukajtis
1649*25c28e83SPiotr Jasiukajtis	fsubd	%f4,%f6,%f0
1650*25c28e83SPiotr Jasiukajtis	ld	[%fp+n0],%o3 ; add	%o3,1,%o3
1651*25c28e83SPiotr Jasiukajtis
1652*25c28e83SPiotr Jasiukajtis	fsubd	%f14,%f16,%f10
1653*25c28e83SPiotr Jasiukajtis	ld	[%fp+n1],%o4 ; add	%o4,1,%o4
1654*25c28e83SPiotr Jasiukajtis
1655*25c28e83SPiotr Jasiukajtis	fsubd	%f24,%f26,%f20
1656*25c28e83SPiotr Jasiukajtis	ld	[%fp+n2],%o5 ; add	%o5,1,%o5
1657*25c28e83SPiotr Jasiukajtis
1658*25c28e83SPiotr Jasiukajtis	fsubd	%f4,%f0,%f32
1659*25c28e83SPiotr Jasiukajtis	and	%o3,1,%o3
1660*25c28e83SPiotr Jasiukajtis
1661*25c28e83SPiotr Jasiukajtis	fsubd	%f14,%f10,%f34
1662*25c28e83SPiotr Jasiukajtis	and	%o4,1,%o4
1663*25c28e83SPiotr Jasiukajtis
1664*25c28e83SPiotr Jasiukajtis	fsubd	%f24,%f20,%f36
1665*25c28e83SPiotr Jasiukajtis	and	%o5,1,%o5
1666*25c28e83SPiotr Jasiukajtis
1667*25c28e83SPiotr Jasiukajtis	fsubd	%f32,%f6,%f32
1668*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f50,%f8
1669*25c28e83SPiotr Jasiukajtis	sll	%o3,3,%o3
1670*25c28e83SPiotr Jasiukajtis
1671*25c28e83SPiotr Jasiukajtis	fsubd	%f34,%f16,%f34
1672*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f50,%f18
1673*25c28e83SPiotr Jasiukajtis	sll	%o4,3,%o4
1674*25c28e83SPiotr Jasiukajtis
1675*25c28e83SPiotr Jasiukajtis	fsubd	%f36,%f26,%f36
1676*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f50,%f28
1677*25c28e83SPiotr Jasiukajtis	sll	%o5,3,%o5
1678*25c28e83SPiotr Jasiukajtis
1679*25c28e83SPiotr Jasiukajtis	fsubd	%f8,%f32,%f8
1680*25c28e83SPiotr Jasiukajtis	ld	[%g1+%o3],%f6
1681*25c28e83SPiotr Jasiukajtis
1682*25c28e83SPiotr Jasiukajtis	fsubd	%f18,%f34,%f18
1683*25c28e83SPiotr Jasiukajtis	ld	[%g1+%o4],%f16
1684*25c28e83SPiotr Jasiukajtis
1685*25c28e83SPiotr Jasiukajtis	fsubd	%f28,%f36,%f28
1686*25c28e83SPiotr Jasiukajtis	ld	[%g1+%o5],%f26
1687*25c28e83SPiotr Jasiukajtis
1688*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f8,%f4
1689*25c28e83SPiotr Jasiukajtis
1690*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f18,%f14
1691*25c28e83SPiotr Jasiukajtis
1692*25c28e83SPiotr Jasiukajtis	fsubd	%f20,%f28,%f24
1693*25c28e83SPiotr Jasiukajtis
1694*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f4,%f32
1695*25c28e83SPiotr Jasiukajtis
1696*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f14,%f34
1697*25c28e83SPiotr Jasiukajtis
1698*25c28e83SPiotr Jasiukajtis	fsubd	%f20,%f24,%f36
1699*25c28e83SPiotr Jasiukajtis
1700*25c28e83SPiotr Jasiukajtis	fsubd	%f32,%f8,%f32
1701*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f52,%f2
1702*25c28e83SPiotr Jasiukajtis
1703*25c28e83SPiotr Jasiukajtis	fsubd	%f34,%f18,%f34
1704*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f52,%f12
1705*25c28e83SPiotr Jasiukajtis
1706*25c28e83SPiotr Jasiukajtis	fsubd	%f36,%f28,%f36
1707*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f52,%f22
1708*25c28e83SPiotr Jasiukajtis
1709*25c28e83SPiotr Jasiukajtis	fsubd	%f2,%f32,%f2
1710*25c28e83SPiotr Jasiukajtis	ld	[%o7+%o3],%f8
1711*25c28e83SPiotr Jasiukajtis
1712*25c28e83SPiotr Jasiukajtis	fsubd	%f12,%f34,%f12
1713*25c28e83SPiotr Jasiukajtis	ld	[%o7+%o4],%f18
1714*25c28e83SPiotr Jasiukajtis
1715*25c28e83SPiotr Jasiukajtis	fsubd	%f22,%f36,%f22
1716*25c28e83SPiotr Jasiukajtis	ld	[%o7+%o5],%f28
1717*25c28e83SPiotr Jasiukajtis
1718*25c28e83SPiotr Jasiukajtis	fsubd	%f4,%f2,%f0		! x
1719*25c28e83SPiotr Jasiukajtis
1720*25c28e83SPiotr Jasiukajtis	fsubd	%f14,%f12,%f10		! x
1721*25c28e83SPiotr Jasiukajtis
1722*25c28e83SPiotr Jasiukajtis	fsubd	%f24,%f22,%f20		! x
1723*25c28e83SPiotr Jasiukajtis
1724*25c28e83SPiotr Jasiukajtis	fsubd	%f4,%f0,%f4
1725*25c28e83SPiotr Jasiukajtis
1726*25c28e83SPiotr Jasiukajtis	fsubd	%f14,%f10,%f14
1727*25c28e83SPiotr Jasiukajtis
1728*25c28e83SPiotr Jasiukajtis	fsubd	%f24,%f20,%f24
1729*25c28e83SPiotr Jasiukajtis
1730*25c28e83SPiotr Jasiukajtis	fands	%f0,%f30,%f9		! save signbit
1731*25c28e83SPiotr Jasiukajtis
1732*25c28e83SPiotr Jasiukajtis	fands	%f10,%f30,%f19		! save signbit
1733*25c28e83SPiotr Jasiukajtis
1734*25c28e83SPiotr Jasiukajtis	fands	%f20,%f30,%f29		! save signbit
1735*25c28e83SPiotr Jasiukajtis
1736*25c28e83SPiotr Jasiukajtis	fabsd	%f0,%f0
1737*25c28e83SPiotr Jasiukajtis	std	%f0,[%fp+x0_1]
1738*25c28e83SPiotr Jasiukajtis
1739*25c28e83SPiotr Jasiukajtis	fabsd	%f10,%f10
1740*25c28e83SPiotr Jasiukajtis	std	%f10,[%fp+x1_1]
1741*25c28e83SPiotr Jasiukajtis
1742*25c28e83SPiotr Jasiukajtis	fabsd	%f20,%f20
1743*25c28e83SPiotr Jasiukajtis	std	%f20,[%fp+x2_1]
1744*25c28e83SPiotr Jasiukajtis
1745*25c28e83SPiotr Jasiukajtis	fsubd	%f4,%f2,%f2		! y
1746*25c28e83SPiotr Jasiukajtis
1747*25c28e83SPiotr Jasiukajtis	fsubd	%f14,%f12,%f12		! y
1748*25c28e83SPiotr Jasiukajtis
1749*25c28e83SPiotr Jasiukajtis	fsubd	%f24,%f22,%f22		! y
1750*25c28e83SPiotr Jasiukajtis
1751*25c28e83SPiotr Jasiukajtis	fcmpgt32 %f6,%f0,%l0
1752*25c28e83SPiotr Jasiukajtis
1753*25c28e83SPiotr Jasiukajtis	fcmpgt32 %f16,%f10,%l1
1754*25c28e83SPiotr Jasiukajtis
1755*25c28e83SPiotr Jasiukajtis	fcmpgt32 %f26,%f20,%l2
1756*25c28e83SPiotr Jasiukajtis
1757*25c28e83SPiotr Jasiukajtis! -- 16 byte aligned
1758*25c28e83SPiotr Jasiukajtis	fxors	%f2,%f9,%f2
1759*25c28e83SPiotr Jasiukajtis
1760*25c28e83SPiotr Jasiukajtis	fxors	%f12,%f19,%f12
1761*25c28e83SPiotr Jasiukajtis
1762*25c28e83SPiotr Jasiukajtis	fxors	%f22,%f29,%f22
1763*25c28e83SPiotr Jasiukajtis
1764*25c28e83SPiotr Jasiukajtis	fands	%f9,%f8,%f9		! if (n & 1) clear sign bit
1765*25c28e83SPiotr Jasiukajtis	andcc	%l0,2,%g0
1766*25c28e83SPiotr Jasiukajtis	bne,pn	%icc,.CASE4
1767*25c28e83SPiotr Jasiukajtis
1768*25c28e83SPiotr Jasiukajtis! delay slot
1769*25c28e83SPiotr Jasiukajtis	fands	%f19,%f18,%f19		! if (n & 1) clear sign bit
1770*25c28e83SPiotr Jasiukajtis	andcc	%l1,2,%g0
1771*25c28e83SPiotr Jasiukajtis	bne,pn	%icc,.CASE2
1772*25c28e83SPiotr Jasiukajtis
1773*25c28e83SPiotr Jasiukajtis! delay slot
1774*25c28e83SPiotr Jasiukajtis	fands	%f29,%f28,%f29		! if (n & 1) clear sign bit
1775*25c28e83SPiotr Jasiukajtis	andcc	%l2,2,%g0
1776*25c28e83SPiotr Jasiukajtis	bne,pn	%icc,.CASE1
1777*25c28e83SPiotr Jasiukajtis
1778*25c28e83SPiotr Jasiukajtis! delay slot
1779*25c28e83SPiotr Jasiukajtis	fpadd32s %f0,%f31,%f8
1780*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
1781*25c28e83SPiotr Jasiukajtis	ld	[%fp+x0_1],%l0
1782*25c28e83SPiotr Jasiukajtis
1783*25c28e83SPiotr Jasiukajtis	fpadd32s %f10,%f31,%f18
1784*25c28e83SPiotr Jasiukajtis	add	%l3,8,%g1
1785*25c28e83SPiotr Jasiukajtis	ld	[%fp+x1_1],%l1
1786*25c28e83SPiotr Jasiukajtis
1787*25c28e83SPiotr Jasiukajtis	fpadd32s %f20,%f31,%f28
1788*25c28e83SPiotr Jasiukajtis	ld	[%fp+x2_1],%l2
1789*25c28e83SPiotr Jasiukajtis
1790*25c28e83SPiotr Jasiukajtis	fand	%f8,%f44,%f4
1791*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
1792*25c28e83SPiotr Jasiukajtis
1793*25c28e83SPiotr Jasiukajtis	fand	%f18,%f44,%f14
1794*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
1795*25c28e83SPiotr Jasiukajtis
1796*25c28e83SPiotr Jasiukajtis	fand	%f28,%f44,%f24
1797*25c28e83SPiotr Jasiukajtis	sub	%l2,%o7,%l2
1798*25c28e83SPiotr Jasiukajtis
1799*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f4,%f0
1800*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
1801*25c28e83SPiotr Jasiukajtis
1802*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f14,%f10
1803*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
1804*25c28e83SPiotr Jasiukajtis
1805*25c28e83SPiotr Jasiukajtis	fsubd	%f20,%f24,%f20
1806*25c28e83SPiotr Jasiukajtis	srl	%l2,10,%l2
1807*25c28e83SPiotr Jasiukajtis
1808*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f2,%f0
1809*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
1810*25c28e83SPiotr Jasiukajtis
1811*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f12,%f10
1812*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
1813*25c28e83SPiotr Jasiukajtis
1814*25c28e83SPiotr Jasiukajtis	faddd	%f20,%f22,%f20
1815*25c28e83SPiotr Jasiukajtis	andn	%l2,0x1f,%l2
1816*25c28e83SPiotr Jasiukajtis
1817*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f0,%f2
1818*25c28e83SPiotr Jasiukajtis	add	%l0,%o3,%l0
1819*25c28e83SPiotr Jasiukajtis
1820*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f12
1821*25c28e83SPiotr Jasiukajtis	add	%l1,%o4,%l1
1822*25c28e83SPiotr Jasiukajtis
1823*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f22
1824*25c28e83SPiotr Jasiukajtis	add	%l2,%o5,%l2
1825*25c28e83SPiotr Jasiukajtis
1826*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f58,%f6
1827*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l0],%f32
1828*25c28e83SPiotr Jasiukajtis
1829*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f58,%f16
1830*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l1],%f34
1831*25c28e83SPiotr Jasiukajtis
1832*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f58,%f26
1833*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l2],%f36
1834*25c28e83SPiotr Jasiukajtis
1835*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f56,%f6
1836*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f62,%f4
1837*25c28e83SPiotr Jasiukajtis
1838*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f56,%f16
1839*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f14
1840*25c28e83SPiotr Jasiukajtis
1841*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f56,%f26
1842*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f62,%f24
1843*25c28e83SPiotr Jasiukajtis
1844*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f6,%f6
1845*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f60,%f4
1846*25c28e83SPiotr Jasiukajtis
1847*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f16,%f16
1848*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f60,%f14
1849*25c28e83SPiotr Jasiukajtis
1850*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f26,%f26
1851*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f60,%f24
1852*25c28e83SPiotr Jasiukajtis
1853*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f54,%f6
1854*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f4,%f4
1855*25c28e83SPiotr Jasiukajtis
1856*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f54,%f16
1857*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f14,%f14
1858*25c28e83SPiotr Jasiukajtis
1859*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f54,%f26
1860*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f24,%f24
1861*25c28e83SPiotr Jasiukajtis
1862*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f6,%f6
1863*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f2
1864*25c28e83SPiotr Jasiukajtis
1865*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f16,%f16
1866*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f12
1867*25c28e83SPiotr Jasiukajtis
1868*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f26,%f26
1869*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l2],%f22
1870*25c28e83SPiotr Jasiukajtis
1871*25c28e83SPiotr Jasiukajtis	fmuld	%f4,%f32,%f4
1872*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l0],%f0
1873*25c28e83SPiotr Jasiukajtis
1874*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f34,%f14
1875*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l1],%f10
1876*25c28e83SPiotr Jasiukajtis
1877*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f36,%f24
1878*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l2],%f20
1879*25c28e83SPiotr Jasiukajtis
1880*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f2,%f6
1881*25c28e83SPiotr Jasiukajtis
1882*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f12,%f16
1883*25c28e83SPiotr Jasiukajtis
1884*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f22,%f26
1885*25c28e83SPiotr Jasiukajtis
1886*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f4,%f6
1887*25c28e83SPiotr Jasiukajtis
1888*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f14,%f16
1889*25c28e83SPiotr Jasiukajtis
1890*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f24,%f26
1891*25c28e83SPiotr Jasiukajtis
1892*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f0,%f6
1893*25c28e83SPiotr Jasiukajtis
1894*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f10,%f16
1895*25c28e83SPiotr Jasiukajtis
1896*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f20,%f26
1897*25c28e83SPiotr Jasiukajtis
1898*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f32,%f6
1899*25c28e83SPiotr Jasiukajtis
1900*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f34,%f16
1901*25c28e83SPiotr Jasiukajtis
1902*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f36,%f26
1903*25c28e83SPiotr Jasiukajtis
1904*25c28e83SPiotr Jasiukajtis.FIXSIGN:
1905*25c28e83SPiotr Jasiukajtis	ld	[%fp+n0],%o3 ; add	%o3,1,%o3
1906*25c28e83SPiotr Jasiukajtis	add	%l5,thresh-4,%g1
1907*25c28e83SPiotr Jasiukajtis
1908*25c28e83SPiotr Jasiukajtis	ld	[%fp+n1],%o4 ; add	%o4,1,%o4
1909*25c28e83SPiotr Jasiukajtis
1910*25c28e83SPiotr Jasiukajtis	ld	[%fp+n2],%o5 ; add	%o5,1,%o5
1911*25c28e83SPiotr Jasiukajtis	and	%o3,2,%o3
1912*25c28e83SPiotr Jasiukajtis
1913*25c28e83SPiotr Jasiukajtis	sll	%o3,2,%o3
1914*25c28e83SPiotr Jasiukajtis	and	%o4,2,%o4
1915*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
1916*25c28e83SPiotr Jasiukajtis
1917*25c28e83SPiotr Jasiukajtis	sll	%o4,2,%o4
1918*25c28e83SPiotr Jasiukajtis	and	%o5,2,%o5
1919*25c28e83SPiotr Jasiukajtis	ld	[%g1+%o3],%f8
1920*25c28e83SPiotr Jasiukajtis
1921*25c28e83SPiotr Jasiukajtis	sll	%o5,2,%o5
1922*25c28e83SPiotr Jasiukajtis	ld	[%g1+%o4],%f18
1923*25c28e83SPiotr Jasiukajtis
1924*25c28e83SPiotr Jasiukajtis	ld	[%g1+%o5],%f28
1925*25c28e83SPiotr Jasiukajtis	fxors	%f9,%f8,%f9
1926*25c28e83SPiotr Jasiukajtis
1927*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f0
1928*25c28e83SPiotr Jasiukajtis	fxors	%f29,%f28,%f29
1929*25c28e83SPiotr Jasiukajtis
1930*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f1
1931*25c28e83SPiotr Jasiukajtis	fxors	%f19,%f18,%f19
1932*25c28e83SPiotr Jasiukajtis
1933*25c28e83SPiotr Jasiukajtis	fors	%f6,%f9,%f6		! tack on sign
1934*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
1935*25c28e83SPiotr Jasiukajtis	st	%f6,[%o0]
1936*25c28e83SPiotr Jasiukajtis
1937*25c28e83SPiotr Jasiukajtis	fors	%f26,%f29,%f26		! tack on sign
1938*25c28e83SPiotr Jasiukajtis	st	%f7,[%o0+4]
1939*25c28e83SPiotr Jasiukajtis
1940*25c28e83SPiotr Jasiukajtis	fors	%f16,%f19,%f16		! tack on sign
1941*25c28e83SPiotr Jasiukajtis	st	%f26,[%o2]
1942*25c28e83SPiotr Jasiukajtis
1943*25c28e83SPiotr Jasiukajtis	st	%f27,[%o2+4]
1944*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
1945*25c28e83SPiotr Jasiukajtis
1946*25c28e83SPiotr Jasiukajtis	st	%f16,[%o1]
1947*25c28e83SPiotr Jasiukajtis	andn	%l0,%i5,%l0		! hx &= ~0x80000000
1948*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.LOOP0
1949*25c28e83SPiotr Jasiukajtis
1950*25c28e83SPiotr Jasiukajtis! delay slot
1951*25c28e83SPiotr Jasiukajtis	st	%f17,[%o1+4]
1952*25c28e83SPiotr Jasiukajtis
1953*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.ENDLOOP0
1954*25c28e83SPiotr Jasiukajtis! delay slot
1955*25c28e83SPiotr Jasiukajtis	nop
1956*25c28e83SPiotr Jasiukajtis
1957*25c28e83SPiotr Jasiukajtis	.align	32
1958*25c28e83SPiotr Jasiukajtis.CASE1:
1959*25c28e83SPiotr Jasiukajtis	fpadd32s %f10,%f31,%f18
1960*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
1961*25c28e83SPiotr Jasiukajtis	ld	[%fp+x0_1],%l0
1962*25c28e83SPiotr Jasiukajtis
1963*25c28e83SPiotr Jasiukajtis	fand	%f8,%f44,%f4
1964*25c28e83SPiotr Jasiukajtis	add	%l3,8,%g1
1965*25c28e83SPiotr Jasiukajtis	ld	[%fp+x1_1],%l1
1966*25c28e83SPiotr Jasiukajtis
1967*25c28e83SPiotr Jasiukajtis	fand	%f18,%f44,%f14
1968*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
1969*25c28e83SPiotr Jasiukajtis
1970*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f4,%f0
1971*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
1972*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
1973*25c28e83SPiotr Jasiukajtis
1974*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f14,%f10
1975*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
1976*25c28e83SPiotr Jasiukajtis
1977*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f20
1978*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o5],%f36
1979*25c28e83SPiotr Jasiukajtis	add	%l5,%o5,%l2
1980*25c28e83SPiotr Jasiukajtis
1981*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f2,%f0
1982*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
1983*25c28e83SPiotr Jasiukajtis
1984*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f12,%f10
1985*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
1986*25c28e83SPiotr Jasiukajtis
1987*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f36,%f24
1988*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x10],%f26
1989*25c28e83SPiotr Jasiukajtis	add	%fp,%o5,%o5
1990*25c28e83SPiotr Jasiukajtis
1991*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f0,%f2
1992*25c28e83SPiotr Jasiukajtis	add	%l0,%o3,%l0
1993*25c28e83SPiotr Jasiukajtis
1994*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f12
1995*25c28e83SPiotr Jasiukajtis	add	%l1,%o4,%l1
1996*25c28e83SPiotr Jasiukajtis
1997*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f26,%f24
1998*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x20],%f36
1999*25c28e83SPiotr Jasiukajtis
2000*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f58,%f6
2001*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l0],%f32
2002*25c28e83SPiotr Jasiukajtis
2003*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f58,%f16
2004*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l1],%f34
2005*25c28e83SPiotr Jasiukajtis
2006*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2007*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x30],%f26
2008*25c28e83SPiotr Jasiukajtis
2009*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f56,%f6
2010*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f62,%f4
2011*25c28e83SPiotr Jasiukajtis
2012*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f56,%f16
2013*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f14
2014*25c28e83SPiotr Jasiukajtis
2015*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f36,%f24
2016*25c28e83SPiotr Jasiukajtis	ldd	[%o5+x2_1],%f36
2017*25c28e83SPiotr Jasiukajtis
2018*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f6,%f6
2019*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f60,%f4
2020*25c28e83SPiotr Jasiukajtis
2021*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f16,%f16
2022*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f60,%f14
2023*25c28e83SPiotr Jasiukajtis
2024*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2025*25c28e83SPiotr Jasiukajtis
2026*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f54,%f6
2027*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f4,%f4
2028*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f2
2029*25c28e83SPiotr Jasiukajtis
2030*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f54,%f16
2031*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f14,%f14
2032*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f12
2033*25c28e83SPiotr Jasiukajtis
2034*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f26,%f24
2035*25c28e83SPiotr Jasiukajtis
2036*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f6,%f6
2037*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l0],%f0
2038*25c28e83SPiotr Jasiukajtis
2039*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f16,%f16
2040*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l1],%f10
2041*25c28e83SPiotr Jasiukajtis
2042*25c28e83SPiotr Jasiukajtis	fmuld	%f4,%f32,%f4
2043*25c28e83SPiotr Jasiukajtis	std	%f22,[%fp+y2_0]
2044*25c28e83SPiotr Jasiukajtis
2045*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f34,%f14
2046*25c28e83SPiotr Jasiukajtis
2047*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f2,%f6
2048*25c28e83SPiotr Jasiukajtis
2049*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f12,%f16
2050*25c28e83SPiotr Jasiukajtis
2051*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2052*25c28e83SPiotr Jasiukajtis
2053*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f4,%f6
2054*25c28e83SPiotr Jasiukajtis
2055*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f14,%f16
2056*25c28e83SPiotr Jasiukajtis
2057*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f24,%f24
2058*25c28e83SPiotr Jasiukajtis	ldd	[%o5+y2_0],%f22
2059*25c28e83SPiotr Jasiukajtis
2060*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f0,%f6
2061*25c28e83SPiotr Jasiukajtis
2062*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f10,%f16
2063*25c28e83SPiotr Jasiukajtis
2064*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f22,%f24
2065*25c28e83SPiotr Jasiukajtis
2066*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f32,%f6
2067*25c28e83SPiotr Jasiukajtis
2068*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f34,%f16
2069*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.FIXSIGN
2070*25c28e83SPiotr Jasiukajtis
2071*25c28e83SPiotr Jasiukajtis! delay slot
2072*25c28e83SPiotr Jasiukajtis	faddd	%f36,%f24,%f26
2073*25c28e83SPiotr Jasiukajtis
2074*25c28e83SPiotr Jasiukajtis	.align	32
2075*25c28e83SPiotr Jasiukajtis.CASE2:
2076*25c28e83SPiotr Jasiukajtis	fpadd32s %f0,%f31,%f8
2077*25c28e83SPiotr Jasiukajtis	ld	[%fp+x0_1],%l0
2078*25c28e83SPiotr Jasiukajtis	andcc	%l2,2,%g0
2079*25c28e83SPiotr Jasiukajtis	bne,pn	%icc,.CASE3
2080*25c28e83SPiotr Jasiukajtis
2081*25c28e83SPiotr Jasiukajtis! delay slot
2082*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
2083*25c28e83SPiotr Jasiukajtis	fpadd32s %f20,%f31,%f28
2084*25c28e83SPiotr Jasiukajtis	ld	[%fp+x2_1],%l2
2085*25c28e83SPiotr Jasiukajtis
2086*25c28e83SPiotr Jasiukajtis	fand	%f8,%f44,%f4
2087*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
2088*25c28e83SPiotr Jasiukajtis	add	%l3,8,%g1
2089*25c28e83SPiotr Jasiukajtis
2090*25c28e83SPiotr Jasiukajtis	fand	%f28,%f44,%f24
2091*25c28e83SPiotr Jasiukajtis	sub	%l2,%o7,%l2
2092*25c28e83SPiotr Jasiukajtis
2093*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f4,%f0
2094*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
2095*25c28e83SPiotr Jasiukajtis
2096*25c28e83SPiotr Jasiukajtis	fsubd	%f20,%f24,%f20
2097*25c28e83SPiotr Jasiukajtis	srl	%l2,10,%l2
2098*25c28e83SPiotr Jasiukajtis
2099*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f10
2100*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o4],%f34
2101*25c28e83SPiotr Jasiukajtis	add	%l5,%o4,%l1
2102*25c28e83SPiotr Jasiukajtis
2103*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f2,%f0
2104*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
2105*25c28e83SPiotr Jasiukajtis
2106*25c28e83SPiotr Jasiukajtis	faddd	%f20,%f22,%f20
2107*25c28e83SPiotr Jasiukajtis	andn	%l2,0x1f,%l2
2108*25c28e83SPiotr Jasiukajtis
2109*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f34,%f14
2110*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x10],%f16
2111*25c28e83SPiotr Jasiukajtis	add	%fp,%o4,%o4
2112*25c28e83SPiotr Jasiukajtis
2113*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f0,%f2
2114*25c28e83SPiotr Jasiukajtis	add	%l0,%o3,%l0
2115*25c28e83SPiotr Jasiukajtis
2116*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f22
2117*25c28e83SPiotr Jasiukajtis	add	%l2,%o5,%l2
2118*25c28e83SPiotr Jasiukajtis
2119*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f16,%f14
2120*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x20],%f34
2121*25c28e83SPiotr Jasiukajtis
2122*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f58,%f6
2123*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l0],%f32
2124*25c28e83SPiotr Jasiukajtis
2125*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f58,%f26
2126*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l2],%f36
2127*25c28e83SPiotr Jasiukajtis
2128*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2129*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x30],%f16
2130*25c28e83SPiotr Jasiukajtis
2131*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f56,%f6
2132*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f62,%f4
2133*25c28e83SPiotr Jasiukajtis
2134*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f56,%f26
2135*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f62,%f24
2136*25c28e83SPiotr Jasiukajtis
2137*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f34,%f14
2138*25c28e83SPiotr Jasiukajtis	ldd	[%o4+x1_1],%f34
2139*25c28e83SPiotr Jasiukajtis
2140*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f6,%f6
2141*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f60,%f4
2142*25c28e83SPiotr Jasiukajtis
2143*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f26,%f26
2144*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f60,%f24
2145*25c28e83SPiotr Jasiukajtis
2146*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2147*25c28e83SPiotr Jasiukajtis
2148*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f54,%f6
2149*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f4,%f4
2150*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f2
2151*25c28e83SPiotr Jasiukajtis
2152*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f54,%f26
2153*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f24,%f24
2154*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l2],%f22
2155*25c28e83SPiotr Jasiukajtis
2156*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f16,%f14
2157*25c28e83SPiotr Jasiukajtis
2158*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f6,%f6
2159*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l0],%f0
2160*25c28e83SPiotr Jasiukajtis
2161*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f26,%f26
2162*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l2],%f20
2163*25c28e83SPiotr Jasiukajtis
2164*25c28e83SPiotr Jasiukajtis	fmuld	%f4,%f32,%f4
2165*25c28e83SPiotr Jasiukajtis	std	%f12,[%fp+y1_0]
2166*25c28e83SPiotr Jasiukajtis
2167*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f36,%f24
2168*25c28e83SPiotr Jasiukajtis
2169*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f2,%f6
2170*25c28e83SPiotr Jasiukajtis
2171*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f22,%f26
2172*25c28e83SPiotr Jasiukajtis
2173*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2174*25c28e83SPiotr Jasiukajtis
2175*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f4,%f6
2176*25c28e83SPiotr Jasiukajtis
2177*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f24,%f26
2178*25c28e83SPiotr Jasiukajtis
2179*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f14,%f14
2180*25c28e83SPiotr Jasiukajtis	ldd	[%o4+y1_0],%f12
2181*25c28e83SPiotr Jasiukajtis
2182*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f0,%f6
2183*25c28e83SPiotr Jasiukajtis
2184*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f20,%f26
2185*25c28e83SPiotr Jasiukajtis
2186*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f12,%f14
2187*25c28e83SPiotr Jasiukajtis
2188*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f32,%f6
2189*25c28e83SPiotr Jasiukajtis
2190*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f36,%f26
2191*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.FIXSIGN
2192*25c28e83SPiotr Jasiukajtis
2193*25c28e83SPiotr Jasiukajtis! delay slot
2194*25c28e83SPiotr Jasiukajtis	faddd	%f34,%f14,%f16
2195*25c28e83SPiotr Jasiukajtis
2196*25c28e83SPiotr Jasiukajtis	.align	32
2197*25c28e83SPiotr Jasiukajtis.CASE3:
2198*25c28e83SPiotr Jasiukajtis	fand	%f8,%f44,%f4
2199*25c28e83SPiotr Jasiukajtis	add	%l3,8,%g1
2200*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
2201*25c28e83SPiotr Jasiukajtis
2202*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f10
2203*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o4],%f34
2204*25c28e83SPiotr Jasiukajtis	add	%l5,%o4,%l1
2205*25c28e83SPiotr Jasiukajtis
2206*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f4,%f0
2207*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
2208*25c28e83SPiotr Jasiukajtis
2209*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f20
2210*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o5],%f36
2211*25c28e83SPiotr Jasiukajtis	add	%l5,%o5,%l2
2212*25c28e83SPiotr Jasiukajtis
2213*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f34,%f14
2214*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x10],%f16
2215*25c28e83SPiotr Jasiukajtis	add	%fp,%o4,%o4
2216*25c28e83SPiotr Jasiukajtis
2217*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f2,%f0
2218*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
2219*25c28e83SPiotr Jasiukajtis
2220*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f36,%f24
2221*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x10],%f26
2222*25c28e83SPiotr Jasiukajtis	add	%fp,%o5,%o5
2223*25c28e83SPiotr Jasiukajtis
2224*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f16,%f14
2225*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x20],%f34
2226*25c28e83SPiotr Jasiukajtis
2227*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f0,%f2
2228*25c28e83SPiotr Jasiukajtis	add	%l0,%o3,%l0
2229*25c28e83SPiotr Jasiukajtis
2230*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f26,%f24
2231*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x20],%f36
2232*25c28e83SPiotr Jasiukajtis
2233*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2234*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x30],%f16
2235*25c28e83SPiotr Jasiukajtis
2236*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f58,%f6
2237*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l0],%f32
2238*25c28e83SPiotr Jasiukajtis
2239*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2240*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x30],%f26
2241*25c28e83SPiotr Jasiukajtis
2242*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f34,%f14
2243*25c28e83SPiotr Jasiukajtis	ldd	[%o4+x1_1],%f34
2244*25c28e83SPiotr Jasiukajtis
2245*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f56,%f6
2246*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f62,%f4
2247*25c28e83SPiotr Jasiukajtis
2248*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f36,%f24
2249*25c28e83SPiotr Jasiukajtis	ldd	[%o5+x2_1],%f36
2250*25c28e83SPiotr Jasiukajtis
2251*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2252*25c28e83SPiotr Jasiukajtis	std	%f12,[%fp+y1_0]
2253*25c28e83SPiotr Jasiukajtis
2254*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f6,%f6
2255*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f60,%f4
2256*25c28e83SPiotr Jasiukajtis
2257*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2258*25c28e83SPiotr Jasiukajtis	std	%f22,[%fp+y2_0]
2259*25c28e83SPiotr Jasiukajtis
2260*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f16,%f14
2261*25c28e83SPiotr Jasiukajtis
2262*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f54,%f6
2263*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f4,%f4
2264*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f2
2265*25c28e83SPiotr Jasiukajtis
2266*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f26,%f24
2267*25c28e83SPiotr Jasiukajtis
2268*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2269*25c28e83SPiotr Jasiukajtis
2270*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f6,%f6
2271*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l0],%f0
2272*25c28e83SPiotr Jasiukajtis
2273*25c28e83SPiotr Jasiukajtis	fmuld	%f4,%f32,%f4
2274*25c28e83SPiotr Jasiukajtis
2275*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2276*25c28e83SPiotr Jasiukajtis
2277*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f2,%f6
2278*25c28e83SPiotr Jasiukajtis
2279*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f14,%f14
2280*25c28e83SPiotr Jasiukajtis	ldd	[%o4+y1_0],%f12
2281*25c28e83SPiotr Jasiukajtis
2282*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f24,%f24
2283*25c28e83SPiotr Jasiukajtis	ldd	[%o5+y2_0],%f22
2284*25c28e83SPiotr Jasiukajtis
2285*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f4,%f6
2286*25c28e83SPiotr Jasiukajtis
2287*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f12,%f14
2288*25c28e83SPiotr Jasiukajtis
2289*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f22,%f24
2290*25c28e83SPiotr Jasiukajtis
2291*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f0,%f6
2292*25c28e83SPiotr Jasiukajtis
2293*25c28e83SPiotr Jasiukajtis	faddd	%f34,%f14,%f16
2294*25c28e83SPiotr Jasiukajtis
2295*25c28e83SPiotr Jasiukajtis	faddd	%f36,%f24,%f26
2296*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.FIXSIGN
2297*25c28e83SPiotr Jasiukajtis
2298*25c28e83SPiotr Jasiukajtis! delay slot
2299*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f32,%f6
2300*25c28e83SPiotr Jasiukajtis
2301*25c28e83SPiotr Jasiukajtis	.align	32
2302*25c28e83SPiotr Jasiukajtis.CASE4:
2303*25c28e83SPiotr Jasiukajtis	fands	%f29,%f28,%f29		! if (n & 1) clear sign bit
2304*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
2305*25c28e83SPiotr Jasiukajtis	andcc	%l1,2,%g0
2306*25c28e83SPiotr Jasiukajtis	bne,pn	%icc,.CASE6
2307*25c28e83SPiotr Jasiukajtis
2308*25c28e83SPiotr Jasiukajtis! delay slot
2309*25c28e83SPiotr Jasiukajtis	andcc	%l2,2,%g0
2310*25c28e83SPiotr Jasiukajtis	fpadd32s %f10,%f31,%f18
2311*25c28e83SPiotr Jasiukajtis	ld	[%fp+x1_1],%l1
2312*25c28e83SPiotr Jasiukajtis	bne,pn	%icc,.CASE5
2313*25c28e83SPiotr Jasiukajtis
2314*25c28e83SPiotr Jasiukajtis! delay slot
2315*25c28e83SPiotr Jasiukajtis	add	%l3,8,%g1
2316*25c28e83SPiotr Jasiukajtis	ld	[%fp+x2_1],%l2
2317*25c28e83SPiotr Jasiukajtis	fpadd32s %f20,%f31,%f28
2318*25c28e83SPiotr Jasiukajtis
2319*25c28e83SPiotr Jasiukajtis	fand	%f18,%f44,%f14
2320*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
2321*25c28e83SPiotr Jasiukajtis
2322*25c28e83SPiotr Jasiukajtis	fand	%f28,%f44,%f24
2323*25c28e83SPiotr Jasiukajtis	sub	%l2,%o7,%l2
2324*25c28e83SPiotr Jasiukajtis
2325*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f14,%f10
2326*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
2327*25c28e83SPiotr Jasiukajtis
2328*25c28e83SPiotr Jasiukajtis	fsubd	%f20,%f24,%f20
2329*25c28e83SPiotr Jasiukajtis	srl	%l2,10,%l2
2330*25c28e83SPiotr Jasiukajtis
2331*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f0,%f0
2332*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o3],%f32
2333*25c28e83SPiotr Jasiukajtis	add	%l5,%o3,%l0
2334*25c28e83SPiotr Jasiukajtis
2335*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f12,%f10
2336*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
2337*25c28e83SPiotr Jasiukajtis
2338*25c28e83SPiotr Jasiukajtis	faddd	%f20,%f22,%f20
2339*25c28e83SPiotr Jasiukajtis	andn	%l2,0x1f,%l2
2340*25c28e83SPiotr Jasiukajtis
2341*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f32,%f4
2342*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x10],%f6
2343*25c28e83SPiotr Jasiukajtis	add	%fp,%o3,%o3
2344*25c28e83SPiotr Jasiukajtis
2345*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f12
2346*25c28e83SPiotr Jasiukajtis	add	%l1,%o4,%l1
2347*25c28e83SPiotr Jasiukajtis
2348*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f22
2349*25c28e83SPiotr Jasiukajtis	add	%l2,%o5,%l2
2350*25c28e83SPiotr Jasiukajtis
2351*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f6,%f4
2352*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x20],%f32
2353*25c28e83SPiotr Jasiukajtis
2354*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f58,%f16
2355*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l1],%f34
2356*25c28e83SPiotr Jasiukajtis
2357*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f58,%f26
2358*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l2],%f36
2359*25c28e83SPiotr Jasiukajtis
2360*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2361*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x30],%f6
2362*25c28e83SPiotr Jasiukajtis
2363*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f56,%f16
2364*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f14
2365*25c28e83SPiotr Jasiukajtis
2366*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f56,%f26
2367*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f62,%f24
2368*25c28e83SPiotr Jasiukajtis
2369*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f32,%f4
2370*25c28e83SPiotr Jasiukajtis	ldd	[%o3+x0_1],%f32
2371*25c28e83SPiotr Jasiukajtis
2372*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f16,%f16
2373*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f60,%f14
2374*25c28e83SPiotr Jasiukajtis
2375*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f26,%f26
2376*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f60,%f24
2377*25c28e83SPiotr Jasiukajtis
2378*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2379*25c28e83SPiotr Jasiukajtis
2380*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f54,%f16
2381*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f14,%f14
2382*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f12
2383*25c28e83SPiotr Jasiukajtis
2384*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f54,%f26
2385*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f24,%f24
2386*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l2],%f22
2387*25c28e83SPiotr Jasiukajtis
2388*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f6,%f4
2389*25c28e83SPiotr Jasiukajtis
2390*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f16,%f16
2391*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l1],%f10
2392*25c28e83SPiotr Jasiukajtis
2393*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f26,%f26
2394*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l2],%f20
2395*25c28e83SPiotr Jasiukajtis
2396*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f34,%f14
2397*25c28e83SPiotr Jasiukajtis	std	%f2,[%fp+y0_0]
2398*25c28e83SPiotr Jasiukajtis
2399*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f36,%f24
2400*25c28e83SPiotr Jasiukajtis
2401*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2402*25c28e83SPiotr Jasiukajtis
2403*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f12,%f16
2404*25c28e83SPiotr Jasiukajtis
2405*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f22,%f26
2406*25c28e83SPiotr Jasiukajtis
2407*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f4,%f4
2408*25c28e83SPiotr Jasiukajtis	ldd	[%o3+y0_0],%f2
2409*25c28e83SPiotr Jasiukajtis
2410*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f14,%f16
2411*25c28e83SPiotr Jasiukajtis
2412*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f24,%f26
2413*25c28e83SPiotr Jasiukajtis
2414*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f2,%f4
2415*25c28e83SPiotr Jasiukajtis
2416*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f10,%f16
2417*25c28e83SPiotr Jasiukajtis
2418*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f20,%f26
2419*25c28e83SPiotr Jasiukajtis
2420*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f4,%f6
2421*25c28e83SPiotr Jasiukajtis
2422*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f34,%f16
2423*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.FIXSIGN
2424*25c28e83SPiotr Jasiukajtis
2425*25c28e83SPiotr Jasiukajtis! delay slot
2426*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f36,%f26
2427*25c28e83SPiotr Jasiukajtis
2428*25c28e83SPiotr Jasiukajtis	.align	32
2429*25c28e83SPiotr Jasiukajtis.CASE5:
2430*25c28e83SPiotr Jasiukajtis	fand	%f18,%f44,%f14
2431*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
2432*25c28e83SPiotr Jasiukajtis
2433*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f0,%f0
2434*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o3],%f32
2435*25c28e83SPiotr Jasiukajtis	add	%l5,%o3,%l0
2436*25c28e83SPiotr Jasiukajtis
2437*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f14,%f10
2438*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
2439*25c28e83SPiotr Jasiukajtis
2440*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f20
2441*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o5],%f36
2442*25c28e83SPiotr Jasiukajtis	add	%l5,%o5,%l2
2443*25c28e83SPiotr Jasiukajtis
2444*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f32,%f4
2445*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x10],%f6
2446*25c28e83SPiotr Jasiukajtis	add	%fp,%o3,%o3
2447*25c28e83SPiotr Jasiukajtis
2448*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f12,%f10
2449*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
2450*25c28e83SPiotr Jasiukajtis
2451*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f36,%f24
2452*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x10],%f26
2453*25c28e83SPiotr Jasiukajtis	add	%fp,%o5,%o5
2454*25c28e83SPiotr Jasiukajtis
2455*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f6,%f4
2456*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x20],%f32
2457*25c28e83SPiotr Jasiukajtis
2458*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f12
2459*25c28e83SPiotr Jasiukajtis	add	%l1,%o4,%l1
2460*25c28e83SPiotr Jasiukajtis
2461*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f26,%f24
2462*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x20],%f36
2463*25c28e83SPiotr Jasiukajtis
2464*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2465*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x30],%f6
2466*25c28e83SPiotr Jasiukajtis
2467*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f58,%f16
2468*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l1],%f34
2469*25c28e83SPiotr Jasiukajtis
2470*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2471*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x30],%f26
2472*25c28e83SPiotr Jasiukajtis
2473*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f32,%f4
2474*25c28e83SPiotr Jasiukajtis	ldd	[%o3+x0_1],%f32
2475*25c28e83SPiotr Jasiukajtis
2476*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f56,%f16
2477*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f14
2478*25c28e83SPiotr Jasiukajtis
2479*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f36,%f24
2480*25c28e83SPiotr Jasiukajtis	ldd	[%o5+x2_1],%f36
2481*25c28e83SPiotr Jasiukajtis
2482*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2483*25c28e83SPiotr Jasiukajtis	std	%f2,[%fp+y0_0]
2484*25c28e83SPiotr Jasiukajtis
2485*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f16,%f16
2486*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f60,%f14
2487*25c28e83SPiotr Jasiukajtis
2488*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2489*25c28e83SPiotr Jasiukajtis	std	%f22,[%fp+y2_0]
2490*25c28e83SPiotr Jasiukajtis
2491*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f6,%f4
2492*25c28e83SPiotr Jasiukajtis
2493*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f54,%f16
2494*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f14,%f14
2495*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f12
2496*25c28e83SPiotr Jasiukajtis
2497*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f26,%f24
2498*25c28e83SPiotr Jasiukajtis
2499*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2500*25c28e83SPiotr Jasiukajtis
2501*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f16,%f16
2502*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l1],%f10
2503*25c28e83SPiotr Jasiukajtis
2504*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f34,%f14
2505*25c28e83SPiotr Jasiukajtis
2506*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2507*25c28e83SPiotr Jasiukajtis
2508*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f12,%f16
2509*25c28e83SPiotr Jasiukajtis
2510*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f4,%f4
2511*25c28e83SPiotr Jasiukajtis	ldd	[%o3+y0_0],%f2
2512*25c28e83SPiotr Jasiukajtis
2513*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f24,%f24
2514*25c28e83SPiotr Jasiukajtis	ldd	[%o5+y2_0],%f22
2515*25c28e83SPiotr Jasiukajtis
2516*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f14,%f16
2517*25c28e83SPiotr Jasiukajtis
2518*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f2,%f4
2519*25c28e83SPiotr Jasiukajtis
2520*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f22,%f24
2521*25c28e83SPiotr Jasiukajtis
2522*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f10,%f16
2523*25c28e83SPiotr Jasiukajtis
2524*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f4,%f6
2525*25c28e83SPiotr Jasiukajtis
2526*25c28e83SPiotr Jasiukajtis	faddd	%f36,%f24,%f26
2527*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.FIXSIGN
2528*25c28e83SPiotr Jasiukajtis
2529*25c28e83SPiotr Jasiukajtis! delay slot
2530*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f34,%f16
2531*25c28e83SPiotr Jasiukajtis
2532*25c28e83SPiotr Jasiukajtis	.align	32
2533*25c28e83SPiotr Jasiukajtis.CASE6:
2534*25c28e83SPiotr Jasiukajtis	ld	[%fp+x2_1],%l2
2535*25c28e83SPiotr Jasiukajtis	add	%l3,8,%g1
2536*25c28e83SPiotr Jasiukajtis	bne,pn	%icc,.CASE7
2537*25c28e83SPiotr Jasiukajtis! delay slot
2538*25c28e83SPiotr Jasiukajtis	fpadd32s %f20,%f31,%f28
2539*25c28e83SPiotr Jasiukajtis
2540*25c28e83SPiotr Jasiukajtis	fand	%f28,%f44,%f24
2541*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o3],%f32
2542*25c28e83SPiotr Jasiukajtis	add	%l5,%o3,%l0
2543*25c28e83SPiotr Jasiukajtis
2544*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f0,%f0
2545*25c28e83SPiotr Jasiukajtis	sub	%l2,%o7,%l2
2546*25c28e83SPiotr Jasiukajtis
2547*25c28e83SPiotr Jasiukajtis	fsubd	%f20,%f24,%f20
2548*25c28e83SPiotr Jasiukajtis	srl	%l2,10,%l2
2549*25c28e83SPiotr Jasiukajtis
2550*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f10
2551*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o4],%f34
2552*25c28e83SPiotr Jasiukajtis	add	%l5,%o4,%l1
2553*25c28e83SPiotr Jasiukajtis
2554*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f32,%f4
2555*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x10],%f6
2556*25c28e83SPiotr Jasiukajtis	add	%fp,%o3,%o3
2557*25c28e83SPiotr Jasiukajtis
2558*25c28e83SPiotr Jasiukajtis	faddd	%f20,%f22,%f20
2559*25c28e83SPiotr Jasiukajtis	andn	%l2,0x1f,%l2
2560*25c28e83SPiotr Jasiukajtis
2561*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f34,%f14
2562*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x10],%f16
2563*25c28e83SPiotr Jasiukajtis	add	%fp,%o4,%o4
2564*25c28e83SPiotr Jasiukajtis
2565*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f6,%f4
2566*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x20],%f32
2567*25c28e83SPiotr Jasiukajtis
2568*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f22
2569*25c28e83SPiotr Jasiukajtis	add	%l2,%o5,%l2
2570*25c28e83SPiotr Jasiukajtis
2571*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f16,%f14
2572*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x20],%f34
2573*25c28e83SPiotr Jasiukajtis
2574*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2575*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x30],%f6
2576*25c28e83SPiotr Jasiukajtis
2577*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f58,%f26
2578*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l2],%f36
2579*25c28e83SPiotr Jasiukajtis
2580*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2581*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x30],%f16
2582*25c28e83SPiotr Jasiukajtis
2583*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f32,%f4
2584*25c28e83SPiotr Jasiukajtis	ldd	[%o3+x0_1],%f32
2585*25c28e83SPiotr Jasiukajtis
2586*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f56,%f26
2587*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f62,%f24
2588*25c28e83SPiotr Jasiukajtis
2589*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f34,%f14
2590*25c28e83SPiotr Jasiukajtis	ldd	[%o4+x1_1],%f34
2591*25c28e83SPiotr Jasiukajtis
2592*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2593*25c28e83SPiotr Jasiukajtis	std	%f2,[%fp+y0_0]
2594*25c28e83SPiotr Jasiukajtis
2595*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f26,%f26
2596*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f60,%f24
2597*25c28e83SPiotr Jasiukajtis
2598*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2599*25c28e83SPiotr Jasiukajtis	std	%f12,[%fp+y1_0]
2600*25c28e83SPiotr Jasiukajtis
2601*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f6,%f4
2602*25c28e83SPiotr Jasiukajtis
2603*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f54,%f26
2604*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f24,%f24
2605*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l2],%f22
2606*25c28e83SPiotr Jasiukajtis
2607*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f16,%f14
2608*25c28e83SPiotr Jasiukajtis
2609*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2610*25c28e83SPiotr Jasiukajtis
2611*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f26,%f26
2612*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l2],%f20
2613*25c28e83SPiotr Jasiukajtis
2614*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f36,%f24
2615*25c28e83SPiotr Jasiukajtis
2616*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2617*25c28e83SPiotr Jasiukajtis
2618*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f22,%f26
2619*25c28e83SPiotr Jasiukajtis
2620*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f4,%f4
2621*25c28e83SPiotr Jasiukajtis	ldd	[%o3+y0_0],%f2
2622*25c28e83SPiotr Jasiukajtis
2623*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f14,%f14
2624*25c28e83SPiotr Jasiukajtis	ldd	[%o4+y1_0],%f12
2625*25c28e83SPiotr Jasiukajtis
2626*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f24,%f26
2627*25c28e83SPiotr Jasiukajtis
2628*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f2,%f4
2629*25c28e83SPiotr Jasiukajtis
2630*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f12,%f14
2631*25c28e83SPiotr Jasiukajtis
2632*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f20,%f26
2633*25c28e83SPiotr Jasiukajtis
2634*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f4,%f6
2635*25c28e83SPiotr Jasiukajtis
2636*25c28e83SPiotr Jasiukajtis	faddd	%f34,%f14,%f16
2637*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.FIXSIGN
2638*25c28e83SPiotr Jasiukajtis
2639*25c28e83SPiotr Jasiukajtis! delay slot
2640*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f36,%f26
2641*25c28e83SPiotr Jasiukajtis
2642*25c28e83SPiotr Jasiukajtis	.align	32
2643*25c28e83SPiotr Jasiukajtis.CASE7:
2644*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f0,%f0
2645*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o3],%f32
2646*25c28e83SPiotr Jasiukajtis	add	%l5,%o3,%l0
2647*25c28e83SPiotr Jasiukajtis
2648*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f10
2649*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o4],%f34
2650*25c28e83SPiotr Jasiukajtis	add	%l5,%o4,%l1
2651*25c28e83SPiotr Jasiukajtis
2652*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f20
2653*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o5],%f36
2654*25c28e83SPiotr Jasiukajtis	add	%l5,%o5,%l2
2655*25c28e83SPiotr Jasiukajtis
2656*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f32,%f4
2657*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x10],%f6
2658*25c28e83SPiotr Jasiukajtis	add	%fp,%o3,%o3
2659*25c28e83SPiotr Jasiukajtis
2660*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f34,%f14
2661*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x10],%f16
2662*25c28e83SPiotr Jasiukajtis	add	%fp,%o4,%o4
2663*25c28e83SPiotr Jasiukajtis
2664*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f36,%f24
2665*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x10],%f26
2666*25c28e83SPiotr Jasiukajtis	add	%fp,%o5,%o5
2667*25c28e83SPiotr Jasiukajtis
2668*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f6,%f4
2669*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x20],%f32
2670*25c28e83SPiotr Jasiukajtis
2671*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f16,%f14
2672*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x20],%f34
2673*25c28e83SPiotr Jasiukajtis
2674*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f26,%f24
2675*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x20],%f36
2676*25c28e83SPiotr Jasiukajtis
2677*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2678*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x30],%f6
2679*25c28e83SPiotr Jasiukajtis
2680*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2681*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x30],%f16
2682*25c28e83SPiotr Jasiukajtis
2683*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2684*25c28e83SPiotr Jasiukajtis	ldd	[%l2+0x30],%f26
2685*25c28e83SPiotr Jasiukajtis
2686*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f32,%f4
2687*25c28e83SPiotr Jasiukajtis	ldd	[%o3+x0_1],%f32
2688*25c28e83SPiotr Jasiukajtis
2689*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f34,%f14
2690*25c28e83SPiotr Jasiukajtis	ldd	[%o4+x1_1],%f34
2691*25c28e83SPiotr Jasiukajtis
2692*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f36,%f24
2693*25c28e83SPiotr Jasiukajtis	ldd	[%o5+x2_1],%f36
2694*25c28e83SPiotr Jasiukajtis
2695*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2696*25c28e83SPiotr Jasiukajtis	std	%f2,[%fp+y0_0]
2697*25c28e83SPiotr Jasiukajtis
2698*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2699*25c28e83SPiotr Jasiukajtis	std	%f12,[%fp+y1_0]
2700*25c28e83SPiotr Jasiukajtis
2701*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2702*25c28e83SPiotr Jasiukajtis	std	%f22,[%fp+y2_0]
2703*25c28e83SPiotr Jasiukajtis
2704*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f6,%f4
2705*25c28e83SPiotr Jasiukajtis
2706*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f16,%f14
2707*25c28e83SPiotr Jasiukajtis
2708*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f26,%f24
2709*25c28e83SPiotr Jasiukajtis
2710*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2711*25c28e83SPiotr Jasiukajtis
2712*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2713*25c28e83SPiotr Jasiukajtis
2714*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f24
2715*25c28e83SPiotr Jasiukajtis
2716*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f4,%f4
2717*25c28e83SPiotr Jasiukajtis	ldd	[%o3+y0_0],%f2
2718*25c28e83SPiotr Jasiukajtis
2719*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f14,%f14
2720*25c28e83SPiotr Jasiukajtis	ldd	[%o4+y1_0],%f12
2721*25c28e83SPiotr Jasiukajtis
2722*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f24,%f24
2723*25c28e83SPiotr Jasiukajtis	ldd	[%o5+y2_0],%f22
2724*25c28e83SPiotr Jasiukajtis
2725*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f2,%f4
2726*25c28e83SPiotr Jasiukajtis
2727*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f12,%f14
2728*25c28e83SPiotr Jasiukajtis
2729*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f22,%f24
2730*25c28e83SPiotr Jasiukajtis
2731*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f4,%f6
2732*25c28e83SPiotr Jasiukajtis
2733*25c28e83SPiotr Jasiukajtis	faddd	%f34,%f14,%f16
2734*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.FIXSIGN
2735*25c28e83SPiotr Jasiukajtis
2736*25c28e83SPiotr Jasiukajtis! delay slot
2737*25c28e83SPiotr Jasiukajtis	faddd	%f36,%f24,%f26
2738*25c28e83SPiotr Jasiukajtis
2739*25c28e83SPiotr Jasiukajtis
2740*25c28e83SPiotr Jasiukajtis	.align	32
2741*25c28e83SPiotr Jasiukajtis.ENDLOOP2:
2742*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f40,%f12
2743*25c28e83SPiotr Jasiukajtis	add	%l5,thresh,%g1
2744*25c28e83SPiotr Jasiukajtis	faddd	%f12,%f42,%f12
2745*25c28e83SPiotr Jasiukajtis	st	%f13,[%fp+n1]
2746*25c28e83SPiotr Jasiukajtis	fsubd	%f12,%f42,%f12		! n
2747*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f46,%f14
2748*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f14,%f14
2749*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f48,%f16
2750*25c28e83SPiotr Jasiukajtis	fsubd	%f14,%f16,%f10
2751*25c28e83SPiotr Jasiukajtis	ld	[%fp+n1],%o4 ; add	%o4,1,%o4
2752*25c28e83SPiotr Jasiukajtis	fsubd	%f14,%f10,%f34
2753*25c28e83SPiotr Jasiukajtis	and	%o4,1,%o4
2754*25c28e83SPiotr Jasiukajtis	fsubd	%f34,%f16,%f34
2755*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f50,%f18
2756*25c28e83SPiotr Jasiukajtis	sll	%o4,3,%o4
2757*25c28e83SPiotr Jasiukajtis	fsubd	%f18,%f34,%f18
2758*25c28e83SPiotr Jasiukajtis	ld	[%g1+%o4],%f16
2759*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f18,%f14
2760*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f14,%f34
2761*25c28e83SPiotr Jasiukajtis	add	%l5,thresh+4,%o7
2762*25c28e83SPiotr Jasiukajtis	fsubd	%f34,%f18,%f34
2763*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f52,%f12
2764*25c28e83SPiotr Jasiukajtis	fsubd	%f12,%f34,%f12
2765*25c28e83SPiotr Jasiukajtis	ld	[%o7+%o4],%f18
2766*25c28e83SPiotr Jasiukajtis	fsubd	%f14,%f12,%f10		! x
2767*25c28e83SPiotr Jasiukajtis	fsubd	%f14,%f10,%f14
2768*25c28e83SPiotr Jasiukajtis	fands	%f10,%f30,%f19		! save signbit
2769*25c28e83SPiotr Jasiukajtis	fabsd	%f10,%f10
2770*25c28e83SPiotr Jasiukajtis	std	%f10,[%fp+x1_1]
2771*25c28e83SPiotr Jasiukajtis	fsubd	%f14,%f12,%f12		! y
2772*25c28e83SPiotr Jasiukajtis	fcmpgt32 %f16,%f10,%l1
2773*25c28e83SPiotr Jasiukajtis	fxors	%f12,%f19,%f12
2774*25c28e83SPiotr Jasiukajtis	fands	%f19,%f18,%f19		! if (n & 1) clear sign bit
2775*25c28e83SPiotr Jasiukajtis	andcc	%l1,2,%g0
2776*25c28e83SPiotr Jasiukajtis	bne,pn	%icc,1f
2777*25c28e83SPiotr Jasiukajtis! delay slot
2778*25c28e83SPiotr Jasiukajtis	nop
2779*25c28e83SPiotr Jasiukajtis	fpadd32s %f10,%f31,%f18
2780*25c28e83SPiotr Jasiukajtis	ld	[%fp+x1_1],%l1
2781*25c28e83SPiotr Jasiukajtis	fand	%f18,%f44,%f14
2782*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
2783*25c28e83SPiotr Jasiukajtis	add	%l3,8,%g1
2784*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f14,%f10
2785*25c28e83SPiotr Jasiukajtis	sub	%l1,%o7,%l1
2786*25c28e83SPiotr Jasiukajtis	srl	%l1,10,%l1
2787*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f12,%f10
2788*25c28e83SPiotr Jasiukajtis	andn	%l1,0x1f,%l1
2789*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f12
2790*25c28e83SPiotr Jasiukajtis	add	%l1,%o4,%l1
2791*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f58,%f16
2792*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l1],%f34
2793*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f56,%f16
2794*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f14
2795*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f16,%f16
2796*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f60,%f14
2797*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f54,%f16
2798*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f14,%f14
2799*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f12
2800*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f16,%f16
2801*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l1],%f10
2802*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f34,%f14
2803*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f12,%f16
2804*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f14,%f16
2805*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f10,%f16
2806*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,2f
2807*25c28e83SPiotr Jasiukajtis	faddd	%f16,%f34,%f16
2808*25c28e83SPiotr Jasiukajtis1:
2809*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f10
2810*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o4],%f34
2811*25c28e83SPiotr Jasiukajtis	add	%l5,%o4,%l1
2812*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f34,%f14
2813*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x10],%f16
2814*25c28e83SPiotr Jasiukajtis	add	%fp,%o4,%o4
2815*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f16,%f14
2816*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x20],%f34
2817*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2818*25c28e83SPiotr Jasiukajtis	ldd	[%l1+0x30],%f16
2819*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f34,%f14
2820*25c28e83SPiotr Jasiukajtis	ldd	[%o4+x1_1],%f34
2821*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2822*25c28e83SPiotr Jasiukajtis	std	%f12,[%fp+y1_0]
2823*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f16,%f14
2824*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f14
2825*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f14,%f14
2826*25c28e83SPiotr Jasiukajtis	ldd	[%o4+y1_0],%f12
2827*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f12,%f14
2828*25c28e83SPiotr Jasiukajtis	faddd	%f34,%f14,%f16
2829*25c28e83SPiotr Jasiukajtis2:
2830*25c28e83SPiotr Jasiukajtis	add	%l5,thresh-4,%g1
2831*25c28e83SPiotr Jasiukajtis	ld	[%fp+n1],%o4 ; add	%o4,1,%o4
2832*25c28e83SPiotr Jasiukajtis	and	%o4,2,%o4
2833*25c28e83SPiotr Jasiukajtis	sll	%o4,2,%o4
2834*25c28e83SPiotr Jasiukajtis	ld	[%g1+%o4],%f18
2835*25c28e83SPiotr Jasiukajtis	fxors	%f19,%f18,%f19
2836*25c28e83SPiotr Jasiukajtis	fors	%f16,%f19,%f16		! tack on sign
2837*25c28e83SPiotr Jasiukajtis	st	%f16,[%o1]
2838*25c28e83SPiotr Jasiukajtis	st	%f17,[%o1+4]
2839*25c28e83SPiotr Jasiukajtis
2840*25c28e83SPiotr Jasiukajtis.ENDLOOP1:
2841*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f40,%f2
2842*25c28e83SPiotr Jasiukajtis	add	%l5,thresh,%g1
2843*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f42,%f2
2844*25c28e83SPiotr Jasiukajtis	st	%f3,[%fp+n0]
2845*25c28e83SPiotr Jasiukajtis	fsubd	%f2,%f42,%f2		! n
2846*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f46,%f4
2847*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f4,%f4
2848*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f48,%f6
2849*25c28e83SPiotr Jasiukajtis	fsubd	%f4,%f6,%f0
2850*25c28e83SPiotr Jasiukajtis	ld	[%fp+n0],%o3 ; add	%o3,1,%o3
2851*25c28e83SPiotr Jasiukajtis	fsubd	%f4,%f0,%f32
2852*25c28e83SPiotr Jasiukajtis	and	%o3,1,%o3
2853*25c28e83SPiotr Jasiukajtis	fsubd	%f32,%f6,%f32
2854*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f50,%f8
2855*25c28e83SPiotr Jasiukajtis	sll	%o3,3,%o3
2856*25c28e83SPiotr Jasiukajtis	fsubd	%f8,%f32,%f8
2857*25c28e83SPiotr Jasiukajtis	ld	[%g1+%o3],%f6
2858*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f8,%f4
2859*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f4,%f32
2860*25c28e83SPiotr Jasiukajtis	add	%l5,thresh+4,%o7
2861*25c28e83SPiotr Jasiukajtis	fsubd	%f32,%f8,%f32
2862*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f52,%f2
2863*25c28e83SPiotr Jasiukajtis	fsubd	%f2,%f32,%f2
2864*25c28e83SPiotr Jasiukajtis	ld	[%o7+%o3],%f8
2865*25c28e83SPiotr Jasiukajtis	fsubd	%f4,%f2,%f0		! x
2866*25c28e83SPiotr Jasiukajtis	fsubd	%f4,%f0,%f4
2867*25c28e83SPiotr Jasiukajtis	fands	%f0,%f30,%f9		! save signbit
2868*25c28e83SPiotr Jasiukajtis	fabsd	%f0,%f0
2869*25c28e83SPiotr Jasiukajtis	std	%f0,[%fp+x0_1]
2870*25c28e83SPiotr Jasiukajtis	fsubd	%f4,%f2,%f2		! y
2871*25c28e83SPiotr Jasiukajtis	fcmpgt32 %f6,%f0,%l0
2872*25c28e83SPiotr Jasiukajtis	fxors	%f2,%f9,%f2
2873*25c28e83SPiotr Jasiukajtis	fands	%f9,%f8,%f9		! if (n & 1) clear sign bit
2874*25c28e83SPiotr Jasiukajtis	andcc	%l0,2,%g0
2875*25c28e83SPiotr Jasiukajtis	bne,pn	%icc,1f
2876*25c28e83SPiotr Jasiukajtis! delay slot
2877*25c28e83SPiotr Jasiukajtis	nop
2878*25c28e83SPiotr Jasiukajtis	fpadd32s %f0,%f31,%f8
2879*25c28e83SPiotr Jasiukajtis	ld	[%fp+x0_1],%l0
2880*25c28e83SPiotr Jasiukajtis	fand	%f8,%f44,%f4
2881*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3fc3c000),%o7
2882*25c28e83SPiotr Jasiukajtis	add	%l3,8,%g1
2883*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f4,%f0
2884*25c28e83SPiotr Jasiukajtis	sub	%l0,%o7,%l0
2885*25c28e83SPiotr Jasiukajtis	srl	%l0,10,%l0
2886*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f2,%f0
2887*25c28e83SPiotr Jasiukajtis	andn	%l0,0x1f,%l0
2888*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f0,%f2
2889*25c28e83SPiotr Jasiukajtis	add	%l0,%o3,%l0
2890*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f58,%f6
2891*25c28e83SPiotr Jasiukajtis	ldd	[%l3+%l0],%f32
2892*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f56,%f6
2893*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f62,%f4
2894*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f6,%f6
2895*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f60,%f4
2896*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f54,%f6
2897*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f4,%f4
2898*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f2
2899*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f6,%f6
2900*25c28e83SPiotr Jasiukajtis	ldd	[%l4+%l0],%f0
2901*25c28e83SPiotr Jasiukajtis	fmuld	%f4,%f32,%f4
2902*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f2,%f6
2903*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f4,%f6
2904*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f0,%f6
2905*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,2f
2906*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f32,%f6
2907*25c28e83SPiotr Jasiukajtis1:
2908*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f0,%f0
2909*25c28e83SPiotr Jasiukajtis	ldd	[%l5+%o3],%f32
2910*25c28e83SPiotr Jasiukajtis	add	%l5,%o3,%l0
2911*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f32,%f4
2912*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x10],%f6
2913*25c28e83SPiotr Jasiukajtis	add	%fp,%o3,%o3
2914*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f6,%f4
2915*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x20],%f32
2916*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2917*25c28e83SPiotr Jasiukajtis	ldd	[%l0+0x30],%f6
2918*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f32,%f4
2919*25c28e83SPiotr Jasiukajtis	ldd	[%o3+x0_1],%f32
2920*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2921*25c28e83SPiotr Jasiukajtis	std	%f2,[%fp+y0_0]
2922*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f6,%f4
2923*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f4
2924*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f4,%f4
2925*25c28e83SPiotr Jasiukajtis	ldd	[%o3+y0_0],%f2
2926*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f2,%f4
2927*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f4,%f6
2928*25c28e83SPiotr Jasiukajtis2:
2929*25c28e83SPiotr Jasiukajtis	add	%l5,thresh-4,%g1
2930*25c28e83SPiotr Jasiukajtis	ld	[%fp+n0],%o3 ; add	%o3,1,%o3
2931*25c28e83SPiotr Jasiukajtis	and	%o3,2,%o3
2932*25c28e83SPiotr Jasiukajtis	sll	%o3,2,%o3
2933*25c28e83SPiotr Jasiukajtis	ld	[%g1+%o3],%f8
2934*25c28e83SPiotr Jasiukajtis	fxors	%f9,%f8,%f9
2935*25c28e83SPiotr Jasiukajtis	fors	%f6,%f9,%f6		! tack on sign
2936*25c28e83SPiotr Jasiukajtis	st	%f6,[%o0]
2937*25c28e83SPiotr Jasiukajtis	st	%f7,[%o0+4]
2938*25c28e83SPiotr Jasiukajtis
2939*25c28e83SPiotr Jasiukajtis.ENDLOOP0:
2940*25c28e83SPiotr Jasiukajtis
2941*25c28e83SPiotr Jasiukajtis! check for huge arguments remaining
2942*25c28e83SPiotr Jasiukajtis
2943*25c28e83SPiotr Jasiukajtis	tst	LIM_l6
2944*25c28e83SPiotr Jasiukajtis	be,pt	%icc,.exit
2945*25c28e83SPiotr Jasiukajtis! delay slot
2946*25c28e83SPiotr Jasiukajtis	nop
2947*25c28e83SPiotr Jasiukajtis
2948*25c28e83SPiotr Jasiukajtis! ========== huge range (use C code) ==========
2949*25c28e83SPiotr Jasiukajtis
2950*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9
2951*25c28e83SPiotr Jasiukajtis	ldx	[%fp+xsave],%o1
2952*25c28e83SPiotr Jasiukajtis	ldx	[%fp+ysave],%o3
2953*25c28e83SPiotr Jasiukajtis#else
2954*25c28e83SPiotr Jasiukajtis	ld	[%fp+xsave],%o1
2955*25c28e83SPiotr Jasiukajtis	ld	[%fp+ysave],%o3
2956*25c28e83SPiotr Jasiukajtis#endif
2957*25c28e83SPiotr Jasiukajtis	ld	[%fp+nsave],%o0
2958*25c28e83SPiotr Jasiukajtis	ld	[%fp+sxsave],%o2
2959*25c28e83SPiotr Jasiukajtis	ld	[%fp+sysave],%o4
2960*25c28e83SPiotr Jasiukajtis	sra	%o2,0,%o2		! sign-extend for V9
2961*25c28e83SPiotr Jasiukajtis	sra	%o4,0,%o4
2962*25c28e83SPiotr Jasiukajtis	call	__vlibm_vcos_big
2963*25c28e83SPiotr Jasiukajtis	mov	%l7,%o5			! delay slot
2964*25c28e83SPiotr Jasiukajtis
2965*25c28e83SPiotr Jasiukajtis.exit:
2966*25c28e83SPiotr Jasiukajtis	ret
2967*25c28e83SPiotr Jasiukajtis	restore
2968*25c28e83SPiotr Jasiukajtis
2969*25c28e83SPiotr Jasiukajtis
2970*25c28e83SPiotr Jasiukajtis	.align	32
2971*25c28e83SPiotr Jasiukajtis.SKIP0:
2972*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
2973*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.ENDLOOP0
2974*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken
2975*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
2976*25c28e83SPiotr Jasiukajtis	andn	%l1,%i5,%l0		! hx &= ~0x80000000
2977*25c28e83SPiotr Jasiukajtis	fmovs	%f10,%f0
2978*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%f1
2979*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.LOOP0
2980*25c28e83SPiotr Jasiukajtis! delay slot
2981*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
2982*25c28e83SPiotr Jasiukajtis
2983*25c28e83SPiotr Jasiukajtis
2984*25c28e83SPiotr Jasiukajtis	.align	32
2985*25c28e83SPiotr Jasiukajtis.SKIP1:
2986*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
2987*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.ENDLOOP1
2988*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken
2989*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
2990*25c28e83SPiotr Jasiukajtis	andn	%l2,%i5,%l1		! hx &= ~0x80000000
2991*25c28e83SPiotr Jasiukajtis	fmovs	%f20,%f10
2992*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%f11
2993*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.LOOP1
2994*25c28e83SPiotr Jasiukajtis! delay slot
2995*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
2996*25c28e83SPiotr Jasiukajtis
2997*25c28e83SPiotr Jasiukajtis
2998*25c28e83SPiotr Jasiukajtis	.align	32
2999*25c28e83SPiotr Jasiukajtis.SKIP2:
3000*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
3001*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.ENDLOOP2
3002*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken
3003*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
3004*25c28e83SPiotr Jasiukajtis	ld	[%i1],%l2
3005*25c28e83SPiotr Jasiukajtis	ld	[%i1],%f20
3006*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%f21
3007*25c28e83SPiotr Jasiukajtis	andn	%l2,%i5,%l2		! hx &= ~0x80000000
3008*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.LOOP2
3009*25c28e83SPiotr Jasiukajtis! delay slot
3010*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
3011*25c28e83SPiotr Jasiukajtis
3012*25c28e83SPiotr Jasiukajtis
3013*25c28e83SPiotr Jasiukajtis	.align	32
3014*25c28e83SPiotr Jasiukajtis.BIG0:
3015*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ff00000),%o7
3016*25c28e83SPiotr Jasiukajtis	cmp	%l0,%o7
3017*25c28e83SPiotr Jasiukajtis	bl,a,pt	%icc,1f			! if hx < 0x7ff00000
3018*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken
3019*25c28e83SPiotr Jasiukajtis	mov	%l7,LIM_l6	! set biguns flag or
3020*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f0,%f0		! y = x - x
3021*25c28e83SPiotr Jasiukajtis	st	%f0,[%o0]
3022*25c28e83SPiotr Jasiukajtis	st	%f1,[%o0+4]
3023*25c28e83SPiotr Jasiukajtis1:
3024*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
3025*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.ENDLOOP0
3026*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken
3027*25c28e83SPiotr Jasiukajtis	andn	%l1,%i5,%l0		! hx &= ~0x80000000
3028*25c28e83SPiotr Jasiukajtis	fmovd	%f10,%f0
3029*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.LOOP0
3030*25c28e83SPiotr Jasiukajtis! delay slot
3031*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
3032*25c28e83SPiotr Jasiukajtis
3033*25c28e83SPiotr Jasiukajtis
3034*25c28e83SPiotr Jasiukajtis	.align	32
3035*25c28e83SPiotr Jasiukajtis.BIG1:
3036*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ff00000),%o7
3037*25c28e83SPiotr Jasiukajtis	cmp	%l1,%o7
3038*25c28e83SPiotr Jasiukajtis	bl,a,pt	%icc,1f			! if hx < 0x7ff00000
3039*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken
3040*25c28e83SPiotr Jasiukajtis	mov	%l7,LIM_l6		! set biguns flag or
3041*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f10,%f10		! y = x - x
3042*25c28e83SPiotr Jasiukajtis	st	%f10,[%o1]
3043*25c28e83SPiotr Jasiukajtis	st	%f11,[%o1+4]
3044*25c28e83SPiotr Jasiukajtis1:
3045*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
3046*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.ENDLOOP1
3047*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken
3048*25c28e83SPiotr Jasiukajtis	andn	%l2,%i5,%l1		! hx &= ~0x80000000
3049*25c28e83SPiotr Jasiukajtis	fmovd	%f20,%f10
3050*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.LOOP1
3051*25c28e83SPiotr Jasiukajtis! delay slot
3052*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
3053*25c28e83SPiotr Jasiukajtis
3054*25c28e83SPiotr Jasiukajtis
3055*25c28e83SPiotr Jasiukajtis	.align	32
3056*25c28e83SPiotr Jasiukajtis.BIG2:
3057*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ff00000),%o7
3058*25c28e83SPiotr Jasiukajtis	cmp	%l2,%o7
3059*25c28e83SPiotr Jasiukajtis	bl,a,pt	%icc,1f			! if hx < 0x7ff00000
3060*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken
3061*25c28e83SPiotr Jasiukajtis	mov	%l7,LIM_l6		! set biguns flag or
3062*25c28e83SPiotr Jasiukajtis	fsubd	%f20,%f20,%f20		! y = x - x
3063*25c28e83SPiotr Jasiukajtis	st	%f20,[%o2]
3064*25c28e83SPiotr Jasiukajtis	st	%f21,[%o2+4]
3065*25c28e83SPiotr Jasiukajtis1:
3066*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
3067*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.ENDLOOP2
3068*25c28e83SPiotr Jasiukajtis! delay slot
3069*25c28e83SPiotr Jasiukajtis	nop
3070*25c28e83SPiotr Jasiukajtis	ld	[%i1],%l2
3071*25c28e83SPiotr Jasiukajtis	ld	[%i1],%f20
3072*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%f21
3073*25c28e83SPiotr Jasiukajtis	andn	%l2,%i5,%l2		! hx &= ~0x80000000
3074*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.LOOP2
3075*25c28e83SPiotr Jasiukajtis! delay slot
3076*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
3077*25c28e83SPiotr Jasiukajtis
3078*25c28e83SPiotr Jasiukajtis	SET_SIZE(__vcos)
3079*25c28e83SPiotr Jasiukajtis
3080