1*25c28e83SPiotr Jasiukajtis/*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis/*
22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*25c28e83SPiotr Jasiukajtis */
24*25c28e83SPiotr Jasiukajtis/*
25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
27*25c28e83SPiotr Jasiukajtis */
28*25c28e83SPiotr Jasiukajtis
29*25c28e83SPiotr Jasiukajtis	.file	"__vsqrtf_ultra3.S"
30*25c28e83SPiotr Jasiukajtis
31*25c28e83SPiotr Jasiukajtis#include "libm.h"
32*25c28e83SPiotr Jasiukajtis	.weak	__vsqrtf
33*25c28e83SPiotr Jasiukajtis	.type	__vsqrtf,#function
34*25c28e83SPiotr Jasiukajtis	__vsqrtf = __vsqrtf_ultra3
35*25c28e83SPiotr Jasiukajtis
36*25c28e83SPiotr Jasiukajtis	RO_DATA
37*25c28e83SPiotr Jasiukajtis	.align	64
38*25c28e83SPiotr Jasiukajtis
39*25c28e83SPiotr Jasiukajtis.CONST_TBL:
40*25c28e83SPiotr Jasiukajtis	.word	0x3fe00001, 0x80007e00	! K1  =  5.00000715259318464227e-01
41*25c28e83SPiotr Jasiukajtis	.word	0xbfc00003, 0xc0017a01	! K2  = -1.25000447037521686593e-01
42*25c28e83SPiotr Jasiukajtis	.word	0x000fffff, 0xffffffff	! DC0 = 0x000fffffffffffff
43*25c28e83SPiotr Jasiukajtis	.word	0x3ff00000, 0x00000000	! DC1 = 0x3ff0000000000000
44*25c28e83SPiotr Jasiukajtis	.word	0x7ffff000, 0x00000000	! DC2 = 0x7ffff00000000000
45*25c28e83SPiotr Jasiukajtis
46*25c28e83SPiotr Jasiukajtis#define DC0		%f6
47*25c28e83SPiotr Jasiukajtis#define DC1		%f4
48*25c28e83SPiotr Jasiukajtis#define DC2		%f2
49*25c28e83SPiotr Jasiukajtis#define K2		%f38
50*25c28e83SPiotr Jasiukajtis#define K1		%f36
51*25c28e83SPiotr Jasiukajtis#define TBL		%l2
52*25c28e83SPiotr Jasiukajtis#define stridex		%l3
53*25c28e83SPiotr Jasiukajtis#define stridey		%l4
54*25c28e83SPiotr Jasiukajtis#define _0x1ff0		%l5
55*25c28e83SPiotr Jasiukajtis#define counter		%l6
56*25c28e83SPiotr Jasiukajtis#define _0x00800000	%l7
57*25c28e83SPiotr Jasiukajtis#define _0x7f800000	%o0
58*25c28e83SPiotr Jasiukajtis
59*25c28e83SPiotr Jasiukajtis#define tmp_px		STACK_BIAS-0x40
60*25c28e83SPiotr Jasiukajtis#define tmp_counter	STACK_BIAS-0x38
61*25c28e83SPiotr Jasiukajtis#define tmp0		STACK_BIAS-0x30
62*25c28e83SPiotr Jasiukajtis#define tmp1		STACK_BIAS-0x28
63*25c28e83SPiotr Jasiukajtis#define tmp2		STACK_BIAS-0x20
64*25c28e83SPiotr Jasiukajtis#define tmp3		STACK_BIAS-0x18
65*25c28e83SPiotr Jasiukajtis#define tmp4		STACK_BIAS-0x10
66*25c28e83SPiotr Jasiukajtis
67*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9
68*25c28e83SPiotr Jasiukajtis#define tmps		0x40
69*25c28e83SPiotr Jasiukajtis
70*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
71*25c28e83SPiotr Jasiukajtis!      !!!!!   algorithm   !!!!!
72*25c28e83SPiotr Jasiukajtis!
73*25c28e83SPiotr Jasiukajtis!  x0 = *px;
74*25c28e83SPiotr Jasiukajtis!  ax = *(int*)px;
75*25c28e83SPiotr Jasiukajtis!  px += stridex;
76*25c28e83SPiotr Jasiukajtis!
77*25c28e83SPiotr Jasiukajtis!  if( ax >= 0x7f800000 )
78*25c28e83SPiotr Jasiukajtis!  {
79*25c28e83SPiotr Jasiukajtis!    *py = sqrtf(x0);
80*25c28e83SPiotr Jasiukajtis!    py += stridey;
81*25c28e83SPiotr Jasiukajtis!    continue;
82*25c28e83SPiotr Jasiukajtis!  }
83*25c28e83SPiotr Jasiukajtis!  if( ax < 0x00800000 )
84*25c28e83SPiotr Jasiukajtis!  {
85*25c28e83SPiotr Jasiukajtis!    *py = sqrtf(x0);
86*25c28e83SPiotr Jasiukajtis!    py += stridey;
87*25c28e83SPiotr Jasiukajtis!    continue;
88*25c28e83SPiotr Jasiukajtis!  }
89*25c28e83SPiotr Jasiukajtis!
90*25c28e83SPiotr Jasiukajtis!  db0 = (double)x0;
91*25c28e83SPiotr Jasiukajtis!  iexp0 = ax >> 24;
92*25c28e83SPiotr Jasiukajtis!  iexp0 += 0x3c0;
93*25c28e83SPiotr Jasiukajtis!  lexp0 = (long long)iexp0 << 52;
94*25c28e83SPiotr Jasiukajtis!
95*25c28e83SPiotr Jasiukajtis!  db0 = vis_fand(db0,DC0);
96*25c28e83SPiotr Jasiukajtis!  db0 = vis_for(db0,DC1);
97*25c28e83SPiotr Jasiukajtis!  hi0 = vis_fand(db0,DC2);
98*25c28e83SPiotr Jasiukajtis!
99*25c28e83SPiotr Jasiukajtis!  ax >>= 11;
100*25c28e83SPiotr Jasiukajtis!  si0 = ax & 0x1ff0;
101*25c28e83SPiotr Jasiukajtis!  dtmp0 = ((double*)((char*)TBL + si0))[0];
102*25c28e83SPiotr Jasiukajtis!  xx0 = (db0 - hi0);
103*25c28e83SPiotr Jasiukajtis!  xx0 *= dtmp0;
104*25c28e83SPiotr Jasiukajtis!  dtmp0 = ((double*)((char*)TBL + si0))[1]
105*25c28e83SPiotr Jasiukajtis!  res0 = K2 * xx0;
106*25c28e83SPiotr Jasiukajtis!  res0 += K1;
107*25c28e83SPiotr Jasiukajtis!  res0 *= xx0;
108*25c28e83SPiotr Jasiukajtis!  res0 += DC1;
109*25c28e83SPiotr Jasiukajtis!  res0 = dtmp0 * res0;
110*25c28e83SPiotr Jasiukajtis!  dtmp1 = *((double*)&lexp0);
111*25c28e83SPiotr Jasiukajtis!  res0 *= dtmp1;
112*25c28e83SPiotr Jasiukajtis!  fres0 = (float)res0;
113*25c28e83SPiotr Jasiukajtis!  *py = fres0;
114*25c28e83SPiotr Jasiukajtis!  py += stridey;
115*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
116*25c28e83SPiotr Jasiukajtis
117*25c28e83SPiotr Jasiukajtis	ENTRY(__vsqrtf_ultra3)
118*25c28e83SPiotr Jasiukajtis	save	%sp,-SA(MINFRAME)-tmps,%sp
119*25c28e83SPiotr Jasiukajtis	PIC_SETUP(l7)
120*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,.CONST_TBL,o2)
121*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,__vlibm_TBL_sqrtf,l2)
122*25c28e83SPiotr Jasiukajtis
123*25c28e83SPiotr Jasiukajtis	st	%i0,[%fp+tmp_counter]
124*25c28e83SPiotr Jasiukajtis	sll	%i2,2,stridex
125*25c28e83SPiotr Jasiukajtis	or	%g0,0xff8,%l5
126*25c28e83SPiotr Jasiukajtis
127*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
128*25c28e83SPiotr Jasiukajtis	sll	%l5,1,_0x1ff0
129*25c28e83SPiotr Jasiukajtis
130*25c28e83SPiotr Jasiukajtis	ldd	[%o2],K1
131*25c28e83SPiotr Jasiukajtis	sll	%i4,2,stridey
132*25c28e83SPiotr Jasiukajtis
133*25c28e83SPiotr Jasiukajtis	ldd	[%o2+8],K2
134*25c28e83SPiotr Jasiukajtis	or	%g0,%i3,%g5
135*25c28e83SPiotr Jasiukajtis
136*25c28e83SPiotr Jasiukajtis	ldd	[%o2+16],DC0
137*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%o0
138*25c28e83SPiotr Jasiukajtis
139*25c28e83SPiotr Jasiukajtis	ldd	[%o2+24],DC1
140*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00800000),%l7
141*25c28e83SPiotr Jasiukajtis
142*25c28e83SPiotr Jasiukajtis	ldd	[%o2+32],DC2
143*25c28e83SPiotr Jasiukajtis
144*25c28e83SPiotr Jasiukajtis.begin:
145*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp_counter],counter
146*25c28e83SPiotr Jasiukajtis	ldx	[%fp+tmp_px],%i1
147*25c28e83SPiotr Jasiukajtis	st	%g0,[%fp+tmp_counter]
148*25c28e83SPiotr Jasiukajtis.begin1:
149*25c28e83SPiotr Jasiukajtis	cmp	counter,0
150*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.exit
151*25c28e83SPiotr Jasiukajtis
152*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%o2		! (2_0) ax = *(int*)px;
153*25c28e83SPiotr Jasiukajtis
154*25c28e83SPiotr Jasiukajtis	or	%g0,%i1,%o7
155*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f25		! (2_0) x0 = *px;
156*25c28e83SPiotr Jasiukajtis
157*25c28e83SPiotr Jasiukajtis	cmp	%o2,_0x7f800000		! (2_0) ax ? 0x7f800000
158*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.spec		! (2_0) if( ax >= 0x7f800000 )
159*25c28e83SPiotr Jasiukajtis	nop
160*25c28e83SPiotr Jasiukajtis
161*25c28e83SPiotr Jasiukajtis	cmp	%o2,_0x00800000		! (2_0) ax ? 0x00800000
162*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.spec		! (2_0) if( ax < 0x00800000 )
163*25c28e83SPiotr Jasiukajtis	nop
164*25c28e83SPiotr Jasiukajtis
165*25c28e83SPiotr Jasiukajtis	fstod	%f25,%f56		! (2_0) db0 = (double)x0;
166*25c28e83SPiotr Jasiukajtis
167*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o7]0x82,%o1	! (3_0) ax = *(int*)px;
168*25c28e83SPiotr Jasiukajtis
169*25c28e83SPiotr Jasiukajtis	sra	%o2,24,%l1		! (2_0) iexp0 = ax >> 24;
170*25c28e83SPiotr Jasiukajtis
171*25c28e83SPiotr Jasiukajtis	add	%o7,stridex,%i1		! px += stridex
172*25c28e83SPiotr Jasiukajtis	add	%l1,960,%l0		! (2_0) iexp0 += 0x3c0;
173*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o7]0x82,%f0	! (3_0) x0 = *px;
174*25c28e83SPiotr Jasiukajtis	fand	%f56,DC0,%f60		! (2_0) db0 = vis_fand(db0,DC0);
175*25c28e83SPiotr Jasiukajtis
176*25c28e83SPiotr Jasiukajtis	cmp	%o1,_0x7f800000		! (3_0) ax ? 0x7f800000
177*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update0		! (3_0) if( ax >= 0x7f800000 )
178*25c28e83SPiotr Jasiukajtis	nop
179*25c28e83SPiotr Jasiukajtis.cont0:
180*25c28e83SPiotr Jasiukajtis	sllx	%l0,52,%o3		! (2_0) lexp0 = (long long)iexp0 << 52;
181*25c28e83SPiotr Jasiukajtis
182*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i2		! (2_0) ax >>= 11;
183*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp0]		! (2_0) dtmp1 = *((double*)&lexp0);
184*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f40		! (2_0) db0 = vis_for(db0,DC1);
185*25c28e83SPiotr Jasiukajtis
186*25c28e83SPiotr Jasiukajtis	cmp	%o1,_0x00800000		! (3_0) ax ? 0x00800000
187*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update1		! (3_0) if( ax < 0x00800000 )
188*25c28e83SPiotr Jasiukajtis	nop
189*25c28e83SPiotr Jasiukajtis.cont1:
190*25c28e83SPiotr Jasiukajtis	fstod	%f0,%f48		! (3_0) db0 = (double)x0;
191*25c28e83SPiotr Jasiukajtis
192*25c28e83SPiotr Jasiukajtis	and	%i2,_0x1ff0,%o3		! (2_0) si0 = ax & 0x1ff0;
193*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%o2	! (4_0) ax = *(int*)px;
194*25c28e83SPiotr Jasiukajtis
195*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
196*25c28e83SPiotr Jasiukajtis	add	%o3,TBL,%i2		! (2_0) (char*)TBL + si0
197*25c28e83SPiotr Jasiukajtis	fand	%f40,DC2,%f46		! (2_0) hi0 = vis_fand(db0,DC2);
198*25c28e83SPiotr Jasiukajtis
199*25c28e83SPiotr Jasiukajtis	sra	%o1,24,%o4		! (3_0) iexp0 = ax >> 24;
200*25c28e83SPiotr Jasiukajtis
201*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f13		! (4_0) x0 = *px;
202*25c28e83SPiotr Jasiukajtis	fand	%f48,DC0,%f58		! (3_0) db0 = vis_fand(db0,DC0);
203*25c28e83SPiotr Jasiukajtis
204*25c28e83SPiotr Jasiukajtis	add	%o4,960,%i0		! (3_0) iexp0 += 0x3c0;
205*25c28e83SPiotr Jasiukajtis
206*25c28e83SPiotr Jasiukajtis	cmp	%o2,_0x7f800000		! (4_1) ax ? 0x7f800000
207*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update2		! (4_1) if( ax >= 0x7f800000 )
208*25c28e83SPiotr Jasiukajtis	nop
209*25c28e83SPiotr Jasiukajtis.cont2:
210*25c28e83SPiotr Jasiukajtis	fsubd	%f40,%f46,%f44		! (2_1) xx0 = (db0 - hi0);
211*25c28e83SPiotr Jasiukajtis	sllx	%i0,52,%g1		! (3_1) lexp0 = (long long)iexp0 << 52;
212*25c28e83SPiotr Jasiukajtis	ldd	[%i2],%f40		! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
213*25c28e83SPiotr Jasiukajtis
214*25c28e83SPiotr Jasiukajtis	sra	%o1,11,%l0		! (3_1) ax >>= 11;
215*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+tmp1]		! (3_1) dtmp1 = *((double*)&lexp0);
216*25c28e83SPiotr Jasiukajtis	for	%f58,DC1,%f48		! (3_1) db0 = vis_for(db0,DC1);
217*25c28e83SPiotr Jasiukajtis
218*25c28e83SPiotr Jasiukajtis	cmp	%o2,_0x00800000		! (4_1) ax ? 0x00800000
219*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update3		! (4_1) if( ax < 0x00800000 )
220*25c28e83SPiotr Jasiukajtis	nop
221*25c28e83SPiotr Jasiukajtis.cont3:
222*25c28e83SPiotr Jasiukajtis	fstod	%f13,%f50		! (4_1) db0 = (double)x0;
223*25c28e83SPiotr Jasiukajtis
224*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f40,%f46		! (2_1) xx0 *= dtmp0;
225*25c28e83SPiotr Jasiukajtis	and	%l0,_0x1ff0,%i0		! (3_1) si0 = ax & 0x1ff0;
226*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%l1	! (0_0) ax = *(int*)px;
227*25c28e83SPiotr Jasiukajtis
228*25c28e83SPiotr Jasiukajtis	add	%i0,TBL,%l0		! (3_1) (char*)TBL + si0
229*25c28e83SPiotr Jasiukajtis	fand	%f48,DC2,%f62		! (3_1) hi0 = vis_fand(db0,DC2);
230*25c28e83SPiotr Jasiukajtis
231*25c28e83SPiotr Jasiukajtis	sra	%o2,24,%o7		! (4_1) iexp0 = ax >> 24;
232*25c28e83SPiotr Jasiukajtis
233*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%o4		! px += stridex
234*25c28e83SPiotr Jasiukajtis	add	%o7,960,%o7		! (4_1) iexp0 += 0x3c0;
235*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%f17	! (0_0) x0 = *px;
236*25c28e83SPiotr Jasiukajtis	fand	%f50,DC0,%f54		! (4_1) db0 = vis_fand(db0,DC0);
237*25c28e83SPiotr Jasiukajtis
238*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f46,%f52		! (2_1) res0 = K2 * xx0;
239*25c28e83SPiotr Jasiukajtis	cmp	%l1,_0x7f800000		! (0_0) ax ? 0x7f800000
240*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update4		! (0_0) if( ax >= 0x7f800000 )
241*25c28e83SPiotr Jasiukajtis	fsubd	%f48,%f62,%f42		! (3_1) xx0 = (db0 - hi0);
242*25c28e83SPiotr Jasiukajtis.cont4:
243*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o1		! (4_1) lexp0 = (long long)iexp0 << 52;
244*25c28e83SPiotr Jasiukajtis	ldd	[%i0+TBL],%f40		! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
245*25c28e83SPiotr Jasiukajtis
246*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i5		! (4_1) ax >>= 11;
247*25c28e83SPiotr Jasiukajtis	stx	%o1,[%fp+tmp2]		! (4_1) dtmp1 = *((double*)&lexp0);
248*25c28e83SPiotr Jasiukajtis	for	%f54,DC1,%f34		! (4_1) db0 = vis_for(db0,DC1);
249*25c28e83SPiotr Jasiukajtis
250*25c28e83SPiotr Jasiukajtis	cmp	%l1,_0x00800000		! (0_0) ax ? 0x00800000
251*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update5		! (0_0) if( ax < 0x00800000 )
252*25c28e83SPiotr Jasiukajtis	nop
253*25c28e83SPiotr Jasiukajtis.cont5:
254*25c28e83SPiotr Jasiukajtis	fstod	%f17,%f56		! (0_0) db0 = (double)x0;
255*25c28e83SPiotr Jasiukajtis
256*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f40,%f42		! (3_1) xx0 *= dtmp0;
257*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o4]0x82,%i0	! (1_0) ax = *(int*)px;
258*25c28e83SPiotr Jasiukajtis	faddd	%f52,K1,%f52		! (2_1) res0 += K1;
259*25c28e83SPiotr Jasiukajtis
260*25c28e83SPiotr Jasiukajtis	sra	%l1,24,%g1		! (0_0) iexp0 = ax >> 24;
261*25c28e83SPiotr Jasiukajtis	and	%i5,_0x1ff0,%i5		! (4_1) si0 = ax & 0x1ff0;
262*25c28e83SPiotr Jasiukajtis	fand	%f34,DC2,%f62		! (4_1) hi0 = vis_fand(db0,DC2);
263*25c28e83SPiotr Jasiukajtis
264*25c28e83SPiotr Jasiukajtis	add	%o4,stridex,%i1		! px += stridex
265*25c28e83SPiotr Jasiukajtis
266*25c28e83SPiotr Jasiukajtis	add	%g1,960,%o5		! (0_0) iexp0 += 0x3c0;
267*25c28e83SPiotr Jasiukajtis	add	%i5,TBL,%i3		! (4_1) (char*)TBL + si0
268*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o4]0x82,%f21	! (1_0) x0 = *px;
269*25c28e83SPiotr Jasiukajtis	fand	%f56,DC0,%f32		! (0_0) db0 = vis_fand(db0,DC0);
270*25c28e83SPiotr Jasiukajtis
271*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f42,%f50		! (3_1) res0 = K2 * xx0;
272*25c28e83SPiotr Jasiukajtis	cmp	%i0,_0x7f800000		! (1_0) ax ? 0x7f800000
273*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update6		! (1_0) if( ax >= 0x7f800000 )
274*25c28e83SPiotr Jasiukajtis	fsubd	%f34,%f62,%f54		! (4_1) xx0 = (db0 - hi0);
275*25c28e83SPiotr Jasiukajtis.cont6:
276*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f46,%f52		! (2_1) res0 *= xx0;
277*25c28e83SPiotr Jasiukajtis	sllx	%o5,52,%o7		! (0_0) lexp0 = (long long)iexp0 << 52;
278*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%i5],%f62		! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
279*25c28e83SPiotr Jasiukajtis
280*25c28e83SPiotr Jasiukajtis	sra	%l1,11,%i4		! (0_0) ax >>= 11;
281*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp3]		! (0_0) dtmp1 = *((double*)&lexp0);
282*25c28e83SPiotr Jasiukajtis	for	%f32,DC1,%f48		! (0_0) db0 = vis_for(db0,DC1);
283*25c28e83SPiotr Jasiukajtis
284*25c28e83SPiotr Jasiukajtis	cmp	%i0,_0x00800000		! (1_0) ax ? 0x00800000
285*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update7		! (1_0) if( ax < 0x00800000 )
286*25c28e83SPiotr Jasiukajtis	nop
287*25c28e83SPiotr Jasiukajtis.cont7:
288*25c28e83SPiotr Jasiukajtis	fstod	%f21,%f56		! (1_0) db0 = (double)x0;
289*25c28e83SPiotr Jasiukajtis
290*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f62,%f46		! (4_1) xx0 *= dtmp0;
291*25c28e83SPiotr Jasiukajtis	and	%i4,_0x1ff0,%g1		! (0_0) si0 = ax & 0x1ff0;
292*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%o2	! (2_0) ax = *(int*)px;
293*25c28e83SPiotr Jasiukajtis	faddd	%f50,K1,%f62		! (3_1) res0 += K1;
294*25c28e83SPiotr Jasiukajtis
295*25c28e83SPiotr Jasiukajtis	add	%g1,TBL,%i5		! (0_0) (double*)((char*)TBL + si0
296*25c28e83SPiotr Jasiukajtis	fand	%f48,DC2,%f32		! (0_0) hi0 = vis_fand(db0,DC2);
297*25c28e83SPiotr Jasiukajtis
298*25c28e83SPiotr Jasiukajtis	sra	%i0,24,%o4		! (1_0) iexp0 = ax >> 24;
299*25c28e83SPiotr Jasiukajtis	ldd	[%i2+8],%f60		! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
300*25c28e83SPiotr Jasiukajtis	faddd	%f52,DC1,%f58		! (2_1) res0 += DC1;
301*25c28e83SPiotr Jasiukajtis
302*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%o7		! px += stridex
303*25c28e83SPiotr Jasiukajtis	add	%o4,960,%i2		! (1_0) iexp0 += 0x3c0;
304*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%f25	! (2_0) x0 = *px;
305*25c28e83SPiotr Jasiukajtis	fand	%f56,DC0,%f34		! (1_0) db0 = vis_fand(db0,DC0);
306*25c28e83SPiotr Jasiukajtis
307*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f46,%f50		! (4_1) res0 = K2 * xx0;
308*25c28e83SPiotr Jasiukajtis	cmp	%o2,_0x7f800000		! (2_0) ax ? 0x7f800000
309*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update8		! (2_0) if( ax >= 0x7f800000 )
310*25c28e83SPiotr Jasiukajtis	fsubd	%f48,%f32,%f52		! (0_0) xx0 = (db0 - hi0);
311*25c28e83SPiotr Jasiukajtis.cont8:
312*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f42,%f54		! (3_1) res0 *= xx0;
313*25c28e83SPiotr Jasiukajtis	sllx	%i2,52,%o4		! (1_0) lexp0 = (long long)iexp0 << 52;
314*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%g1],%f32		! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
315*25c28e83SPiotr Jasiukajtis
316*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f58,%f60		! (2_1) res0 = dtmp0 * res0;
317*25c28e83SPiotr Jasiukajtis	sra	%i0,11,%g1		! (1_0) ax >>= 11;
318*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+tmp4]		! (1_0) dtmp1 = *((double*)&lexp0);
319*25c28e83SPiotr Jasiukajtis	for	%f34,DC1,%f48		! (1_0) db0 = vis_for(db0,DC1);
320*25c28e83SPiotr Jasiukajtis
321*25c28e83SPiotr Jasiukajtis	cmp	%o2,_0x00800000		! (2_0) ax ? 0x00800000
322*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update9		! (2_0) if( ax < 0x00800000 )
323*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp0],%f40		! (2_1) dtmp1 = *((double*)&lexp0);
324*25c28e83SPiotr Jasiukajtis	fstod	%f25,%f56		! (2_0) db0 = (double)x0;
325*25c28e83SPiotr Jasiukajtis.cont9:
326*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f32,%f42		! (0_0) xx0 *= dtmp0;
327*25c28e83SPiotr Jasiukajtis	and	%g1,_0x1ff0,%o5		! (1_0) si0 = ax & 0x1ff0;
328*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o7]0x82,%o1	! (3_0) ax = *(int*)px;
329*25c28e83SPiotr Jasiukajtis	faddd	%f50,K1,%f34		! (4_1) res0 += K1;
330*25c28e83SPiotr Jasiukajtis
331*25c28e83SPiotr Jasiukajtis	add	%o5,TBL,%i4		! (1_0) (char*)TBL + si0
332*25c28e83SPiotr Jasiukajtis	fand	%f48,DC2,%f62		! (1_0) hi0 = vis_fand(db0,DC2);
333*25c28e83SPiotr Jasiukajtis
334*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f40,%f32		! (2_1) res0 *= dtmp1;
335*25c28e83SPiotr Jasiukajtis	sra	%o2,24,%l1		! (2_0) iexp0 = ax >> 24;
336*25c28e83SPiotr Jasiukajtis	ldd	[%l0+8],%f40		! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
337*25c28e83SPiotr Jasiukajtis	faddd	%f54,DC1,%f58		! (3_1) res0 += DC1;
338*25c28e83SPiotr Jasiukajtis
339*25c28e83SPiotr Jasiukajtis	add	%o7,stridex,%i1		! px += stridex
340*25c28e83SPiotr Jasiukajtis	add	%l1,960,%l0		! (2_0) iexp0 += 0x3c0;
341*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o7]0x82,%f0	! (3_0) x0 = *px;
342*25c28e83SPiotr Jasiukajtis	fand	%f56,DC0,%f60		! (2_0) db0 = vis_fand(db0,DC0);
343*25c28e83SPiotr Jasiukajtis
344*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f42,%f50		! (0_0) res0 = K2 * xx0;
345*25c28e83SPiotr Jasiukajtis	cmp	%o1,_0x7f800000		! (3_0) ax ? 0x7f800000
346*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update10		! (3_0) if( ax >= 0x7f800000 )
347*25c28e83SPiotr Jasiukajtis	fsubd	%f48,%f62,%f54		! (1_0) xx0 = (db0 - hi0);
348*25c28e83SPiotr Jasiukajtis.cont10:
349*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f46,%f52		! (4_1) res0 *= xx0;
350*25c28e83SPiotr Jasiukajtis	sllx	%l0,52,%o3		! (2_0) lexp0 = (long long)iexp0 << 52;
351*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%o5],%f56		! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
352*25c28e83SPiotr Jasiukajtis
353*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f58,%f34		! (3_1) res0 = dtmp0 * res0;
354*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i2		! (2_0) ax >>= 11;
355*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp0]		! (2_0) dtmp1 = *((double*)&lexp0);
356*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f40		! (2_0) db0 = vis_for(db0,DC1);
357*25c28e83SPiotr Jasiukajtis
358*25c28e83SPiotr Jasiukajtis	cmp	%o1,_0x00800000		! (3_0) ax ? 0x00800000
359*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update11		! (3_0) if( ax < 0x00800000 )
360*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp1],%f62		! (3_1) dtmp1 = *((double*)&lexp0);
361*25c28e83SPiotr Jasiukajtis	fstod	%f0,%f48		! (3_0) db0 = (double)x0;
362*25c28e83SPiotr Jasiukajtis.cont11:
363*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f56,%f30		! (1_0) xx0 *= dtmp0;
364*25c28e83SPiotr Jasiukajtis	and	%i2,_0x1ff0,%o3		! (2_0) si0 = ax & 0x1ff0;
365*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%o2	! (4_0) ax = *(int*)px;
366*25c28e83SPiotr Jasiukajtis	faddd	%f50,K1,%f56		! (0_0) res0 += K1;
367*25c28e83SPiotr Jasiukajtis
368*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
369*25c28e83SPiotr Jasiukajtis	add	%o3,TBL,%i2		! (2_0) (char*)TBL + si0
370*25c28e83SPiotr Jasiukajtis	fand	%f40,DC2,%f46		! (2_0) hi0 = vis_fand(db0,DC2);
371*25c28e83SPiotr Jasiukajtis
372*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f62,%f28		! (3_1) res0 *= dtmp1;
373*25c28e83SPiotr Jasiukajtis	sra	%o1,24,%o4		! (3_0) iexp0 = ax >> 24;
374*25c28e83SPiotr Jasiukajtis	ldd	[%i3+8],%f50		! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
375*25c28e83SPiotr Jasiukajtis	faddd	%f52,DC1,%f54		! (4_1) res0 += DC1;
376*25c28e83SPiotr Jasiukajtis
377*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f13		! (4_0) x0 = *px;
378*25c28e83SPiotr Jasiukajtis	fand	%f48,DC0,%f58		! (3_0) db0 = vis_fand(db0,DC0);
379*25c28e83SPiotr Jasiukajtis
380*25c28e83SPiotr Jasiukajtis	or	%g0,%g5,%i3
381*25c28e83SPiotr Jasiukajtis	cmp	counter,5
382*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.tail
383*25c28e83SPiotr Jasiukajtis	add	%o4,960,%g5		! (3_0) iexp0 += 0x3c0;
384*25c28e83SPiotr Jasiukajtis
385*25c28e83SPiotr Jasiukajtis	ba	.main_loop
386*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter	! counter
387*25c28e83SPiotr Jasiukajtis
388*25c28e83SPiotr Jasiukajtis	.align	16
389*25c28e83SPiotr Jasiukajtis.main_loop:
390*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f30,%f60		! (1_1) res0 = K2 * xx0;
391*25c28e83SPiotr Jasiukajtis	cmp	%o2,_0x7f800000		! (4_1) ax ? 0x7f800000
392*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update12		! (4_1) if( ax >= 0x7f800000 )
393*25c28e83SPiotr Jasiukajtis	fsubd	%f40,%f46,%f44		! (2_1) xx0 = (db0 - hi0);
394*25c28e83SPiotr Jasiukajtis.cont12:
395*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f42,%f52		! (0_1) res0 *= xx0;
396*25c28e83SPiotr Jasiukajtis	sllx	%g5,52,%g5		! (3_1) lexp0 = (long long)iexp0 << 52;
397*25c28e83SPiotr Jasiukajtis	ldd	[%i2],%f40		! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
398*25c28e83SPiotr Jasiukajtis	fdtos	%f32,%f15		! (2_2) fres0 = (float)res0;
399*25c28e83SPiotr Jasiukajtis
400*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f54,%f42		! (4_2) res0 = dtmp0 * res0;
401*25c28e83SPiotr Jasiukajtis	sra	%o1,11,%l0		! (3_1) ax >>= 11;
402*25c28e83SPiotr Jasiukajtis	stx	%g5,[%fp+tmp1]		! (3_1) dtmp1 = *((double*)&lexp0);
403*25c28e83SPiotr Jasiukajtis	for	%f58,DC1,%f48		! (3_1) db0 = vis_for(db0,DC1);
404*25c28e83SPiotr Jasiukajtis
405*25c28e83SPiotr Jasiukajtis	cmp	%o2,_0x00800000		! (4_1) ax ? 0x00800000
406*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update13		! (4_1) if( ax < 0x00800000 )
407*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp2],%f56		! (4_2) dtmp1 = *((double*)&lexp0);
408*25c28e83SPiotr Jasiukajtis	fstod	%f13,%f50		! (4_1) db0 = (double)x0;
409*25c28e83SPiotr Jasiukajtis.cont13:
410*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f40,%f46		! (2_1) xx0 *= dtmp0;
411*25c28e83SPiotr Jasiukajtis	and	%l0,_0x1ff0,%i0		! (3_1) si0 = ax & 0x1ff0;
412*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%l1	! (0_0) ax = *(int*)px;
413*25c28e83SPiotr Jasiukajtis	faddd	%f60,K1,%f32		! (1_1) res0 += K1;
414*25c28e83SPiotr Jasiukajtis
415*25c28e83SPiotr Jasiukajtis	add	%i0,TBL,%l0		! (3_1) (char*)TBL + si0
416*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%o3		! py += stridey
417*25c28e83SPiotr Jasiukajtis	st	%f15,[%i3]		! (2_2) *py = fres0;
418*25c28e83SPiotr Jasiukajtis	fand	%f48,DC2,%f62		! (3_1) hi0 = vis_fand(db0,DC2);
419*25c28e83SPiotr Jasiukajtis
420*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f56,%f44		! (4_2) res0 *= dtmp1;
421*25c28e83SPiotr Jasiukajtis	sra	%o2,24,%o7		! (4_1) iexp0 = ax >> 24;
422*25c28e83SPiotr Jasiukajtis	ldd	[%i5+8],%f58		! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
423*25c28e83SPiotr Jasiukajtis	faddd	%f52,DC1,%f34		! (0_1) res0 += DC1;
424*25c28e83SPiotr Jasiukajtis
425*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%o4		! px += stridex
426*25c28e83SPiotr Jasiukajtis	add	%o7,960,%o7		! (4_1) iexp0 += 0x3c0;
427*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%f17	! (0_0) x0 = *px;
428*25c28e83SPiotr Jasiukajtis	fand	%f50,DC0,%f54		! (4_1) db0 = vis_fand(db0,DC0);
429*25c28e83SPiotr Jasiukajtis
430*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f46,%f52		! (2_1) res0 = K2 * xx0;
431*25c28e83SPiotr Jasiukajtis	cmp	%l1,_0x7f800000		! (0_0) ax ? 0x7f800000
432*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update14		! (0_0) if( ax >= 0x7f800000 )
433*25c28e83SPiotr Jasiukajtis	fsubd	%f48,%f62,%f42		! (3_1) xx0 = (db0 - hi0);
434*25c28e83SPiotr Jasiukajtis.cont14:
435*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f30,%f48		! (1_1) res0 *= xx0;
436*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o1		! (4_1) lexp0 = (long long)iexp0 << 52;
437*25c28e83SPiotr Jasiukajtis	ldd	[%i0+TBL],%f40		! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
438*25c28e83SPiotr Jasiukajtis	fdtos	%f28,%f19		! (3_2) fres0 = (float)res0;
439*25c28e83SPiotr Jasiukajtis
440*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f34,%f32		! (0_1) res0 = dtmp0 * res0;
441*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i5		! (4_1) ax >>= 11;
442*25c28e83SPiotr Jasiukajtis	stx	%o1,[%fp+tmp2]		! (4_1) dtmp1 = *((double*)&lexp0);
443*25c28e83SPiotr Jasiukajtis	for	%f54,DC1,%f34		! (4_1) db0 = vis_for(db0,DC1);
444*25c28e83SPiotr Jasiukajtis
445*25c28e83SPiotr Jasiukajtis	cmp	%l1,_0x00800000		! (0_0) ax ? 0x00800000
446*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update15		! (0_0) if( ax < 0x00800000 )
447*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp3],%f60		! (0_1) dtmp1 = *((double*)&lexp0);
448*25c28e83SPiotr Jasiukajtis	fstod	%f17,%f56		! (0_0) db0 = (double)x0;
449*25c28e83SPiotr Jasiukajtis.cont15:
450*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f40,%f42		! (3_1) xx0 *= dtmp0;
451*25c28e83SPiotr Jasiukajtis	add	%o3,stridey,%g5		! py += stridey
452*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o4]0x82,%i0	! (1_0) ax = *(int*)px;
453*25c28e83SPiotr Jasiukajtis	faddd	%f52,K1,%f52		! (2_1) res0 += K1;
454*25c28e83SPiotr Jasiukajtis
455*25c28e83SPiotr Jasiukajtis	sra	%l1,24,%g1		! (0_0) iexp0 = ax >> 24;
456*25c28e83SPiotr Jasiukajtis	and	%i5,_0x1ff0,%i5		! (4_1) si0 = ax & 0x1ff0;
457*25c28e83SPiotr Jasiukajtis	st	%f19,[%o3]		! (3_2) *py = fres0;
458*25c28e83SPiotr Jasiukajtis	fand	%f34,DC2,%f62		! (4_1) hi0 = vis_fand(db0,DC2);
459*25c28e83SPiotr Jasiukajtis
460*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f60,%f40		! (0_1) res0 *= dtmp1;
461*25c28e83SPiotr Jasiukajtis	add	%o4,stridex,%i1		! px += stridex
462*25c28e83SPiotr Jasiukajtis	ldd	[%i4+8],%f60		! (1_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
463*25c28e83SPiotr Jasiukajtis	faddd	%f48,DC1,%f58		! (1_1) res0 += DC1;
464*25c28e83SPiotr Jasiukajtis
465*25c28e83SPiotr Jasiukajtis	add	%g1,960,%o5		! (0_0) iexp0 += 0x3c0;
466*25c28e83SPiotr Jasiukajtis	add	%i5,TBL,%i3		! (4_1) (char*)TBL + si0
467*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o4]0x82,%f21	! (1_0) x0 = *px;
468*25c28e83SPiotr Jasiukajtis	fand	%f56,DC0,%f32		! (0_0) db0 = vis_fand(db0,DC0);
469*25c28e83SPiotr Jasiukajtis
470*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f42,%f50		! (3_1) res0 = K2 * xx0;
471*25c28e83SPiotr Jasiukajtis	cmp	%i0,_0x7f800000		! (1_0) ax ? 0x7f800000
472*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update16		! (1_0) if( ax >= 0x7f800000 )
473*25c28e83SPiotr Jasiukajtis	fsubd	%f34,%f62,%f54		! (4_1) xx0 = (db0 - hi0);
474*25c28e83SPiotr Jasiukajtis.cont16:
475*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f46,%f52		! (2_1) res0 *= xx0;
476*25c28e83SPiotr Jasiukajtis	sllx	%o5,52,%o7		! (0_0) lexp0 = (long long)iexp0 << 52;
477*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%i5],%f62		! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
478*25c28e83SPiotr Jasiukajtis	fdtos	%f44,%f23		! (4_2) fres0 = (float)res0;
479*25c28e83SPiotr Jasiukajtis
480*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f58,%f44		! (1_1) res0 = dtmp0 * res0;
481*25c28e83SPiotr Jasiukajtis	sra	%l1,11,%i4		! (0_0) ax >>= 11;
482*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp3]		! (0_0) dtmp1 = *((double*)&lexp0);
483*25c28e83SPiotr Jasiukajtis	for	%f32,DC1,%f48		! (0_0) db0 = vis_for(db0,DC1);
484*25c28e83SPiotr Jasiukajtis
485*25c28e83SPiotr Jasiukajtis	cmp	%i0,_0x00800000		! (1_0) ax ? 0x00800000
486*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update17		! (1_0) if( ax < 0x00800000 )
487*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp4],%f34		! (1_1) dtmp1 = *((double*)&lexp0);
488*25c28e83SPiotr Jasiukajtis	fstod	%f21,%f56		! (1_0) db0 = (double)x0;
489*25c28e83SPiotr Jasiukajtis.cont17:
490*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f62,%f46		! (4_1) xx0 *= dtmp0;
491*25c28e83SPiotr Jasiukajtis	and	%i4,_0x1ff0,%g1		! (0_0) si0 = ax & 0x1ff0;
492*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%o2	! (2_0) ax = *(int*)px;
493*25c28e83SPiotr Jasiukajtis	faddd	%f50,K1,%f62		! (3_1) res0 += K1;
494*25c28e83SPiotr Jasiukajtis
495*25c28e83SPiotr Jasiukajtis	add	%g1,TBL,%i5		! (0_0) (double*)((char*)TBL + si0
496*25c28e83SPiotr Jasiukajtis	add	%g5,stridey,%g5		! py += stridey
497*25c28e83SPiotr Jasiukajtis	st	%f23,[stridey+%o3]	! (4_2) *py = fres0;
498*25c28e83SPiotr Jasiukajtis	fand	%f48,DC2,%f32		! (0_0) hi0 = vis_fand(db0,DC2);
499*25c28e83SPiotr Jasiukajtis
500*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f34,%f44		! (1_1) res0 *= dtmp1;
501*25c28e83SPiotr Jasiukajtis	sra	%i0,24,%o4		! (1_0) iexp0 = ax >> 24;
502*25c28e83SPiotr Jasiukajtis	ldd	[%i2+8],%f60		! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
503*25c28e83SPiotr Jasiukajtis	faddd	%f52,DC1,%f58		! (2_1) res0 += DC1;
504*25c28e83SPiotr Jasiukajtis
505*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%o7		! px += stridex
506*25c28e83SPiotr Jasiukajtis	add	%o4,960,%i2		! (1_0) iexp0 += 0x3c0;
507*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%f25	! (2_0) x0 = *px;
508*25c28e83SPiotr Jasiukajtis	fand	%f56,DC0,%f34		! (1_0) db0 = vis_fand(db0,DC0);
509*25c28e83SPiotr Jasiukajtis
510*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f46,%f50		! (4_1) res0 = K2 * xx0;
511*25c28e83SPiotr Jasiukajtis	cmp	%o2,_0x7f800000		! (2_0) ax ? 0x7f800000
512*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update18		! (2_0) if( ax >= 0x7f800000 )
513*25c28e83SPiotr Jasiukajtis	fsubd	%f48,%f32,%f52		! (0_0) xx0 = (db0 - hi0);
514*25c28e83SPiotr Jasiukajtis.cont18:
515*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f42,%f54		! (3_1) res0 *= xx0;
516*25c28e83SPiotr Jasiukajtis	sllx	%i2,52,%o4		! (1_0) lexp0 = (long long)iexp0 << 52;
517*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%g1],%f32		! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
518*25c28e83SPiotr Jasiukajtis	fdtos	%f40,%f27		! (0_1) fres0 = (float)res0;
519*25c28e83SPiotr Jasiukajtis
520*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f58,%f60		! (2_1) res0 = dtmp0 * res0;
521*25c28e83SPiotr Jasiukajtis	sra	%i0,11,%g1		! (1_0) ax >>= 11;
522*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+tmp4]		! (1_0) dtmp1 = *((double*)&lexp0);
523*25c28e83SPiotr Jasiukajtis	for	%f34,DC1,%f48		! (1_0) db0 = vis_for(db0,DC1);
524*25c28e83SPiotr Jasiukajtis
525*25c28e83SPiotr Jasiukajtis	cmp	%o2,_0x00800000		! (2_0) ax ? 0x00800000
526*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update19		! (2_0) if( ax < 0x00800000 )
527*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp0],%f40		! (2_1) dtmp1 = *((double*)&lexp0);
528*25c28e83SPiotr Jasiukajtis	fstod	%f25,%f56		! (2_0) db0 = (double)x0;
529*25c28e83SPiotr Jasiukajtis.cont19:
530*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f32,%f42		! (0_0) xx0 *= dtmp0;
531*25c28e83SPiotr Jasiukajtis	and	%g1,_0x1ff0,%o5		! (1_0) si0 = ax & 0x1ff0;
532*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o7]0x82,%o1	! (3_0) ax = *(int*)px;
533*25c28e83SPiotr Jasiukajtis	faddd	%f50,K1,%f34		! (4_1) res0 += K1;
534*25c28e83SPiotr Jasiukajtis
535*25c28e83SPiotr Jasiukajtis	add	%o5,TBL,%i4		! (1_0) (char*)TBL + si0
536*25c28e83SPiotr Jasiukajtis	add	%g5,stridey,%g1		! py += stridey
537*25c28e83SPiotr Jasiukajtis	st	%f27,[%g5]		! (0_1) *py = fres0;
538*25c28e83SPiotr Jasiukajtis	fand	%f48,DC2,%f62		! (1_0) hi0 = vis_fand(db0,DC2);
539*25c28e83SPiotr Jasiukajtis
540*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f40,%f32		! (2_1) res0 *= dtmp1;
541*25c28e83SPiotr Jasiukajtis	sra	%o2,24,%l1		! (2_0) iexp0 = ax >> 24;
542*25c28e83SPiotr Jasiukajtis	ldd	[%l0+8],%f40		! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
543*25c28e83SPiotr Jasiukajtis	faddd	%f54,DC1,%f58		! (3_1) res0 += DC1;
544*25c28e83SPiotr Jasiukajtis
545*25c28e83SPiotr Jasiukajtis	add	%o7,stridex,%i1		! px += stridex
546*25c28e83SPiotr Jasiukajtis	add	%l1,960,%l0		! (2_0) iexp0 += 0x3c0;
547*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o7]0x82,%f0	! (3_0) x0 = *px;
548*25c28e83SPiotr Jasiukajtis	fand	%f56,DC0,%f60		! (2_0) db0 = vis_fand(db0,DC0);
549*25c28e83SPiotr Jasiukajtis
550*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f42,%f50		! (0_0) res0 = K2 * xx0;
551*25c28e83SPiotr Jasiukajtis	cmp	%o1,_0x7f800000		! (3_0) ax ? 0x7f800000
552*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update20		! (3_0) if( ax >= 0x7f800000 )
553*25c28e83SPiotr Jasiukajtis	fsubd	%f48,%f62,%f54		! (1_0) xx0 = (db0 - hi0);
554*25c28e83SPiotr Jasiukajtis.cont20:
555*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f46,%f52		! (4_1) res0 *= xx0;
556*25c28e83SPiotr Jasiukajtis	sllx	%l0,52,%o3		! (2_0) lexp0 = (long long)iexp0 << 52;
557*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%o5],%f56		! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
558*25c28e83SPiotr Jasiukajtis	fdtos	%f44,%f8		! (1_1) fres0 = (float)res0;
559*25c28e83SPiotr Jasiukajtis
560*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f58,%f34		! (3_1) res0 = dtmp0 * res0;
561*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i2		! (2_0) ax >>= 11;
562*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp0]		! (2_0) dtmp1 = *((double*)&lexp0);
563*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f40		! (2_0) db0 = vis_for(db0,DC1);
564*25c28e83SPiotr Jasiukajtis
565*25c28e83SPiotr Jasiukajtis	cmp	%o1,_0x00800000		! (3_0) ax ? 0x00800000
566*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update21		! (3_0) if( ax < 0x00800000 )
567*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp1],%f62		! (3_1) dtmp1 = *((double*)&lexp0);
568*25c28e83SPiotr Jasiukajtis	fstod	%f0,%f48		! (3_0) db0 = (double)x0;
569*25c28e83SPiotr Jasiukajtis.cont21:
570*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f56,%f30		! (1_0) xx0 *= dtmp0;
571*25c28e83SPiotr Jasiukajtis	and	%i2,_0x1ff0,%o3		! (2_0) si0 = ax & 0x1ff0;
572*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%o2	! (4_0) ax = *(int*)px;
573*25c28e83SPiotr Jasiukajtis	faddd	%f50,K1,%f56		! (0_0) res0 += K1;
574*25c28e83SPiotr Jasiukajtis
575*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
576*25c28e83SPiotr Jasiukajtis	add	%o3,TBL,%i2		! (2_0) (char*)TBL + si0
577*25c28e83SPiotr Jasiukajtis	st	%f8,[stridey+%g5]	! (1_1) *py = fres0;
578*25c28e83SPiotr Jasiukajtis	fand	%f40,DC2,%f46		! (2_0) hi0 = vis_fand(db0,DC2);
579*25c28e83SPiotr Jasiukajtis
580*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f62,%f28		! (3_1) res0 *= dtmp1;
581*25c28e83SPiotr Jasiukajtis	sra	%o1,24,%o4		! (3_0) iexp0 = ax >> 24;
582*25c28e83SPiotr Jasiukajtis	ldd	[%i3+8],%f50		! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
583*25c28e83SPiotr Jasiukajtis	faddd	%f52,DC1,%f54		! (4_1) res0 += DC1;
584*25c28e83SPiotr Jasiukajtis
585*25c28e83SPiotr Jasiukajtis	add	%g1,stridey,%i3		! py += stridey
586*25c28e83SPiotr Jasiukajtis	subcc	counter,5,counter	! counter
587*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f13		! (4_0) x0 = *px;
588*25c28e83SPiotr Jasiukajtis	fand	%f48,DC0,%f58		! (3_0) db0 = vis_fand(db0,DC0);
589*25c28e83SPiotr Jasiukajtis
590*25c28e83SPiotr Jasiukajtis	bpos,pt	%icc,.main_loop
591*25c28e83SPiotr Jasiukajtis	add	%o4,960,%g5		! (3_0) iexp0 += 0x3c0;
592*25c28e83SPiotr Jasiukajtis
593*25c28e83SPiotr Jasiukajtis	add	counter,5,counter
594*25c28e83SPiotr Jasiukajtis.tail:
595*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
596*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
597*25c28e83SPiotr Jasiukajtis	or	%g0,%i3,%g5
598*25c28e83SPiotr Jasiukajtis
599*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f42,%f52		! (0_1) res0 *= xx0;
600*25c28e83SPiotr Jasiukajtis	fdtos	%f32,%f15		! (2_2) fres0 = (float)res0;
601*25c28e83SPiotr Jasiukajtis
602*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f54,%f42		! (4_2) res0 = dtmp0 * res0;
603*25c28e83SPiotr Jasiukajtis
604*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp2],%f56		! (4_2) dtmp1 = *((double*)&lexp0);
605*25c28e83SPiotr Jasiukajtis
606*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%o3		! py += stridey
607*25c28e83SPiotr Jasiukajtis	st	%f15,[%i3]		! (2_2) *py = fres0;
608*25c28e83SPiotr Jasiukajtis
609*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
610*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
611*25c28e83SPiotr Jasiukajtis	or	%g0,%o3,%g5
612*25c28e83SPiotr Jasiukajtis
613*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f56,%f44		! (4_2) res0 *= dtmp1;
614*25c28e83SPiotr Jasiukajtis	ldd	[%i5+8],%f58		! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
615*25c28e83SPiotr Jasiukajtis	faddd	%f52,DC1,%f34		! (0_1) res0 += DC1;
616*25c28e83SPiotr Jasiukajtis
617*25c28e83SPiotr Jasiukajtis	fdtos	%f28,%f19		! (3_2) fres0 = (float)res0;
618*25c28e83SPiotr Jasiukajtis
619*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f34,%f32		! (0_1) res0 = dtmp0 * res0;
620*25c28e83SPiotr Jasiukajtis
621*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp3],%f60		! (0_1) dtmp1 = *((double*)&lexp0);
622*25c28e83SPiotr Jasiukajtis
623*25c28e83SPiotr Jasiukajtis	add	%o3,stridey,%g5		! py += stridey
624*25c28e83SPiotr Jasiukajtis
625*25c28e83SPiotr Jasiukajtis	st	%f19,[%o3]		! (3_2) *py = fres0;
626*25c28e83SPiotr Jasiukajtis
627*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
628*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
629*25c28e83SPiotr Jasiukajtis	nop
630*25c28e83SPiotr Jasiukajtis
631*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f60,%f40		! (0_1) res0 *= dtmp1;
632*25c28e83SPiotr Jasiukajtis
633*25c28e83SPiotr Jasiukajtis	fdtos	%f44,%f23		! (4_2) fres0 = (float)res0;
634*25c28e83SPiotr Jasiukajtis
635*25c28e83SPiotr Jasiukajtis	add	%g5,stridey,%g5		! py += stridey
636*25c28e83SPiotr Jasiukajtis	st	%f23,[stridey+%o3]	! (4_2) *py = fres0;
637*25c28e83SPiotr Jasiukajtis
638*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
639*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
640*25c28e83SPiotr Jasiukajtis	nop
641*25c28e83SPiotr Jasiukajtis
642*25c28e83SPiotr Jasiukajtis	fdtos	%f40,%f27		! (0_1) fres0 = (float)res0;
643*25c28e83SPiotr Jasiukajtis
644*25c28e83SPiotr Jasiukajtis	st	%f27,[%g5]		! (0_1) *py = fres0;
645*25c28e83SPiotr Jasiukajtis
646*25c28e83SPiotr Jasiukajtis	ba	.begin
647*25c28e83SPiotr Jasiukajtis	add	%g5,stridey,%g5
648*25c28e83SPiotr Jasiukajtis
649*25c28e83SPiotr Jasiukajtis	.align	16
650*25c28e83SPiotr Jasiukajtis.spec:
651*25c28e83SPiotr Jasiukajtis	fsqrts	%f25,%f25
652*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
653*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1
654*25c28e83SPiotr Jasiukajtis	st	%f25,[%g5]
655*25c28e83SPiotr Jasiukajtis	ba	.begin1
656*25c28e83SPiotr Jasiukajtis	add	%g5,stridey,%g5
657*25c28e83SPiotr Jasiukajtis
658*25c28e83SPiotr Jasiukajtis	.align	16
659*25c28e83SPiotr Jasiukajtis.update0:
660*25c28e83SPiotr Jasiukajtis	cmp	counter,1
661*25c28e83SPiotr Jasiukajtis	ble	.cont0
662*25c28e83SPiotr Jasiukajtis	fzeros	%f0
663*25c28e83SPiotr Jasiukajtis
664*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
665*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%o1
666*25c28e83SPiotr Jasiukajtis
667*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
668*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
669*25c28e83SPiotr Jasiukajtis
670*25c28e83SPiotr Jasiukajtis	ba	.cont0
671*25c28e83SPiotr Jasiukajtis	or	%g0,1,counter
672*25c28e83SPiotr Jasiukajtis
673*25c28e83SPiotr Jasiukajtis	.align	16
674*25c28e83SPiotr Jasiukajtis.update1:
675*25c28e83SPiotr Jasiukajtis	cmp	counter,1
676*25c28e83SPiotr Jasiukajtis	ble	.cont1
677*25c28e83SPiotr Jasiukajtis	fzeros	%f0
678*25c28e83SPiotr Jasiukajtis
679*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
680*25c28e83SPiotr Jasiukajtis	clr	%o1
681*25c28e83SPiotr Jasiukajtis
682*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
683*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
684*25c28e83SPiotr Jasiukajtis
685*25c28e83SPiotr Jasiukajtis	ba	.cont1
686*25c28e83SPiotr Jasiukajtis	or	%g0,1,counter
687*25c28e83SPiotr Jasiukajtis
688*25c28e83SPiotr Jasiukajtis	.align	16
689*25c28e83SPiotr Jasiukajtis.update2:
690*25c28e83SPiotr Jasiukajtis	cmp	counter,2
691*25c28e83SPiotr Jasiukajtis	ble	.cont2
692*25c28e83SPiotr Jasiukajtis	fzeros	%f13
693*25c28e83SPiotr Jasiukajtis
694*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
695*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%o2
696*25c28e83SPiotr Jasiukajtis
697*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
698*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
699*25c28e83SPiotr Jasiukajtis
700*25c28e83SPiotr Jasiukajtis	ba	.cont2
701*25c28e83SPiotr Jasiukajtis	or	%g0,2,counter
702*25c28e83SPiotr Jasiukajtis
703*25c28e83SPiotr Jasiukajtis	.align	16
704*25c28e83SPiotr Jasiukajtis.update3:
705*25c28e83SPiotr Jasiukajtis	cmp	counter,2
706*25c28e83SPiotr Jasiukajtis	ble	.cont3
707*25c28e83SPiotr Jasiukajtis	fzeros	%f13
708*25c28e83SPiotr Jasiukajtis
709*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
710*25c28e83SPiotr Jasiukajtis	clr	%o2
711*25c28e83SPiotr Jasiukajtis
712*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
713*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
714*25c28e83SPiotr Jasiukajtis
715*25c28e83SPiotr Jasiukajtis	ba	.cont3
716*25c28e83SPiotr Jasiukajtis	or	%g0,2,counter
717*25c28e83SPiotr Jasiukajtis
718*25c28e83SPiotr Jasiukajtis	.align	16
719*25c28e83SPiotr Jasiukajtis.update4:
720*25c28e83SPiotr Jasiukajtis	cmp	counter,3
721*25c28e83SPiotr Jasiukajtis	ble	.cont4
722*25c28e83SPiotr Jasiukajtis	fzeros	%f17
723*25c28e83SPiotr Jasiukajtis
724*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+tmp_px]
725*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%l1
726*25c28e83SPiotr Jasiukajtis
727*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
728*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
729*25c28e83SPiotr Jasiukajtis
730*25c28e83SPiotr Jasiukajtis	ba	.cont4
731*25c28e83SPiotr Jasiukajtis	or	%g0,3,counter
732*25c28e83SPiotr Jasiukajtis
733*25c28e83SPiotr Jasiukajtis	.align	16
734*25c28e83SPiotr Jasiukajtis.update5:
735*25c28e83SPiotr Jasiukajtis	cmp	counter,3
736*25c28e83SPiotr Jasiukajtis	ble	.cont5
737*25c28e83SPiotr Jasiukajtis	fzeros	%f17
738*25c28e83SPiotr Jasiukajtis
739*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+tmp_px]
740*25c28e83SPiotr Jasiukajtis	clr	%l1
741*25c28e83SPiotr Jasiukajtis
742*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
743*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
744*25c28e83SPiotr Jasiukajtis
745*25c28e83SPiotr Jasiukajtis	ba	.cont5
746*25c28e83SPiotr Jasiukajtis	or	%g0,3,counter
747*25c28e83SPiotr Jasiukajtis
748*25c28e83SPiotr Jasiukajtis	.align	16
749*25c28e83SPiotr Jasiukajtis.update6:
750*25c28e83SPiotr Jasiukajtis	cmp	counter,4
751*25c28e83SPiotr Jasiukajtis	ble	.cont6
752*25c28e83SPiotr Jasiukajtis	fzeros	%f21
753*25c28e83SPiotr Jasiukajtis
754*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
755*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%i0
756*25c28e83SPiotr Jasiukajtis
757*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
758*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
759*25c28e83SPiotr Jasiukajtis
760*25c28e83SPiotr Jasiukajtis	ba	.cont6
761*25c28e83SPiotr Jasiukajtis	or	%g0,4,counter
762*25c28e83SPiotr Jasiukajtis
763*25c28e83SPiotr Jasiukajtis	.align	16
764*25c28e83SPiotr Jasiukajtis.update7:
765*25c28e83SPiotr Jasiukajtis	cmp	counter,4
766*25c28e83SPiotr Jasiukajtis	ble	.cont7
767*25c28e83SPiotr Jasiukajtis	fzeros	%f21
768*25c28e83SPiotr Jasiukajtis
769*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
770*25c28e83SPiotr Jasiukajtis	clr	%i0
771*25c28e83SPiotr Jasiukajtis
772*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
773*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
774*25c28e83SPiotr Jasiukajtis
775*25c28e83SPiotr Jasiukajtis	ba	.cont7
776*25c28e83SPiotr Jasiukajtis	or	%g0,4,counter
777*25c28e83SPiotr Jasiukajtis
778*25c28e83SPiotr Jasiukajtis	.align	16
779*25c28e83SPiotr Jasiukajtis.update8:
780*25c28e83SPiotr Jasiukajtis	cmp	counter,5
781*25c28e83SPiotr Jasiukajtis	ble	.cont8
782*25c28e83SPiotr Jasiukajtis	fzeros	%f25
783*25c28e83SPiotr Jasiukajtis
784*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp_px]
785*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%o2
786*25c28e83SPiotr Jasiukajtis
787*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
788*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
789*25c28e83SPiotr Jasiukajtis
790*25c28e83SPiotr Jasiukajtis	ba	.cont8
791*25c28e83SPiotr Jasiukajtis	or	%g0,5,counter
792*25c28e83SPiotr Jasiukajtis
793*25c28e83SPiotr Jasiukajtis	.align	16
794*25c28e83SPiotr Jasiukajtis.update9:
795*25c28e83SPiotr Jasiukajtis	cmp	counter,5
796*25c28e83SPiotr Jasiukajtis	ble	.cont9
797*25c28e83SPiotr Jasiukajtis	fzeros	%f25
798*25c28e83SPiotr Jasiukajtis
799*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp_px]
800*25c28e83SPiotr Jasiukajtis	clr	%o2
801*25c28e83SPiotr Jasiukajtis
802*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
803*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
804*25c28e83SPiotr Jasiukajtis
805*25c28e83SPiotr Jasiukajtis	ba	.cont9
806*25c28e83SPiotr Jasiukajtis	or	%g0,5,counter
807*25c28e83SPiotr Jasiukajtis
808*25c28e83SPiotr Jasiukajtis	.align	16
809*25c28e83SPiotr Jasiukajtis.update10:
810*25c28e83SPiotr Jasiukajtis	cmp	counter,6
811*25c28e83SPiotr Jasiukajtis	ble	.cont10
812*25c28e83SPiotr Jasiukajtis	fzeros	%f0
813*25c28e83SPiotr Jasiukajtis
814*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
815*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%o1
816*25c28e83SPiotr Jasiukajtis
817*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
818*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
819*25c28e83SPiotr Jasiukajtis
820*25c28e83SPiotr Jasiukajtis	ba	.cont10
821*25c28e83SPiotr Jasiukajtis	or	%g0,6,counter
822*25c28e83SPiotr Jasiukajtis
823*25c28e83SPiotr Jasiukajtis	.align	16
824*25c28e83SPiotr Jasiukajtis.update11:
825*25c28e83SPiotr Jasiukajtis	cmp	counter,6
826*25c28e83SPiotr Jasiukajtis	ble	.cont11
827*25c28e83SPiotr Jasiukajtis	fzeros	%f0
828*25c28e83SPiotr Jasiukajtis
829*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
830*25c28e83SPiotr Jasiukajtis	clr	%o1
831*25c28e83SPiotr Jasiukajtis
832*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
833*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
834*25c28e83SPiotr Jasiukajtis
835*25c28e83SPiotr Jasiukajtis	ba	.cont11
836*25c28e83SPiotr Jasiukajtis	or	%g0,6,counter
837*25c28e83SPiotr Jasiukajtis
838*25c28e83SPiotr Jasiukajtis	.align	16
839*25c28e83SPiotr Jasiukajtis.update12:
840*25c28e83SPiotr Jasiukajtis	cmp	counter,2
841*25c28e83SPiotr Jasiukajtis	ble	.cont12
842*25c28e83SPiotr Jasiukajtis	fzeros	%f13
843*25c28e83SPiotr Jasiukajtis
844*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
845*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%o2
846*25c28e83SPiotr Jasiukajtis
847*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
848*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
849*25c28e83SPiotr Jasiukajtis
850*25c28e83SPiotr Jasiukajtis	ba	.cont12
851*25c28e83SPiotr Jasiukajtis	or	%g0,2,counter
852*25c28e83SPiotr Jasiukajtis
853*25c28e83SPiotr Jasiukajtis	.align	16
854*25c28e83SPiotr Jasiukajtis.update13:
855*25c28e83SPiotr Jasiukajtis	cmp	counter,2
856*25c28e83SPiotr Jasiukajtis	ble	.cont13
857*25c28e83SPiotr Jasiukajtis	fzeros	%f13
858*25c28e83SPiotr Jasiukajtis
859*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
860*25c28e83SPiotr Jasiukajtis	clr	%o2
861*25c28e83SPiotr Jasiukajtis
862*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
863*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
864*25c28e83SPiotr Jasiukajtis
865*25c28e83SPiotr Jasiukajtis	ba	.cont13
866*25c28e83SPiotr Jasiukajtis	or	%g0,2,counter
867*25c28e83SPiotr Jasiukajtis
868*25c28e83SPiotr Jasiukajtis	.align	16
869*25c28e83SPiotr Jasiukajtis.update14:
870*25c28e83SPiotr Jasiukajtis	cmp	counter,3
871*25c28e83SPiotr Jasiukajtis	ble	.cont14
872*25c28e83SPiotr Jasiukajtis	fzeros	%f17
873*25c28e83SPiotr Jasiukajtis
874*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+tmp_px]
875*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%l1
876*25c28e83SPiotr Jasiukajtis
877*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
878*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
879*25c28e83SPiotr Jasiukajtis
880*25c28e83SPiotr Jasiukajtis	ba	.cont14
881*25c28e83SPiotr Jasiukajtis	or	%g0,3,counter
882*25c28e83SPiotr Jasiukajtis
883*25c28e83SPiotr Jasiukajtis	.align	16
884*25c28e83SPiotr Jasiukajtis.update15:
885*25c28e83SPiotr Jasiukajtis	cmp	counter,3
886*25c28e83SPiotr Jasiukajtis	ble	.cont15
887*25c28e83SPiotr Jasiukajtis	fzeros	%f17
888*25c28e83SPiotr Jasiukajtis
889*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+tmp_px]
890*25c28e83SPiotr Jasiukajtis	clr	%l1
891*25c28e83SPiotr Jasiukajtis
892*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
893*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
894*25c28e83SPiotr Jasiukajtis
895*25c28e83SPiotr Jasiukajtis	ba	.cont15
896*25c28e83SPiotr Jasiukajtis	or	%g0,3,counter
897*25c28e83SPiotr Jasiukajtis
898*25c28e83SPiotr Jasiukajtis	.align	16
899*25c28e83SPiotr Jasiukajtis.update16:
900*25c28e83SPiotr Jasiukajtis	cmp	counter,4
901*25c28e83SPiotr Jasiukajtis	ble	.cont16
902*25c28e83SPiotr Jasiukajtis	fzeros	%f21
903*25c28e83SPiotr Jasiukajtis
904*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
905*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%i0
906*25c28e83SPiotr Jasiukajtis
907*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
908*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
909*25c28e83SPiotr Jasiukajtis
910*25c28e83SPiotr Jasiukajtis	ba	.cont16
911*25c28e83SPiotr Jasiukajtis	or	%g0,4,counter
912*25c28e83SPiotr Jasiukajtis
913*25c28e83SPiotr Jasiukajtis	.align	16
914*25c28e83SPiotr Jasiukajtis.update17:
915*25c28e83SPiotr Jasiukajtis	cmp	counter,4
916*25c28e83SPiotr Jasiukajtis	ble	.cont17
917*25c28e83SPiotr Jasiukajtis	fzeros	%f21
918*25c28e83SPiotr Jasiukajtis
919*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
920*25c28e83SPiotr Jasiukajtis	clr	%i0
921*25c28e83SPiotr Jasiukajtis
922*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
923*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
924*25c28e83SPiotr Jasiukajtis
925*25c28e83SPiotr Jasiukajtis	ba	.cont17
926*25c28e83SPiotr Jasiukajtis	or	%g0,4,counter
927*25c28e83SPiotr Jasiukajtis
928*25c28e83SPiotr Jasiukajtis	.align	16
929*25c28e83SPiotr Jasiukajtis.update18:
930*25c28e83SPiotr Jasiukajtis	cmp	counter,5
931*25c28e83SPiotr Jasiukajtis	ble	.cont18
932*25c28e83SPiotr Jasiukajtis	fzeros	%f25
933*25c28e83SPiotr Jasiukajtis
934*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp_px]
935*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%o2
936*25c28e83SPiotr Jasiukajtis
937*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
938*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
939*25c28e83SPiotr Jasiukajtis
940*25c28e83SPiotr Jasiukajtis	ba	.cont18
941*25c28e83SPiotr Jasiukajtis	or	%g0,5,counter
942*25c28e83SPiotr Jasiukajtis
943*25c28e83SPiotr Jasiukajtis	.align	16
944*25c28e83SPiotr Jasiukajtis.update19:
945*25c28e83SPiotr Jasiukajtis	cmp	counter,5
946*25c28e83SPiotr Jasiukajtis	ble	.cont19
947*25c28e83SPiotr Jasiukajtis	fzeros	%f25
948*25c28e83SPiotr Jasiukajtis
949*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp_px]
950*25c28e83SPiotr Jasiukajtis	clr	%o2
951*25c28e83SPiotr Jasiukajtis
952*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
953*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
954*25c28e83SPiotr Jasiukajtis
955*25c28e83SPiotr Jasiukajtis	ba	.cont19
956*25c28e83SPiotr Jasiukajtis	or	%g0,5,counter
957*25c28e83SPiotr Jasiukajtis
958*25c28e83SPiotr Jasiukajtis	.align	16
959*25c28e83SPiotr Jasiukajtis.update20:
960*25c28e83SPiotr Jasiukajtis	cmp	counter,6
961*25c28e83SPiotr Jasiukajtis	ble	.cont20
962*25c28e83SPiotr Jasiukajtis	fzeros	%f0
963*25c28e83SPiotr Jasiukajtis
964*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
965*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%o1
966*25c28e83SPiotr Jasiukajtis
967*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
968*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
969*25c28e83SPiotr Jasiukajtis
970*25c28e83SPiotr Jasiukajtis	ba	.cont20
971*25c28e83SPiotr Jasiukajtis	or	%g0,6,counter
972*25c28e83SPiotr Jasiukajtis
973*25c28e83SPiotr Jasiukajtis	.align	16
974*25c28e83SPiotr Jasiukajtis.update21:
975*25c28e83SPiotr Jasiukajtis	cmp	counter,6
976*25c28e83SPiotr Jasiukajtis	ble	.cont21
977*25c28e83SPiotr Jasiukajtis	fzeros	%f0
978*25c28e83SPiotr Jasiukajtis
979*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
980*25c28e83SPiotr Jasiukajtis	clr	%o1
981*25c28e83SPiotr Jasiukajtis
982*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
983*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
984*25c28e83SPiotr Jasiukajtis
985*25c28e83SPiotr Jasiukajtis	ba	.cont21
986*25c28e83SPiotr Jasiukajtis	or	%g0,6,counter
987*25c28e83SPiotr Jasiukajtis
988*25c28e83SPiotr Jasiukajtis.exit:
989*25c28e83SPiotr Jasiukajtis	ret
990*25c28e83SPiotr Jasiukajtis	restore
991*25c28e83SPiotr Jasiukajtis	SET_SIZE(__vsqrtf_ultra3)
992*25c28e83SPiotr Jasiukajtis
993