1*25c28e83SPiotr Jasiukajtis/*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis/*
22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*25c28e83SPiotr Jasiukajtis */
24*25c28e83SPiotr Jasiukajtis/*
25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
27*25c28e83SPiotr Jasiukajtis */
28*25c28e83SPiotr Jasiukajtis
29*25c28e83SPiotr Jasiukajtis	.file	"__vrsqrt.S"
30*25c28e83SPiotr Jasiukajtis
31*25c28e83SPiotr Jasiukajtis#include "libm.h"
32*25c28e83SPiotr Jasiukajtis
33*25c28e83SPiotr Jasiukajtis	RO_DATA
34*25c28e83SPiotr Jasiukajtis	.align	64
35*25c28e83SPiotr Jasiukajtis
36*25c28e83SPiotr Jasiukajtis.CONST_TBL:
37*25c28e83SPiotr Jasiukajtis	.word	0xbfe00000, 0x0000002f	! K1 =-5.00000000000005209867e-01;
38*25c28e83SPiotr Jasiukajtis	.word	0x3fd80000, 0x00000058	! K2 = 3.75000000000004884257e-01;
39*25c28e83SPiotr Jasiukajtis	.word	0xbfd3ffff, 0xff444bc8	! K3 =-3.12499999317136886551e-01;
40*25c28e83SPiotr Jasiukajtis	.word	0x3fd17fff, 0xff5006fe	! K4 = 2.73437499359815081532e-01;
41*25c28e83SPiotr Jasiukajtis	.word	0xbfcf80bb, 0xb33ef574	! K5 =-2.46116125605037803130e-01;
42*25c28e83SPiotr Jasiukajtis	.word	0x3fcce0af, 0xf8156949	! K6 = 2.25606914648617522896e-01;
43*25c28e83SPiotr Jasiukajtis
44*25c28e83SPiotr Jasiukajtis	.word	0x001fffff, 0xffffffff	! DC0
45*25c28e83SPiotr Jasiukajtis	.word	0x3fe00000, 0x00000000	! DC1
46*25c28e83SPiotr Jasiukajtis	.word	0x00002000, 0x00000000	! DC2
47*25c28e83SPiotr Jasiukajtis	.word	0x7fffc000, 0x00000000	! DC3
48*25c28e83SPiotr Jasiukajtis	.word	0x0007ffff, 0xffffffff	! DC4
49*25c28e83SPiotr Jasiukajtis
50*25c28e83SPiotr Jasiukajtis	.word	0x43200000, 0x00000000	! D2ON51  = pow(2,51)
51*25c28e83SPiotr Jasiukajtis	.word	0x3ff00000, 0x00000000	! DONE   = 1.0
52*25c28e83SPiotr Jasiukajtis
53*25c28e83SPiotr Jasiukajtis#define stridex		%l5
54*25c28e83SPiotr Jasiukajtis#define stridey		%l7
55*25c28e83SPiotr Jasiukajtis#define counter		%l0
56*25c28e83SPiotr Jasiukajtis#define TBL		%l3
57*25c28e83SPiotr Jasiukajtis#define _0x7ff00000	%o0
58*25c28e83SPiotr Jasiukajtis#define _0x00100000	%o1
59*25c28e83SPiotr Jasiukajtis
60*25c28e83SPiotr Jasiukajtis#define DC0		%f56
61*25c28e83SPiotr Jasiukajtis#define DC1		%f54
62*25c28e83SPiotr Jasiukajtis#define DC2		%f48
63*25c28e83SPiotr Jasiukajtis#define DC3		%f46
64*25c28e83SPiotr Jasiukajtis#define K6		%f42
65*25c28e83SPiotr Jasiukajtis#define K5		%f20
66*25c28e83SPiotr Jasiukajtis#define K4		%f52
67*25c28e83SPiotr Jasiukajtis#define K3		%f50
68*25c28e83SPiotr Jasiukajtis#define K2		%f14
69*25c28e83SPiotr Jasiukajtis#define K1		%f12
70*25c28e83SPiotr Jasiukajtis#define DONE		%f4
71*25c28e83SPiotr Jasiukajtis
72*25c28e83SPiotr Jasiukajtis#define tmp_counter	%g5
73*25c28e83SPiotr Jasiukajtis#define tmp_px		%o5
74*25c28e83SPiotr Jasiukajtis
75*25c28e83SPiotr Jasiukajtis#define tmp0		STACK_BIAS-0x40
76*25c28e83SPiotr Jasiukajtis#define tmp1		STACK_BIAS-0x38
77*25c28e83SPiotr Jasiukajtis#define tmp2		STACK_BIAS-0x30
78*25c28e83SPiotr Jasiukajtis#define tmp3		STACK_BIAS-0x28
79*25c28e83SPiotr Jasiukajtis#define tmp4		STACK_BIAS-0x20
80*25c28e83SPiotr Jasiukajtis#define tmp5		STACK_BIAS-0x18
81*25c28e83SPiotr Jasiukajtis#define tmp6		STACK_BIAS-0x10
82*25c28e83SPiotr Jasiukajtis#define tmp7		STACK_BIAS-0x08
83*25c28e83SPiotr Jasiukajtis
84*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9
85*25c28e83SPiotr Jasiukajtis#define tmps		0x40
86*25c28e83SPiotr Jasiukajtis
87*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
88*25c28e83SPiotr Jasiukajtis!      !!!!!   algorithm   !!!!!
89*25c28e83SPiotr Jasiukajtis!  ((float*)&res)[0] = ((float*)px)[0];
90*25c28e83SPiotr Jasiukajtis!  ((float*)&res)[1] = ((float*)px)[1];
91*25c28e83SPiotr Jasiukajtis!  hx = *(int*)px;
92*25c28e83SPiotr Jasiukajtis!  if ( hx >= 0x7ff00000 )
93*25c28e83SPiotr Jasiukajtis!  {
94*25c28e83SPiotr Jasiukajtis!    res = DONE / res;
95*25c28e83SPiotr Jasiukajtis!    ((float*)py)[0] = ((float*)&res)[0];
96*25c28e83SPiotr Jasiukajtis!    ((float*)py)[1] = ((float*)&res)[1];
97*25c28e83SPiotr Jasiukajtis!    px += stridex;
98*25c28e83SPiotr Jasiukajtis!    py += stridey;
99*25c28e83SPiotr Jasiukajtis!    continue;
100*25c28e83SPiotr Jasiukajtis!  }
101*25c28e83SPiotr Jasiukajtis!  if ( hx < 0x00100000 )
102*25c28e83SPiotr Jasiukajtis!  {
103*25c28e83SPiotr Jasiukajtis!    ax = hx & 0x7fffffff;
104*25c28e83SPiotr Jasiukajtis!    lx = ((int*)px)[1];
105*25c28e83SPiotr Jasiukajtis!
106*25c28e83SPiotr Jasiukajtis!    if ( (ax | lx) == 0 )
107*25c28e83SPiotr Jasiukajtis!    {
108*25c28e83SPiotr Jasiukajtis!      res = DONE / res;
109*25c28e83SPiotr Jasiukajtis!      ((float*)py)[0] = ((float*)&res)[0];
110*25c28e83SPiotr Jasiukajtis!      ((float*)py)[1] = ((float*)&res)[1];
111*25c28e83SPiotr Jasiukajtis!      px += stridex;
112*25c28e83SPiotr Jasiukajtis!      py += stridey;
113*25c28e83SPiotr Jasiukajtis!      continue;
114*25c28e83SPiotr Jasiukajtis!    }
115*25c28e83SPiotr Jasiukajtis!    else if ( hx >= 0 )
116*25c28e83SPiotr Jasiukajtis!    {
117*25c28e83SPiotr Jasiukajtis!      if ( hx < 0x00080000 )
118*25c28e83SPiotr Jasiukajtis!      {
119*25c28e83SPiotr Jasiukajtis!        res = *(long long*)&res;
120*25c28e83SPiotr Jasiukajtis!        hx = *(int*)&res - (537 << 21);
121*25c28e83SPiotr Jasiukajtis!      }
122*25c28e83SPiotr Jasiukajtis!      else
123*25c28e83SPiotr Jasiukajtis!      {
124*25c28e83SPiotr Jasiukajtis!        res = vis_fand(res,DC4);
125*25c28e83SPiotr Jasiukajtis!        res = *(long long*)&res;
126*25c28e83SPiotr Jasiukajtis!        res += D2ON51;
127*25c28e83SPiotr Jasiukajtis!        hx = *(int*)&res - (537 << 21);
128*25c28e83SPiotr Jasiukajtis!      }
129*25c28e83SPiotr Jasiukajtis!    }
130*25c28e83SPiotr Jasiukajtis!    else
131*25c28e83SPiotr Jasiukajtis!    {
132*25c28e83SPiotr Jasiukajtis!      res = sqrt(res);
133*25c28e83SPiotr Jasiukajtis!      ((float*)py)[0] = ((float*)&res)[0];
134*25c28e83SPiotr Jasiukajtis!      ((float*)py)[1] = ((float*)&res)[1];
135*25c28e83SPiotr Jasiukajtis!      px += stridex;
136*25c28e83SPiotr Jasiukajtis!      py += stridey;
137*25c28e83SPiotr Jasiukajtis!      continue;
138*25c28e83SPiotr Jasiukajtis!    }
139*25c28e83SPiotr Jasiukajtis!  }
140*25c28e83SPiotr Jasiukajtis!
141*25c28e83SPiotr Jasiukajtis!  iexp = hx >> 21;
142*25c28e83SPiotr Jasiukajtis!  iexp = -iexp;
143*25c28e83SPiotr Jasiukajtis!  iexp += 0x5fe;
144*25c28e83SPiotr Jasiukajtis!  lexp = iexp << 52;
145*25c28e83SPiotr Jasiukajtis!  dlexp = *(double*)&lexp;
146*25c28e83SPiotr Jasiukajtis!  hx >>= 10;
147*25c28e83SPiotr Jasiukajtis!  hx &= 0x7f8;
148*25c28e83SPiotr Jasiukajtis!  hx += 8;
149*25c28e83SPiotr Jasiukajtis!  hx &= -16;
150*25c28e83SPiotr Jasiukajtis!
151*25c28e83SPiotr Jasiukajtis!  res = vis_fand(res,DC0);
152*25c28e83SPiotr Jasiukajtis!  res = vis_for(res,DC1);
153*25c28e83SPiotr Jasiukajtis!  res_c = vis_fpadd32(res,DC2);
154*25c28e83SPiotr Jasiukajtis!  res_c = vis_fand(res_c,DC3);
155*25c28e83SPiotr Jasiukajtis!
156*25c28e83SPiotr Jasiukajtis!  addr = (char*)arr + hx;
157*25c28e83SPiotr Jasiukajtis!  dexp_hi = ((double*)addr)[0];
158*25c28e83SPiotr Jasiukajtis!  dexp_lo = ((double*)addr)[1];
159*25c28e83SPiotr Jasiukajtis!  dtmp0 = dexp_hi * dexp_hi;
160*25c28e83SPiotr Jasiukajtis!  xx = res - res_c;
161*25c28e83SPiotr Jasiukajtis!  xx *= dtmp0;
162*25c28e83SPiotr Jasiukajtis!  res = K6 * xx;
163*25c28e83SPiotr Jasiukajtis!  res += K5;
164*25c28e83SPiotr Jasiukajtis!  res *= xx;
165*25c28e83SPiotr Jasiukajtis!  res += K4;
166*25c28e83SPiotr Jasiukajtis!  res *= xx;
167*25c28e83SPiotr Jasiukajtis!  res += K3;
168*25c28e83SPiotr Jasiukajtis!  res *= xx;
169*25c28e83SPiotr Jasiukajtis!  res += K2;
170*25c28e83SPiotr Jasiukajtis!  res *= xx;
171*25c28e83SPiotr Jasiukajtis!  res += K1;
172*25c28e83SPiotr Jasiukajtis!  res *= xx;
173*25c28e83SPiotr Jasiukajtis!  res = dexp_hi * res;
174*25c28e83SPiotr Jasiukajtis!  res += dexp_lo;
175*25c28e83SPiotr Jasiukajtis!  res += dexp_hi;
176*25c28e83SPiotr Jasiukajtis!
177*25c28e83SPiotr Jasiukajtis!  res *= dlexp;
178*25c28e83SPiotr Jasiukajtis!
179*25c28e83SPiotr Jasiukajtis!  ((float*)py)[0] = ((float*)&res)[0];
180*25c28e83SPiotr Jasiukajtis!  ((float*)py)[1] = ((float*)&res)[1];
181*25c28e83SPiotr Jasiukajtis!
182*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
183*25c28e83SPiotr Jasiukajtis
184*25c28e83SPiotr Jasiukajtis	ENTRY(__vrsqrt)
185*25c28e83SPiotr Jasiukajtis	save	%sp,-SA(MINFRAME)-tmps,%sp
186*25c28e83SPiotr Jasiukajtis	PIC_SETUP(l7)
187*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,.CONST_TBL,o3)
188*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,__vlibm_TBL_rsqrt,l3)
189*25c28e83SPiotr Jasiukajtis	wr	%g0,0x82,%asi
190*25c28e83SPiotr Jasiukajtis
191*25c28e83SPiotr Jasiukajtis	ldd	[%o3],K1
192*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ff00000),%o0
193*25c28e83SPiotr Jasiukajtis	mov	%i3,%o4
194*25c28e83SPiotr Jasiukajtis
195*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x08],K2
196*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00100000),%o1
197*25c28e83SPiotr Jasiukajtis	mov	%i1,tmp_px
198*25c28e83SPiotr Jasiukajtis
199*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x10],K3
200*25c28e83SPiotr Jasiukajtis	sll	%i2,3,stridex
201*25c28e83SPiotr Jasiukajtis	mov	%i0,tmp_counter
202*25c28e83SPiotr Jasiukajtis
203*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x18],K4
204*25c28e83SPiotr Jasiukajtis	sll	%i4,3,stridey
205*25c28e83SPiotr Jasiukajtis
206*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x20],K5
207*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x28],K6
208*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x30],DC0
209*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x38],DC1
210*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x40],DC2
211*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x48],DC3
212*25c28e83SPiotr Jasiukajtis
213*25c28e83SPiotr Jasiukajtis.begin:
214*25c28e83SPiotr Jasiukajtis	mov	tmp_counter,counter
215*25c28e83SPiotr Jasiukajtis	mov	tmp_px,%i1
216*25c28e83SPiotr Jasiukajtis	clr	tmp_counter
217*25c28e83SPiotr Jasiukajtis.begin1:
218*25c28e83SPiotr Jasiukajtis	cmp	counter,0
219*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.exit
220*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x60],DONE
221*25c28e83SPiotr Jasiukajtis
222*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f0		! (6_0) ((float*)res)[0] = ((float*)px)[0];
223*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%i0
224*25c28e83SPiotr Jasiukajtis
225*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f1		! (6_0) ((float*)res)[1] = ((float*)px)[1];
226*25c28e83SPiotr Jasiukajtis	add	%i0,1023,%i0
227*25c28e83SPiotr Jasiukajtis
228*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
229*25c28e83SPiotr Jasiukajtis
230*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%g1		! (6_1) hx = *(int*)px;
231*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i4
232*25c28e83SPiotr Jasiukajtis
233*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%l4
234*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%l6		! px += stridex
235*25c28e83SPiotr Jasiukajtis
236*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
237*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f8		! (0_0) ((float*)res)[0] = ((float*)px)[0];
238*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (6_1) res = vis_for(res,DC1);
239*25c28e83SPiotr Jasiukajtis
240*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f9		! (0_0) ((float*)res)[1] = ((float*)px)[1];
241*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (6_1) hx >>= 10;
242*25c28e83SPiotr Jasiukajtis	and	%g1,%i0,%i2
243*25c28e83SPiotr Jasiukajtis
244*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (6_1) hx ? 0x7ff00000
245*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.spec0		! (6_1) if ( hx >= 0x7ff00000 )
246*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
247*25c28e83SPiotr Jasiukajtis
248*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (6_1) hx ? 0x00100000
249*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.spec1		! (6_1) if ( hx < 0x00100000 )
250*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
251*25c28e83SPiotr Jasiukajtis.cont_spec:
252*25c28e83SPiotr Jasiukajtis	fand	%f8,DC0,%f16		! (0_0) res = vis_fand(res,DC0);
253*25c28e83SPiotr Jasiukajtis
254*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
255*25c28e83SPiotr Jasiukajtis
256*25c28e83SPiotr Jasiukajtis	add	%o2,8,%l4		! (6_1) hx += 8;
257*25c28e83SPiotr Jasiukajtis
258*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (6_1) iexp += 0x5fe;
259*25c28e83SPiotr Jasiukajtis
260*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (0_0) hx = *(int*)px;
261*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (6_1) iexp << 52;
262*25c28e83SPiotr Jasiukajtis	and	%l4,-16,%l4		! (6_1) hx = -16;
263*25c28e83SPiotr Jasiukajtis
264*25c28e83SPiotr Jasiukajtis	add	%l4,TBL,%l4		! (6_1) addr = (char*)arr + hx;
265*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp1]		! (6_1) dlexp = *(double*)lexp;
266*25c28e83SPiotr Jasiukajtis
267*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
268*25c28e83SPiotr Jasiukajtis	ldd	[%l4],%f30		! (6_1) dtmp0 = ((double*)addr)[0];
269*25c28e83SPiotr Jasiukajtis
270*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
271*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f0		! (1_0) ((float*)res)[0] = ((float*)px)[0];
272*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (0_0) res = vis_for(res,DC1);
273*25c28e83SPiotr Jasiukajtis
274*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (0_0) hx >>= 10;
275*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
276*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f1		! (1_0) ((float*)res)[1] = ((float*)px)[1];
277*25c28e83SPiotr Jasiukajtis
278*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (0_0) hx ? 0x7ff00000
279*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update0		! (0_0) if ( hx >= 0x7ff00000 )
280*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f6		! (6_1) res_c = vis_fand(res_c,DC3);
281*25c28e83SPiotr Jasiukajtis.cont0:
282*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
283*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f30,%f10		! (6_1) dtmp0 = dexp_hi * dexp_hi;
284*25c28e83SPiotr Jasiukajtis
285*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (0_0) hx ? 0x00100000
286*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update1		! (0_0) if ( hx < 0x00100000 )
287*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
288*25c28e83SPiotr Jasiukajtis.cont1:
289*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (1_0) res = vis_fand(res,DC0);
290*25c28e83SPiotr Jasiukajtis
291*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (0_0) res_c = vis_fpadd32(res,DC2);
292*25c28e83SPiotr Jasiukajtis
293*25c28e83SPiotr Jasiukajtis	add	%o2,8,%l2		! (0_0) hx += 8;
294*25c28e83SPiotr Jasiukajtis	fsubd	%f44,%f6,%f6		! (6_1) xx = res - res_c;
295*25c28e83SPiotr Jasiukajtis
296*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (1_0) hx = *(int*)px;
297*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (0_0) iexp << 52;
298*25c28e83SPiotr Jasiukajtis	and	%l2,-16,%l2		! (0_0) hx = -16;
299*25c28e83SPiotr Jasiukajtis
300*25c28e83SPiotr Jasiukajtis	add	%l2,TBL,%l2		! (0_0) addr = (char*)arr + hx;
301*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
302*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp2]		! (0_0) dlexp = *(double*)lexp;
303*25c28e83SPiotr Jasiukajtis
304*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f10,%f26		! (6_1) xx *= dtmp0;
305*25c28e83SPiotr Jasiukajtis	ldd	[%l2],%f10		! (0_0) dtmp0 = ((double*)addr)[0];
306*25c28e83SPiotr Jasiukajtis
307*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
308*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f6		! (2_0) ((float*)res)[0] = ((float*)px)[0];
309*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (1_0) res = vis_for(res,DC1);
310*25c28e83SPiotr Jasiukajtis
311*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (1_0) hx >>= 10;
312*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (1_0) hx ? 0x7ff00000
313*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update2		! (1_0) if ( hx >= 0x7ff00000 )
314*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f7		! (2_0) ((float*)res)[1] = ((float*)px)[1];
315*25c28e83SPiotr Jasiukajtis.cont2:
316*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (0_0) res_c = vis_fand(res_c,DC3);
317*25c28e83SPiotr Jasiukajtis
318*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f10		! (0_0) dtmp0 = dexp_hi * dexp_hi;
319*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (1_0) hx ? 0x00100000
320*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update3		! (1_0) if ( hx < 0x00100000 )
321*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
322*25c28e83SPiotr Jasiukajtis.cont3:
323*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (1_0) iexp = -iexp;
324*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (2_0) res = vis_fand(res,DC0);
325*25c28e83SPiotr Jasiukajtis
326*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (1_0) iexp += 0x5fe;
327*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (1_0) res_c = vis_fpadd32(res,DC2);
328*25c28e83SPiotr Jasiukajtis
329*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f26,%f62		! (6_1) res = K6 * xx;
330*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i2		! (1_0) hx += 8;
331*25c28e83SPiotr Jasiukajtis	fsubd	%f28,%f8,%f32		! (0_0) xx = res - res_c;
332*25c28e83SPiotr Jasiukajtis
333*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (2_0) hx = *(int*)px;
334*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (1_0) iexp << 52;
335*25c28e83SPiotr Jasiukajtis	and	%i2,-16,%i2		! (1_0) hx = -16;
336*25c28e83SPiotr Jasiukajtis
337*25c28e83SPiotr Jasiukajtis	add	%i2,TBL,%i2		! (1_0) addr = (char*)arr + hx;
338*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp3]		! (1_0) dlexp = *(double*)lexp;
339*25c28e83SPiotr Jasiukajtis
340*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f10,%f32		! (0_0) xx *= dtmp0;
341*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
342*25c28e83SPiotr Jasiukajtis	ldd	[%i2],%f10		! (1_0) dtmp0 = ((double*)addr)[0];
343*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (6_1) res += K5;
344*25c28e83SPiotr Jasiukajtis
345*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
346*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f0		! (3_0) ((float*)res)[0] = ((float*)px)[0];
347*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (2_0) res = vis_for(res,DC1);
348*25c28e83SPiotr Jasiukajtis
349*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (2_0) hx >>= 10;
350*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (2_0) hx ? 0x7ff00000
351*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update4		! (2_0) if ( hx >= 0x7ff00000 )
352*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f1		! (3_0) ((float*)res)[1] = ((float*)px)[1];
353*25c28e83SPiotr Jasiukajtis.cont4:
354*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f26,%f40		! (6_1) res *= xx;
355*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (1_0) res_c = vis_fand(res_c,DC3);
356*25c28e83SPiotr Jasiukajtis
357*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f10		! (1_0) dtmp0 = dexp_hi * dexp_hi;
358*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (2_0) hx ? 0x00100000
359*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update5		! (2_0) if ( hx < 0x00100000 )
360*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
361*25c28e83SPiotr Jasiukajtis.cont5:
362*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (2_0) iexp = -iexp;
363*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (3_0) res = vis_fand(res,DC0);
364*25c28e83SPiotr Jasiukajtis
365*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (2_0) iexp += 0x5fe;
366*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (2_0) res_c = vis_fpadd32(res,DC2);
367*25c28e83SPiotr Jasiukajtis
368*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f32,%f62		! (0_0) res = K6 * xx;
369*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i4		! (2_0) hx += 8;
370*25c28e83SPiotr Jasiukajtis	fsubd	%f44,%f8,%f6		! (1_0) xx = res - res_c;
371*25c28e83SPiotr Jasiukajtis
372*25c28e83SPiotr Jasiukajtis	faddd	%f40,K4,%f40		! (6_1) res += K4;
373*25c28e83SPiotr Jasiukajtis
374*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (3_0) hx = *(int*)px;
375*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (2_0) iexp << 52;
376*25c28e83SPiotr Jasiukajtis	and	%i4,-16,%i4		! (2_0) hx = -16;
377*25c28e83SPiotr Jasiukajtis
378*25c28e83SPiotr Jasiukajtis	add	%i4,TBL,%i4		! (2_0) addr = (char*)arr + hx;
379*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp4]		! (2_0) dlexp = *(double*)lexp;
380*25c28e83SPiotr Jasiukajtis
381*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f10,%f38		! (1_0) xx *= dtmp0;
382*25c28e83SPiotr Jasiukajtis	ldd	[%i4],%f24		! (2_0) dtmp0 = ((double*)addr)[0];
383*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (0_0) res += K5;
384*25c28e83SPiotr Jasiukajtis
385*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f26,%f34		! (6_1) res *= xx;
386*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
387*25c28e83SPiotr Jasiukajtis
388*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (3_0) iexp = hx >> 21;
389*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f8		! (4_0) ((float*)res)[0] = ((float*)px)[0];
390*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (3_0) res = vis_for(res,DC1);
391*25c28e83SPiotr Jasiukajtis
392*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (3_0) hx >>= 10;
393*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (3_0) hx ? 0x7ff00000
394*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update6		! (3_0) if ( hx >= 0x7ff00000 )
395*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f9		! (4_0) ((float*)res)[1] = ((float*)px)[1];
396*25c28e83SPiotr Jasiukajtis.cont6:
397*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f32,%f60		! (0_0) res *= xx;
398*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (3_0) hx ? 0x00100000
399*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f22		! (2_0) res_c = vis_fand(res_c,DC3);
400*25c28e83SPiotr Jasiukajtis
401*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f24,%f24		! (2_0) dtmp0 = dexp_hi * dexp_hi;
402*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update7		! (3_0) if ( hx < 0x00100000 )
403*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (3_0) hx &= 0x7f8;
404*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f6		! (6_1) res += K3;
405*25c28e83SPiotr Jasiukajtis.cont7:
406*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (3_0) iexp = -iexp;
407*25c28e83SPiotr Jasiukajtis	fand	%f8,DC0,%f16		! (4_0) res = vis_fand(res,DC0);
408*25c28e83SPiotr Jasiukajtis
409*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (3_0) iexp += 0x5fe;
410*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (3_0) res_c = vis_fpadd32(res,DC2);
411*25c28e83SPiotr Jasiukajtis
412*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f38,%f62		! (1_0) res = K6 * xx;
413*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i5		! (3_0) hx += 8;
414*25c28e83SPiotr Jasiukajtis	fsubd	%f28,%f22,%f28		! (2_0) xx = res - res_c;
415*25c28e83SPiotr Jasiukajtis
416*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f26,%f22		! (6_1) res *= xx;
417*25c28e83SPiotr Jasiukajtis	faddd	%f60,K4,%f60		! (0_0) res += K4;
418*25c28e83SPiotr Jasiukajtis
419*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (4_0) hx = *(int*)px;
420*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (3_0) iexp << 52;
421*25c28e83SPiotr Jasiukajtis	and	%i5,-16,%i5		! (3_0) hx = -16;
422*25c28e83SPiotr Jasiukajtis
423*25c28e83SPiotr Jasiukajtis	add	%i5,TBL,%i5		! (3_0) addr = (char*)arr + hx;
424*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp5]		! (3_0) dlexp = *(double*)lexp;
425*25c28e83SPiotr Jasiukajtis
426*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f24,%f36		! (2_0) xx *= dtmp0;
427*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%i0		! px += stridex
428*25c28e83SPiotr Jasiukajtis	ldd	[%i5],%f28		! (3_0) dtmp0 = ((double*)addr)[0];
429*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (1_0) res += K5;
430*25c28e83SPiotr Jasiukajtis
431*25c28e83SPiotr Jasiukajtis	faddd	%f22,K2,%f10		! (6_1) res += K2;
432*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f32,%f34		! (0_0) res *= xx;
433*25c28e83SPiotr Jasiukajtis
434*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (4_0) iexp = hx >> 21;
435*25c28e83SPiotr Jasiukajtis	lda	[%i0]%asi,%f0		! (5_0) ((float*)res)[0] = ((float*)px)[0];
436*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f24		! (4_0) res = vis_for(res,DC1);
437*25c28e83SPiotr Jasiukajtis
438*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (4_0) hx >>= 10;
439*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (4_0) hx ? 0x7ff00000
440*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update8		! (4_0) if ( hx >= 0x7ff00000 )
441*25c28e83SPiotr Jasiukajtis	lda	[%i0+4]%asi,%f1		! (5_0) ((float*)res)[1] = ((float*)px)[1];
442*25c28e83SPiotr Jasiukajtis.cont8:
443*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f40		! (3_0) res_c = vis_fand(res_c,DC3);
444*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f38,%f62		! (1_0) res *= xx;
445*25c28e83SPiotr Jasiukajtis
446*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f26,%f58		! (6_1) res *= xx;
447*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (4_0) hx ? 0x00100000
448*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (4_0) hx &= 0x7f8;
449*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f60		! (0_0) res += K3;
450*25c28e83SPiotr Jasiukajtis
451*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f28,%f28		! (3_0) dtmp0 = dexp_hi * dexp_hi;
452*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update9		! (4_0) if ( hx < 0x00100000 )
453*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (4_0) iexp = -iexp;
454*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (5_0) res = vis_fand(res,DC0);
455*25c28e83SPiotr Jasiukajtis.cont9:
456*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (4_0) iexp += 0x5fe;
457*25c28e83SPiotr Jasiukajtis	fpadd32	%f24,DC2,%f18		! (4_0) res_c = vis_fpadd32(res,DC2);
458*25c28e83SPiotr Jasiukajtis
459*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f36,%f10		! (2_0) res = K6 * xx;
460*25c28e83SPiotr Jasiukajtis	add	%o2,8,%l1		! (4_0) hx += 8;
461*25c28e83SPiotr Jasiukajtis	fsubd	%f44,%f40,%f44		! (3_0) xx = res - res_c;
462*25c28e83SPiotr Jasiukajtis
463*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f32,%f60		! (0_0) res *= xx;
464*25c28e83SPiotr Jasiukajtis	faddd	%f62,K4,%f6		! (1_0) res += K4;
465*25c28e83SPiotr Jasiukajtis
466*25c28e83SPiotr Jasiukajtis	lda	[%i0]%asi,%g1		! (5_0) hx = *(int*)px;
467*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (4_0) iexp << 52;
468*25c28e83SPiotr Jasiukajtis	and	%l1,-16,%l1		! (4_0) hx = -16;
469*25c28e83SPiotr Jasiukajtis	faddd	%f58,K1,%f58		! (6_1) res += K1;
470*25c28e83SPiotr Jasiukajtis
471*25c28e83SPiotr Jasiukajtis	add	%i0,stridex,%i1		! px += stridex
472*25c28e83SPiotr Jasiukajtis	add	%l1,TBL,%l1		! (4_0) addr = (char*)arr + hx;
473*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp6]		! (4_0) dlexp = *(double*)lexp;
474*25c28e83SPiotr Jasiukajtis
475*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f28,%f40		! (3_0) xx *= dtmp0;
476*25c28e83SPiotr Jasiukajtis	ldd	[%l1],%f44		! (4_0) dtmp0 = ((double*)addr)[0];
477*25c28e83SPiotr Jasiukajtis	faddd	%f10,K5,%f62		! (2_0) res += K5;
478*25c28e83SPiotr Jasiukajtis
479*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f38,%f34		! (1_0) res *= xx;
480*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (5_0) iexp = hx >> 21;
481*25c28e83SPiotr Jasiukajtis	nop
482*25c28e83SPiotr Jasiukajtis	faddd	%f60,K2,%f60		! (0_0) res += K2;
483*25c28e83SPiotr Jasiukajtis
484*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (5_0) res = vis_for(res,DC1);
485*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (5_0) iexp = -iexp;
486*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f6		! (6_0) ((float*)res)[0] = ((float*)px)[0];
487*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f26,%f26		! (6_1) res *= xx;
488*25c28e83SPiotr Jasiukajtis
489*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (5_0) hx >>= 10;
490*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (5_0) hx ? 0x7ff00000
491*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update10		! (5_0) if ( hx >= 0x7ff00000 )
492*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f7		! (6_0) ((float*)res)[1] = ((float*)px)[1];
493*25c28e83SPiotr Jasiukajtis.cont10:
494*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (4_0) res_c = vis_fand(res_c,DC3);
495*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f36,%f62		! (2_0) res *= xx;
496*25c28e83SPiotr Jasiukajtis
497*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f32,%f58		! (0_0) res *= xx;
498*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (5_0) hx ? 0x00100000
499*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (5_0) hx &= 0x7f8;
500*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (1_0) res += K3;
501*25c28e83SPiotr Jasiukajtis
502*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f26,%f26		! (6_1) res = dexp_hi * res;
503*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update11		! (5_0) if ( hx < 0x00100000 )
504*25c28e83SPiotr Jasiukajtis	nop
505*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
506*25c28e83SPiotr Jasiukajtis.cont11:
507*25c28e83SPiotr Jasiukajtis	ldd	[%l4+8],%f60		! (6_1) dexp_lo = ((double*)addr)[1];
508*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f44,%f44		! (4_0) dtmp0 = dexp_hi * dexp_hi;
509*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (5_0) res_c = vis_fpadd32(res,DC2);
510*25c28e83SPiotr Jasiukajtis
511*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f40,%f22		! (3_0) res = K6 * xx;
512*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i3		! (5_0) hx += 8;
513*25c28e83SPiotr Jasiukajtis	fsubd	%f24,%f8,%f10		! (4_0) xx = res - res_c;
514*25c28e83SPiotr Jasiukajtis
515*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f38,%f24		! (1_0) res *= xx;
516*25c28e83SPiotr Jasiukajtis	or	%g0,%o4,%i0
517*25c28e83SPiotr Jasiukajtis
518*25c28e83SPiotr Jasiukajtis	cmp	counter,7
519*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.tail
520*25c28e83SPiotr Jasiukajtis	faddd	%f62,K4,%f34		! (2_0) res += K4;
521*25c28e83SPiotr Jasiukajtis
522*25c28e83SPiotr Jasiukajtis	ba	.main_loop
523*25c28e83SPiotr Jasiukajtis	sub	counter,7,counter	! counter
524*25c28e83SPiotr Jasiukajtis
525*25c28e83SPiotr Jasiukajtis	.align	16
526*25c28e83SPiotr Jasiukajtis.main_loop:
527*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (5_0) iexp += 0x5fe;
528*25c28e83SPiotr Jasiukajtis	and	%i3,-16,%i3		! (5_1) hx = -16;
529*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%g1		! (6_1) hx = *(int*)px;
530*25c28e83SPiotr Jasiukajtis	faddd	%f58,K1,%f58		! (0_1) res += K1;
531*25c28e83SPiotr Jasiukajtis
532*25c28e83SPiotr Jasiukajtis	add	%i3,TBL,%i3		! (5_1) addr = (char*)arr + hx;
533*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (5_1) iexp << 52;
534*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp0]		! (5_1) dlexp = *(double*)lexp;
535*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f60,%f8		! (6_2) res += dexp_lo;
536*25c28e83SPiotr Jasiukajtis
537*25c28e83SPiotr Jasiukajtis	faddd	%f22,K5,%f62		! (3_1) res += K5;
538*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%l6		! px += stridex
539*25c28e83SPiotr Jasiukajtis	ldd	[%i3],%f22		! (5_1) dtmp0 = ((double*)addr)[0];
540*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f44,%f60		! (4_1) xx *= dtmp0;
541*25c28e83SPiotr Jasiukajtis
542*25c28e83SPiotr Jasiukajtis	faddd	%f24,K2,%f26		! (1_1) res += K2;
543*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i1		! px += stridey
544*25c28e83SPiotr Jasiukajtis	ldd	[%l2],%f24		! (0_1) dexp_hi = ((double*)addr)[0];
545*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f36,%f34		! (2_1) res *= xx;
546*25c28e83SPiotr Jasiukajtis
547*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f32,%f58		! (0_1) res *= xx;
548*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
549*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f0		! (0_0) ((float*)res)[0] = ((float*)px)[0];
550*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (6_1) res = vis_for(res,DC1);
551*25c28e83SPiotr Jasiukajtis
552*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f1		! (0_0) ((float*)res)[1] = ((float*)px)[1];
553*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (6_1) hx >>= 10;
554*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f22,%f10		! (5_1) dtmp0 = dexp_hi * dexp_hi;
555*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f30,%f30		! (6_2) res += dexp_hi;
556*25c28e83SPiotr Jasiukajtis
557*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f40,%f32		! (3_1) res *= xx;
558*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (6_1) hx ? 0x7ff00000
559*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp1],%f62		! (6_2) dlexp = *(double*)lexp;
560*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (5_1) res_c = vis_fand(res_c,DC3);
561*25c28e83SPiotr Jasiukajtis
562*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f38,%f26		! (1_1) res *= xx;
563*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update12		! (6_1) if ( hx >= 0x7ff00000 )
564*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
565*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (2_1) res += K3;
566*25c28e83SPiotr Jasiukajtis.cont12:
567*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f58,%f58		! (0_1) res = dexp_hi * res;
568*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (6_1) hx ? 0x00100000
569*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
570*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (0_0) res = vis_fand(res,DC0);
571*25c28e83SPiotr Jasiukajtis
572*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f62,%f2		! (6_2) res *= dlexp;
573*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update13		! (6_1) if ( hx < 0x00100000 )
574*25c28e83SPiotr Jasiukajtis	ldd	[%l2+8],%f30		! (0_1) dexp_lo = ((double*)addr)[1];
575*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
576*25c28e83SPiotr Jasiukajtis.cont13:
577*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f60,%f62		! (4_1) res = K6 * xx;
578*25c28e83SPiotr Jasiukajtis	add	%o2,8,%l4		! (6_1) hx += 8;
579*25c28e83SPiotr Jasiukajtis	st	%f2,[%i0]		! (6_2) ((float*)py)[0] = ((float*)res)[0];
580*25c28e83SPiotr Jasiukajtis	fsubd	%f28,%f8,%f6		! (5_1) xx = res - res_c;
581*25c28e83SPiotr Jasiukajtis
582*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f36,%f28		! (2_1) res *= xx;
583*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (6_1) iexp += 0x5fe;
584*25c28e83SPiotr Jasiukajtis	st	%f3,[%i0+4]		! (6_2) ((float*)py)[1] = ((float*)res)[1];
585*25c28e83SPiotr Jasiukajtis	faddd	%f32,K4,%f32		! (3_1) res += K4;
586*25c28e83SPiotr Jasiukajtis
587*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (0_0) hx = *(int*)px;
588*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (6_1) iexp << 52;
589*25c28e83SPiotr Jasiukajtis	and	%l4,-16,%l4		! (6_1) hx = -16;
590*25c28e83SPiotr Jasiukajtis	faddd	%f26,K1,%f26		! (1_1) res += K1;
591*25c28e83SPiotr Jasiukajtis
592*25c28e83SPiotr Jasiukajtis	add	%i1,stridey,%i0		! px += stridey
593*25c28e83SPiotr Jasiukajtis	add	%l4,TBL,%l4		! (6_1) addr = (char*)arr + hx;
594*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp1]		! (6_1) dlexp = *(double*)lexp;
595*25c28e83SPiotr Jasiukajtis	faddd	%f58,%f30,%f8		! (0_1) res += dexp_lo;
596*25c28e83SPiotr Jasiukajtis
597*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f10,%f58		! (5_1) xx *= dtmp0;
598*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
599*25c28e83SPiotr Jasiukajtis	ldd	[%l4],%f30		! (6_1) dtmp0 = ((double*)addr)[0];
600*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (4_1) res += K5;
601*25c28e83SPiotr Jasiukajtis
602*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f40,%f34		! (3_1) res *= xx;
603*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (0_0) hx >>= 10;
604*25c28e83SPiotr Jasiukajtis	ldd	[%i2],%f4		! (1_1) dexp_hi = ((double*)addr)[0];
605*25c28e83SPiotr Jasiukajtis	faddd	%f28,K2,%f32		! (2_1) res += K2;
606*25c28e83SPiotr Jasiukajtis
607*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f38,%f26		! (1_1) res *= xx;
608*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
609*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f6		! (1_0) ((float*)res)[0] = ((float*)px)[0];
610*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (0_0) res = vis_for(res,DC1);
611*25c28e83SPiotr Jasiukajtis
612*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f30,%f30		! (6_1) dtmp0 = dexp_hi * dexp_hi;
613*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
614*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f7		! (1_0) ((float*)res)[1] = ((float*)px)[1];
615*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f24,%f24		! (0_1) res += dexp_hi;
616*25c28e83SPiotr Jasiukajtis
617*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f60,%f38		! (4_1) res *= xx;
618*25c28e83SPiotr Jasiukajtis