1*25c28e83SPiotr Jasiukajtis/*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis/*
22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*25c28e83SPiotr Jasiukajtis */
24*25c28e83SPiotr Jasiukajtis/*
25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
27*25c28e83SPiotr Jasiukajtis */
28*25c28e83SPiotr Jasiukajtis
29*25c28e83SPiotr Jasiukajtis	.file	"__vrhypot.S"
30*25c28e83SPiotr Jasiukajtis
31*25c28e83SPiotr Jasiukajtis#include "libm.h"
32*25c28e83SPiotr Jasiukajtis
33*25c28e83SPiotr Jasiukajtis	RO_DATA
34*25c28e83SPiotr Jasiukajtis	.align	64
35*25c28e83SPiotr Jasiukajtis
36*25c28e83SPiotr Jasiukajtis.CONST_TBL:
37*25c28e83SPiotr Jasiukajtis	.word	0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465,
38*25c28e83SPiotr Jasiukajtis	.word	0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a,
39*25c28e83SPiotr Jasiukajtis	.word	0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6,
40*25c28e83SPiotr Jasiukajtis	.word	0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3,
41*25c28e83SPiotr Jasiukajtis	.word	0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b,
42*25c28e83SPiotr Jasiukajtis	.word	0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036,
43*25c28e83SPiotr Jasiukajtis	.word	0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01,
44*25c28e83SPiotr Jasiukajtis	.word	0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1,
45*25c28e83SPiotr Jasiukajtis	.word	0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb,
46*25c28e83SPiotr Jasiukajtis	.word	0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5,
47*25c28e83SPiotr Jasiukajtis	.word	0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405,
48*25c28e83SPiotr Jasiukajtis	.word	0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc,
49*25c28e83SPiotr Jasiukajtis	.word	0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7,
50*25c28e83SPiotr Jasiukajtis	.word	0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec,
51*25c28e83SPiotr Jasiukajtis	.word	0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b,
52*25c28e83SPiotr Jasiukajtis	.word	0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed,
53*25c28e83SPiotr Jasiukajtis	.word	0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150,
54*25c28e83SPiotr Jasiukajtis	.word	0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539,
55*25c28e83SPiotr Jasiukajtis	.word	0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66,
56*25c28e83SPiotr Jasiukajtis	.word	0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995,
57*25c28e83SPiotr Jasiukajtis	.word	0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d,
58*25c28e83SPiotr Jasiukajtis	.word	0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19,
59*25c28e83SPiotr Jasiukajtis	.word	0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404,
60*25c28e83SPiotr Jasiukajtis	.word	0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22,
61*25c28e83SPiotr Jasiukajtis	.word	0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47,
62*25c28e83SPiotr Jasiukajtis	.word	0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a,
63*25c28e83SPiotr Jasiukajtis	.word	0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06,
64*25c28e83SPiotr Jasiukajtis	.word	0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358,
65*25c28e83SPiotr Jasiukajtis	.word	0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20,
66*25c28e83SPiotr Jasiukajtis	.word	0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f,
67*25c28e83SPiotr Jasiukajtis	.word	0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197,
68*25c28e83SPiotr Jasiukajtis	.word	0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010,
69*25c28e83SPiotr Jasiukajtis
70*25c28e83SPiotr Jasiukajtis	.word	0x42300000, 0		! D2ON36 = 2**36
71*25c28e83SPiotr Jasiukajtis	.word	0xffffff00, 0		! DA0
72*25c28e83SPiotr Jasiukajtis	.word	0xfff00000, 0		! DA1
73*25c28e83SPiotr Jasiukajtis	.word	0x3ff00000, 0		! DONE = 1.0
74*25c28e83SPiotr Jasiukajtis	.word	0x40000000, 0		! DTWO = 2.0
75*25c28e83SPiotr Jasiukajtis	.word	0x7fd00000, 0		! D2ON1022
76*25c28e83SPiotr Jasiukajtis	.word	0x3cb00000, 0		! D2ONM52
77*25c28e83SPiotr Jasiukajtis	.word	0x43200000, 0		! D2ON51
78*25c28e83SPiotr Jasiukajtis	.word	0x0007ffff, 0xffffffff	! 0x0007ffffffffffff
79*25c28e83SPiotr Jasiukajtis
80*25c28e83SPiotr Jasiukajtis#define stridex		%l2
81*25c28e83SPiotr Jasiukajtis#define stridey		%l3
82*25c28e83SPiotr Jasiukajtis#define stridez		%l5
83*25c28e83SPiotr Jasiukajtis
84*25c28e83SPiotr Jasiukajtis#define TBL_SHIFT	512
85*25c28e83SPiotr Jasiukajtis
86*25c28e83SPiotr Jasiukajtis#define TBL		%l1
87*25c28e83SPiotr Jasiukajtis#define counter		%l4
88*25c28e83SPiotr Jasiukajtis
89*25c28e83SPiotr Jasiukajtis#define _0x7ff00000	%l0
90*25c28e83SPiotr Jasiukajtis#define _0x00100000	%o5
91*25c28e83SPiotr Jasiukajtis#define _0x7fffffff	%l6
92*25c28e83SPiotr Jasiukajtis
93*25c28e83SPiotr Jasiukajtis#define D2ON36		%f4
94*25c28e83SPiotr Jasiukajtis#define DTWO		%f6
95*25c28e83SPiotr Jasiukajtis#define DONE		%f8
96*25c28e83SPiotr Jasiukajtis#define DA0		%f58
97*25c28e83SPiotr Jasiukajtis#define DA1		%f56
98*25c28e83SPiotr Jasiukajtis
99*25c28e83SPiotr Jasiukajtis#define dtmp0		STACK_BIAS-0x80
100*25c28e83SPiotr Jasiukajtis#define dtmp1		STACK_BIAS-0x78
101*25c28e83SPiotr Jasiukajtis#define dtmp2		STACK_BIAS-0x70
102*25c28e83SPiotr Jasiukajtis#define dtmp3		STACK_BIAS-0x68
103*25c28e83SPiotr Jasiukajtis#define dtmp4		STACK_BIAS-0x60
104*25c28e83SPiotr Jasiukajtis#define dtmp5		STACK_BIAS-0x58
105*25c28e83SPiotr Jasiukajtis#define dtmp6		STACK_BIAS-0x50
106*25c28e83SPiotr Jasiukajtis#define dtmp7		STACK_BIAS-0x48
107*25c28e83SPiotr Jasiukajtis#define dtmp8		STACK_BIAS-0x40
108*25c28e83SPiotr Jasiukajtis#define dtmp9		STACK_BIAS-0x38
109*25c28e83SPiotr Jasiukajtis#define dtmp10		STACK_BIAS-0x30
110*25c28e83SPiotr Jasiukajtis#define dtmp11		STACK_BIAS-0x28
111*25c28e83SPiotr Jasiukajtis#define dtmp12		STACK_BIAS-0x20
112*25c28e83SPiotr Jasiukajtis#define dtmp13		STACK_BIAS-0x18
113*25c28e83SPiotr Jasiukajtis#define dtmp14		STACK_BIAS-0x10
114*25c28e83SPiotr Jasiukajtis#define dtmp15		STACK_BIAS-0x08
115*25c28e83SPiotr Jasiukajtis
116*25c28e83SPiotr Jasiukajtis#define ftmp0		STACK_BIAS-0x100
117*25c28e83SPiotr Jasiukajtis#define tmp_px		STACK_BIAS-0x98
118*25c28e83SPiotr Jasiukajtis#define tmp_py		STACK_BIAS-0x90
119*25c28e83SPiotr Jasiukajtis#define tmp_counter	STACK_BIAS-0x88
120*25c28e83SPiotr Jasiukajtis
121*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9
122*25c28e83SPiotr Jasiukajtis#define tmps		0x100
123*25c28e83SPiotr Jasiukajtis
124*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
125*25c28e83SPiotr Jasiukajtis!      !!!!!   algorithm   !!!!!
126*25c28e83SPiotr Jasiukajtis!  hx0 = *(int*)px;
127*25c28e83SPiotr Jasiukajtis!  hy0 = *(int*)py;
128*25c28e83SPiotr Jasiukajtis!
129*25c28e83SPiotr Jasiukajtis!  ((float*)&x0)[0] = ((float*)px)[0];
130*25c28e83SPiotr Jasiukajtis!  ((float*)&x0)[1] = ((float*)px)[1];
131*25c28e83SPiotr Jasiukajtis!  ((float*)&y0)[0] = ((float*)py)[0];
132*25c28e83SPiotr Jasiukajtis!  ((float*)&y0)[1] = ((float*)py)[1];
133*25c28e83SPiotr Jasiukajtis!
134*25c28e83SPiotr Jasiukajtis!  hx0 &= 0x7fffffff;
135*25c28e83SPiotr Jasiukajtis!  hy0 &= 0x7fffffff;
136*25c28e83SPiotr Jasiukajtis!
137*25c28e83SPiotr Jasiukajtis!  diff0 = hy0 - hx0;
138*25c28e83SPiotr Jasiukajtis!  j0 = diff0 >> 31;
139*25c28e83SPiotr Jasiukajtis!  j0 &= diff0;
140*25c28e83SPiotr Jasiukajtis!  j0 = hy0 - j0;
141*25c28e83SPiotr Jasiukajtis!  j0 &= 0x7ff00000;
142*25c28e83SPiotr Jasiukajtis!
143*25c28e83SPiotr Jasiukajtis!  j0 = 0x7ff00000 - j0;
144*25c28e83SPiotr Jasiukajtis!  ll = (long long)j0 << 32;
145*25c28e83SPiotr Jasiukajtis!  *(long long*)&scl0 = ll;
146*25c28e83SPiotr Jasiukajtis!
147*25c28e83SPiotr Jasiukajtis!  if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 )
148*25c28e83SPiotr Jasiukajtis!  {
149*25c28e83SPiotr Jasiukajtis!    lx = ((int*)px)[1];
150*25c28e83SPiotr Jasiukajtis!    ly = ((int*)py)[1];
151*25c28e83SPiotr Jasiukajtis!
152*25c28e83SPiotr Jasiukajtis!    if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0;
153*25c28e83SPiotr Jasiukajtis!    else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0;
154*25c28e83SPiotr Jasiukajtis!    else res0 = fabs(x0) * fabs(y0);
155*25c28e83SPiotr Jasiukajtis!
156*25c28e83SPiotr Jasiukajtis!    ((float*)pz)[0] = ((float*)&res0)[0];
157*25c28e83SPiotr Jasiukajtis!    ((float*)pz)[1] = ((float*)&res0)[1];
158*25c28e83SPiotr Jasiukajtis!
159*25c28e83SPiotr Jasiukajtis!    px += stridex;
160*25c28e83SPiotr Jasiukajtis!    py += stridey;
161*25c28e83SPiotr Jasiukajtis!    pz += stridez;
162*25c28e83SPiotr Jasiukajtis!    continue;
163*25c28e83SPiotr Jasiukajtis!  }
164*25c28e83SPiotr Jasiukajtis!  if ( hx0 <  0x00100000 && hy0 <  0x00100000 )
165*25c28e83SPiotr Jasiukajtis!  {
166*25c28e83SPiotr Jasiukajtis!    lx = ((int*)px)[1];
167*25c28e83SPiotr Jasiukajtis!    ly = ((int*)py)[1];
168*25c28e83SPiotr Jasiukajtis!    ii = hx0 | hy0;
169*25c28e83SPiotr Jasiukajtis!    ii |= lx;
170*25c28e83SPiotr Jasiukajtis!    ii |= ly;
171*25c28e83SPiotr Jasiukajtis!    if ( ii == 0 )
172*25c28e83SPiotr Jasiukajtis!    {
173*25c28e83SPiotr Jasiukajtis!      res0 = 1.0 / 0.0;
174*25c28e83SPiotr Jasiukajtis!      ((float*)pz)[0] = ((float*)&res0)[0];
175*25c28e83SPiotr Jasiukajtis!      ((float*)pz)[1] = ((float*)&res0)[1];
176*25c28e83SPiotr Jasiukajtis!
177*25c28e83SPiotr Jasiukajtis!      px += stridex;
178*25c28e83SPiotr Jasiukajtis!      py += stridey;
179*25c28e83SPiotr Jasiukajtis!      pz += stridez;
180*25c28e83SPiotr Jasiukajtis!      continue;
181*25c28e83SPiotr Jasiukajtis!    }
182*25c28e83SPiotr Jasiukajtis!    x0 = fabs(x0);
183*25c28e83SPiotr Jasiukajtis!    y0 = fabs(y0);
184*25c28e83SPiotr Jasiukajtis!    if ( hx0 < 0x00080000 )
185*25c28e83SPiotr Jasiukajtis!    {
186*25c28e83SPiotr Jasiukajtis!      x0 = *(long long*)&x0;
187*25c28e83SPiotr Jasiukajtis!    }
188*25c28e83SPiotr Jasiukajtis!    else
189*25c28e83SPiotr Jasiukajtis!    {
190*25c28e83SPiotr Jasiukajtis!      ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
191*25c28e83SPiotr Jasiukajtis!      x0 = vis_fand(x0, dtmp0);
192*25c28e83SPiotr Jasiukajtis!      x0 = *(long long*)&x0;
193*25c28e83SPiotr Jasiukajtis!      x0 += D2ON51;
194*25c28e83SPiotr Jasiukajtis!    }
195*25c28e83SPiotr Jasiukajtis!    x0 *= D2ONM52;
196*25c28e83SPiotr Jasiukajtis!    if ( hy0 < 0x00080000 )
197*25c28e83SPiotr Jasiukajtis!    {
198*25c28e83SPiotr Jasiukajtis!      y0 = *(long long*)&y0;
199*25c28e83SPiotr Jasiukajtis!    }
200*25c28e83SPiotr Jasiukajtis!    else
201*25c28e83SPiotr Jasiukajtis!    {
202*25c28e83SPiotr Jasiukajtis!      ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
203*25c28e83SPiotr Jasiukajtis!      y0 = vis_fand(y0, dtmp0);
204*25c28e83SPiotr Jasiukajtis!      y0 = *(long long*)&y0;
205*25c28e83SPiotr Jasiukajtis!      y0 += D2ON51;
206*25c28e83SPiotr Jasiukajtis!    }
207*25c28e83SPiotr Jasiukajtis!    y0 *= D2ONM52;
208*25c28e83SPiotr Jasiukajtis!    *(long long*)&scl0 = 0x7fd0000000000000ULL;
209*25c28e83SPiotr Jasiukajtis!  }
210*25c28e83SPiotr Jasiukajtis!  else
211*25c28e83SPiotr Jasiukajtis!  {
212*25c28e83SPiotr Jasiukajtis!    x0 *= scl0;
213*25c28e83SPiotr Jasiukajtis!    y0 *= scl0;
214*25c28e83SPiotr Jasiukajtis!  }
215*25c28e83SPiotr Jasiukajtis!
216*25c28e83SPiotr Jasiukajtis!  x_hi0 = x0 + D2ON36;
217*25c28e83SPiotr Jasiukajtis!  y_hi0 = y0 + D2ON36;
218*25c28e83SPiotr Jasiukajtis!  x_hi0 -= D2ON36;
219*25c28e83SPiotr Jasiukajtis!  y_hi0 -= D2ON36;
220*25c28e83SPiotr Jasiukajtis!  x_lo0 = x0 - x_hi0;
221*25c28e83SPiotr Jasiukajtis!  y_lo0 = y0 - y_hi0;
222*25c28e83SPiotr Jasiukajtis!  res0_hi = x_hi0 * x_hi0;
223*25c28e83SPiotr Jasiukajtis!  dtmp0 = y_hi0 * y_hi0;
224*25c28e83SPiotr Jasiukajtis!  res0_hi += dtmp0;
225*25c28e83SPiotr Jasiukajtis!  res0_lo = x0 + x_hi0;
226*25c28e83SPiotr Jasiukajtis!  res0_lo *= x_lo0;
227*25c28e83SPiotr Jasiukajtis!  dtmp1 = y0 + y_hi0;
228*25c28e83SPiotr Jasiukajtis!  dtmp1 *= y_lo0;
229*25c28e83SPiotr Jasiukajtis!  res0_lo += dtmp1;
230*25c28e83SPiotr Jasiukajtis!
231*25c28e83SPiotr Jasiukajtis!  dres = res0_hi + res0_lo;
232*25c28e83SPiotr Jasiukajtis!  dexp0 = vis_fand(dres,DA1);
233*25c28e83SPiotr Jasiukajtis!  iarr = ((int*)&dres)[0];
234*25c28e83SPiotr Jasiukajtis!
235*25c28e83SPiotr Jasiukajtis!  iarr >>= 11;
236*25c28e83SPiotr Jasiukajtis!  iarr &= 0x1fc;
237*25c28e83SPiotr Jasiukajtis!  dtmp0 = ((double*)((char*)dll1 + iarr))[0];
238*25c28e83SPiotr Jasiukajtis!  dd = vis_fpsub32(dtmp0, dexp0);
239*25c28e83SPiotr Jasiukajtis!
240*25c28e83SPiotr Jasiukajtis!  dtmp0 = dd * dres;
241*25c28e83SPiotr Jasiukajtis!  dtmp0 = DTWO - dtmp0;
242*25c28e83SPiotr Jasiukajtis!  dd *= dtmp0;
243*25c28e83SPiotr Jasiukajtis!  dtmp1 = dd * dres;
244*25c28e83SPiotr Jasiukajtis!  dtmp1 = DTWO - dtmp1;
245*25c28e83SPiotr Jasiukajtis!  dd *= dtmp1;
246*25c28e83SPiotr Jasiukajtis!  dtmp2 = dd * dres;
247*25c28e83SPiotr Jasiukajtis!  dtmp2 = DTWO - dtmp2;
248*25c28e83SPiotr Jasiukajtis!  dres = dd * dtmp2;
249*25c28e83SPiotr Jasiukajtis!
250*25c28e83SPiotr Jasiukajtis!  res0 = vis_fand(dres,DA0);
251*25c28e83SPiotr Jasiukajtis!
252*25c28e83SPiotr Jasiukajtis!  dtmp0 = res0_hi * res0;
253*25c28e83SPiotr Jasiukajtis!  dtmp0 = DONE - dtmp0;
254*25c28e83SPiotr Jasiukajtis!  dtmp1 = res0_lo * res0;
255*25c28e83SPiotr Jasiukajtis!  dtmp0 -= dtmp1;
256*25c28e83SPiotr Jasiukajtis!  dtmp0 *= dres;
257*25c28e83SPiotr Jasiukajtis!  res0 += dtmp0;
258*25c28e83SPiotr Jasiukajtis!
259*25c28e83SPiotr Jasiukajtis!  res0 = sqrt ( res0 );
260*25c28e83SPiotr Jasiukajtis!
261*25c28e83SPiotr Jasiukajtis!  res0 = scl0 * res0;
262*25c28e83SPiotr Jasiukajtis!
263*25c28e83SPiotr Jasiukajtis!  ((float*)pz)[0] = ((float*)&res0)[0];
264*25c28e83SPiotr Jasiukajtis!  ((float*)pz)[1] = ((float*)&res0)[1];
265*25c28e83SPiotr Jasiukajtis!
266*25c28e83SPiotr Jasiukajtis!  px += stridex;
267*25c28e83SPiotr Jasiukajtis!  py += stridey;
268*25c28e83SPiotr Jasiukajtis!  pz += stridez;
269*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
270*25c28e83SPiotr Jasiukajtis
271*25c28e83SPiotr Jasiukajtis	ENTRY(__vrhypot)
272*25c28e83SPiotr Jasiukajtis	save	%sp,-SA(MINFRAME)-tmps,%sp
273*25c28e83SPiotr Jasiukajtis	PIC_SETUP(l7)
274*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,.CONST_TBL,l1)
275*25c28e83SPiotr Jasiukajtis	wr	%g0,0x82,%asi
276*25c28e83SPiotr Jasiukajtis
277*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9
278*25c28e83SPiotr Jasiukajtis	ldx	[%fp+STACK_BIAS+176],stridez
279*25c28e83SPiotr Jasiukajtis#else
280*25c28e83SPiotr Jasiukajtis	ld	[%fp+STACK_BIAS+92],stridez
281*25c28e83SPiotr Jasiukajtis#endif
282*25c28e83SPiotr Jasiukajtis
283*25c28e83SPiotr Jasiukajtis	sll	%i2,3,stridex
284*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ff00000),_0x7ff00000
285*25c28e83SPiotr Jasiukajtis	st	%i0,[%fp+tmp_counter]
286*25c28e83SPiotr Jasiukajtis
287*25c28e83SPiotr Jasiukajtis	sll	%i4,3,stridey
288*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00100000),_0x00100000
289*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
290*25c28e83SPiotr Jasiukajtis
291*25c28e83SPiotr Jasiukajtis	sll	stridez,3,stridez
292*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),_0x7fffffff
293*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
294*25c28e83SPiotr Jasiukajtis
295*25c28e83SPiotr Jasiukajtis	ldd	[TBL+TBL_SHIFT],D2ON36
296*25c28e83SPiotr Jasiukajtis	add	_0x7fffffff,1023,_0x7fffffff
297*25c28e83SPiotr Jasiukajtis
298*25c28e83SPiotr Jasiukajtis	ldd	[TBL+TBL_SHIFT+8],DA0
299*25c28e83SPiotr Jasiukajtis
300*25c28e83SPiotr Jasiukajtis	ldd	[TBL+TBL_SHIFT+16],DA1
301*25c28e83SPiotr Jasiukajtis
302*25c28e83SPiotr Jasiukajtis	ldd	[TBL+TBL_SHIFT+24],DONE
303*25c28e83SPiotr Jasiukajtis
304*25c28e83SPiotr Jasiukajtis	ldd	[TBL+TBL_SHIFT+32],DTWO
305*25c28e83SPiotr Jasiukajtis
306*25c28e83SPiotr Jasiukajtis.begin:
307*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp_counter],counter
308*25c28e83SPiotr Jasiukajtis	ldx	[%fp+tmp_px],%i4
309*25c28e83SPiotr Jasiukajtis	ldx	[%fp+tmp_py],%i3
310*25c28e83SPiotr Jasiukajtis	st	%g0,[%fp+tmp_counter]
311*25c28e83SPiotr Jasiukajtis.begin1:
312*25c28e83SPiotr Jasiukajtis	cmp	counter,0
313*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.exit
314*25c28e83SPiotr Jasiukajtis
315*25c28e83SPiotr Jasiukajtis	lda	[%i4]0x82,%o1		! (7_0) hx0 = *(int*)px;
316*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i1
317*25c28e83SPiotr Jasiukajtis
318*25c28e83SPiotr Jasiukajtis	lda	[%i3]0x82,%o4		! (7_0) hy0 = *(int*)py;
319*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i0		! py += stridey
320*25c28e83SPiotr Jasiukajtis
321*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (7_0) hx0 &= 0x7fffffff;
322*25c28e83SPiotr Jasiukajtis
323*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (7_0) hx0 ? 0x7ff00000
324*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.spec0		! (7_0) if ( hx0 >= 0x7ff00000 )
325*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (7_0) hy0 &= 0x7fffffff;
326*25c28e83SPiotr Jasiukajtis
327*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (7_0) hy0 ? 0x7ff00000
328*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.spec0		! (7_0) if ( hy0 >= 0x7ff00000 )
329*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (7_0) diff0 = hy0 - hx0;
330*25c28e83SPiotr Jasiukajtis
331*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (7_0) j0 = diff0 >> 31;
332*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (7_0) hx0 ? 0x00100000
333*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.spec1		! (7_0) if ( hx0 < 0x00100000 )
334*25c28e83SPiotr Jasiukajtis
335*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (7_0) j0 &= diff0;
336*25c28e83SPiotr Jasiukajtis.cont_spec0:
337*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (7_0) j0 = hy0 - j0;
338*25c28e83SPiotr Jasiukajtis
339*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (7_0) j0 &= 0x7ff00000;
340*25c28e83SPiotr Jasiukajtis
341*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (7_0) j0 = 0x7ff00000 - j0;
342*25c28e83SPiotr Jasiukajtis
343*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
344*25c28e83SPiotr Jasiukajtis
345*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
346*25c28e83SPiotr Jasiukajtis
347*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp0]		! (7_1) *(long long*)&scl0 = ll;
348*25c28e83SPiotr Jasiukajtis.cont_spec1:
349*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%o1		! (0_0) hx0 = *(int*)px;
350*25c28e83SPiotr Jasiukajtis	mov	%i1,%i2
351*25c28e83SPiotr Jasiukajtis
352*25c28e83SPiotr Jasiukajtis	lda	[%i0]0x82,%o4		! (0_0) hy0 = *(int*)py;
353*25c28e83SPiotr Jasiukajtis
354*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (0_0) hx0 &= 0x7fffffff;
355*25c28e83SPiotr Jasiukajtis	mov	%i0,%o0
356*25c28e83SPiotr Jasiukajtis
357*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (0_0) hx0 ? 0x7ff00000
358*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update0		! (0_0) if ( hx0 >= 0x7ff00000 )
359*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (0_0) hy0 &= 0x7fffffff;
360*25c28e83SPiotr Jasiukajtis
361*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (0_0) hy0 ? 0x7ff00000
362*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (0_0) diff0 = hy0 - hx0;
363*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update0		! (0_0) if ( hy0 >= 0x7ff00000 )
364*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (0_0) j0 = diff0 >> 31;
365*25c28e83SPiotr Jasiukajtis
366*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (0_0) hx0 ? 0x00100000
367*25c28e83SPiotr Jasiukajtis
368*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (0_0) j0 &= diff0;
369*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update1		! (0_0) if ( hx0 < 0x00100000 )
370*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (0_0) j0 = hy0 - j0;
371*25c28e83SPiotr Jasiukajtis.cont0:
372*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (0_0) j0 &= 0x7ff00000;
373*25c28e83SPiotr Jasiukajtis
374*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%o4		! (0_0) j0 = 0x7ff00000 - j0;
375*25c28e83SPiotr Jasiukajtis.cont1:
376*25c28e83SPiotr Jasiukajtis	sllx	%o4,32,%o4		! (0_0) ll = (long long)j0 << 32;
377*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp1]		! (0_0) *(long long*)&scl0 = ll;
378*25c28e83SPiotr Jasiukajtis
379*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp15],%f62	! (7_1) *(long long*)&scl0 = ll;
380*25c28e83SPiotr Jasiukajtis
381*25c28e83SPiotr Jasiukajtis	lda	[%i4]%asi,%f10		! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
382*25c28e83SPiotr Jasiukajtis
383*25c28e83SPiotr Jasiukajtis	lda	[%i4+4]%asi,%f11	! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
384*25c28e83SPiotr Jasiukajtis
385*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f12		! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
386*25c28e83SPiotr Jasiukajtis
387*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i4		! px += stridex
388*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f13	! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
389*25c28e83SPiotr Jasiukajtis
390*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (7_1) x0 *= scl0;
391*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i1		! px += stridex
392*25c28e83SPiotr Jasiukajtis
393*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (7_1) y0 *= scl0;
394*25c28e83SPiotr Jasiukajtis
395*25c28e83SPiotr Jasiukajtis	lda	[%i4]0x82,%o1		! (1_0) hx0 = *(int*)px;
396*25c28e83SPiotr Jasiukajtis
397*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i3		! py += stridey
398*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (7_1) x_hi0 = x0 + D2ON36;
399*25c28e83SPiotr Jasiukajtis
400*25c28e83SPiotr Jasiukajtis	lda	[%i3]0x82,%g1		! (1_0) hy0 = *(int*)py;
401*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i0		! py += stridey
402*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f50	! (7_1) y_hi0 = y0 + D2ON36;
403*25c28e83SPiotr Jasiukajtis
404*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (1_0) hx0 &= 0x7fffffff;
405*25c28e83SPiotr Jasiukajtis
406*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (1_0) hx0 ? 0x7ff00000
407*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp2]		! (0_0) *(long long*)&scl0 = ll;
408*25c28e83SPiotr Jasiukajtis
409*25c28e83SPiotr Jasiukajtis	and	%g1,_0x7fffffff,%l7	! (1_0) hy0 &= 0x7fffffff;
410*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update2		! (1_0) if ( hx0 >= 0x7ff00000 )
411*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (7_1) x_hi0 -= D2ON36;
412*25c28e83SPiotr Jasiukajtis
413*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (1_0) hy0 ? 0x7ff00000
414*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (1_0) diff0 = hy0 - hx0;
415*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update3		! (1_0) if ( hy0 >= 0x7ff00000 )
416*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (7_1) y_hi0 -= D2ON36;
417*25c28e83SPiotr Jasiukajtis
418*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (1_0) j0 = diff0 >> 31;
419*25c28e83SPiotr Jasiukajtis
420*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (1_0) j0 &= diff0;
421*25c28e83SPiotr Jasiukajtis
422*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
423*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (1_0) j0 = hy0 - j0;
424*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (1_0) hx0 ? 0x00100000
425*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
426*25c28e83SPiotr Jasiukajtis
427*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
428*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (1_0) j0 &= 0x7ff00000;
429*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update4		! (1_0) if ( hx0 < 0x00100000 )
430*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
431*25c28e83SPiotr Jasiukajtis
432*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%o4		! (1_0) j0 = 0x7ff00000 - j0;
433*25c28e83SPiotr Jasiukajtis.cont4:
434*25c28e83SPiotr Jasiukajtis	sllx	%o4,32,%o4		! (1_0) ll = (long long)j0 << 32;
435*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp3]		! (1_0) *(long long*)&scl0 = ll;
436*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (7_1) dtmp1 = y0 + y_hi0;
437*25c28e83SPiotr Jasiukajtis
438*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (7_1) y_lo0 = y0 - y_hi0;
439*25c28e83SPiotr Jasiukajtis
440*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f0,%f0		! (7_1) res0_lo *= x_lo0;
441*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp1],%f62	! (0_0) *(long long*)&scl0 = ll;
442*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f44		! (7_1) res0_hi += dtmp0;
443*25c28e83SPiotr Jasiukajtis
444*25c28e83SPiotr Jasiukajtis	lda	[%i2]%asi,%f10		! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
445*25c28e83SPiotr Jasiukajtis
446*25c28e83SPiotr Jasiukajtis	lda	[%i2+4]%asi,%f11	! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
447*25c28e83SPiotr Jasiukajtis
448*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f12,%f26		! (7_1) dtmp1 *= y_lo0;
449*25c28e83SPiotr Jasiukajtis	lda	[%o0]%asi,%f12		! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
450*25c28e83SPiotr Jasiukajtis
451*25c28e83SPiotr Jasiukajtis	lda	[%o0+4]%asi,%f13	! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
452*25c28e83SPiotr Jasiukajtis
453*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (0_0) x0 *= scl0;
454*25c28e83SPiotr Jasiukajtis
455*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (0_0) y0 *= scl0;
456*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f26,%f38		! (7_1) res0_lo += dtmp1;
457*25c28e83SPiotr Jasiukajtis
458*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%o1		! (2_0) hx0 = *(int*)px;
459*25c28e83SPiotr Jasiukajtis	mov	%i1,%i2
460*25c28e83SPiotr Jasiukajtis
461*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (0_0) x_hi0 = x0 + D2ON36;
462*25c28e83SPiotr Jasiukajtis
463*25c28e83SPiotr Jasiukajtis	lda	[%i0]0x82,%g1		! (2_0) hy0 = *(int*)py;
464*25c28e83SPiotr Jasiukajtis	mov	%i0,%o0
465*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f12	! (0_0) y_hi0 = y0 + D2ON36;
466*25c28e83SPiotr Jasiukajtis
467*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f38,%f14		! (7_1) dres = res0_hi + res0_lo;
468*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (2_0) hx0 &= 0x7fffffff;
469*25c28e83SPiotr Jasiukajtis
470*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (2_0) hx0 ? 0x7ff00000
471*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update5		! (2_0) if ( hx0 >= 0x7ff00000 )
472*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp4]		! (1_0) *(long long*)&scl0 = ll;
473*25c28e83SPiotr Jasiukajtis
474*25c28e83SPiotr Jasiukajtis	and	%g1,_0x7fffffff,%l7	! (2_0) hx0 &= 0x7fffffff;
475*25c28e83SPiotr Jasiukajtis	st	%f14,[%fp+ftmp0]	! (7_1) iarr = ((int*)&dres)[0];
476*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (0_0) x_hi0 -= D2ON36;
477*25c28e83SPiotr Jasiukajtis
478*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (2_0) diff0 = hy0 - hx0;
479*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (2_0) hy0 ? 0x7ff00000
480*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update6		! (2_0) if ( hy0 >= 0x7ff00000 )
481*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (0_0) y_hi0 -= D2ON36;
482*25c28e83SPiotr Jasiukajtis
483*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (2_0) j0 = diff0 >> 31;
484*25c28e83SPiotr Jasiukajtis
485*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (2_0) j0 &= diff0;
486*25c28e83SPiotr Jasiukajtis
487*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
488*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (2_0) hx0 ? 0x00100000
489*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (2_0) j0 = hy0 - j0;
490*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
491*25c28e83SPiotr Jasiukajtis
492*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
493*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (2_0) j0 &= 0x7ff00000;
494*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update7		! (2_0) if ( hx0 < 0x00100000 )
495*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
496*25c28e83SPiotr Jasiukajtis.cont7:
497*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (2_0) j0 = 0x7ff00000 - j0;
498*25c28e83SPiotr Jasiukajtis
499*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (2_0) ll = (long long)j0 << 32;
500*25c28e83SPiotr Jasiukajtis.cont8:
501*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp5]		! (2_0) *(long long*)&scl0 = ll;
502*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (0_0) dtmp1 = y0 + y_hi0;
503*25c28e83SPiotr Jasiukajtis
504*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (0_0) y_lo0 = y0 - y_hi0;
505*25c28e83SPiotr Jasiukajtis
506*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f0,%f0		! (0_0) res0_lo *= x_lo0;
507*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp3],%f62	! (1_0) *(long long*)&scl0 = ll;
508*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f32		! (0_0) res0_hi += dtmp0;
509*25c28e83SPiotr Jasiukajtis
510*25c28e83SPiotr Jasiukajtis	lda	[%i4]%asi,%f10		! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
511*25c28e83SPiotr Jasiukajtis
512*25c28e83SPiotr Jasiukajtis	lda	[%i4+4]%asi,%f11	! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
513*25c28e83SPiotr Jasiukajtis
514*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f12,%f28		! (0_0) dtmp1 *= y_lo0;
515*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f12		! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
516*25c28e83SPiotr Jasiukajtis
517*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i4		! px += stridex
518*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f13	! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
519*25c28e83SPiotr Jasiukajtis
520*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (7_1) iarr = ((int*)&dres)[0];
521*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i1		! px += stridex
522*25c28e83SPiotr Jasiukajtis	fand	%f14,DA1,%f2		! (7_1) dexp0 = vis_fand(dres,DA1);
523*25c28e83SPiotr Jasiukajtis
524*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (1_0) x0 *= scl0;
525*25c28e83SPiotr Jasiukajtis
526*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (1_0) y0 *= scl0;
527*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i3		! (7_1) iarr >>= 11;
528*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f28,%f36		! (0_0) res0_lo += dtmp1;
529*25c28e83SPiotr Jasiukajtis
530*25c28e83SPiotr Jasiukajtis	and	%i3,0x1fc,%i3		! (7_1) iarr &= 0x1fc;
531*25c28e83SPiotr Jasiukajtis
532*25c28e83SPiotr Jasiukajtis	add	%i3,TBL,%o4		! (7_1) (char*)dll1 + iarr
533*25c28e83SPiotr Jasiukajtis	lda	[%i4]0x82,%o1		! (3_0) hx0 = *(int*)px;
534*25c28e83SPiotr Jasiukajtis
535*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i3		! py += stridey
536*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f26		! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
537*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (1_0) x_hi0 = x0 + D2ON36;
538*25c28e83SPiotr Jasiukajtis
539*25c28e83SPiotr Jasiukajtis	lda	[%i3]0x82,%o4		! (3_0) hy0 = *(int*)py;
540*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i0		! py += stridey
541*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f12	! (1_0) y_hi0 = y0 + D2ON36;
542*25c28e83SPiotr Jasiukajtis
543*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f36,%f22		! (0_0) dres = res0_hi + res0_lo;
544*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (3_0) hx0 &= 0x7fffffff;
545*25c28e83SPiotr Jasiukajtis
546*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (3_0) hx0 ? 0x7ff00000
547*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp6]		! (2_0) *(long long*)&scl0 = ll;
548*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update9		! (3_0) if ( hx0 >= 0x7ff00000 )
549*25c28e83SPiotr Jasiukajtis	fpsub32	%f26,%f2,%f26		! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
550*25c28e83SPiotr Jasiukajtis
551*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (3_0) hy0 &= 0x7fffffff;
552*25c28e83SPiotr Jasiukajtis	st	%f22,[%fp+ftmp0]	! (0_0) iarr = ((int*)&dres)[0];
553*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (1_0) x_hi0 -= D2ON36;
554*25c28e83SPiotr Jasiukajtis
555*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (3_0) diff0 = hy0 - hx0;
556*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (3_0) hy0 ? 0x7ff00000
557*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update10		! (3_0) if ( hy0 >= 0x7ff00000 )
558*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (1_0) y_hi0 -= D2ON36;
559*25c28e83SPiotr Jasiukajtis
560*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
561*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (3_0) j0 = diff0 >> 31;
562*25c28e83SPiotr Jasiukajtis
563*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (3_0) j0 &= diff0;
564*25c28e83SPiotr Jasiukajtis
565*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
566*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (3_0) hx0 ? 0x00100000
567*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (3_0) j0 = hy0 - j0;
568*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
569*25c28e83SPiotr Jasiukajtis
570*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
571*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (3_0) j0 &= 0x7ff00000;
572*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update11		! (3_0) if ( hx0 < 0x00100000 )
573*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
574*25c28e83SPiotr Jasiukajtis.cont11:
575*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (3_0) j0 = 0x7ff00000 - j0;
576*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
577*25c28e83SPiotr Jasiukajtis.cont12:
578*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (3_0) ll = (long long)j0 << 32;
579*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp7]		! (3_0) *(long long*)&scl0 = ll;
580*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (1_0) dtmp1 = y0 + y_hi0;
581*25c28e83SPiotr Jasiukajtis
582*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (1_0) y_lo0 = y0 - y_hi0
583*25c28e83SPiotr Jasiukajtis
584*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f0,%f0		! (1_0) res0_lo *= x_lo0;
585*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp5],%f62	! (2_0) *(long long*)&scl0 = ll;
586*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f42		! (1_0) res0_hi += dtmp0;
587*25c28e83SPiotr Jasiukajtis
588*25c28e83SPiotr Jasiukajtis	lda	[%i2]%asi,%f10		! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
589*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f20,%f54		! (7_1) dd *= dtmp0;
590*25c28e83SPiotr Jasiukajtis
591*25c28e83SPiotr Jasiukajtis	lda	[%i2+4]%asi,%f11	! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
592*25c28e83SPiotr Jasiukajtis
593*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f12,%f26		! (1_0) dtmp1 *= y_lo0;
594*25c28e83SPiotr Jasiukajtis	lda	[%o0]%asi,%f12		! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
595*25c28e83SPiotr Jasiukajtis
596*25c28e83SPiotr Jasiukajtis	lda	[%o0+4]%asi,%f13	! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
597*25c28e83SPiotr Jasiukajtis
598*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f14,%f50		! (7_1) dtmp1 = dd * dres;
599*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (0_0) iarr = ((int*)&dres)[0];
600*25c28e83SPiotr Jasiukajtis	fand	%f22,DA1,%f2		! (0_0) dexp0 = vis_fand(dres,DA1);
601*25c28e83SPiotr Jasiukajtis
602*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (2_0) x0 *= scl0;
603*25c28e83SPiotr Jasiukajtis
604*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (2_0) y0 *= scl0;
605*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%o4		! (0_0) iarr >>= 11;
606*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f26,%f34		! (1_0) res0_lo += dtmp1;
607*25c28e83SPiotr Jasiukajtis
608*25c28e83SPiotr Jasiukajtis	and	%o4,0x1fc,%o4		! (0_0) iarr &= 0x1fc;
609*25c28e83SPiotr Jasiukajtis
610*25c28e83SPiotr Jasiukajtis	add	%o4,TBL,%o4		! (0_0) (char*)dll1 + iarr
611*25c28e83SPiotr Jasiukajtis	mov	%i1,%i2
612*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%o1		! (4_0) hx0 = *(int*)px;
613*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (7_1) dtmp1 = DTWO - dtmp1;
614*25c28e83SPiotr Jasiukajtis
615*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f28		! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
616*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (2_0) x_hi0 = x0 + D2ON36;
617*25c28e83SPiotr Jasiukajtis
618*25c28e83SPiotr Jasiukajtis	lda	[%i0]0x82,%o4		! (4_0) hy0 = *(int*)py;
619*25c28e83SPiotr Jasiukajtis	mov	%i0,%o0
620*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f50	! (2_0) y_hi0 = y0 + D2ON36;
621*25c28e83SPiotr Jasiukajtis
622*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (4_0) hx0 &= 0x7fffffff;
623*25c28e83SPiotr Jasiukajtis	faddd	%f42,%f34,%f18		! (1_0) dres = res0_hi + res0_lo;
624*25c28e83SPiotr Jasiukajtis
625*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f20,%f16		! (7_1) dd *= dtmp1;
626*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (4_0) hx0 ? 0x7ff00000
627*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
628*25c28e83SPiotr Jasiukajtis	fpsub32	%f28,%f2,%f28		! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
629*25c28e83SPiotr Jasiukajtis
630*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (4_0) hy0 &= 0x7fffffff;
631*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update13		! (4_0) if ( hx0 >= 0x7ff00000 )
632*25c28e83SPiotr Jasiukajtis	st	%f18,[%fp+ftmp0]	! (1_0) iarr = ((int*)&dres)[0];
633*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (2_0) x_hi0 -= D2ON36;
634*25c28e83SPiotr Jasiukajtis
635*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (4_0) diff0 = hy0 - hx0;
636*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (4_0) hy0 ? 0x7ff00000
637*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update14		! (4_0) if ( hy0 >= 0x7ff00000 )
638*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (2_0) y_hi0 -= D2ON36;
639*25c28e83SPiotr Jasiukajtis
640*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
641*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (4_0) j0 = diff0 >> 31;
642*25c28e83SPiotr Jasiukajtis
643*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (4_0) j0 &= diff0;
644*25c28e83SPiotr Jasiukajtis
645*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
646*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (4_0) j0 = hy0 - j0;
647*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (4_0) hx0 ? 0x00100000
648*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
649*25c28e83SPiotr Jasiukajtis
650*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
651*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (4_0) j0 &= 0x7ff00000;
652*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update15		! (4_0) if ( hx0 < 0x00100000 )
653*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
654*25c28e83SPiotr Jasiukajtis.cont15:
655*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (4_0) j0 = 0x7ff00000 - j0;
656*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
657*25c28e83SPiotr Jasiukajtis.cont16:
658*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f14,%f14		! (7_1) dtmp2 = dd * dres;
659*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (4_0) ll = (long long)j0 << 32;
660*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp9]		! (4_0) *(long long*)&scl0 = ll;
661*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (2_0) dtmp1 = y0 + y_hi0;
662*25c28e83SPiotr Jasiukajtis
663*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (2_0) y_lo0 = y0 - y_hi0;
664*25c28e83SPiotr Jasiukajtis
665*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f0,%f0		! (2_0) res0_lo *= x_lo0;
666*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp7],%f62	! (3_0) *(long long*)&scl0 = ll;
667*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f30		! (2_0) res0_hi += dtmp0;
668*25c28e83SPiotr Jasiukajtis
669*25c28e83SPiotr Jasiukajtis	lda	[%i4]%asi,%f10		! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
670*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f20,%f54		! (0_0) dd *= dtmp0;
671*25c28e83SPiotr Jasiukajtis
672*25c28e83SPiotr Jasiukajtis	lda	[%i4+4]%asi,%f11	! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
673*25c28e83SPiotr Jasiukajtis
674*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f12,%f28		! (2_0) dtmp1 *= y_lo0;
675*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f12		! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
676*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f14,%f20		! (7_1) dtmp2 = DTWO - dtmp2;
677*25c28e83SPiotr Jasiukajtis
678*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f13	! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
679*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i4		! px += stridex
680*25c28e83SPiotr Jasiukajtis
681*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f22,%f50		! (0_0) dtmp1 = dd * dres;
682*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (1_0) iarr = ((int*)&dres)[0];
683*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i1		! px += stridex
684*25c28e83SPiotr Jasiukajtis	fand	%f18,DA1,%f2		! (1_0) dexp0 = vis_fand(dres,DA1);
685*25c28e83SPiotr Jasiukajtis
686*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (3_0) x0 *= scl0;
687*25c28e83SPiotr Jasiukajtis
688*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (3_0) y0 *= scl0;
689*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i3		! (1_0) iarr >>= 11;
690*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f28,%f40		! (2_0) res0_lo += dtmp1;
691*25c28e83SPiotr Jasiukajtis
692*25c28e83SPiotr Jasiukajtis	and	%i3,0x1fc,%i3		! (1_0) iarr &= 0x1fc;
693*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f20,%f28		! (7_1) dres = dd * dtmp2;
694*25c28e83SPiotr Jasiukajtis
695*25c28e83SPiotr Jasiukajtis	add	%i3,TBL,%o4		! (1_0) (char*)dll1 + iarr
696*25c28e83SPiotr Jasiukajtis	lda	[%i4]0x82,%o1		! (5_0) hx0 = *(int*)px;
697*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (0_0) dtmp1 = DTWO - dtmp1;
698*25c28e83SPiotr Jasiukajtis
699*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i3		! py += stridey
700*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f26		! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
701*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (3_0) x_hi0 = x0 + D2ON36;
702*25c28e83SPiotr Jasiukajtis
703*25c28e83SPiotr Jasiukajtis	lda	[%i3]0x82,%o4		! (5_0) hy0 = *(int*)py;
704*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i0		! py += stridey
705*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f50	! (3_0) y_hi0 = y0 + D2ON36;
706*25c28e83SPiotr Jasiukajtis
707*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (5_0) hx0 &= 0x7fffffff;
708*25c28e83SPiotr Jasiukajtis	faddd	%f30,%f40,%f14		! (2_0) dres = res0_hi + res0_lo;
709*25c28e83SPiotr Jasiukajtis
710*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f20,%f24		! (0_0) dd *= dtmp1;
711*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (5_0) hx0 ? 0x7ff00000
712*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
713*25c28e83SPiotr Jasiukajtis	fpsub32	%f26,%f2,%f26		! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
714*25c28e83SPiotr Jasiukajtis
715*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (5_0) hy0 &= 0x7fffffff;
716*25c28e83SPiotr Jasiukajtis	st	%f14,[%fp+ftmp0]	! (2_0) iarr = ((int*)&dres)[0];
717*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update17		! (5_0) if ( hx0 >= 0x7ff00000 )
718*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (3_0) x_hi0 -= D2ON36;
719*25c28e83SPiotr Jasiukajtis
720*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (5_0) diff0 = hy0 - hx0;
721*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (5_0) hy0 ? 0x7ff00000
722*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update18		! (5_0) if ( hy0 >= 0x7ff00000 )
723*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (3_0) y_hi0 -= D2ON36;
724*25c28e83SPiotr Jasiukajtis
725*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
726*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (5_0) j0 = diff0 >> 31;
727*25c28e83SPiotr Jasiukajtis
728*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (5_0) j0 &= diff0;
729*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
730*25c28e83SPiotr Jasiukajtis
731*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
732*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (5_0) j0 = hy0 - j0;
733*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (5_0) hx0 ? 0x00100000
734*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
735*25c28e83SPiotr Jasiukajtis
736*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
737*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (5_0) j0 &= 0x7ff00000;
738*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update19		! (5_0) if ( hx0 < 0x00100000 )
739*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
740*25c28e83SPiotr Jasiukajtis.cont19a:
741*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
742*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (5_0) j0 = 0x7ff00000 - j0;
743*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
744*25c28e83SPiotr Jasiukajtis.cont19b:
745*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
746*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
747*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
748*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
749*25c28e83SPiotr Jasiukajtis
750*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
751*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
752*25c28e83SPiotr Jasiukajtis.cont20:
753*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f0,%f0		! (3_0) res0_lo *= x_lo0;
754*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp9],%f62	! (4_0) *(long long*)&scl0 = ll;
755*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f44		! (3_0) res0_hi += dtmp0;
756*25c28e83SPiotr Jasiukajtis
757*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f10,%f60		! (7_1) dtmp0 = DONE - dtmp0;
758*25c28e83SPiotr Jasiukajtis	lda	[%i2]%asi,%f10		! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
759*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f20,%f54		! (1_0) dd *= dtmp0;
760*25c28e83SPiotr Jasiukajtis
761*25c28e83SPiotr Jasiukajtis	lda	[%i2+4]%asi,%f11	! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
762*25c28e83SPiotr Jasiukajtis
763*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f12,%f26		! (3_0) dtmp1 *= y_lo0;
764*25c28e83SPiotr Jasiukajtis	lda	[%o0]%asi,%f12		! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
765*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f22,%f20		! (0_0) dtmp2 = DTWO - dtmp2;
766*25c28e83SPiotr Jasiukajtis
767*25c28e83SPiotr Jasiukajtis	lda	[%o0+4]%asi,%f13	! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
768*25c28e83SPiotr Jasiukajtis
769*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f18,%f50		! (1_0) dtmp1 = dd * dres;
770*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (2_0) iarr = ((int*)&dres)[0];
771*25c28e83SPiotr Jasiukajtis	fand	%f14,DA1,%f2		! (2_0) dexp0 = vis_fand(dres,DA1);
772*25c28e83SPiotr Jasiukajtis
773*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (4_0) x0 *= scl0;
774*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f38,%f46		! (7_1) dtmp0 -= dtmp1;
775*25c28e83SPiotr Jasiukajtis
776*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (4_0) y0 *= scl0;
777*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%o4		! (2_0) iarr >>= 11;
778*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f26,%f38		! (3_0) res0_lo += dtmp1;
779*25c28e83SPiotr Jasiukajtis
780*25c28e83SPiotr Jasiukajtis	and	%o4,0x1fc,%o4		! (2_0) iarr &= 0x1fc;
781*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f20,%f26		! (0_0) dres = dd * dtmp2;
782*25c28e83SPiotr Jasiukajtis
783*25c28e83SPiotr Jasiukajtis	add	%o4,TBL,%o4		! (2_0) (char*)dll1 + iarr
784*25c28e83SPiotr Jasiukajtis	mov	%i1,%i2
785*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%o1		! (6_0) hx0 = *(int*)px;
786*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f52		! (1_0) dtmp1 = DTWO - dtmp1;
787*25c28e83SPiotr Jasiukajtis
788*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f28,%f28		! (7_1) dtmp0 *= dres;
789*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f20		! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
790*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (4_0) x_hi0 = x0 + D2ON36;
791*25c28e83SPiotr Jasiukajtis
792*25c28e83SPiotr Jasiukajtis	lda	[%i0]0x82,%o4		! (6_0) hy0 = *(int*)py;
793*25c28e83SPiotr Jasiukajtis	mov	%i0,%o0
794*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f50	! (4_0) y_hi0 = y0 + D2ON36;
795*25c28e83SPiotr Jasiukajtis
796*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (6_0) hx0 &= 0x7fffffff;
797*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f38,%f22		! (3_0) dres = res0_hi + res0_lo;
798*25c28e83SPiotr Jasiukajtis
799*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f52,%f16		! (1_0) dd *= dtmp1;
800*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (6_0) hx0 ? 0x7ff00000
801*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp12]	! (5_0) *(long long*)&scl0 = ll;
802*25c28e83SPiotr Jasiukajtis	fpsub32	%f20,%f2,%f52		! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
803*25c28e83SPiotr Jasiukajtis
804*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (6_0) hy0 &= 0x7fffffff;
805*25c28e83SPiotr Jasiukajtis	st	%f22,[%fp+ftmp0]	! (3_0) iarr = ((int*)&dres)[0];
806*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update21		! (6_0) if ( hx0 >= 0x7ff00000 )
807*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f46	! (4_0) x_hi0 -= D2ON36;
808*25c28e83SPiotr Jasiukajtis
809*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (6_0) diff0 = hy0 - hx0;
810*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (6_0) hy0 ? 0x7ff00000
811*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update22		! (6_0) if ( hy0 >= 0x7ff00000 )
812*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (4_0) y_hi0 -= D2ON36;
813*25c28e83SPiotr Jasiukajtis
814*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
815*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (6_0) j0 = diff0 >> 31;
816*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
817*25c28e83SPiotr Jasiukajtis
818*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (6_0) j0 &= diff0;
819*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
820*25c28e83SPiotr Jasiukajtis
821*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
822*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (6_0) j0 = hy0 - j0;
823*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (6_0) hx0 ? 0x00100000
824*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
825*25c28e83SPiotr Jasiukajtis
826*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
827*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (6_0) j0 &= 0x7ff00000;
828*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update23		! (6_0) if ( hx0 < 0x00100000 )
829*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
830*25c28e83SPiotr Jasiukajtis.cont23a:
831*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
832*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (6_0) j0 = 0x7ff00000 - j0;
833*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
834*25c28e83SPiotr Jasiukajtis.cont23b:
835*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
836*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
837*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
838*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
839*25c28e83SPiotr Jasiukajtis
840*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
841*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
842*25c28e83SPiotr Jasiukajtis.cont24:
843*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f2,%f2		! (4_0) res0_lo *= x_lo0;
844*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp11],%f62	! (5_0) *(long long*)&scl0 = ll;
845*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f20,%f32		! (4_0) res0_hi += dtmp0;
846*25c28e83SPiotr Jasiukajtis
847*25c28e83SPiotr Jasiukajtis	lda	[%i4]%asi,%f0		! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
848*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f10,%f10		! (2_0) dd *= dtmp0;
849*25c28e83SPiotr Jasiukajtis
850*25c28e83SPiotr Jasiukajtis	lda	[%i4+4]%asi,%f1		! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
851*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f50,%f52		! (0_0) dtmp0 = DONE - dtmp0;
852*25c28e83SPiotr Jasiukajtis
853*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f60,%f46		! (4_0) dtmp1 *= y_lo0;
854*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f12		! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
855*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f18,%f18		! (1_0) dtmp2 = DTWO - dtmp2;
856*25c28e83SPiotr Jasiukajtis
857*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i4		! px += stridex
858*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f13	! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
859*25c28e83SPiotr Jasiukajtis
860*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f50		! (2_0) dtmp1 = dd * dres;
861*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i1		! px += stridex
862*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (3_0) iarr = ((int*)&dres)[0];
863*25c28e83SPiotr Jasiukajtis	fand	%f22,DA1,%f54		! (3_0) dexp0 = vis_fand(dres,DA1);
864*25c28e83SPiotr Jasiukajtis
865*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f62,%f60		! (5_0) x0 *= scl0;
866*25c28e83SPiotr Jasiukajtis	fsubd	%f52,%f36,%f20		! (0_0) dtmp0 -= dtmp1;
867*25c28e83SPiotr Jasiukajtis
868*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f52		! (5_0) y0 *= scl0;
869*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i3		! (3_0) iarr >>= 11;
870*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f36		! (4_0) res0_lo += dtmp1;
871*25c28e83SPiotr Jasiukajtis
872*25c28e83SPiotr Jasiukajtis	and	%i3,0x1fc,%i3		! (3_0) iarr &= 0x1fc;
873*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f18,%f16		! (1_0) dres = dd * dtmp2;
874*25c28e83SPiotr Jasiukajtis
875*25c28e83SPiotr Jasiukajtis	fsqrtd	%f48,%f18		! (7_1) res0 = sqrt ( res0 );
876*25c28e83SPiotr Jasiukajtis	add	%i3,TBL,%o4		! (3_0) (char*)dll1 + iarr
877*25c28e83SPiotr Jasiukajtis	lda	[%i4]0x82,%o1		! (7_0) hx0 = *(int*)px;
878*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f46		! (2_0) dtmp1 = DTWO - dtmp1;
879*25c28e83SPiotr Jasiukajtis
880*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f26,%f48		! (0_0) dtmp0 *= dres;
881*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i3		! py += stridey
882*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f20		! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
883*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f50	! (5_0) x_hi0 = x0 + D2ON36;
884*25c28e83SPiotr Jasiukajtis
885*25c28e83SPiotr Jasiukajtis	lda	[%i3]0x82,%o4		! (7_0) hy0 = *(int*)py;
886*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i0		! py += stridey
887*25c28e83SPiotr Jasiukajtis	faddd	%f52,D2ON36,%f12	! (5_0) y_hi0 = y0 + D2ON36;
888*25c28e83SPiotr Jasiukajtis
889*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (7_0) hx0 &= 0x7fffffff;
890*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f36,%f24		! (4_0) dres = res0_hi + res0_lo;
891*25c28e83SPiotr Jasiukajtis
892*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f46,%f26		! (2_0) dd *= dtmp1;
893*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (7_0) hx0 ? 0x7ff00000
894*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp14]	! (6_0) *(long long*)&scl0 = ll;
895*25c28e83SPiotr Jasiukajtis	fpsub32	%f20,%f54,%f10		! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
896*25c28e83SPiotr Jasiukajtis
897*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (7_0) hy0 &= 0x7fffffff;
898*25c28e83SPiotr Jasiukajtis	st	%f24,[%fp+ftmp0]	! (4_0) iarr = ((int*)&dres)[0];
899*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update25		! (7_0) if ( hx0 >= 0x7ff00000 )
900*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f20	! (5_0) x_hi0 -= D2ON36;
901*25c28e83SPiotr Jasiukajtis
902*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (7_0) diff0 = hy0 - hx0;
903*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (7_0) hy0 ? 0x7ff00000
904*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update26		! (7_0) if ( hy0 >= 0x7ff00000 )
905*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (5_0) y_hi0 -= D2ON36;
906*25c28e83SPiotr Jasiukajtis
907*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
908*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (7_0) j0 = diff0 >> 31;
909*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
910*25c28e83SPiotr Jasiukajtis
911*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (7_0) j0 &= diff0;
912*25c28e83SPiotr Jasiukajtis	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
913*25c28e83SPiotr Jasiukajtis
914*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
915*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (7_0) j0 = hy0 - j0;
916*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (7_0) hx0 ? 0x00100000
917*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
918*25c28e83SPiotr Jasiukajtis
919*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
920*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (7_0) j0 &= 0x7ff00000;
921*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update27		! (7_0) if ( hx0 < 0x00100000 )
922*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
923*25c28e83SPiotr Jasiukajtis.cont27a:
924*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
925*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (7_0) j0 = 0x7ff00000 - j0;
926*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
927*25c28e83SPiotr Jasiukajtis.cont27b:
928*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
929*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
930*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
931*25c28e83SPiotr Jasiukajtis	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
932*25c28e83SPiotr Jasiukajtis
933*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
934*25c28e83SPiotr Jasiukajtis	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
935*25c28e83SPiotr Jasiukajtis.cont28:
936*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f2,%f2		! (5_0) res0_lo *= x_lo0;
937*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp13],%f62	! (6_0) *(long long*)&scl0 = ll;
938*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f46,%f42		! (5_0) res0_hi += dtmp0;
939*25c28e83SPiotr Jasiukajtis
940*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f20,%f52		! (3_0) dd *= dtmp0;
941*25c28e83SPiotr Jasiukajtis	lda	[%i2]%asi,%f10		! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
942*25c28e83SPiotr Jasiukajtis
943*25c28e83SPiotr Jasiukajtis	lda	[%i2+4]%asi,%f11	! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
944*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f60,%f60		! (1_0) dtmp0 = DONE - dtmp0;
945*25c28e83SPiotr Jasiukajtis
946*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f54,%f46		! (5_0) dtmp1 *= y_lo0;
947*25c28e83SPiotr Jasiukajtis	lda	[%o0]%asi,%f12		! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
948*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f14,%f14		! (2_0) dtmp2 = DTWO - dtmp2;
949*25c28e83SPiotr Jasiukajtis
950*25c28e83SPiotr Jasiukajtis	lda	[%o0+4]%asi,%f13	! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
951*25c28e83SPiotr Jasiukajtis
952*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f22,%f50		! (3_0) dtmp1 = dd * dres;
953*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (4_0) iarr = ((int*)&dres)[0];
954*25c28e83SPiotr Jasiukajtis	fand	%f24,DA1,%f54		! (4_0) dexp0 = vis_fand(dres,DA1);
955*25c28e83SPiotr Jasiukajtis
956*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (6_0) x0 *= scl0;
957*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp0],%f0		! (7_1) *(long long*)&scl0 = ll;
958*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f34,%f20		! (1_0) dtmp0 -= dtmp1;
959*25c28e83SPiotr Jasiukajtis
960*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (6_0) y0 *= scl0;
961*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%o4		! (4_0) iarr >>= 11;
962*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f34		! (5_0) res0_lo += dtmp1;
963*25c28e83SPiotr Jasiukajtis
964*25c28e83SPiotr Jasiukajtis	and	%o4,0x1fc,%o4		! (4_0) iarr &= 0x1fc;
965*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f26		! (2_0) dres = dd * dtmp2;
966*25c28e83SPiotr Jasiukajtis
967*25c28e83SPiotr Jasiukajtis	cmp	counter,8
968*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.tail
969*25c28e83SPiotr Jasiukajtis	nop
970*25c28e83SPiotr Jasiukajtis
971*25c28e83SPiotr Jasiukajtis	ba	.main_loop
972*25c28e83SPiotr Jasiukajtis	sub	counter,8,counter
973*25c28e83SPiotr Jasiukajtis
974*25c28e83SPiotr Jasiukajtis	.align	16
975*25c28e83SPiotr Jasiukajtis.main_loop:
976*25c28e83SPiotr Jasiukajtis	fsqrtd	%f48,%f14		! (0_1) res0 = sqrt ( res0 );
977*25c28e83SPiotr Jasiukajtis	add	%o4,TBL,%o4		! (4_1) (char*)dll1 + iarr
978*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%o1		! (0_0) hx0 = *(int*)px;
979*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f46		! (3_1) dtmp1 = DTWO - dtmp1;
980*25c28e83SPiotr Jasiukajtis
981*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f16,%f48		! (1_1) dtmp0 *= dres;
982*25c28e83SPiotr Jasiukajtis	mov	%i1,%i2
983*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f20		! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
984*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f50	! (6_1) x_hi0 = x0 + D2ON36;
985*25c28e83SPiotr Jasiukajtis
986*25c28e83SPiotr Jasiukajtis	nop
987*25c28e83SPiotr Jasiukajtis	mov	%i0,%o0
988*25c28e83SPiotr Jasiukajtis	lda	[%i0]0x82,%o4		! (0_0) hy0 = *(int*)py;
989*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f2		! (6_1) y_hi0 = y0 + D2ON36;
990*25c28e83SPiotr Jasiukajtis
991*25c28e83SPiotr Jasiukajtis	faddd	%f42,%f34,%f16		! (5_1) dres = res0_hi + res0_lo;
992*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (0_0) hx0 &= 0x7fffffff;
993*25c28e83SPiotr Jasiukajtis	st	%f16,[%fp+ftmp0]	! (5_1) iarr = ((int*)&dres)[0];
994*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f18,%f0		! (7_2) res0 = scl0 * res0;
995*25c28e83SPiotr Jasiukajtis
996*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f46,%f18		! (3_1) dd *= dtmp1;
997*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (0_0) hx0 ? 0x7ff00000
998*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
999*25c28e83SPiotr Jasiukajtis	fpsub32	%f20,%f54,%f54		! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
1000*25c28e83SPiotr Jasiukajtis
1001*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (0_0) hy0 &= 0x7fffffff;
1002*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
1003*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update29		! (0_0) if ( hx0 >= 0x7ff00000 )
1004*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f20	! (6_1) x_hi0 -= D2ON36;
1005*25c28e83SPiotr Jasiukajtis
1006*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (0_0) hy0 ? 0x7ff00000
1007*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (0_0) diff0 = hy0 - hx0;
1008*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update30		! (0_0) if ( hy0 >= 0x7ff00000 )
1009*25c28e83SPiotr Jasiukajtis	fsubd	%f2,D2ON36,%f2		! (6_1) y_hi0 -= D2ON36;
1010*25c28e83SPiotr Jasiukajtis
1011*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f24,%f50		! (4_1) dtmp0 = dd * dres;
1012*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (0_0) j0 = diff0 >> 31;
1013*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp0]		! (7_1) *(long long*)&scl0 = ll;
1014*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f48,%f52		! (1_1) res0 += dtmp0;
1015*25c28e83SPiotr Jasiukajtis
1016*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (0_0) j0 &= diff0;
1017*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (0_0) hx0 ? 0x00100000
1018*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update31		! (0_0) if ( hx0 < 0x00100000 )
1019*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f48		! (2_1) res0 = vis_fand(dres,DA0);
1020*25c28e83SPiotr Jasiukajtis.cont31:
1021*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f0		! (6_1) res0_hi = x_hi0 * x_hi0;
1022*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (0_0) j0 = hy0 - j0;
1023*25c28e83SPiotr Jasiukajtis	nop
1024*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f28		! (6_1) x_lo0 = x0 - x_hi0;
1025*25c28e83SPiotr Jasiukajtis
1026*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f2,%f46		! (6_1) dtmp0 = y_hi0 * y_hi0;
1027*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
1028*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (0_0) j0 &= 0x7ff00000;
1029*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (6_1) res0_lo = x0 + x_hi0;
1030*25c28e83SPiotr Jasiukajtis
1031*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f22,%f22		! (3_1) dtmp2 = dd * dres;
1032*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%o4		! (0_0) j0 = 0x7ff00000 - j0;
1033*25c28e83SPiotr Jasiukajtis	nop
1034*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (4_1) dtmp0 = DTWO - dtmp0;
1035*25c28e83SPiotr Jasiukajtis.cont32:
1036*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f48,%f12		! (2_1) dtmp0 = res0_hi * res0;
1037*25c28e83SPiotr Jasiukajtis	sllx	%o4,32,%o4		! (0_0) ll = (long long)j0 << 32;
1038*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp1]		! (0_0) *(long long*)&scl0 = ll;
1039*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f2,%f50		! (6_1) dtmp1 = y0 + y_hi0;
1040*25c28e83SPiotr Jasiukajtis
1041*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f48,%f40		! (2_1) dtmp1 = res0_lo * res0;
1042*25c28e83SPiotr Jasiukajtis	nop
1043*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1044*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f2,%f2		! (6_1) y_lo0 = y0 - y_hi0;
1045*25c28e83SPiotr Jasiukajtis
1046*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f28,%f28		! (6_1) res0_lo *= x_lo0;
1047*25c28e83SPiotr Jasiukajtis	nop
1048*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp15],%f62	! (7_1) *(long long*)&scl0 = ll;
1049*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f46,%f30		! (6_1) res0_hi += dtmp0;
1050*25c28e83SPiotr Jasiukajtis
1051*25c28e83SPiotr Jasiukajtis	nop
1052*25c28e83SPiotr Jasiukajtis	nop
1053*25c28e83SPiotr Jasiukajtis	lda	[%i4]%asi,%f10		! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
1054*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f20,%f54		! (4_1) dd *= dtmp0;
1055*25c28e83SPiotr Jasiukajtis
1056*25c28e83SPiotr Jasiukajtis	nop
1057*25c28e83SPiotr Jasiukajtis	nop
1058*25c28e83SPiotr Jasiukajtis	lda	[%i4+4]%asi,%f11	! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
1059*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f12,%f60		! (2_1) dtmp0 = DONE - dtmp0;
1060*25c28e83SPiotr Jasiukajtis
1061*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f2,%f46		! (6_1) dtmp1 *= y_lo0;
1062*25c28e83SPiotr Jasiukajtis	nop
1063*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f12		! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
1064*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f22,%f22		! (3_1) dtmp2 = DTWO - dtmp2;
1065*25c28e83SPiotr Jasiukajtis
1066*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i4		! px += stridex
1067*25c28e83SPiotr Jasiukajtis	nop
1068*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f13	! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
1069*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1070*25c28e83SPiotr Jasiukajtis
1071*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f24,%f50		! (4_1) dtmp1 = dd * dres;
1072*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i1		! px += stridex
1073*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (5_1) iarr = ((int*)&dres)[0];
1074*25c28e83SPiotr Jasiukajtis	fand	%f16,DA1,%f2		! (5_1) dexp0 = vis_fand(dres,DA1);
1075*25c28e83SPiotr Jasiukajtis
1076*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (7_1) x0 *= scl0;
1077*25c28e83SPiotr Jasiukajtis	nop
1078*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp2],%f0		! (0_1) *(long long*)&scl0 = ll;
1079*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f40,%f20		! (2_1) dtmp0 -= dtmp1;
1080*25c28e83SPiotr Jasiukajtis
1081*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (7_1) y0 *= scl0;
1082*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i3		! (5_1) iarr >>= 11;
1083*25c28e83SPiotr Jasiukajtis	nop
1084*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f46,%f40		! (6_1) res0_lo += dtmp1;
1085*25c28e83SPiotr Jasiukajtis
1086*25c28e83SPiotr Jasiukajtis	and	%i3,0x1fc,%i3		! (5_1) iarr &= 0x1fc;
1087*25c28e83SPiotr Jasiukajtis	nop
1088*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1089*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f22,%f28		! (3_1) dres = dd * dtmp2;
1090*25c28e83SPiotr Jasiukajtis
1091*25c28e83SPiotr Jasiukajtis	fsqrtd	%f52,%f22		! (1_1) res0 = sqrt ( res0 );
1092*25c28e83SPiotr Jasiukajtis	lda	[%i4]0x82,%o1		! (1_0) hx0 = *(int*)px;
1093*25c28e83SPiotr Jasiukajtis	add	%i3,TBL,%g1		! (5_1) (char*)dll1 + iarr
1094*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f62		! (4_1) dtmp1 = DTWO - dtmp1;
1095*25c28e83SPiotr Jasiukajtis
1096*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f26,%f52		! (2_1) dtmp0 *= dres;
1097*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i3		! py += stridey
1098*25c28e83SPiotr Jasiukajtis	ld	[%g1],%f26		! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1099*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (7_1) x_hi0 = x0 + D2ON36;
1100*25c28e83SPiotr Jasiukajtis
1101*25c28e83SPiotr Jasiukajtis	nop
1102*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i0		! py += stridey
1103*25c28e83SPiotr Jasiukajtis	lda	[%i3]0x82,%g1		! (1_0) hy0 = *(int*)py;
1104*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f50	! (7_1) y_hi0 = y0 + D2ON36;
1105*25c28e83SPiotr Jasiukajtis
1106*25c28e83SPiotr Jasiukajtis	faddd	%f30,%f40,%f18		! (6_1) dres = res0_hi + res0_lo;
1107*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (1_0) hx0 &= 0x7fffffff;
1108*25c28e83SPiotr Jasiukajtis	st	%f18,[%fp+ftmp0]	! (6_1) iarr = ((int*)&dres)[0];
1109*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f14,%f0		! (0_1) res0 = scl0 * res0;
1110*25c28e83SPiotr Jasiukajtis
1111*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f62,%f14		! (4_1) dd *= dtmp1;
1112*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (1_0) hx0 ? 0x7ff00000
1113*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
1114*25c28e83SPiotr Jasiukajtis	fpsub32	%f26,%f2,%f26		! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
1115*25c28e83SPiotr Jasiukajtis
1116*25c28e83SPiotr Jasiukajtis	and	%g1,_0x7fffffff,%l7	! (1_0) hy0 &= 0x7fffffff;
1117*25c28e83SPiotr Jasiukajtis	nop
1118*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update33		! (1_0) if ( hx0 >= 0x7ff00000 )
1119*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (7_1) x_hi0 -= D2ON36;
1120*25c28e83SPiotr Jasiukajtis
1121*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (1_0) hy0 ? 0x7ff00000
1122*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (1_0) diff0 = hy0 - hx0;
1123*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
1124*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (7_1) y_hi0 -= D2ON36;
1125*25c28e83SPiotr Jasiukajtis
1126*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f16,%f50		! (5_1) dtmp0 = dd * dres;
1127*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (1_0) j0 = diff0 >> 31;
1128*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update34		! (1_0) if ( hy0 >= 0x7ff00000 )
1129*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (2_1) res0 += dtmp0;
1130*25c28e83SPiotr Jasiukajtis
1131*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (1_0) j0 &= diff0;
1132*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
1133*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp2]		! (0_0) *(long long*)&scl0 = ll;
1134*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (3_1) res0 = vis_fand(dres,DA0);
1135*25c28e83SPiotr Jasiukajtis
1136*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
1137*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (1_0) j0 = hy0 - j0;
1138*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (1_0) hx0 ? 0x00100000
1139*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
1140*25c28e83SPiotr Jasiukajtis
1141*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
1142*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (1_0) j0 &= 0x7ff00000;
1143*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update35		! (1_0) if ( hx0 < 0x00100000 )
1144*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
1145*25c28e83SPiotr Jasiukajtis.cont35a:
1146*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (3_1) dtmp0 = res0_hi * res0;
1147*25c28e83SPiotr Jasiukajtis	nop
1148*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%o4		! (1_0) j0 = 0x7ff00000 - j0;
1149*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (5_1) dtmp0 = DTWO - dtmp0;
1150*25c28e83SPiotr Jasiukajtis.cont35b:
1151*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f24,%f24		! (4_1) dtmp2 = dd * dres;
1152*25c28e83SPiotr Jasiukajtis	sllx	%o4,32,%o4		! (1_0) ll = (long long)j0 << 32;
1153*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp3]		! (1_0) *(long long*)&scl0 = ll;
1154*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (7_1) dtmp1 = y0 + y_hi0;
1155*25c28e83SPiotr Jasiukajtis
1156*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f48,%f38		! (3_1) dtmp1 = res0_lo * res0;
1157*25c28e83SPiotr Jasiukajtis	nop
1158*25c28e83SPiotr Jasiukajtis	nop
1159*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (7_1) y_lo0 = y0 - y_hi0;
1160*25c28e83SPiotr Jasiukajtis.cont36:
1161*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f0,%f0		! (7_1) res0_lo *= x_lo0;
1162*25c28e83SPiotr Jasiukajtis	nop
1163*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp1],%f62	! (0_0) *(long long*)&scl0 = ll;
1164*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f44		! (7_1) res0_hi += dtmp0;
1165*25c28e83SPiotr Jasiukajtis
1166*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f10,%f60		! (3_1) dtmp0 = DONE - dtmp0;
1167*25c28e83SPiotr Jasiukajtis	nop
1168*25c28e83SPiotr Jasiukajtis	lda	[%i2]%asi,%f10		! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
1169*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f20,%f54		! (5_1) dd *= dtmp0;
1170*25c28e83SPiotr Jasiukajtis
1171*25c28e83SPiotr Jasiukajtis	nop
1172*25c28e83SPiotr Jasiukajtis	nop
1173*25c28e83SPiotr Jasiukajtis	lda	[%i2+4]%asi,%f11	! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
1174*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1175*25c28e83SPiotr Jasiukajtis
1176*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f12,%f26		! (7_1) dtmp1 *= y_lo0;
1177*25c28e83SPiotr Jasiukajtis	nop
1178*25c28e83SPiotr Jasiukajtis	lda	[%o0]%asi,%f12		! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
1179*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f24,%f24		! (4_1) dtmp2 = DTWO - dtmp2;
1180*25c28e83SPiotr Jasiukajtis
1181*25c28e83SPiotr Jasiukajtis	nop
1182*25c28e83SPiotr Jasiukajtis	nop
1183*25c28e83SPiotr Jasiukajtis	lda	[%o0+4]%asi,%f13	! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
1184*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1185*25c28e83SPiotr Jasiukajtis
1186*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f16,%f46		! (5_1) dtmp1 = dd * dres;
1187*25c28e83SPiotr Jasiukajtis	nop
1188*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (6_1) iarr = ((int*)&dres)[0];
1189*25c28e83SPiotr Jasiukajtis	fand	%f18,DA1,%f2		! (6_1) dexp0 = vis_fand(dres,DA1);
1190*25c28e83SPiotr Jasiukajtis
1191*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (0_0) x0 *= scl0;
1192*25c28e83SPiotr Jasiukajtis	nop
1193*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp4],%f50	! (1_1) *(long long*)&scl0 = ll;
1194*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f38,%f20		! (3_1) dtmp0 -= dtmp1;
1195*25c28e83SPiotr Jasiukajtis
1196*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (0_0) y0 *= scl0;
1197*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%g1		! (6_1) iarr >>= 11;
1198*25c28e83SPiotr Jasiukajtis	nop
1199*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f26,%f38		! (7_1) res0_lo += dtmp1;
1200*25c28e83SPiotr Jasiukajtis
1201*25c28e83SPiotr Jasiukajtis	nop
1202*25c28e83SPiotr Jasiukajtis	and	%g1,0x1fc,%g1		! (6_1) iarr &= 0x1fc;
1203*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1204*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f24,%f26		! (4_1) dres = dd * dtmp2;
1205*25c28e83SPiotr Jasiukajtis
1206*25c28e83SPiotr Jasiukajtis	fsqrtd	%f52,%f24		! (2_1) res0 = sqrt ( res0 );
1207*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%o1		! (2_0) hx0 = *(int*)px;
1208*25c28e83SPiotr Jasiukajtis	add	%g1,TBL,%g1		! (6_1) (char*)dll1 + iarr
1209*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f46,%f62		! (5_1) dtmp1 = DTWO - dtmp1;
1210*25c28e83SPiotr Jasiukajtis
1211*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f28,%f52		! (3_1) dtmp0 *= dres;
1212*25c28e83SPiotr Jasiukajtis	mov	%i1,%i2
1213*25c28e83SPiotr Jasiukajtis	ld	[%g1],%f28		! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1214*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (0_0) x_hi0 = x0 + D2ON36;
1215*25c28e83SPiotr Jasiukajtis
1216*25c28e83SPiotr Jasiukajtis	nop
1217*25c28e83SPiotr Jasiukajtis	mov	%i0,%o0
1218*25c28e83SPiotr Jasiukajtis	lda	[%i0]0x82,%g1		! (2_0) hy0 = *(int*)py;
1219*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f12	! (0_0) y_hi0 = y0 + D2ON36;
1220*25c28e83SPiotr Jasiukajtis
1221*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f38,%f14		! (7_1) dres = res0_hi + res0_lo;
1222*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (2_0) hx0 &= 0x7fffffff;
1223*25c28e83SPiotr Jasiukajtis	st	%f14,[%fp+ftmp0]	! (7_1) iarr = ((int*)&dres)[0];
1224*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f22,%f0		! (1_1) res0 = scl0 * res0;
1225*25c28e83SPiotr Jasiukajtis
1226*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f62,%f22		! (5_1) dd *= dtmp1;
1227*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (2_0) hx0 ? 0x7ff00000
1228*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
1229*25c28e83SPiotr Jasiukajtis	fpsub32	%f28,%f2,%f28		! (6_1) dd = vis_fpsub32(dtmp0, dexp0);
1230*25c28e83SPiotr Jasiukajtis
1231*25c28e83SPiotr Jasiukajtis	and	%g1,_0x7fffffff,%l7	! (2_0) hx0 &= 0x7fffffff;
1232*25c28e83SPiotr Jasiukajtis	nop
1233*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update37		! (2_0) if ( hx0 >= 0x7ff00000 )
1234*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (0_0) x_hi0 -= D2ON36;
1235*25c28e83SPiotr Jasiukajtis
1236*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (2_0) diff0 = hy0 - hx0;
1237*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (2_0) hy0 ? 0x7ff00000
1238*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
1239*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (0_0) y_hi0 -= D2ON36;
1240*25c28e83SPiotr Jasiukajtis
1241*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f18,%f50		! (6_1) dtmp0 = dd * dres;
1242*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (2_0) j0 = diff0 >> 31;
1243*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update38		! (2_0) if ( hy0 >= 0x7ff00000 )
1244*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (3_1) res0 += dtmp0;
1245*25c28e83SPiotr Jasiukajtis
1246*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (2_0) j0 &= diff0;
1247*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
1248*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp4]		! (1_0) *(long long*)&scl0 = ll;
1249*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f48		! (4_1) res0 = vis_fand(dres,DA0);
1250*25c28e83SPiotr Jasiukajtis
1251*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
1252*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (2_0) hx0 ? 0x00100000
1253*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (2_0) j0 = hy0 - j0;
1254*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
1255*25c28e83SPiotr Jasiukajtis
1256*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
1257*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (2_0) j0 &= 0x7ff00000;
1258*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update39		! (2_0) if ( hx0 < 0x00100000 )
1259*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
1260*25c28e83SPiotr Jasiukajtis.cont39a:
1261*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f48,%f10		! (4_1) dtmp0 = res0_hi * res0;
1262*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (2_0) j0 = 0x7ff00000 - j0;
1263*25c28e83SPiotr Jasiukajtis	nop
1264*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (6_1) dtmp0 = DTWO - dtmp0;
1265*25c28e83SPiotr Jasiukajtis.cont39b:
1266*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f16,%f16		! (5_1) dtmp2 = dd * dres;
1267*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (2_0) ll = (long long)j0 << 32;
1268*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp5]		! (2_0) *(long long*)&scl0 = ll;
1269*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (0_0) dtmp1 = y0 + y_hi0;
1270*25c28e83SPiotr Jasiukajtis
1271*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f48,%f36		! (4_1) dtmp1 = res0_lo * res0;
1272*25c28e83SPiotr Jasiukajtis	nop
1273*25c28e83SPiotr Jasiukajtis	nop
1274*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (0_0) y_lo0 = y0 - y_hi0;
1275*25c28e83SPiotr Jasiukajtis.cont40:
1276*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f0,%f0		! (0_0) res0_lo *= x_lo0;
1277*25c28e83SPiotr Jasiukajtis	nop
1278*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp3],%f62	! (1_0) *(long long*)&scl0 = ll;
1279*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f32		! (0_0) res0_hi += dtmp0;
1280*25c28e83SPiotr Jasiukajtis
1281*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f10,%f60		! (4_1) dtmp0 = DONE - dtmp0;
1282*25c28e83SPiotr Jasiukajtis	nop
1283*25c28e83SPiotr Jasiukajtis	lda	[%i4]%asi,%f10		! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
1284*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f20,%f54		! (6_1) dd *= dtmp0;
1285*25c28e83SPiotr Jasiukajtis
1286*25c28e83SPiotr Jasiukajtis	nop
1287*25c28e83SPiotr Jasiukajtis	nop
1288*25c28e83SPiotr Jasiukajtis	lda	[%i4+4]%asi,%f11	! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
1289*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1290*25c28e83SPiotr Jasiukajtis
1291*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f12,%f28		! (0_0) dtmp1 *= y_lo0;
1292*25c28e83SPiotr Jasiukajtis	nop
1293*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f12		! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
1294*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f16,%f16		! (5_1) dtmp2 = DTWO - dtmp2;
1295*25c28e83SPiotr Jasiukajtis
1296*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i4		! px += stridex
1297*25c28e83SPiotr Jasiukajtis	nop
1298*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f13	! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
1299*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1300*25c28e83SPiotr Jasiukajtis
1301*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f18,%f46		! (6_1) dtmp1 = dd * dres;
1302*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i1		! px += stridex
1303*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (7_1) iarr = ((int*)&dres)[0];
1304*25c28e83SPiotr Jasiukajtis	fand	%f14,DA1,%f2		! (7_1) dexp0 = vis_fand(dres,DA1);
1305*25c28e83SPiotr Jasiukajtis
1306*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (1_0) x0 *= scl0;
1307*25c28e83SPiotr Jasiukajtis	nop
1308*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp6],%f50	! (2_1) *(long long*)&scl0 = ll;
1309*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f36,%f20		! (4_1) dtmp0 -= dtmp1;
1310*25c28e83SPiotr Jasiukajtis
1311*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (1_0) y0 *= scl0;
1312*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i3		! (7_1) iarr >>= 11;
1313*25c28e83SPiotr Jasiukajtis	nop
1314*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f28,%f36		! (0_0) res0_lo += dtmp1;
1315*25c28e83SPiotr Jasiukajtis
1316*25c28e83SPiotr Jasiukajtis	and	%i3,0x1fc,%i3		! (7_1) iarr &= 0x1fc;
1317*25c28e83SPiotr Jasiukajtis	nop
1318*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1319*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f16,%f28		! (5_1) dres = dd * dtmp2;
1320*25c28e83SPiotr Jasiukajtis
1321*25c28e83SPiotr Jasiukajtis	fsqrtd	%f52,%f16		! (3_1) res0 = sqrt ( res0 );
1322*25c28e83SPiotr Jasiukajtis	add	%i3,TBL,%o4		! (7_1) (char*)dll1 + iarr
1323*25c28e83SPiotr Jasiukajtis	lda	[%i4]0x82,%o1		! (3_0) hx0 = *(int*)px;
1324*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f46,%f62		! (6_1) dtmp1 = DTWO - dtmp1;
1325*25c28e83SPiotr Jasiukajtis
1326*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f26,%f52		! (4_1) dtmp0 *= dres;
1327*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i3		! py += stridey
1328*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f26		! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1329*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (1_0) x_hi0 = x0 + D2ON36;
1330*25c28e83SPiotr Jasiukajtis
1331*25c28e83SPiotr Jasiukajtis	nop
1332*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i0		! py += stridey
1333*25c28e83SPiotr Jasiukajtis	lda	[%i3]0x82,%o4		! (3_0) hy0 = *(int*)py;
1334*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f12	! (1_0) y_hi0 = y0 + D2ON36;
1335*25c28e83SPiotr Jasiukajtis
1336*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f36,%f22		! (0_0) dres = res0_hi + res0_lo;
1337*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (3_0) hx0 &= 0x7fffffff;
1338*25c28e83SPiotr Jasiukajtis	st	%f22,[%fp+ftmp0]	! (0_0) iarr = ((int*)&dres)[0];
1339*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f24,%f0		! (2_1) res0 = scl0 * res0;
1340*25c28e83SPiotr Jasiukajtis
1341*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f62,%f24		! (6_1) dd *= dtmp1;
1342*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (3_0) hx0 ? 0x7ff00000
1343*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
1344*25c28e83SPiotr Jasiukajtis	fpsub32	%f26,%f2,%f26		! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
1345*25c28e83SPiotr Jasiukajtis
1346*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (3_0) hy0 &= 0x7fffffff;
1347*25c28e83SPiotr Jasiukajtis	nop
1348*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update41		! (3_0) if ( hx0 >= 0x7ff00000 )
1349*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (1_0) x_hi0 -= D2ON36;
1350*25c28e83SPiotr Jasiukajtis
1351*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (3_0) diff0 = hy0 - hx0;
1352*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (3_0) hy0 ? 0x7ff00000
1353*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
1354*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (1_0) y_hi0 -= D2ON36;
1355*25c28e83SPiotr Jasiukajtis
1356*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
1357*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (3_0) j0 = diff0 >> 31;
1358*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update42		! (3_0) if ( hy0 >= 0x7ff00000 )
1359*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (4_1) res0 += dtmp0;
1360*25c28e83SPiotr Jasiukajtis
1361*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (3_0) j0 &= diff0;
1362*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
1363*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp6]		! (2_0) *(long long*)&scl0 = ll;
1364*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (5_1) res0 = vis_fand(dres,DA0);
1365*25c28e83SPiotr Jasiukajtis
1366*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
1367*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (3_0) hx0 ? 0x00100000
1368*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (3_0) j0 = hy0 - j0;
1369*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
1370*25c28e83SPiotr Jasiukajtis
1371*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
1372*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (3_0) j0 &= 0x7ff00000;
1373*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update43		! (3_0) if ( hx0 < 0x00100000 )
1374*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
1375*25c28e83SPiotr Jasiukajtis.cont43a:
1376*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f48,%f10		! (5_1) dtmp0 = res0_hi * res0;
1377*25c28e83SPiotr Jasiukajtis	nop
1378*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (3_0) j0 = 0x7ff00000 - j0;
1379*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
1380*25c28e83SPiotr Jasiukajtis.cont43b:
1381*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f18,%f18		! (6_1) dtmp2 = dd * dres;
1382*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (3_0) ll = (long long)j0 << 32;
1383*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp7]		! (3_0) *(long long*)&scl0 = ll;
1384*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (1_0) dtmp1 = y0 + y_hi0;
1385*25c28e83SPiotr Jasiukajtis
1386*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f48,%f34		! (5_1) dtmp1 = res0_lo * res0;
1387*25c28e83SPiotr Jasiukajtis	nop
1388*25c28e83SPiotr Jasiukajtis	nop
1389*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (1_0) y_lo0 = y0 - y_hi0
1390*25c28e83SPiotr Jasiukajtis.cont44:
1391*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f0,%f0		! (1_0) res0_lo *= x_lo0;
1392*25c28e83SPiotr Jasiukajtis	nop
1393*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp5],%f62	! (2_0) *(long long*)&scl0 = ll;
1394*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f42		! (1_0) res0_hi += dtmp0;
1395*25c28e83SPiotr Jasiukajtis
1396*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f10,%f60		! (5_1) dtmp0 = DONE - dtmp0;
1397*25c28e83SPiotr Jasiukajtis	nop
1398*25c28e83SPiotr Jasiukajtis	lda	[%i2]%asi,%f10		! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
1399*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f20,%f54		! (7_1) dd *= dtmp0;
1400*25c28e83SPiotr Jasiukajtis
1401*25c28e83SPiotr Jasiukajtis	nop
1402*25c28e83SPiotr Jasiukajtis	nop
1403*25c28e83SPiotr Jasiukajtis	lda	[%i2+4]%asi,%f11	! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
1404*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1405*25c28e83SPiotr Jasiukajtis
1406*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f12,%f26		! (1_0) dtmp1 *= y_lo0;
1407*25c28e83SPiotr Jasiukajtis	nop
1408*25c28e83SPiotr Jasiukajtis	lda	[%o0]%asi,%f12		! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
1409*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f18,%f20		! (6_1) dtmp2 = DTWO - dtmp2;
1410*25c28e83SPiotr Jasiukajtis
1411*25c28e83SPiotr Jasiukajtis	nop
1412*25c28e83SPiotr Jasiukajtis	nop
1413*25c28e83SPiotr Jasiukajtis	lda	[%o0+4]%asi,%f13	! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
1414*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1415*25c28e83SPiotr Jasiukajtis
1416*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f14,%f50		! (7_1) dtmp1 = dd * dres;
1417*25c28e83SPiotr Jasiukajtis	nop
1418*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (0_0) iarr = ((int*)&dres)[0];
1419*25c28e83SPiotr Jasiukajtis	fand	%f22,DA1,%f2		! (0_0) dexp0 = vis_fand(dres,DA1);
1420*25c28e83SPiotr Jasiukajtis
1421*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (2_0) x0 *= scl0;
1422*25c28e83SPiotr Jasiukajtis	nop
1423*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp8],%f18	! (3_1) *(long long*)&scl0 = ll;
1424*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f34,%f46		! (5_1) dtmp0 -= dtmp1;
1425*25c28e83SPiotr Jasiukajtis
1426*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (2_0) y0 *= scl0;
1427*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%o4		! (0_0) iarr >>= 11;
1428*25c28e83SPiotr Jasiukajtis	nop
1429*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f26,%f34		! (1_0) res0_lo += dtmp1;
1430*25c28e83SPiotr Jasiukajtis
1431*25c28e83SPiotr Jasiukajtis	and	%o4,0x1fc,%o4		! (0_0) iarr &= 0x1fc;
1432*25c28e83SPiotr Jasiukajtis	nop
1433*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1434*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f20,%f26		! (6_1) dres = dd * dtmp2;
1435*25c28e83SPiotr Jasiukajtis
1436*25c28e83SPiotr Jasiukajtis	fsqrtd	%f52,%f24		! (4_1) res0 = sqrt ( res0 );
1437*25c28e83SPiotr Jasiukajtis	add	%o4,TBL,%o4		! (0_0) (char*)dll1 + iarr
1438*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%o1		! (4_0) hx0 = *(int*)px;
1439*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (7_1) dtmp1 = DTWO - dtmp1;
1440*25c28e83SPiotr Jasiukajtis
1441*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f28,%f52		! (5_1) dtmp0 -= dtmp1;
1442*25c28e83SPiotr Jasiukajtis	mov	%i1,%i2
1443*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f28		! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1444*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (2_0) x_hi0 = x0 + D2ON36;
1445*25c28e83SPiotr Jasiukajtis
1446*25c28e83SPiotr Jasiukajtis	nop
1447*25c28e83SPiotr Jasiukajtis	mov	%i0,%o0
1448*25c28e83SPiotr Jasiukajtis	lda	[%i0]0x82,%o4		! (4_0) hy0 = *(int*)py;
1449*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f50	! (2_0) y_hi0 = y0 + D2ON36;
1450*25c28e83SPiotr Jasiukajtis
1451*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f16,%f0		! (3_1) res0 = scl0 * res0;
1452*25c28e83SPiotr Jasiukajtis	nop
1453*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (4_0) hx0 &= 0x7fffffff;
1454*25c28e83SPiotr Jasiukajtis	faddd	%f42,%f34,%f18		! (1_0) dres = res0_hi + res0_lo;
1455*25c28e83SPiotr Jasiukajtis
1456*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f20,%f16		! (7_1) dd *= dtmp1;
1457*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (4_0) hx0 ? 0x7ff00000
1458*25c28e83SPiotr Jasiukajtis	st	%f18,[%fp+ftmp0]	! (1_0) iarr = ((int*)&dres)[0];
1459*25c28e83SPiotr Jasiukajtis	fpsub32	%f28,%f2,%f28		! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
1460*25c28e83SPiotr Jasiukajtis
1461*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (4_0) hy0 &= 0x7fffffff;
1462*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
1463*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update45		! (4_0) if ( hx0 >= 0x7ff00000 )
1464*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (2_0) x_hi0 -= D2ON36;
1465*25c28e83SPiotr Jasiukajtis
1466*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (4_0) diff0 = hy0 - hx0;
1467*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (4_0) hy0 ? 0x7ff00000
1468*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update46		! (4_0) if ( hy0 >= 0x7ff00000 )
1469*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (2_0) y_hi0 -= D2ON36;
1470*25c28e83SPiotr Jasiukajtis
1471*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
1472*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (4_0) j0 = diff0 >> 31;
1473*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
1474*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (5_1) res0 += dtmp0;
1475*25c28e83SPiotr Jasiukajtis
1476*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (4_0) j0 &= diff0;
1477*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (4_0) hx0 ? 0x00100000
1478*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update47		! (4_0) if ( hx0 < 0x00100000 )
1479*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f48		! (6_1) res0 = vis_fand(dres,DA0);
1480*25c28e83SPiotr Jasiukajtis.cont47a:
1481*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
1482*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (4_0) j0 = hy0 - j0;
1483*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
1484*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
1485*25c28e83SPiotr Jasiukajtis
1486*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
1487*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (4_0) j0 &= 0x7ff00000;
1488*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
1489*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
1490*25c28e83SPiotr Jasiukajtis
1491*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f48,%f10		! (6_1) dtmp0 = res0_hi * res0;
1492*25c28e83SPiotr Jasiukajtis	nop
1493*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (4_0) j0 = 0x7ff00000 - j0;
1494*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
1495*25c28e83SPiotr Jasiukajtis.cont47b:
1496*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f14,%f14		! (7_1) dtmp2 = dd * dres;
1497*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (4_0) ll = (long long)j0 << 32;
1498*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp9]		! (4_0) *(long long*)&scl0 = ll;
1499*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (2_0) dtmp1 = y0 + y_hi0;
1500*25c28e83SPiotr Jasiukajtis
1501*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f48,%f40		! (6_1) dtmp1 = res0_lo * res0;
1502*25c28e83SPiotr Jasiukajtis	nop
1503*25c28e83SPiotr Jasiukajtis	nop
1504*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (2_0) y_lo0 = y0 - y_hi0;
1505*25c28e83SPiotr Jasiukajtis.cont48:
1506*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f0,%f0		! (2_0) res0_lo *= x_lo0;
1507*25c28e83SPiotr Jasiukajtis	nop
1508*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp7],%f62	! (3_0) *(long long*)&scl0 = ll;
1509*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f30		! (2_0) res0_hi += dtmp0;
1510*25c28e83SPiotr Jasiukajtis
1511*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f10,%f60		! (6_1) dtmp0 = DONE - dtmp0;
1512*25c28e83SPiotr Jasiukajtis	nop
1513*25c28e83SPiotr Jasiukajtis	lda	[%i4]%asi,%f10		! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
1514*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f20,%f54		! (0_0) dd *= dtmp0;
1515*25c28e83SPiotr Jasiukajtis
1516*25c28e83SPiotr Jasiukajtis	nop
1517*25c28e83SPiotr Jasiukajtis	nop
1518*25c28e83SPiotr Jasiukajtis	lda	[%i4+4]%asi,%f11	! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
1519*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1520*25c28e83SPiotr Jasiukajtis
1521*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f12,%f28		! (2_0) dtmp1 *= y_lo0;
1522*25c28e83SPiotr Jasiukajtis	nop
1523*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f12		! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
1524*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f14,%f20		! (7_1) dtmp2 = DTWO - dtmp2;
1525*25c28e83SPiotr Jasiukajtis
1526*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f13	! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
1527*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i4		! px += stridex
1528*25c28e83SPiotr Jasiukajtis	nop
1529*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1530*25c28e83SPiotr Jasiukajtis
1531*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f22,%f50		! (0_0) dtmp1 = dd * dres;
1532*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i1		! px += stridex
1533*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (1_0) iarr = ((int*)&dres)[0];
1534*25c28e83SPiotr Jasiukajtis	fand	%f18,DA1,%f2		! (1_0) dexp0 = vis_fand(dres,DA1);
1535*25c28e83SPiotr Jasiukajtis
1536*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (3_0) x0 *= scl0;
1537*25c28e83SPiotr Jasiukajtis	nop
1538*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp10],%f14	! (4_1) *(long long*)&scl0 = ll;
1539*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f40,%f46		! (6_1) dtmp0 -= dtmp1;
1540*25c28e83SPiotr Jasiukajtis
1541*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (3_0) y0 *= scl0;
1542*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i3		! (1_0) iarr >>= 11;
1543*25c28e83SPiotr Jasiukajtis	nop
1544*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f28,%f40		! (2_0) res0_lo += dtmp1;
1545*25c28e83SPiotr Jasiukajtis
1546*25c28e83SPiotr Jasiukajtis	and	%i3,0x1fc,%i3		! (1_0) iarr &= 0x1fc;
1547*25c28e83SPiotr Jasiukajtis	nop
1548*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1549*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f20,%f28		! (7_1) dres = dd * dtmp2;
1550*25c28e83SPiotr Jasiukajtis
1551*25c28e83SPiotr Jasiukajtis	fsqrtd	%f52,%f16		! (5_1) res0 = sqrt ( res0 );
1552*25c28e83SPiotr Jasiukajtis	add	%i3,TBL,%o4		! (1_0) (char*)dll1 + iarr
1553*25c28e83SPiotr Jasiukajtis	lda	[%i4]0x82,%o1		! (5_0) hx0 = *(int*)px;
1554*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (0_0) dtmp1 = DTWO - dtmp1;
1555*25c28e83SPiotr Jasiukajtis
1556*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f26,%f52		! (6_1) dtmp0 *= dres;
1557*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i3		! py += stridey
1558*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f26		! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1559*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (3_0) x_hi0 = x0 + D2ON36;
1560*25c28e83SPiotr Jasiukajtis
1561*25c28e83SPiotr Jasiukajtis	nop
1562*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i0		! py += stridey
1563*25c28e83SPiotr Jasiukajtis	lda	[%i3]0x82,%o4		! (5_0) hy0 = *(int*)py;
1564*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f50	! (3_0) y_hi0 = y0 + D2ON36;
1565*25c28e83SPiotr Jasiukajtis
1566*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f24,%f0		! (4_1) res0 = scl0 * res0;
1567*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (5_0) hx0 &= 0x7fffffff;
1568*25c28e83SPiotr Jasiukajtis	nop
1569*25c28e83SPiotr Jasiukajtis	faddd	%f30,%f40,%f14		! (2_0) dres = res0_hi + res0_lo;
1570*25c28e83SPiotr Jasiukajtis
1571*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f20,%f24		! (0_0) dd *= dtmp1;
1572*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (5_0) hx0 ? 0x7ff00000
1573*25c28e83SPiotr Jasiukajtis	st	%f14,[%fp+ftmp0]	! (2_0) iarr = ((int*)&dres)[0];
1574*25c28e83SPiotr Jasiukajtis	fpsub32	%f26,%f2,%f26		! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
1575*25c28e83SPiotr Jasiukajtis
1576*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (5_0) hy0 &= 0x7fffffff;
1577*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
1578*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update49		! (5_0) if ( hx0 >= 0x7ff00000 )
1579*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (3_0) x_hi0 -= D2ON36;
1580*25c28e83SPiotr Jasiukajtis
1581*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (5_0) diff0 = hy0 - hx0;
1582*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (5_0) hy0 ? 0x7ff00000
1583*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update50		! (5_0) if ( hy0 >= 0x7ff00000 )
1584*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (3_0) y_hi0 -= D2ON36;
1585*25c28e83SPiotr Jasiukajtis
1586*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
1587*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (5_0) j0 = diff0 >> 31;
1588*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
1589*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (6_1) res0 += dtmp0;
1590*25c28e83SPiotr Jasiukajtis
1591*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (5_0) j0 &= diff0;
1592*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (5_0) hx0 ? 0x00100000
1593*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update51		! (5_0) if ( hx0 < 0x00100000 )
1594*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
1595*25c28e83SPiotr Jasiukajtis.cont51a:
1596*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
1597*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (5_0) j0 = hy0 - j0;
1598*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
1599*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
1600*25c28e83SPiotr Jasiukajtis
1601*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
1602*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (5_0) j0 &= 0x7ff00000;
1603*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
1604*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
1605*25c28e83SPiotr Jasiukajtis
1606*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
1607*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (5_0) j0 = 0x7ff00000 - j0;
1608*25c28e83SPiotr Jasiukajtis	nop
1609*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
1610*25c28e83SPiotr Jasiukajtis.cont51b:
1611*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
1612*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
1613*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
1614*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
1615*25c28e83SPiotr Jasiukajtis
1616*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
1617*25c28e83SPiotr Jasiukajtis	nop
1618*25c28e83SPiotr Jasiukajtis	nop
1619*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
1620*25c28e83SPiotr Jasiukajtis.cont52:
1621*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f0,%f0		! (3_0) res0_lo *= x_lo0;
1622*25c28e83SPiotr Jasiukajtis	nop
1623*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp9],%f62	! (4_0) *(long long*)&scl0 = ll;
1624*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f44		! (3_0) res0_hi += dtmp0;
1625*25c28e83SPiotr Jasiukajtis
1626*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f10,%f60		! (7_1) dtmp0 = DONE - dtmp0;
1627*25c28e83SPiotr Jasiukajtis	nop
1628*25c28e83SPiotr Jasiukajtis	lda	[%i2]%asi,%f10		! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
1629*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f20,%f54		! (1_0) dd *= dtmp0;
1630*25c28e83SPiotr Jasiukajtis
1631*25c28e83SPiotr Jasiukajtis	nop
1632*25c28e83SPiotr Jasiukajtis	nop
1633*25c28e83SPiotr Jasiukajtis	lda	[%i2+4]%asi,%f11	! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
1634*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1635*25c28e83SPiotr Jasiukajtis
1636*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f12,%f26		! (3_0) dtmp1 *= y_lo0;
1637*25c28e83SPiotr Jasiukajtis	nop
1638*25c28e83SPiotr Jasiukajtis	lda	[%o0]%asi,%f12		! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
1639*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f22,%f20		! (0_0) dtmp2 = DTWO - dtmp2;
1640*25c28e83SPiotr Jasiukajtis
1641*25c28e83SPiotr Jasiukajtis	nop
1642*25c28e83SPiotr Jasiukajtis	nop
1643*25c28e83SPiotr Jasiukajtis	lda	[%o0+4]%asi,%f13	! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
1644*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1645*25c28e83SPiotr Jasiukajtis
1646*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f18,%f50		! (1_0) dtmp1 = dd * dres;
1647*25c28e83SPiotr Jasiukajtis	nop
1648*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (2_0) iarr = ((int*)&dres)[0];
1649*25c28e83SPiotr Jasiukajtis	fand	%f14,DA1,%f2		! (2_0) dexp0 = vis_fand(dres,DA1);
1650*25c28e83SPiotr Jasiukajtis
1651*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (4_0) x0 *= scl0;
1652*25c28e83SPiotr Jasiukajtis	nop
1653*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp12],%f22	! (5_1) *(long long*)&scl0 = ll;
1654*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f38,%f46		! (7_1) dtmp0 -= dtmp1;
1655*25c28e83SPiotr Jasiukajtis
1656*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (4_0) y0 *= scl0;
1657*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%o4		! (2_0) iarr >>= 11;
1658*25c28e83SPiotr Jasiukajtis	nop
1659*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f26,%f38		! (3_0) res0_lo += dtmp1;
1660*25c28e83SPiotr Jasiukajtis
1661*25c28e83SPiotr Jasiukajtis	and	%o4,0x1fc,%o4		! (2_0) iarr &= 0x1fc;
1662*25c28e83SPiotr Jasiukajtis	nop
1663*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1664*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f20,%f26		! (0_0) dres = dd * dtmp2;
1665*25c28e83SPiotr Jasiukajtis
1666*25c28e83SPiotr Jasiukajtis	fsqrtd	%f52,%f24		! (6_1) res0 = sqrt ( res0 );
1667*25c28e83SPiotr Jasiukajtis	add	%o4,TBL,%o4		! (2_0) (char*)dll1 + iarr
1668*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%o1		! (6_0) hx0 = *(int*)px;
1669*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f52		! (1_0) dtmp1 = DTWO - dtmp1;
1670*25c28e83SPiotr Jasiukajtis
1671*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f28,%f28		! (7_1) dtmp0 *= dres;
1672*25c28e83SPiotr Jasiukajtis	mov	%i1,%i2
1673*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f20		! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1674*25c28e83SPiotr Jasiukajtis	faddd	%f10,D2ON36,%f46	! (4_0) x_hi0 = x0 + D2ON36;
1675*25c28e83SPiotr Jasiukajtis
1676*25c28e83SPiotr Jasiukajtis	nop
1677*25c28e83SPiotr Jasiukajtis	mov	%i0,%o0
1678*25c28e83SPiotr Jasiukajtis	lda	[%i0]0x82,%o4		! (6_0) hy0 = *(int*)py;
1679*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f50	! (4_0) y_hi0 = y0 + D2ON36;
1680*25c28e83SPiotr Jasiukajtis
1681*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f16,%f0		! (5_1) res0 = scl0 * res0;
1682*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (6_0) hx0 &= 0x7fffffff;
1683*25c28e83SPiotr Jasiukajtis	nop
1684*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f38,%f22		! (3_0) dres = res0_hi + res0_lo;
1685*25c28e83SPiotr Jasiukajtis
1686*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f52,%f16		! (1_0) dd *= dtmp1;
1687*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (6_0) hx0 ? 0x7ff00000
1688*25c28e83SPiotr Jasiukajtis	st	%f22,[%fp+ftmp0]	! (3_0) iarr = ((int*)&dres)[0];
1689*25c28e83SPiotr Jasiukajtis	fpsub32	%f20,%f2,%f52		! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
1690*25c28e83SPiotr Jasiukajtis
1691*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (6_0) hy0 &= 0x7fffffff;
1692*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
1693*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update53		! (6_0) if ( hx0 >= 0x7ff00000 )
1694*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f46	! (4_0) x_hi0 -= D2ON36;
1695*25c28e83SPiotr Jasiukajtis
1696*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (6_0) diff0 = hy0 - hx0;
1697*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (6_0) hy0 ? 0x7ff00000
1698*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update54		! (6_0) if ( hy0 >= 0x7ff00000 )
1699*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (4_0) y_hi0 -= D2ON36;
1700*25c28e83SPiotr Jasiukajtis
1701*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
1702*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (6_0) j0 = diff0 >> 31;
1703*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
1704*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
1705*25c28e83SPiotr Jasiukajtis
1706*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (6_0) j0 &= diff0;
1707*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (6_0) hx0 ? 0x00100000
1708*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update55		! (6_0) if ( hx0 < 0x00100000 )
1709*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
1710*25c28e83SPiotr Jasiukajtis.cont55a:
1711*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
1712*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (6_0) j0 = hy0 - j0;
1713*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp12]	! (5_0) *(long long*)&scl0 = ll;
1714*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
1715*25c28e83SPiotr Jasiukajtis
1716*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
1717*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (6_0) j0 &= 0x7ff00000;
1718*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
1719*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
1720*25c28e83SPiotr Jasiukajtis
1721*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
1722*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (6_0) j0 = 0x7ff00000 - j0;
1723*25c28e83SPiotr Jasiukajtis	nop
1724*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
1725*25c28e83SPiotr Jasiukajtis.cont55b:
1726*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
1727*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
1728*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
1729*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
1730*25c28e83SPiotr Jasiukajtis
1731*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
1732*25c28e83SPiotr Jasiukajtis	nop
1733*25c28e83SPiotr Jasiukajtis	nop
1734*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
1735*25c28e83SPiotr Jasiukajtis.cont56:
1736*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f2,%f2		! (4_0) res0_lo *= x_lo0;
1737*25c28e83SPiotr Jasiukajtis	nop
1738*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp11],%f62	! (5_0) *(long long*)&scl0 = ll;
1739*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f20,%f32		! (4_0) res0_hi += dtmp0;
1740*25c28e83SPiotr Jasiukajtis
1741*25c28e83SPiotr Jasiukajtis	lda	[%i4]%asi,%f0		! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
1742*25c28e83SPiotr Jasiukajtis	nop
1743*25c28e83SPiotr Jasiukajtis	nop
1744*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f10,%f10		! (2_0) dd *= dtmp0;
1745*25c28e83SPiotr Jasiukajtis
1746*25c28e83SPiotr Jasiukajtis	lda	[%i4+4]%asi,%f1		! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
1747*25c28e83SPiotr Jasiukajtis	nop
1748*25c28e83SPiotr Jasiukajtis	nop
1749*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f50,%f52		! (0_0) dtmp0 = DONE - dtmp0;
1750*25c28e83SPiotr Jasiukajtis
1751*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f60,%f46		! (4_0) dtmp1 *= y_lo0;
1752*25c28e83SPiotr Jasiukajtis	nop
1753*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f12		! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
1754*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f18,%f18		! (1_0) dtmp2 = DTWO - dtmp2;
1755*25c28e83SPiotr Jasiukajtis
1756*25c28e83SPiotr Jasiukajtis	nop
1757*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i4		! px += stridex
1758*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f13	! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
1759*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1760*25c28e83SPiotr Jasiukajtis
1761*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f50		! (2_0) dtmp1 = dd * dres;
1762*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i1		! px += stridex
1763*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (3_0) iarr = ((int*)&dres)[0];
1764*25c28e83SPiotr Jasiukajtis	fand	%f22,DA1,%f54		! (3_0) dexp0 = vis_fand(dres,DA1);
1765*25c28e83SPiotr Jasiukajtis
1766*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f62,%f60		! (5_0) x0 *= scl0;
1767*25c28e83SPiotr Jasiukajtis	nop
1768*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp14],%f0	! (6_1) *(long long*)&scl0 = ll;
1769*25c28e83SPiotr Jasiukajtis	fsubd	%f52,%f36,%f20		! (0_0) dtmp0 -= dtmp1;
1770*25c28e83SPiotr Jasiukajtis
1771*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f52		! (5_0) y0 *= scl0;
1772*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i3		! (3_0) iarr >>= 11;
1773*25c28e83SPiotr Jasiukajtis	nop
1774*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f36		! (4_0) res0_lo += dtmp1;
1775*25c28e83SPiotr Jasiukajtis
1776*25c28e83SPiotr Jasiukajtis	and	%i3,0x1fc,%i3		! (3_0) iarr &= 0x1fc;
1777*25c28e83SPiotr Jasiukajtis	nop
1778*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1779*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f18,%f16		! (1_0) dres = dd * dtmp2;
1780*25c28e83SPiotr Jasiukajtis
1781*25c28e83SPiotr Jasiukajtis	fsqrtd	%f48,%f18		! (7_1) res0 = sqrt ( res0 );
1782*25c28e83SPiotr Jasiukajtis	add	%i3,TBL,%o4		! (3_0) (char*)dll1 + iarr
1783*25c28e83SPiotr Jasiukajtis	lda	[%i4]0x82,%o1		! (7_0) hx0 = *(int*)px;
1784*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f46		! (2_0) dtmp1 = DTWO - dtmp1;
1785*25c28e83SPiotr Jasiukajtis
1786*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f26,%f48		! (0_0) dtmp0 *= dres;
1787*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i3		! py += stridey
1788*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f20		! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1789*25c28e83SPiotr Jasiukajtis	faddd	%f60,D2ON36,%f50	! (5_0) x_hi0 = x0 + D2ON36;
1790*25c28e83SPiotr Jasiukajtis
1791*25c28e83SPiotr Jasiukajtis	nop
1792*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i0		! py += stridey
1793*25c28e83SPiotr Jasiukajtis	lda	[%i3]0x82,%o4		! (7_0) hy0 = *(int*)py;
1794*25c28e83SPiotr Jasiukajtis	faddd	%f52,D2ON36,%f12	! (5_0) y_hi0 = y0 + D2ON36;
1795*25c28e83SPiotr Jasiukajtis
1796*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f24,%f2		! (6_1) res0 = scl0 * res0;
1797*25c28e83SPiotr Jasiukajtis	and	%o1,_0x7fffffff,%o7	! (7_0) hx0 &= 0x7fffffff;
1798*25c28e83SPiotr Jasiukajtis	nop
1799*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f36,%f24		! (4_0) dres = res0_hi + res0_lo;
1800*25c28e83SPiotr Jasiukajtis
1801*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f46,%f26		! (2_0) dd *= dtmp1;
1802*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! (7_0) hx0 ? 0x7ff00000
1803*25c28e83SPiotr Jasiukajtis	st	%f24,[%fp+ftmp0]	! (4_0) iarr = ((int*)&dres)[0];
1804*25c28e83SPiotr Jasiukajtis	fpsub32	%f20,%f54,%f10		! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
1805*25c28e83SPiotr Jasiukajtis
1806*25c28e83SPiotr Jasiukajtis	and	%o4,_0x7fffffff,%l7	! (7_0) hy0 &= 0x7fffffff;
1807*25c28e83SPiotr Jasiukajtis	st	%f2,[%i5]		! (6_1) ((float*)pz)[0] = ((float*)&res0)[0];
1808*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update57		! (7_0) if ( hx0 >= 0x7ff00000 )
1809*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f20	! (5_0) x_hi0 -= D2ON36;
1810*25c28e83SPiotr Jasiukajtis
1811*25c28e83SPiotr Jasiukajtis	sub	%l7,%o7,%o1		! (7_0) diff0 = hy0 - hx0;
1812*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! (7_0) hy0 ? 0x7ff00000
1813*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update58		! (7_0) if ( hy0 >= 0x7ff00000 )
1814*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (5_0) y_hi0 -= D2ON36;
1815*25c28e83SPiotr Jasiukajtis
1816*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
1817*25c28e83SPiotr Jasiukajtis	sra	%o1,31,%o3		! (7_0) j0 = diff0 >> 31;
1818*25c28e83SPiotr Jasiukajtis	st	%f3,[%i5+4]		! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
1819*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
1820*25c28e83SPiotr Jasiukajtis
1821*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (7_0) j0 &= diff0;
1822*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x00100000		! (7_0) hx0 ? 0x00100000
1823*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update59		! (7_0) if ( hx0 < 0x00100000 )
1824*25c28e83SPiotr Jasiukajtis	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
1825*25c28e83SPiotr Jasiukajtis.cont59a:
1826*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
1827*25c28e83SPiotr Jasiukajtis	sub	%l7,%o1,%o4		! (7_0) j0 = hy0 - j0;
1828*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp14]	! (6_0) *(long long*)&scl0 = ll;
1829*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
1830*25c28e83SPiotr Jasiukajtis
1831*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
1832*25c28e83SPiotr Jasiukajtis	and	%o4,%l0,%o4		! (7_0) j0 &= 0x7ff00000;
1833*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
1834*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
1835*25c28e83SPiotr Jasiukajtis
1836*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
1837*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%g1		! (7_0) j0 = 0x7ff00000 - j0;
1838*25c28e83SPiotr Jasiukajtis	nop
1839*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
1840*25c28e83SPiotr Jasiukajtis.cont59b:
1841*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
1842*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
1843*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
1844*25c28e83SPiotr Jasiukajtis	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
1845*25c28e83SPiotr Jasiukajtis
1846*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
1847*25c28e83SPiotr Jasiukajtis	nop
1848*25c28e83SPiotr Jasiukajtis	nop
1849*25c28e83SPiotr Jasiukajtis	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
1850*25c28e83SPiotr Jasiukajtis.cont60:
1851*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f2,%f2		! (5_0) res0_lo *= x_lo0;
1852*25c28e83SPiotr Jasiukajtis	nop
1853*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp13],%f62	! (6_0) *(long long*)&scl0 = ll;
1854*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f46,%f42		! (5_0) res0_hi += dtmp0;
1855*25c28e83SPiotr Jasiukajtis
1856*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f20,%f52		! (3_0) dd *= dtmp0;
1857*25c28e83SPiotr Jasiukajtis	nop
1858*25c28e83SPiotr Jasiukajtis	lda	[%i2]%asi,%f10		! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
1859*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1860*25c28e83SPiotr Jasiukajtis
1861*25c28e83SPiotr Jasiukajtis	lda	[%i2+4]%asi,%f11	! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
1862*25c28e83SPiotr Jasiukajtis	nop
1863*25c28e83SPiotr Jasiukajtis	nop
1864*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f60,%f60		! (1_0) dtmp0 = DONE - dtmp0;
1865*25c28e83SPiotr Jasiukajtis
1866*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f54,%f46		! (5_0) dtmp1 *= y_lo0;
1867*25c28e83SPiotr Jasiukajtis	nop
1868*25c28e83SPiotr Jasiukajtis	lda	[%o0]%asi,%f12		! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
1869*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f14,%f14		! (2_0) dtmp2 = DTWO - dtmp2;
1870*25c28e83SPiotr Jasiukajtis
1871*25c28e83SPiotr Jasiukajtis	nop
1872*25c28e83SPiotr Jasiukajtis	nop
1873*25c28e83SPiotr Jasiukajtis	lda	[%o0+4]%asi,%f13	! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
1874*25c28e83SPiotr Jasiukajtis	bn,pn	%icc,.exit
1875*25c28e83SPiotr Jasiukajtis
1876*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f22,%f50		! (3_0) dtmp1 = dd * dres;
1877*25c28e83SPiotr Jasiukajtis	nop
1878*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (4_0) iarr = ((int*)&dres)[0];
1879*25c28e83SPiotr Jasiukajtis	fand	%f24,DA1,%f54		! (4_0) dexp0 = vis_fand(dres,DA1);
1880*25c28e83SPiotr Jasiukajtis
1881*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f62,%f10		! (6_0) x0 *= scl0;
1882*25c28e83SPiotr Jasiukajtis	nop
1883*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp0],%f0		! (7_1) *(long long*)&scl0 = ll;
1884*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f34,%f20		! (1_0) dtmp0 -= dtmp1;
1885*25c28e83SPiotr Jasiukajtis
1886*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f62,%f60		! (6_0) y0 *= scl0;
1887*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%o4		! (4_0) iarr >>= 11;
1888*25c28e83SPiotr Jasiukajtis	nop
1889*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f46,%f34		! (5_0) res0_lo += dtmp1;
1890*25c28e83SPiotr Jasiukajtis
1891*25c28e83SPiotr Jasiukajtis	and	%o4,0x1fc,%o4		! (4_0) iarr &= 0x1fc;
1892*25c28e83SPiotr Jasiukajtis	subcc	counter,8,counter	! counter -= 8;
1893*25c28e83SPiotr Jasiukajtis	bpos,pt	%icc,.main_loop
1894*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f26		! (2_0) dres = dd * dtmp2;
1895*25c28e83SPiotr Jasiukajtis
1896*25c28e83SPiotr Jasiukajtis	add	counter,8,counter
1897*25c28e83SPiotr Jasiukajtis
1898*25c28e83SPiotr Jasiukajtis.tail:
1899*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
1900*25c28e83SPiotr Jasiukajtis	bneg	.begin
1901*25c28e83SPiotr Jasiukajtis	nop
1902*25c28e83SPiotr Jasiukajtis
1903*25c28e83SPiotr Jasiukajtis	fsqrtd	%f48,%f14		! (0_1) res0 = sqrt ( res0 );
1904*25c28e83SPiotr Jasiukajtis	add	%o4,TBL,%o4		! (4_1) (char*)dll1 + iarr
1905*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f46		! (3_1) dtmp1 = DTWO - dtmp1;
1906*25c28e83SPiotr Jasiukajtis
1907*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f16,%f48		! (1_1) dtmp0 *= dres;
1908*25c28e83SPiotr Jasiukajtis	ld	[%o4],%f20		! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1909*25c28e83SPiotr Jasiukajtis
1910*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f18,%f0		! (7_2) res0 = scl0 * res0;
1911*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
1912*25c28e83SPiotr Jasiukajtis	faddd	%f42,%f34,%f16		! (5_1) dres = res0_hi + res0_lo;
1913*25c28e83SPiotr Jasiukajtis
1914*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
1915*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
1916*25c28e83SPiotr Jasiukajtis	bneg	.begin
1917*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
1918*25c28e83SPiotr Jasiukajtis
1919*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f46,%f18		! (3_1) dd *= dtmp1;
1920*25c28e83SPiotr Jasiukajtis	st	%f16,[%fp+ftmp0]	! (5_1) iarr = ((int*)&dres)[0];
1921*25c28e83SPiotr Jasiukajtis	fpsub32	%f20,%f54,%f54		! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
1922*25c28e83SPiotr Jasiukajtis
1923*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f24,%f50		! (4_1) dtmp0 = dd * dres;
1924*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f48,%f52		! (1_1) res0 += dtmp0;
1925*25c28e83SPiotr Jasiukajtis
1926*25c28e83SPiotr Jasiukajtis
1927*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f48		! (2_1) res0 = vis_fand(dres,DA0);
1928*25c28e83SPiotr Jasiukajtis
1929*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f22,%f22		! (3_1) dtmp2 = dd * dres;
1930*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (4_1) dtmp0 = DTWO - dtmp0;
1931*25c28e83SPiotr Jasiukajtis
1932*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f48,%f12		! (2_1) dtmp0 = res0_hi * res0;
1933*25c28e83SPiotr Jasiukajtis
1934*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f48,%f40		! (2_1) dtmp1 = res0_lo * res0;
1935*25c28e83SPiotr Jasiukajtis
1936*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f20,%f54		! (4_1) dd *= dtmp0;
1937*25c28e83SPiotr Jasiukajtis
1938*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f12,%f60		! (2_1) dtmp0 = DONE - dtmp0;
1939*25c28e83SPiotr Jasiukajtis
1940*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f22,%f22		! (3_1) dtmp2 = DTWO - dtmp2;
1941*25c28e83SPiotr Jasiukajtis
1942*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f24,%f50		! (4_1) dtmp1 = dd * dres;
1943*25c28e83SPiotr Jasiukajtis	ld	[%fp+ftmp0],%o2		! (5_1) iarr = ((int*)&dres)[0];
1944*25c28e83SPiotr Jasiukajtis	fand	%f16,DA1,%f2		! (5_1) dexp0 = vis_fand(dres,DA1);
1945*25c28e83SPiotr Jasiukajtis
1946*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp2],%f0		! (0_1) *(long long*)&scl0 = ll;
1947*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f40,%f20		! (2_1) dtmp0 -= dtmp1;
1948*25c28e83SPiotr Jasiukajtis
1949*25c28e83SPiotr Jasiukajtis	sra	%o2,11,%i3		! (5_1) iarr >>= 11;
1950*25c28e83SPiotr Jasiukajtis
1951*25c28e83SPiotr Jasiukajtis	and	%i3,0x1fc,%i3		! (5_1) iarr &= 0x1fc;
1952*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f22,%f28		! (3_1) dres = dd * dtmp2;
1953*25c28e83SPiotr Jasiukajtis
1954*25c28e83SPiotr Jasiukajtis	fsqrtd	%f52,%f22		! (1_1) res0 = sqrt ( res0 );
1955*25c28e83SPiotr Jasiukajtis	add	%i3,TBL,%g1		! (5_1) (char*)dll1 + iarr
1956*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f62		! (4_1) dtmp1 = DTWO - dtmp1;
1957*25c28e83SPiotr Jasiukajtis
1958*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f26,%f52		! (2_1) dtmp0 *= dres;
1959*25c28e83SPiotr Jasiukajtis	ld	[%g1],%f26		! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1960*25c28e83SPiotr Jasiukajtis
1961*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f14,%f0		! (0_1) res0 = scl0 * res0;
1962*25c28e83SPiotr Jasiukajtis
1963*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f62,%f14		! (4_1) dd *= dtmp1;
1964*25c28e83SPiotr Jasiukajtis	fpsub32	%f26,%f2,%f26		! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
1965*25c28e83SPiotr Jasiukajtis
1966*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
1967*25c28e83SPiotr Jasiukajtis
1968*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f16,%f50		! (5_1) dtmp0 = dd * dres;
1969*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
1970*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (2_1) res0 += dtmp0;
1971*25c28e83SPiotr Jasiukajtis
1972*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
1973*25c28e83SPiotr Jasiukajtis	bneg	.begin
1974*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
1975*25c28e83SPiotr Jasiukajtis
1976*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (3_1) res0 = vis_fand(dres,DA0);
1977*25c28e83SPiotr Jasiukajtis
1978*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (3_1) dtmp0 = res0_hi * res0;
1979*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (5_1) dtmp0 = DTWO - dtmp0;
1980*25c28e83SPiotr Jasiukajtis
1981*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f24,%f24		! (4_1) dtmp2 = dd * dres;
1982*25c28e83SPiotr Jasiukajtis
1983*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f48,%f38		! (3_1) dtmp1 = res0_lo * res0;
1984*25c28e83SPiotr Jasiukajtis
1985*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f10,%f60		! (3_1) dtmp0 = DONE - dtmp0;
1986*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f20,%f54		! (5_1) dd *= dtmp0;
1987*25c28e83SPiotr Jasiukajtis
1988*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f24,%f24		! (4_1) dtmp2 = DTWO - dtmp2;
1989*25c28e83SPiotr Jasiukajtis
1990*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f16,%f46		! (5_1) dtmp1 = dd * dres;
1991*25c28e83SPiotr Jasiukajtis
1992*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp4],%f50	! (1_1) *(long long*)&scl0 = ll;
1993*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f38,%f20		! (3_1) dtmp0 -= dtmp1;
1994*25c28e83SPiotr Jasiukajtis
1995*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f24,%f26		! (4_1) dres = dd * dtmp2;
1996*25c28e83SPiotr Jasiukajtis
1997*25c28e83SPiotr Jasiukajtis	fsqrtd	%f52,%f24		! (2_1) res0 = sqrt ( res0 );
1998*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f46,%f62		! (5_1) dtmp1 = DTWO - dtmp1;
1999*25c28e83SPiotr Jasiukajtis
2000*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f28,%f52		! (3_1) dtmp0 *= dres;
2001*25c28e83SPiotr Jasiukajtis
2002*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f22,%f0		! (1_1) res0 = scl0 * res0;
2003*25c28e83SPiotr Jasiukajtis
2004*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f62,%f22		! (5_1) dd *= dtmp1;
2005*25c28e83SPiotr Jasiukajtis
2006*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
2007*25c28e83SPiotr Jasiukajtis
2008*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
2009*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
2010*25c28e83SPiotr Jasiukajtis	bneg	.begin
2011*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
2012*25c28e83SPiotr Jasiukajtis
2013*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (3_1) res0 += dtmp0;
2014*25c28e83SPiotr Jasiukajtis
2015*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f48		! (4_1) res0 = vis_fand(dres,DA0);
2016*25c28e83SPiotr Jasiukajtis
2017*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f48,%f10		! (4_1) dtmp0 = res0_hi * res0;
2018*25c28e83SPiotr Jasiukajtis
2019*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f16,%f16		! (5_1) dtmp2 = dd * dres;
2020*25c28e83SPiotr Jasiukajtis
2021*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f48,%f36		! (4_1) dtmp1 = res0_lo * res0;
2022*25c28e83SPiotr Jasiukajtis
2023*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f10,%f60		! (4_1) dtmp0 = DONE - dtmp0;
2024*25c28e83SPiotr Jasiukajtis
2025*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f16,%f16		! (5_1) dtmp2 = DTWO - dtmp2;
2026*25c28e83SPiotr Jasiukajtis
2027*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp6],%f50	! (2_1) *(long long*)&scl0 = ll;
2028*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f36,%f20		! (4_1) dtmp0 -= dtmp1;
2029*25c28e83SPiotr Jasiukajtis
2030*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f16,%f28		! (5_1) dres = dd * dtmp2;
2031*25c28e83SPiotr Jasiukajtis
2032*25c28e83SPiotr Jasiukajtis	fsqrtd	%f52,%f16		! (3_1) res0 = sqrt ( res0 );
2033*25c28e83SPiotr Jasiukajtis
2034*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f26,%f52		! (4_1) dtmp0 *= dres;
2035*25c28e83SPiotr Jasiukajtis
2036*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f24,%f0		! (2_1) res0 = scl0 * res0;
2037*25c28e83SPiotr Jasiukajtis
2038*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
2039*25c28e83SPiotr Jasiukajtis
2040*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
2041*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (4_1) res0 += dtmp0;
2042*25c28e83SPiotr Jasiukajtis
2043*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
2044*25c28e83SPiotr Jasiukajtis	bneg	.begin
2045*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
2046*25c28e83SPiotr Jasiukajtis
2047*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (5_1) res0 = vis_fand(dres,DA0);
2048*25c28e83SPiotr Jasiukajtis
2049*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f48,%f10		! (5_1) dtmp0 = res0_hi * res0;
2050*25c28e83SPiotr Jasiukajtis
2051*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f48,%f34		! (5_1) dtmp1 = res0_lo * res0;
2052*25c28e83SPiotr Jasiukajtis
2053*25c28e83SPiotr Jasiukajtis	fsubd	DONE,%f10,%f60		! (5_1) dtmp0 = DONE - dtmp0;
2054*25c28e83SPiotr Jasiukajtis
2055*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp8],%f18	! (3_1) *(long long*)&scl0 = ll;
2056*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f34,%f46		! (5_1) dtmp0 -= dtmp1;
2057*25c28e83SPiotr Jasiukajtis
2058*25c28e83SPiotr Jasiukajtis	fsqrtd	%f52,%f24		! (4_1) res0 = sqrt ( res0 );
2059*25c28e83SPiotr Jasiukajtis
2060*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f28,%f52		! (5_1) dtmp0 -= dtmp1;
2061*25c28e83SPiotr Jasiukajtis
2062*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f16,%f0		! (3_1) res0 = scl0 * res0;
2063*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
2064*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
2065*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (5_1) res0 += dtmp0;
2066*25c28e83SPiotr Jasiukajtis
2067*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
2068*25c28e83SPiotr Jasiukajtis	bneg	.begin
2069*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
2070*25c28e83SPiotr Jasiukajtis
2071*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp10],%f14	! (4_1) *(long long*)&scl0 = ll;
2072*25c28e83SPiotr Jasiukajtis
2073*25c28e83SPiotr Jasiukajtis	fsqrtd	%f52,%f16		! (5_1) res0 = sqrt ( res0 );
2074*25c28e83SPiotr Jasiukajtis
2075*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f24,%f0		! (4_1) res0 = scl0 * res0
2076*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
2077*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
2078*25c28e83SPiotr Jasiukajtis
2079*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
2080*25c28e83SPiotr Jasiukajtis	bneg	.begin
2081*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
2082*25c28e83SPiotr Jasiukajtis
2083*25c28e83SPiotr Jasiukajtis	ldd	[%fp+dtmp12],%f22	! (5_1) *(long long*)&scl0 = ll;
2084*25c28e83SPiotr Jasiukajtis
2085*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f16,%f0		! (5_1) res0 = scl0 * res0;
2086*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
2087*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
2088*25c28e83SPiotr Jasiukajtis
2089*25c28e83SPiotr Jasiukajtis	ba	.begin
2090*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5
2091*25c28e83SPiotr Jasiukajtis
2092*25c28e83SPiotr Jasiukajtis	.align	16
2093*25c28e83SPiotr Jasiukajtis.spec0:
2094*25c28e83SPiotr Jasiukajtis	cmp	%o7,_0x7ff00000		! hx0 ? 0x7ff00000
2095*25c28e83SPiotr Jasiukajtis	bne	1f			! if ( hx0 != 0x7ff00000 )
2096*25c28e83SPiotr Jasiukajtis	ld	[%i4+4],%i2		! lx = ((int*)px)[1];
2097*25c28e83SPiotr Jasiukajtis
2098*25c28e83SPiotr Jasiukajtis	cmp	%i2,0			! lx ? 0
2099*25c28e83SPiotr Jasiukajtis	be	3f			! if ( lx == 0 )
2100*25c28e83SPiotr Jasiukajtis	nop
2101*25c28e83SPiotr Jasiukajtis1:
2102*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x7ff00000		! hy0 ? 0x7ff00000
2103*25c28e83SPiotr Jasiukajtis	bne	2f			! if ( hy0 != 0x7ff00000 )
2104*25c28e83SPiotr Jasiukajtis	ld	[%i3+4],%o2		! ly = ((int*)py)[1];
2105*25c28e83SPiotr Jasiukajtis
2106*25c28e83SPiotr Jasiukajtis	cmp	%o2,0			! ly ? 0
2107*25c28e83SPiotr Jasiukajtis	be	3f			! if ( ly == 0 )
2108*25c28e83SPiotr Jasiukajtis2:
2109*25c28e83SPiotr Jasiukajtis	ld	[%i4],%f0		! ((float*)&x0)[0] = ((float*)px)[0];
2110*25c28e83SPiotr Jasiukajtis	ld	[%i4+4],%f1		! ((float*)&x0)[1] = ((float*)px)[1];
2111*25c28e83SPiotr Jasiukajtis
2112*25c28e83SPiotr Jasiukajtis	ld	[%i3],%f2		! ((float*)&y0)[0] = ((float*)py)[0];
2113*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i4		! px += stridex
2114*25c28e83SPiotr Jasiukajtis	ld	[%i3+4],%f3		! ((float*)&y0)[1] = ((float*)py)[1];
2115*25c28e83SPiotr Jasiukajtis
2116*25c28e83SPiotr Jasiukajtis	fabsd	%f0,%f0
2117*25c28e83SPiotr Jasiukajtis
2118*25c28e83SPiotr Jasiukajtis	fabsd	%f2,%f2
2119*25c28e83SPiotr Jasiukajtis
2120*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f2,%f0		! res0 = fabs(x0) * fabs(y0);
2121*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i3		! py += stridey;
2122*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! ((float*)pz)[0] = ((float*)&res0)[0];
2123*25c28e83SPiotr Jasiukajtis
2124*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! ((float*)pz)[1] = ((float*)&res0)[1];
2125*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
2126*25c28e83SPiotr Jasiukajtis	ba	.begin1
2127*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
2128*25c28e83SPiotr Jasiukajtis3:
2129*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i4		! px += stridex
2130*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i3		! py += stridey
2131*25c28e83SPiotr Jasiukajtis	st	%g0,[%i5]		! ((int*)pz)[0] = 0;
2132*25c28e83SPiotr Jasiukajtis
2133*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez;
2134*25c28e83SPiotr Jasiukajtis	st	%g0,[%i5+4]		! ((int*)pz)[1] = 0;
2135*25c28e83SPiotr Jasiukajtis	ba	.begin1
2136*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
2137*25c28e83SPiotr Jasiukajtis
2138*25c28e83SPiotr Jasiukajtis	.align	16
2139*25c28e83SPiotr Jasiukajtis.spec1:
2140*25c28e83SPiotr Jasiukajtis	and	%o1,%o3,%o1		! (7_0) j0 &= diff0;
2141*25c28e83SPiotr Jasiukajtis
2142*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (7_0) hy0 ? 0x00100000
2143*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont_spec0	! (7_0) if ( hy0 < 0x00100000 )
2144*25c28e83SPiotr Jasiukajtis
2145*25c28e83SPiotr Jasiukajtis	ld	[%i4+4],%i2		! lx = ((int*)px)[1];
2146*25c28e83SPiotr Jasiukajtis	or	%o7,%l7,%g5		! ii = hx0 | hy0;
2147*25c28e83SPiotr Jasiukajtis	fzero	%f0
2148*25c28e83SPiotr Jasiukajtis
2149*25c28e83SPiotr Jasiukajtis	ld	[%i3+4],%o2		! ly = ((int*)py)[1];
2150*25c28e83SPiotr Jasiukajtis	or	%i2,%g5,%g5		! ii |= lx;
2151*25c28e83SPiotr Jasiukajtis
2152*25c28e83SPiotr Jasiukajtis	orcc	%o2,%g5,%g5		! ii |= ly;
2153*25c28e83SPiotr Jasiukajtis	bnz,a,pn	%icc,1f		! if ( ii != 0 )
2154*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i2
2155*25c28e83SPiotr Jasiukajtis
2156*25c28e83SPiotr Jasiukajtis	fdivd	DONE,%f0,%f0		! res0 = 1.0 / 0.0;
2157*25c28e83SPiotr Jasiukajtis
2158*25c28e83SPiotr Jasiukajtis	st	%f0,[%i5]		! ((float*)pz)[0] = ((float*)&res0)[0];
2159*25c28e83SPiotr Jasiukajtis
2160*25c28e83SPiotr Jasiukajtis	add	%i4,stridex,%i4		! px += stridex;
2161*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i3		! py += stridey;
2162*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! ((float*)pz)[1] = ((float*)&res0)[1];
2163*25c28e83SPiotr Jasiukajtis
2164*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez;
2165*25c28e83SPiotr Jasiukajtis	ba	.begin1
2166*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
2167*25c28e83SPiotr Jasiukajtis1:
2168*25c28e83SPiotr Jasiukajtis	ld	[%i4],%f0		! ((float*)&x0)[0] = ((float*)px)[0];
2169*25c28e83SPiotr Jasiukajtis
2170*25c28e83SPiotr Jasiukajtis	ld	[%i4+4],%f1		! ((float*)&x0)[1] = ((float*)px)[1];
2171*25c28e83SPiotr Jasiukajtis
2172*25c28e83SPiotr Jasiukajtis	ld	[%i3],%f2		! ((float*)&y0)[0] = ((float*)py)[0];
2173*25c28e83SPiotr Jasiukajtis
2174*25c28e83SPiotr Jasiukajtis	fabsd	%f0,%f0			! x0 = fabs(x0);
2175*25c28e83SPiotr Jasiukajtis	ld	[%i3+4],%f3		! ((float*)&y0)[1] = ((float*)py)[1];
2176*25c28e83SPiotr Jasiukajtis
2177*25c28e83SPiotr Jasiukajtis	ldd	[TBL+TBL_SHIFT+64],%f12	! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
2178*25c28e83SPiotr Jasiukajtis	add	%fp,dtmp2,%i4
2179*25c28e83SPiotr Jasiukajtis	add	%fp,dtmp3,%i3
2180*25c28e83SPiotr Jasiukajtis
2181*25c28e83SPiotr Jasiukajtis	fabsd	%f2,%f2			! y0 = fabs(y0);
2182*25c28e83SPiotr Jasiukajtis	ldd	[TBL+TBL_SHIFT+56],%f10	! D2ON51
2183*25c28e83SPiotr Jasiukajtis
2184*25c28e83SPiotr Jasiukajtis	ldx	[TBL+TBL_SHIFT+48],%g5	! D2ONM52
2185*25c28e83SPiotr Jasiukajtis	cmp	%o7,%i2			! hx0 ? 0x00080000
2186*25c28e83SPiotr Jasiukajtis	bl,a	1f			! if ( hx0 < 0x00080000 )
2187*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! x0 = *(long long*)&x0;
2188*25c28e83SPiotr Jasiukajtis
2189*25c28e83SPiotr Jasiukajtis	fand	%f0,%f12,%f0		! x0 = vis_fand(x0, dtmp0);
2190*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! x0 = *(long long*)&x0;
2191*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f10,%f0		! x0 += D2ON51;
2192*25c28e83SPiotr Jasiukajtis1:
2193*25c28e83SPiotr Jasiukajtis	std	%f0,[%i4]
2194*25c28e83SPiotr Jasiukajtis
2195*25c28e83SPiotr Jasiukajtis	ldx	[TBL+TBL_SHIFT+40],%g1	! D2ON1022
2196*25c28e83SPiotr Jasiukajtis	cmp	%l7,%i2			! hy0 ? 0x00080000
2197*25c28e83SPiotr Jasiukajtis	bl,a	1f			! if ( hy0 < 0x00080000 )
2198*25c28e83SPiotr Jasiukajtis	fxtod	%f2,%f2			! y0 = *(long long*)&y0;
2199*25c28e83SPiotr Jasiukajtis
2200*25c28e83SPiotr Jasiukajtis	fand	%f2,%f12,%f2		! y0 = vis_fand(y0, dtmp0);
2201*25c28e83SPiotr Jasiukajtis	fxtod	%f2,%f2			! y0 = *(long long*)&y0;
2202*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f10,%f2		! y0 += D2ON51;
2203*25c28e83SPiotr Jasiukajtis1:
2204*25c28e83SPiotr Jasiukajtis	std	%f2,[%i3]
2205*25c28e83SPiotr Jasiukajtis
2206*25c28e83SPiotr Jasiukajtis	stx	%g5,[%fp+dtmp15]	! D2ONM52
2207*25c28e83SPiotr Jasiukajtis
2208*25c28e83SPiotr Jasiukajtis	ba	.cont_spec1
2209*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp0]		! D2ON1022
2210*25c28e83SPiotr Jasiukajtis
2211*25c28e83SPiotr Jasiukajtis	.align	16
2212*25c28e83SPiotr Jasiukajtis.update0:
2213*25c28e83SPiotr Jasiukajtis	cmp	counter,1
2214*25c28e83SPiotr Jasiukajtis	ble	1f
2215*25c28e83SPiotr Jasiukajtis	nop
2216*25c28e83SPiotr Jasiukajtis
2217*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
2218*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2219*25c28e83SPiotr Jasiukajtis
2220*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2221*25c28e83SPiotr Jasiukajtis
2222*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2223*25c28e83SPiotr Jasiukajtis
2224*25c28e83SPiotr Jasiukajtis	mov	1,counter
2225*25c28e83SPiotr Jasiukajtis1:
2226*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%o4
2227*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2228*25c28e83SPiotr Jasiukajtis	ba	.cont1
2229*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2230*25c28e83SPiotr Jasiukajtis
2231*25c28e83SPiotr Jasiukajtis	.align	16
2232*25c28e83SPiotr Jasiukajtis.update1:
2233*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2234*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont0		! (0_0) if ( hy0 < 0x00100000 )
2235*25c28e83SPiotr Jasiukajtis
2236*25c28e83SPiotr Jasiukajtis	cmp	counter,1
2237*25c28e83SPiotr Jasiukajtis	ble,a	1f
2238*25c28e83SPiotr Jasiukajtis	nop
2239*25c28e83SPiotr Jasiukajtis
2240*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
2241*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2242*25c28e83SPiotr Jasiukajtis
2243*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2244*25c28e83SPiotr Jasiukajtis
2245*25c28e83SPiotr Jasiukajtis	mov	1,counter
2246*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2247*25c28e83SPiotr Jasiukajtis1:
2248*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%o4
2249*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2250*25c28e83SPiotr Jasiukajtis	ba	.cont1
2251*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2252*25c28e83SPiotr Jasiukajtis
2253*25c28e83SPiotr Jasiukajtis	.align	16
2254*25c28e83SPiotr Jasiukajtis.update2:
2255*25c28e83SPiotr Jasiukajtis	cmp	counter,2
2256*25c28e83SPiotr Jasiukajtis	ble	1f
2257*25c28e83SPiotr Jasiukajtis	nop
2258*25c28e83SPiotr Jasiukajtis
2259*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
2260*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2261*25c28e83SPiotr Jasiukajtis
2262*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2263*25c28e83SPiotr Jasiukajtis
2264*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2265*25c28e83SPiotr Jasiukajtis
2266*25c28e83SPiotr Jasiukajtis	mov	2,counter
2267*25c28e83SPiotr Jasiukajtis1:
2268*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (7_1) y_hi0 -= D2ON36;
2269*25c28e83SPiotr Jasiukajtis
2270*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
2271*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
2272*25c28e83SPiotr Jasiukajtis
2273*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
2274*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
2275*25c28e83SPiotr Jasiukajtis
2276*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%o4
2277*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2278*25c28e83SPiotr Jasiukajtis	ba	.cont4
2279*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2280*25c28e83SPiotr Jasiukajtis
2281*25c28e83SPiotr Jasiukajtis	.align	16
2282*25c28e83SPiotr Jasiukajtis.update3:
2283*25c28e83SPiotr Jasiukajtis	cmp	counter,2
2284*25c28e83SPiotr Jasiukajtis	ble	1f
2285*25c28e83SPiotr Jasiukajtis	nop
2286*25c28e83SPiotr Jasiukajtis
2287*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
2288*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2289*25c28e83SPiotr Jasiukajtis
2290*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2291*25c28e83SPiotr Jasiukajtis
2292*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2293*25c28e83SPiotr Jasiukajtis
2294*25c28e83SPiotr Jasiukajtis	mov	2,counter
2295*25c28e83SPiotr Jasiukajtis1:
2296*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
2297*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
2298*25c28e83SPiotr Jasiukajtis
2299*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
2300*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
2301*25c28e83SPiotr Jasiukajtis
2302*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%o4
2303*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2304*25c28e83SPiotr Jasiukajtis	ba	.cont4
2305*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2306*25c28e83SPiotr Jasiukajtis
2307*25c28e83SPiotr Jasiukajtis	.align	16
2308*25c28e83SPiotr Jasiukajtis.update4:
2309*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2310*25c28e83SPiotr Jasiukajtis	bge,a,pn	%icc,.cont4	! (0_0) if ( hy0 < 0x00100000 )
2311*25c28e83SPiotr Jasiukajtis	sub	%l0,%o4,%o4		! (1_0) j0 = 0x7ff00000 - j0;
2312*25c28e83SPiotr Jasiukajtis
2313*25c28e83SPiotr Jasiukajtis	cmp	counter,2
2314*25c28e83SPiotr Jasiukajtis	ble,a	1f
2315*25c28e83SPiotr Jasiukajtis	nop
2316*25c28e83SPiotr Jasiukajtis
2317*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
2318*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2319*25c28e83SPiotr Jasiukajtis
2320*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2321*25c28e83SPiotr Jasiukajtis
2322*25c28e83SPiotr Jasiukajtis	mov	2,counter
2323*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2324*25c28e83SPiotr Jasiukajtis1:
2325*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%o4
2326*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2327*25c28e83SPiotr Jasiukajtis	ba	.cont4
2328*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2329*25c28e83SPiotr Jasiukajtis
2330*25c28e83SPiotr Jasiukajtis	.align	16
2331*25c28e83SPiotr Jasiukajtis.update5:
2332*25c28e83SPiotr Jasiukajtis	cmp	counter,3
2333*25c28e83SPiotr Jasiukajtis	ble	1f
2334*25c28e83SPiotr Jasiukajtis	nop
2335*25c28e83SPiotr Jasiukajtis
2336*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
2337*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2338*25c28e83SPiotr Jasiukajtis
2339*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2340*25c28e83SPiotr Jasiukajtis
2341*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2342*25c28e83SPiotr Jasiukajtis
2343*25c28e83SPiotr Jasiukajtis	mov	3,counter
2344*25c28e83SPiotr Jasiukajtis1:
2345*25c28e83SPiotr Jasiukajtis	st	%f14,[%fp+ftmp0]	! (7_1) iarr = ((int*)&dres)[0];
2346*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (0_0) x_hi0 -= D2ON36;
2347*25c28e83SPiotr Jasiukajtis
2348*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (0_0) y_hi0 -= D2ON36;
2349*25c28e83SPiotr Jasiukajtis
2350*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
2351*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
2352*25c28e83SPiotr Jasiukajtis
2353*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
2354*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
2355*25c28e83SPiotr Jasiukajtis
2356*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2357*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2358*25c28e83SPiotr Jasiukajtis
2359*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1
2360*25c28e83SPiotr Jasiukajtis	ba	.cont8
2361*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2362*25c28e83SPiotr Jasiukajtis
2363*25c28e83SPiotr Jasiukajtis	.align	16
2364*25c28e83SPiotr Jasiukajtis.update6:
2365*25c28e83SPiotr Jasiukajtis	cmp	counter,3
2366*25c28e83SPiotr Jasiukajtis	ble	1f
2367*25c28e83SPiotr Jasiukajtis	nop
2368*25c28e83SPiotr Jasiukajtis
2369*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
2370*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2371*25c28e83SPiotr Jasiukajtis
2372*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2373*25c28e83SPiotr Jasiukajtis
2374*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2375*25c28e83SPiotr Jasiukajtis
2376*25c28e83SPiotr Jasiukajtis	mov	3,counter
2377*25c28e83SPiotr Jasiukajtis1:
2378*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
2379*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
2380*25c28e83SPiotr Jasiukajtis
2381*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
2382*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
2383*25c28e83SPiotr Jasiukajtis
2384*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2385*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2386*25c28e83SPiotr Jasiukajtis
2387*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1
2388*25c28e83SPiotr Jasiukajtis	ba	.cont8
2389*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2390*25c28e83SPiotr Jasiukajtis
2391*25c28e83SPiotr Jasiukajtis	.align	16
2392*25c28e83SPiotr Jasiukajtis.update7:
2393*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2394*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont7		! (0_0) if ( hy0 < 0x00100000 )
2395*25c28e83SPiotr Jasiukajtis
2396*25c28e83SPiotr Jasiukajtis	cmp	counter,3
2397*25c28e83SPiotr Jasiukajtis	ble,a	1f
2398*25c28e83SPiotr Jasiukajtis	nop
2399*25c28e83SPiotr Jasiukajtis
2400*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
2401*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2402*25c28e83SPiotr Jasiukajtis
2403*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2404*25c28e83SPiotr Jasiukajtis
2405*25c28e83SPiotr Jasiukajtis	mov	3,counter
2406*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2407*25c28e83SPiotr Jasiukajtis1:
2408*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2409*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2410*25c28e83SPiotr Jasiukajtis
2411*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1
2412*25c28e83SPiotr Jasiukajtis	ba	.cont8
2413*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2414*25c28e83SPiotr Jasiukajtis
2415*25c28e83SPiotr Jasiukajtis	.align	16
2416*25c28e83SPiotr Jasiukajtis.update9:
2417*25c28e83SPiotr Jasiukajtis	cmp	counter,4
2418*25c28e83SPiotr Jasiukajtis	ble	1f
2419*25c28e83SPiotr Jasiukajtis	nop
2420*25c28e83SPiotr Jasiukajtis
2421*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
2422*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2423*25c28e83SPiotr Jasiukajtis
2424*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2425*25c28e83SPiotr Jasiukajtis
2426*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2427*25c28e83SPiotr Jasiukajtis
2428*25c28e83SPiotr Jasiukajtis	mov	4,counter
2429*25c28e83SPiotr Jasiukajtis1:
2430*25c28e83SPiotr Jasiukajtis	st	%f22,[%fp+ftmp0]	! (0_0) iarr = ((int*)&dres)[0];
2431*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (1_0) x_hi0 -= D2ON36;
2432*25c28e83SPiotr Jasiukajtis
2433*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (1_0) y_hi0 -= D2ON36;
2434*25c28e83SPiotr Jasiukajtis
2435*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
2436*25c28e83SPiotr Jasiukajtis
2437*25c28e83SPiotr Jasiukajtis
2438*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
2439*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
2440*25c28e83SPiotr Jasiukajtis
2441*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
2442*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
2443*25c28e83SPiotr Jasiukajtis
2444*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
2445*25c28e83SPiotr Jasiukajtis
2446*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2447*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2448*25c28e83SPiotr Jasiukajtis	ba	.cont12
2449*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2450*25c28e83SPiotr Jasiukajtis
2451*25c28e83SPiotr Jasiukajtis	.align	16
2452*25c28e83SPiotr Jasiukajtis.update10:
2453*25c28e83SPiotr Jasiukajtis	cmp	counter,4
2454*25c28e83SPiotr Jasiukajtis	ble	1f
2455*25c28e83SPiotr Jasiukajtis	nop
2456*25c28e83SPiotr Jasiukajtis
2457*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
2458*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2459*25c28e83SPiotr Jasiukajtis
2460*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2461*25c28e83SPiotr Jasiukajtis
2462*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2463*25c28e83SPiotr Jasiukajtis
2464*25c28e83SPiotr Jasiukajtis	mov	4,counter
2465*25c28e83SPiotr Jasiukajtis1:
2466*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
2467*25c28e83SPiotr Jasiukajtis
2468*25c28e83SPiotr Jasiukajtis
2469*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
2470*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
2471*25c28e83SPiotr Jasiukajtis
2472*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
2473*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
2474*25c28e83SPiotr Jasiukajtis
2475*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
2476*25c28e83SPiotr Jasiukajtis
2477*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2478*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2479*25c28e83SPiotr Jasiukajtis	ba	.cont12
2480*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2481*25c28e83SPiotr Jasiukajtis
2482*25c28e83SPiotr Jasiukajtis	.align	16
2483*25c28e83SPiotr Jasiukajtis.update11:
2484*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2485*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont11		! (0_0) if ( hy0 < 0x00100000 )
2486*25c28e83SPiotr Jasiukajtis
2487*25c28e83SPiotr Jasiukajtis	cmp	counter,4
2488*25c28e83SPiotr Jasiukajtis	ble,a	1f
2489*25c28e83SPiotr Jasiukajtis	nop
2490*25c28e83SPiotr Jasiukajtis
2491*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
2492*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2493*25c28e83SPiotr Jasiukajtis
2494*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2495*25c28e83SPiotr Jasiukajtis
2496*25c28e83SPiotr Jasiukajtis	mov	4,counter
2497*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2498*25c28e83SPiotr Jasiukajtis1:
2499*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2500*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2501*25c28e83SPiotr Jasiukajtis
2502*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
2503*25c28e83SPiotr Jasiukajtis	ba	.cont12
2504*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2505*25c28e83SPiotr Jasiukajtis
2506*25c28e83SPiotr Jasiukajtis	.align	16
2507*25c28e83SPiotr Jasiukajtis.update13:
2508*25c28e83SPiotr Jasiukajtis	cmp	counter,5
2509*25c28e83SPiotr Jasiukajtis	ble	1f
2510*25c28e83SPiotr Jasiukajtis	nop
2511*25c28e83SPiotr Jasiukajtis
2512*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
2513*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2514*25c28e83SPiotr Jasiukajtis
2515*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2516*25c28e83SPiotr Jasiukajtis
2517*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2518*25c28e83SPiotr Jasiukajtis
2519*25c28e83SPiotr Jasiukajtis	mov	5,counter
2520*25c28e83SPiotr Jasiukajtis1:
2521*25c28e83SPiotr Jasiukajtis	fsubd	%f46,D2ON36,%f20	! (2_0) x_hi0 -= D2ON36;
2522*25c28e83SPiotr Jasiukajtis
2523*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (2_0) y_hi0 -= D2ON36;
2524*25c28e83SPiotr Jasiukajtis
2525*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
2526*25c28e83SPiotr Jasiukajtis
2527*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
2528*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
2529*25c28e83SPiotr Jasiukajtis
2530*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
2531*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
2532*25c28e83SPiotr Jasiukajtis
2533*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
2534*25c28e83SPiotr Jasiukajtis
2535*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2536*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2537*25c28e83SPiotr Jasiukajtis	ba	.cont16
2538*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2539*25c28e83SPiotr Jasiukajtis
2540*25c28e83SPiotr Jasiukajtis	.align	16
2541*25c28e83SPiotr Jasiukajtis.update14:
2542*25c28e83SPiotr Jasiukajtis	cmp	counter,5
2543*25c28e83SPiotr Jasiukajtis	ble	1f
2544*25c28e83SPiotr Jasiukajtis	nop
2545*25c28e83SPiotr Jasiukajtis
2546*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
2547*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2548*25c28e83SPiotr Jasiukajtis
2549*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2550*25c28e83SPiotr Jasiukajtis
2551*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2552*25c28e83SPiotr Jasiukajtis
2553*25c28e83SPiotr Jasiukajtis	mov	5,counter
2554*25c28e83SPiotr Jasiukajtis1:
2555*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
2556*25c28e83SPiotr Jasiukajtis
2557*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
2558*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
2559*25c28e83SPiotr Jasiukajtis
2560*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
2561*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
2562*25c28e83SPiotr Jasiukajtis
2563*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
2564*25c28e83SPiotr Jasiukajtis
2565*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2566*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2567*25c28e83SPiotr Jasiukajtis	ba	.cont16
2568*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2569*25c28e83SPiotr Jasiukajtis
2570*25c28e83SPiotr Jasiukajtis	.align	16
2571*25c28e83SPiotr Jasiukajtis.update15:
2572*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2573*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont15		! (0_0) if ( hy0 < 0x00100000 )
2574*25c28e83SPiotr Jasiukajtis
2575*25c28e83SPiotr Jasiukajtis	cmp	counter,5
2576*25c28e83SPiotr Jasiukajtis	ble,a	1f
2577*25c28e83SPiotr Jasiukajtis	nop
2578*25c28e83SPiotr Jasiukajtis
2579*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
2580*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2581*25c28e83SPiotr Jasiukajtis
2582*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2583*25c28e83SPiotr Jasiukajtis
2584*25c28e83SPiotr Jasiukajtis	mov	5,counter
2585*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2586*25c28e83SPiotr Jasiukajtis1:
2587*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2588*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2589*25c28e83SPiotr Jasiukajtis
2590*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
2591*25c28e83SPiotr Jasiukajtis	ba	.cont16
2592*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2593*25c28e83SPiotr Jasiukajtis
2594*25c28e83SPiotr Jasiukajtis	.align	16
2595*25c28e83SPiotr Jasiukajtis.update17:
2596*25c28e83SPiotr Jasiukajtis	cmp	counter,6
2597*25c28e83SPiotr Jasiukajtis	ble	1f
2598*25c28e83SPiotr Jasiukajtis	nop
2599*25c28e83SPiotr Jasiukajtis
2600*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
2601*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2602*25c28e83SPiotr Jasiukajtis
2603*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2604*25c28e83SPiotr Jasiukajtis
2605*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2606*25c28e83SPiotr Jasiukajtis
2607*25c28e83SPiotr Jasiukajtis	mov	6,counter
2608*25c28e83SPiotr Jasiukajtis1:
2609*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (3_0) y_hi0 -= D2ON36;
2610*25c28e83SPiotr Jasiukajtis
2611*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
2612*25c28e83SPiotr Jasiukajtis
2613*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
2614*25c28e83SPiotr Jasiukajtis
2615*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
2616*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
2617*25c28e83SPiotr Jasiukajtis
2618*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
2619*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
2620*25c28e83SPiotr Jasiukajtis
2621*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
2622*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
2623*25c28e83SPiotr Jasiukajtis
2624*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
2625*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
2626*25c28e83SPiotr Jasiukajtis
2627*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
2628*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
2629*25c28e83SPiotr Jasiukajtis
2630*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2631*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2632*25c28e83SPiotr Jasiukajtis
2633*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
2634*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
2635*25c28e83SPiotr Jasiukajtis	ba	.cont20
2636*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2637*25c28e83SPiotr Jasiukajtis
2638*25c28e83SPiotr Jasiukajtis	.align	16
2639*25c28e83SPiotr Jasiukajtis.update18:
2640*25c28e83SPiotr Jasiukajtis	cmp	counter,6
2641*25c28e83SPiotr Jasiukajtis	ble	1f
2642*25c28e83SPiotr Jasiukajtis	nop
2643*25c28e83SPiotr Jasiukajtis
2644*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
2645*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2646*25c28e83SPiotr Jasiukajtis
2647*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2648*25c28e83SPiotr Jasiukajtis
2649*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2650*25c28e83SPiotr Jasiukajtis
2651*25c28e83SPiotr Jasiukajtis	mov	6,counter
2652*25c28e83SPiotr Jasiukajtis1:
2653*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
2654*25c28e83SPiotr Jasiukajtis
2655*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
2656*25c28e83SPiotr Jasiukajtis
2657*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
2658*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
2659*25c28e83SPiotr Jasiukajtis
2660*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
2661*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
2662*25c28e83SPiotr Jasiukajtis
2663*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
2664*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
2665*25c28e83SPiotr Jasiukajtis
2666*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
2667*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
2668*25c28e83SPiotr Jasiukajtis
2669*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
2670*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
2671*25c28e83SPiotr Jasiukajtis
2672*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2673*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2674*25c28e83SPiotr Jasiukajtis
2675*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
2676*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
2677*25c28e83SPiotr Jasiukajtis	ba	.cont20
2678*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2679*25c28e83SPiotr Jasiukajtis
2680*25c28e83SPiotr Jasiukajtis	.align	16
2681*25c28e83SPiotr Jasiukajtis.update19:
2682*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2683*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont19a		! (0_0) if ( hy0 < 0x00100000 )
2684*25c28e83SPiotr Jasiukajtis
2685*25c28e83SPiotr Jasiukajtis	cmp	counter,6
2686*25c28e83SPiotr Jasiukajtis	ble,a	1f
2687*25c28e83SPiotr Jasiukajtis	nop
2688*25c28e83SPiotr Jasiukajtis
2689*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
2690*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2691*25c28e83SPiotr Jasiukajtis
2692*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2693*25c28e83SPiotr Jasiukajtis
2694*25c28e83SPiotr Jasiukajtis	mov	6,counter
2695*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2696*25c28e83SPiotr Jasiukajtis1:
2697*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
2698*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2699*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2700*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
2701*25c28e83SPiotr Jasiukajtis
2702*25c28e83SPiotr Jasiukajtis	ba	.cont19b
2703*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2704*25c28e83SPiotr Jasiukajtis
2705*25c28e83SPiotr Jasiukajtis	.align	16
2706*25c28e83SPiotr Jasiukajtis.update21:
2707*25c28e83SPiotr Jasiukajtis	cmp	counter,7
2708*25c28e83SPiotr Jasiukajtis	ble	1f
2709*25c28e83SPiotr Jasiukajtis	nop
2710*25c28e83SPiotr Jasiukajtis
2711*25c28e83SPiotr Jasiukajtis	sub	counter,7,counter
2712*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2713*25c28e83SPiotr Jasiukajtis
2714*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2715*25c28e83SPiotr Jasiukajtis
2716*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2717*25c28e83SPiotr Jasiukajtis
2718*25c28e83SPiotr Jasiukajtis	mov	7,counter
2719*25c28e83SPiotr Jasiukajtis1:
2720*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (4_0) y_hi0 -= D2ON36;
2721*25c28e83SPiotr Jasiukajtis
2722*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
2723*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
2724*25c28e83SPiotr Jasiukajtis
2725*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
2726*25c28e83SPiotr Jasiukajtis
2727*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
2728*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
2729*25c28e83SPiotr Jasiukajtis
2730*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
2731*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
2732*25c28e83SPiotr Jasiukajtis
2733*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
2734*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
2735*25c28e83SPiotr Jasiukajtis
2736*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
2737*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
2738*25c28e83SPiotr Jasiukajtis
2739*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
2740*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2741*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2742*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
2743*25c28e83SPiotr Jasiukajtis
2744*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
2745*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
2746*25c28e83SPiotr Jasiukajtis	ba	.cont24
2747*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2748*25c28e83SPiotr Jasiukajtis
2749*25c28e83SPiotr Jasiukajtis	.align	16
2750*25c28e83SPiotr Jasiukajtis.update22:
2751*25c28e83SPiotr Jasiukajtis	cmp	counter,7
2752*25c28e83SPiotr Jasiukajtis	ble	1f
2753*25c28e83SPiotr Jasiukajtis	nop
2754*25c28e83SPiotr Jasiukajtis
2755*25c28e83SPiotr Jasiukajtis	sub	counter,7,counter
2756*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2757*25c28e83SPiotr Jasiukajtis
2758*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2759*25c28e83SPiotr Jasiukajtis
2760*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2761*25c28e83SPiotr Jasiukajtis
2762*25c28e83SPiotr Jasiukajtis	mov	7,counter
2763*25c28e83SPiotr Jasiukajtis1:
2764*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
2765*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
2766*25c28e83SPiotr Jasiukajtis
2767*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
2768*25c28e83SPiotr Jasiukajtis
2769*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
2770*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
2771*25c28e83SPiotr Jasiukajtis
2772*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
2773*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
2774*25c28e83SPiotr Jasiukajtis
2775*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
2776*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
2777*25c28e83SPiotr Jasiukajtis
2778*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
2779*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
2780*25c28e83SPiotr Jasiukajtis
2781*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
2782*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2783*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2784*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
2785*25c28e83SPiotr Jasiukajtis
2786*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
2787*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
2788*25c28e83SPiotr Jasiukajtis	ba	.cont24
2789*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2790*25c28e83SPiotr Jasiukajtis
2791*25c28e83SPiotr Jasiukajtis	.align	16
2792*25c28e83SPiotr Jasiukajtis.update23:
2793*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2794*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont23a		! (0_0) if ( hy0 < 0x00100000 )
2795*25c28e83SPiotr Jasiukajtis
2796*25c28e83SPiotr Jasiukajtis	cmp	counter,7
2797*25c28e83SPiotr Jasiukajtis	ble,a	1f
2798*25c28e83SPiotr Jasiukajtis	nop
2799*25c28e83SPiotr Jasiukajtis
2800*25c28e83SPiotr Jasiukajtis	sub	counter,7,counter
2801*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2802*25c28e83SPiotr Jasiukajtis
2803*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2804*25c28e83SPiotr Jasiukajtis
2805*25c28e83SPiotr Jasiukajtis	mov	7,counter
2806*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2807*25c28e83SPiotr Jasiukajtis1:
2808*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
2809*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2810*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2811*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
2812*25c28e83SPiotr Jasiukajtis
2813*25c28e83SPiotr Jasiukajtis	ba	.cont23b
2814*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2815*25c28e83SPiotr Jasiukajtis
2816*25c28e83SPiotr Jasiukajtis	.align	16
2817*25c28e83SPiotr Jasiukajtis.update25:
2818*25c28e83SPiotr Jasiukajtis	cmp	counter,8
2819*25c28e83SPiotr Jasiukajtis	ble	1f
2820*25c28e83SPiotr Jasiukajtis	nop
2821*25c28e83SPiotr Jasiukajtis
2822*25c28e83SPiotr Jasiukajtis	sub	counter,8,counter
2823*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2824*25c28e83SPiotr Jasiukajtis
2825*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2826*25c28e83SPiotr Jasiukajtis
2827*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2828*25c28e83SPiotr Jasiukajtis
2829*25c28e83SPiotr Jasiukajtis	mov	8,counter
2830*25c28e83SPiotr Jasiukajtis1:
2831*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (5_0) y_hi0 -= D2ON36;
2832*25c28e83SPiotr Jasiukajtis
2833*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
2834*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
2835*25c28e83SPiotr Jasiukajtis
2836*25c28e83SPiotr Jasiukajtis	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
2837*25c28e83SPiotr Jasiukajtis
2838*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
2839*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
2840*25c28e83SPiotr Jasiukajtis
2841*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
2842*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
2843*25c28e83SPiotr Jasiukajtis
2844*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
2845*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
2846*25c28e83SPiotr Jasiukajtis
2847*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
2848*25c28e83SPiotr Jasiukajtis	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
2849*25c28e83SPiotr Jasiukajtis
2850*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
2851*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2852*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2853*25c28e83SPiotr Jasiukajtis	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
2854*25c28e83SPiotr Jasiukajtis
2855*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
2856*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
2857*25c28e83SPiotr Jasiukajtis	ba	.cont28
2858*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2859*25c28e83SPiotr Jasiukajtis
2860*25c28e83SPiotr Jasiukajtis	.align	16
2861*25c28e83SPiotr Jasiukajtis.update26:
2862*25c28e83SPiotr Jasiukajtis	cmp	counter,8
2863*25c28e83SPiotr Jasiukajtis	ble	1f
2864*25c28e83SPiotr Jasiukajtis	nop
2865*25c28e83SPiotr Jasiukajtis
2866*25c28e83SPiotr Jasiukajtis	sub	counter,8,counter
2867*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2868*25c28e83SPiotr Jasiukajtis
2869*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2870*25c28e83SPiotr Jasiukajtis
2871*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2872*25c28e83SPiotr Jasiukajtis
2873*25c28e83SPiotr Jasiukajtis	mov	8,counter
2874*25c28e83SPiotr Jasiukajtis1:
2875*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
2876*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
2877*25c28e83SPiotr Jasiukajtis
2878*25c28e83SPiotr Jasiukajtis	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
2879*25c28e83SPiotr Jasiukajtis
2880*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
2881*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
2882*25c28e83SPiotr Jasiukajtis
2883*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
2884*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
2885*25c28e83SPiotr Jasiukajtis
2886*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
2887*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
2888*25c28e83SPiotr Jasiukajtis
2889*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
2890*25c28e83SPiotr Jasiukajtis	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
2891*25c28e83SPiotr Jasiukajtis
2892*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
2893*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2894*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2895*25c28e83SPiotr Jasiukajtis	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
2896*25c28e83SPiotr Jasiukajtis
2897*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
2898*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
2899*25c28e83SPiotr Jasiukajtis	ba	.cont28
2900*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2901*25c28e83SPiotr Jasiukajtis
2902*25c28e83SPiotr Jasiukajtis	.align	16
2903*25c28e83SPiotr Jasiukajtis.update27:
2904*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2905*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont27a		! (0_0) if ( hy0 < 0x00100000 )
2906*25c28e83SPiotr Jasiukajtis
2907*25c28e83SPiotr Jasiukajtis	cmp	counter,8
2908*25c28e83SPiotr Jasiukajtis	ble,a	1f
2909*25c28e83SPiotr Jasiukajtis	nop
2910*25c28e83SPiotr Jasiukajtis
2911*25c28e83SPiotr Jasiukajtis	sub	counter,8,counter
2912*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2913*25c28e83SPiotr Jasiukajtis
2914*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
2915*25c28e83SPiotr Jasiukajtis
2916*25c28e83SPiotr Jasiukajtis	mov	8,counter
2917*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
2918*25c28e83SPiotr Jasiukajtis1:
2919*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
2920*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
2921*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
2922*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
2923*25c28e83SPiotr Jasiukajtis
2924*25c28e83SPiotr Jasiukajtis	ba	.cont27b
2925*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
2926*25c28e83SPiotr Jasiukajtis
2927*25c28e83SPiotr Jasiukajtis	.align	16
2928*25c28e83SPiotr Jasiukajtis.update29:
2929*25c28e83SPiotr Jasiukajtis	cmp	counter,1
2930*25c28e83SPiotr Jasiukajtis	ble	1f
2931*25c28e83SPiotr Jasiukajtis	nop
2932*25c28e83SPiotr Jasiukajtis
2933*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
2934*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2935*25c28e83SPiotr Jasiukajtis
2936*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2937*25c28e83SPiotr Jasiukajtis
2938*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2939*25c28e83SPiotr Jasiukajtis
2940*25c28e83SPiotr Jasiukajtis	mov	1,counter
2941*25c28e83SPiotr Jasiukajtis1:
2942*25c28e83SPiotr Jasiukajtis	fsubd	%f2,D2ON36,%f2		! (6_1) y_hi0 -= D2ON36;
2943*25c28e83SPiotr Jasiukajtis
2944*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f24,%f50		! (4_1) dtmp0 = dd * dres;
2945*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp0]		! (7_1) *(long long*)&scl0 = ll;
2946*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f48,%f52		! (1_1) res0 += dtmp0;
2947*25c28e83SPiotr Jasiukajtis
2948*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f48		! (2_1) res0 = vis_fand(dres,DA0);
2949*25c28e83SPiotr Jasiukajtis
2950*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f0		! (6_1) res0_hi = x_hi0 * x_hi0;
2951*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f28		! (6_1) x_lo0 = x0 - x_hi0;
2952*25c28e83SPiotr Jasiukajtis
2953*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f2,%f46		! (6_1) dtmp0 = y_hi0 * y_hi0;
2954*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
2955*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (6_1) res0_lo = x0 + x_hi0;
2956*25c28e83SPiotr Jasiukajtis
2957*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f22,%f22		! (3_1) dtmp2 = dd * dres;
2958*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%o4
2959*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2960*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (4_1) dtmp0 = DTWO - dtmp0;
2961*25c28e83SPiotr Jasiukajtis
2962*25c28e83SPiotr Jasiukajtis	ba	.cont32
2963*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
2964*25c28e83SPiotr Jasiukajtis
2965*25c28e83SPiotr Jasiukajtis	.align	16
2966*25c28e83SPiotr Jasiukajtis.update30:
2967*25c28e83SPiotr Jasiukajtis	cmp	counter,1
2968*25c28e83SPiotr Jasiukajtis	ble	1f
2969*25c28e83SPiotr Jasiukajtis	nop
2970*25c28e83SPiotr Jasiukajtis
2971*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
2972*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
2973*25c28e83SPiotr Jasiukajtis
2974*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
2975*25c28e83SPiotr Jasiukajtis
2976*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
2977*25c28e83SPiotr Jasiukajtis
2978*25c28e83SPiotr Jasiukajtis	mov	1,counter
2979*25c28e83SPiotr Jasiukajtis1:
2980*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f24,%f50		! (4_1) dtmp0 = dd * dres;
2981*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp0]		! (7_1) *(long long*)&scl0 = ll;
2982*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f48,%f52		! (1_1) res0 += dtmp0;
2983*25c28e83SPiotr Jasiukajtis
2984*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f48		! (2_1) res0 = vis_fand(dres,DA0);
2985*25c28e83SPiotr Jasiukajtis
2986*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f0		! (6_1) res0_hi = x_hi0 * x_hi0;
2987*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f28		! (6_1) x_lo0 = x0 - x_hi0;
2988*25c28e83SPiotr Jasiukajtis
2989*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f2,%f46		! (6_1) dtmp0 = y_hi0 * y_hi0;
2990*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
2991*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (6_1) res0_lo = x0 + x_hi0;
2992*25c28e83SPiotr Jasiukajtis
2993*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f22,%f22		! (3_1) dtmp2 = dd * dres;
2994*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%o4
2995*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
2996*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (4_1) dtmp0 = DTWO - dtmp0;
2997*25c28e83SPiotr Jasiukajtis
2998*25c28e83SPiotr Jasiukajtis	ba	.cont32
2999*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
3000*25c28e83SPiotr Jasiukajtis
3001*25c28e83SPiotr Jasiukajtis	.align	16
3002*25c28e83SPiotr Jasiukajtis.update31:
3003*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3004*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont31		! (0_0) if ( hy0 < 0x00100000 )
3005*25c28e83SPiotr Jasiukajtis
3006*25c28e83SPiotr Jasiukajtis	cmp	counter,1
3007*25c28e83SPiotr Jasiukajtis	ble,a	1f
3008*25c28e83SPiotr Jasiukajtis	nop
3009*25c28e83SPiotr Jasiukajtis
3010*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
3011*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3012*25c28e83SPiotr Jasiukajtis
3013*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
3014*25c28e83SPiotr Jasiukajtis
3015*25c28e83SPiotr Jasiukajtis	mov	1,counter
3016*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
3017*25c28e83SPiotr Jasiukajtis1:
3018*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f0		! (6_1) res0_hi = x_hi0 * x_hi0;
3019*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f28		! (6_1) x_lo0 = x0 - x_hi0;
3020*25c28e83SPiotr Jasiukajtis
3021*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f2,%f46		! (6_1) dtmp0 = y_hi0 * y_hi0;
3022*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3023*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (6_1) res0_lo = x0 + x_hi0;
3024*25c28e83SPiotr Jasiukajtis
3025*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f22,%f22		! (3_1) dtmp2 = dd * dres;
3026*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%o4
3027*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
3028*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (4_1) dtmp0 = DTWO - dtmp0;
3029*25c28e83SPiotr Jasiukajtis
3030*25c28e83SPiotr Jasiukajtis	ba	.cont32
3031*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
3032*25c28e83SPiotr Jasiukajtis
3033*25c28e83SPiotr Jasiukajtis	.align	16
3034*25c28e83SPiotr Jasiukajtis.update33:
3035*25c28e83SPiotr Jasiukajtis	cmp	counter,2
3036*25c28e83SPiotr Jasiukajtis	ble	1f
3037*25c28e83SPiotr Jasiukajtis	nop
3038*25c28e83SPiotr Jasiukajtis
3039*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
3040*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3041*25c28e83SPiotr Jasiukajtis
3042*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3043*25c28e83SPiotr Jasiukajtis
3044*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3045*25c28e83SPiotr Jasiukajtis
3046*25c28e83SPiotr Jasiukajtis	mov	2,counter
3047*25c28e83SPiotr Jasiukajtis1:
3048*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
3049*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (7_1) y_hi0 -= D2ON36;
3050*25c28e83SPiotr Jasiukajtis
3051*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f16,%f50		! (5_1) dtmp0 = dd * dres;
3052*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (2_1) res0 += dtmp0;
3053*25c28e83SPiotr Jasiukajtis
3054*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3055*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp2]		! (0_0) *(long long*)&scl0 = ll;
3056*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (3_1) res0 = vis_fand(dres,DA0);
3057*25c28e83SPiotr Jasiukajtis
3058*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
3059*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
3060*25c28e83SPiotr Jasiukajtis
3061*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
3062*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
3063*25c28e83SPiotr Jasiukajtis
3064*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (3_1) dtmp0 = res0_hi * res0;
3065*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (5_1) dtmp0 = DTWO - dtmp0;
3066*25c28e83SPiotr Jasiukajtis
3067*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f24,%f24		! (4_1) dtmp2 = dd * dres;
3068*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (7_1) dtmp1 = y0 + y_hi0;
3069*25c28e83SPiotr Jasiukajtis
3070*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f48,%f38		! (3_1) dtmp1 = res0_lo * res0;
3071*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%o4
3072*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3073*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (7_1) y_lo0 = y0 - y_hi0;
3074*25c28e83SPiotr Jasiukajtis
3075*25c28e83SPiotr Jasiukajtis	sllx	%o4,32,%o4		! (1_0) ll = (long long)j0 << 32;
3076*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp3]		! (1_0) *(long long*)&scl0 = ll;
3077*25c28e83SPiotr Jasiukajtis	ba	.cont36
3078*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3079*25c28e83SPiotr Jasiukajtis
3080*25c28e83SPiotr Jasiukajtis	.align	16
3081*25c28e83SPiotr Jasiukajtis.update34:
3082*25c28e83SPiotr Jasiukajtis	cmp	counter,2
3083*25c28e83SPiotr Jasiukajtis	ble	1f
3084*25c28e83SPiotr Jasiukajtis	nop
3085*25c28e83SPiotr Jasiukajtis
3086*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
3087*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3088*25c28e83SPiotr Jasiukajtis
3089*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3090*25c28e83SPiotr Jasiukajtis
3091*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3092*25c28e83SPiotr Jasiukajtis
3093*25c28e83SPiotr Jasiukajtis	mov	2,counter
3094*25c28e83SPiotr Jasiukajtis1:
3095*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3096*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp2]		! (0_0) *(long long*)&scl0 = ll;
3097*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (3_1) res0 = vis_fand(dres,DA0);
3098*25c28e83SPiotr Jasiukajtis
3099*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
3100*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
3101*25c28e83SPiotr Jasiukajtis
3102*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
3103*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
3104*25c28e83SPiotr Jasiukajtis
3105*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (3_1) dtmp0 = res0_hi * res0;
3106*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (5_1) dtmp0 = DTWO - dtmp0;
3107*25c28e83SPiotr Jasiukajtis
3108*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f24,%f24		! (4_1) dtmp2 = dd * dres;
3109*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (7_1) dtmp1 = y0 + y_hi0;
3110*25c28e83SPiotr Jasiukajtis
3111*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f48,%f38		! (3_1) dtmp1 = res0_lo * res0;
3112*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%o4
3113*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3114*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (7_1) y_lo0 = y0 - y_hi0;
3115*25c28e83SPiotr Jasiukajtis
3116*25c28e83SPiotr Jasiukajtis	sllx	%o4,32,%o4		! (1_0) ll = (long long)j0 << 32;
3117*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp3]		! (1_0) *(long long*)&scl0 = ll;
3118*25c28e83SPiotr Jasiukajtis	ba	.cont36
3119*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3120*25c28e83SPiotr Jasiukajtis
3121*25c28e83SPiotr Jasiukajtis	.align	16
3122*25c28e83SPiotr Jasiukajtis.update35:
3123*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3124*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont35a		! (0_0) if ( hy0 < 0x00100000 )
3125*25c28e83SPiotr Jasiukajtis
3126*25c28e83SPiotr Jasiukajtis	cmp	counter,2
3127*25c28e83SPiotr Jasiukajtis	ble,a	1f
3128*25c28e83SPiotr Jasiukajtis	nop
3129*25c28e83SPiotr Jasiukajtis
3130*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
3131*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3132*25c28e83SPiotr Jasiukajtis
3133*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3134*25c28e83SPiotr Jasiukajtis
3135*25c28e83SPiotr Jasiukajtis	mov	2,counter
3136*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3137*25c28e83SPiotr Jasiukajtis1:
3138*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (3_1) dtmp0 = res0_hi * res0;
3139*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%o4
3140*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3141*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (5_1) dtmp0 = DTWO - dtmp0;
3142*25c28e83SPiotr Jasiukajtis
3143*25c28e83SPiotr Jasiukajtis	ba	.cont35b
3144*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3145*25c28e83SPiotr Jasiukajtis
3146*25c28e83SPiotr Jasiukajtis	.align	16
3147*25c28e83SPiotr Jasiukajtis.update37:
3148*25c28e83SPiotr Jasiukajtis	cmp	counter,3
3149*25c28e83SPiotr Jasiukajtis	ble	1f
3150*25c28e83SPiotr Jasiukajtis	nop
3151*25c28e83SPiotr Jasiukajtis
3152*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
3153*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3154*25c28e83SPiotr Jasiukajtis
3155*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
3156*25c28e83SPiotr Jasiukajtis
3157*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
3158*25c28e83SPiotr Jasiukajtis
3159*25c28e83SPiotr Jasiukajtis	mov	3,counter
3160*25c28e83SPiotr Jasiukajtis1:
3161*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
3162*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (0_0) y_hi0 -= D2ON36;
3163*25c28e83SPiotr Jasiukajtis
3164*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f18,%f50		! (6_1) dtmp0 = dd * dres;
3165*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (3_1) res0 += dtmp0;
3166*25c28e83SPiotr Jasiukajtis
3167*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3168*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp4]		! (1_0) *(long long*)&scl0 = ll;
3169*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f48		! (4_1) res0 = vis_fand(dres,DA0);
3170*25c28e83SPiotr Jasiukajtis
3171*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
3172*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
3173*25c28e83SPiotr Jasiukajtis
3174*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
3175*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
3176*25c28e83SPiotr Jasiukajtis
3177*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f48,%f10		! (4_1) dtmp0 = res0_hi * res0;
3178*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (6_1) dtmp0 = DTWO - dtmp0;
3179*25c28e83SPiotr Jasiukajtis
3180*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f16,%f16		! (5_1) dtmp2 = dd * dres;
3181*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (0_0) dtmp1 = y0 + y_hi0;
3182*25c28e83SPiotr Jasiukajtis
3183*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f48,%f36		! (4_1) dtmp1 = res0_lo * res0;
3184*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3185*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
3186*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (0_0) y_lo0 = y0 - y_hi0;
3187*25c28e83SPiotr Jasiukajtis
3188*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (2_0) ll = (long long)j0 << 32;
3189*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp5]		! (2_0) *(long long*)&scl0 = ll;
3190*25c28e83SPiotr Jasiukajtis	ba	.cont40
3191*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
3192*25c28e83SPiotr Jasiukajtis
3193*25c28e83SPiotr Jasiukajtis	.align	16
3194*25c28e83SPiotr Jasiukajtis.update38:
3195*25c28e83SPiotr Jasiukajtis	cmp	counter,3
3196*25c28e83SPiotr Jasiukajtis	ble	1f
3197*25c28e83SPiotr Jasiukajtis	nop
3198*25c28e83SPiotr Jasiukajtis
3199*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
3200*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3201*25c28e83SPiotr Jasiukajtis
3202*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
3203*25c28e83SPiotr Jasiukajtis
3204*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
3205*25c28e83SPiotr Jasiukajtis
3206*25c28e83SPiotr Jasiukajtis	mov	3,counter
3207*25c28e83SPiotr Jasiukajtis1:
3208*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3209*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+dtmp4]		! (1_0) *(long long*)&scl0 = ll;
3210*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f48		! (4_1) res0 = vis_fand(dres,DA0);
3211*25c28e83SPiotr Jasiukajtis
3212*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
3213*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
3214*25c28e83SPiotr Jasiukajtis
3215*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
3216*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
3217*25c28e83SPiotr Jasiukajtis
3218*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f48,%f10		! (4_1) dtmp0 = res0_hi * res0;
3219*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (6_1) dtmp0 = DTWO - dtmp0;
3220*25c28e83SPiotr Jasiukajtis
3221*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f16,%f16		! (5_1) dtmp2 = dd * dres;
3222*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (0_0) dtmp1 = y0 + y_hi0;
3223*25c28e83SPiotr Jasiukajtis
3224*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f48,%f36		! (4_1) dtmp1 = res0_lo * res0;
3225*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3226*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
3227*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (0_0) y_lo0 = y0 - y_hi0;
3228*25c28e83SPiotr Jasiukajtis
3229*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (2_0) ll = (long long)j0 << 32;
3230*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp5]		! (2_0) *(long long*)&scl0 = ll;
3231*25c28e83SPiotr Jasiukajtis	ba	.cont40
3232*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
3233*25c28e83SPiotr Jasiukajtis
3234*25c28e83SPiotr Jasiukajtis	.align	16
3235*25c28e83SPiotr Jasiukajtis.update39:
3236*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3237*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont39a		! (0_0) if ( hy0 < 0x00100000 )
3238*25c28e83SPiotr Jasiukajtis
3239*25c28e83SPiotr Jasiukajtis	cmp	counter,3
3240*25c28e83SPiotr Jasiukajtis	ble,a	1f
3241*25c28e83SPiotr Jasiukajtis	nop
3242*25c28e83SPiotr Jasiukajtis
3243*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
3244*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3245*25c28e83SPiotr Jasiukajtis
3246*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
3247*25c28e83SPiotr Jasiukajtis
3248*25c28e83SPiotr Jasiukajtis	mov	3,counter
3249*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
3250*25c28e83SPiotr Jasiukajtis1:
3251*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f48,%f10		! (4_1) dtmp0 = res0_hi * res0;
3252*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3253*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
3254*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (6_1) dtmp0 = DTWO - dtmp0;
3255*25c28e83SPiotr Jasiukajtis
3256*25c28e83SPiotr Jasiukajtis	ba	.cont39b
3257*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
3258*25c28e83SPiotr Jasiukajtis
3259*25c28e83SPiotr Jasiukajtis	.align	16
3260*25c28e83SPiotr Jasiukajtis.update41:
3261*25c28e83SPiotr Jasiukajtis	cmp	counter,4
3262*25c28e83SPiotr Jasiukajtis	ble	1f
3263*25c28e83SPiotr Jasiukajtis	nop
3264*25c28e83SPiotr Jasiukajtis
3265*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
3266*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3267*25c28e83SPiotr Jasiukajtis
3268*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3269*25c28e83SPiotr Jasiukajtis
3270*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3271*25c28e83SPiotr Jasiukajtis
3272*25c28e83SPiotr Jasiukajtis	mov	4,counter
3273*25c28e83SPiotr Jasiukajtis1:
3274*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
3275*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (1_0) y_hi0 -= D2ON36;
3276*25c28e83SPiotr Jasiukajtis
3277*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
3278*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (4_1) res0 += dtmp0;
3279*25c28e83SPiotr Jasiukajtis
3280*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3281*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp6]		! (2_0) *(long long*)&scl0 = ll;
3282*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (5_1) res0 = vis_fand(dres,DA0);
3283*25c28e83SPiotr Jasiukajtis
3284*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
3285*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
3286*25c28e83SPiotr Jasiukajtis
3287*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
3288*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
3289*25c28e83SPiotr Jasiukajtis
3290*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f48,%f10		! (5_1) dtmp0 = res0_hi * res0;
3291*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
3292*25c28e83SPiotr Jasiukajtis
3293*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f18,%f18		! (6_1) dtmp2 = dd * dres;
3294*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (1_0) dtmp1 = y0 + y_hi0;
3295*25c28e83SPiotr Jasiukajtis
3296*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f48,%f34		! (5_1) dtmp1 = res0_lo * res0;
3297*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3298*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3299*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (1_0) y_lo0 = y0 - y_hi0
3300*25c28e83SPiotr Jasiukajtis
3301*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (3_0) ll = (long long)j0 << 32;
3302*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp7]		! (3_0) *(long long*)&scl0 = ll;
3303*25c28e83SPiotr Jasiukajtis	ba	.cont44
3304*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3305*25c28e83SPiotr Jasiukajtis
3306*25c28e83SPiotr Jasiukajtis	.align	16
3307*25c28e83SPiotr Jasiukajtis.update42:
3308*25c28e83SPiotr Jasiukajtis	cmp	counter,4
3309*25c28e83SPiotr Jasiukajtis	ble	1f
3310*25c28e83SPiotr Jasiukajtis	nop
3311*25c28e83SPiotr Jasiukajtis
3312*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
3313*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3314*25c28e83SPiotr Jasiukajtis
3315*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3316*25c28e83SPiotr Jasiukajtis
3317*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3318*25c28e83SPiotr Jasiukajtis
3319*25c28e83SPiotr Jasiukajtis	mov	4,counter
3320*25c28e83SPiotr Jasiukajtis1:
3321*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3322*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp6]		! (2_0) *(long long*)&scl0 = ll;
3323*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (5_1) res0 = vis_fand(dres,DA0);
3324*25c28e83SPiotr Jasiukajtis
3325*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
3326*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
3327*25c28e83SPiotr Jasiukajtis
3328*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
3329*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
3330*25c28e83SPiotr Jasiukajtis
3331*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f48,%f10		! (5_1) dtmp0 = res0_hi * res0;
3332*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
3333*25c28e83SPiotr Jasiukajtis
3334*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f18,%f18		! (6_1) dtmp2 = dd * dres;
3335*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (1_0) dtmp1 = y0 + y_hi0;
3336*25c28e83SPiotr Jasiukajtis
3337*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f48,%f34		! (5_1) dtmp1 = res0_lo * res0;
3338*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3339*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3340*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (1_0) y_lo0 = y0 - y_hi0
3341*25c28e83SPiotr Jasiukajtis
3342*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (3_0) ll = (long long)j0 << 32;
3343*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp7]		! (3_0) *(long long*)&scl0 = ll;
3344*25c28e83SPiotr Jasiukajtis	ba	.cont44
3345*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3346*25c28e83SPiotr Jasiukajtis
3347*25c28e83SPiotr Jasiukajtis	.align	16
3348*25c28e83SPiotr Jasiukajtis.update43:
3349*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3350*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont43a		! (0_0) if ( hy0 < 0x00100000 )
3351*25c28e83SPiotr Jasiukajtis
3352*25c28e83SPiotr Jasiukajtis	cmp	counter,4
3353*25c28e83SPiotr Jasiukajtis	ble,a	1f
3354*25c28e83SPiotr Jasiukajtis	nop
3355*25c28e83SPiotr Jasiukajtis
3356*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
3357*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3358*25c28e83SPiotr Jasiukajtis
3359*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3360*25c28e83SPiotr Jasiukajtis
3361*25c28e83SPiotr Jasiukajtis	mov	4,counter
3362*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3363*25c28e83SPiotr Jasiukajtis1:
3364*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f48,%f10		! (5_1) dtmp0 = res0_hi * res0;
3365*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3366*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3367*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
3368*25c28e83SPiotr Jasiukajtis
3369*25c28e83SPiotr Jasiukajtis	ba	.cont43b
3370*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3371*25c28e83SPiotr Jasiukajtis
3372*25c28e83SPiotr Jasiukajtis	.align	16
3373*25c28e83SPiotr Jasiukajtis.update45:
3374*25c28e83SPiotr Jasiukajtis	cmp	counter,5
3375*25c28e83SPiotr Jasiukajtis	ble	1f
3376*25c28e83SPiotr Jasiukajtis	nop
3377*25c28e83SPiotr Jasiukajtis
3378*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
3379*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3380*25c28e83SPiotr Jasiukajtis
3381*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
3382*25c28e83SPiotr Jasiukajtis
3383*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
3384*25c28e83SPiotr Jasiukajtis
3385*25c28e83SPiotr Jasiukajtis	mov	5,counter
3386*25c28e83SPiotr Jasiukajtis1:
3387*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (2_0) y_hi0 -= D2ON36;
3388*25c28e83SPiotr Jasiukajtis
3389*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
3390*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
3391*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (5_1) res0 += dtmp0;
3392*25c28e83SPiotr Jasiukajtis
3393*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f48		! (6_1) res0 = vis_fand(dres,DA0);
3394*25c28e83SPiotr Jasiukajtis
3395*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
3396*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
3397*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
3398*25c28e83SPiotr Jasiukajtis
3399*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
3400*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3401*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
3402*25c28e83SPiotr Jasiukajtis
3403*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f48,%f10		! (6_1) dtmp0 = res0_hi * res0;
3404*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
3405*25c28e83SPiotr Jasiukajtis
3406*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f14,%f14		! (7_1) dtmp2 = dd * dres;
3407*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (2_0) dtmp1 = y0 + y_hi0;
3408*25c28e83SPiotr Jasiukajtis
3409*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f48,%f40		! (6_1) dtmp1 = res0_lo * res0;
3410*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3411*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
3412*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (2_0) y_lo0 = y0 - y_hi0;
3413*25c28e83SPiotr Jasiukajtis
3414*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (4_0) ll = (long long)j0 << 32;
3415*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp9]		! (4_0) *(long long*)&scl0 = ll;
3416*25c28e83SPiotr Jasiukajtis	ba	.cont48
3417*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
3418*25c28e83SPiotr Jasiukajtis
3419*25c28e83SPiotr Jasiukajtis	.align	16
3420*25c28e83SPiotr Jasiukajtis.update46:
3421*25c28e83SPiotr Jasiukajtis	cmp	counter,5
3422*25c28e83SPiotr Jasiukajtis	ble	1f
3423*25c28e83SPiotr Jasiukajtis	nop
3424*25c28e83SPiotr Jasiukajtis
3425*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
3426*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3427*25c28e83SPiotr Jasiukajtis
3428*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
3429*25c28e83SPiotr Jasiukajtis
3430*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
3431*25c28e83SPiotr Jasiukajtis
3432*25c28e83SPiotr Jasiukajtis	mov	5,counter
3433*25c28e83SPiotr Jasiukajtis1:
3434*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
3435*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
3436*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (5_1) res0 += dtmp0;
3437*25c28e83SPiotr Jasiukajtis
3438*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f48		! (6_1) res0 = vis_fand(dres,DA0);
3439*25c28e83SPiotr Jasiukajtis
3440*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
3441*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
3442*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
3443*25c28e83SPiotr Jasiukajtis
3444*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
3445*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3446*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
3447*25c28e83SPiotr Jasiukajtis
3448*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f48,%f10		! (6_1) dtmp0 = res0_hi * res0;
3449*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
3450*25c28e83SPiotr Jasiukajtis
3451*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f14,%f14		! (7_1) dtmp2 = dd * dres;
3452*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (2_0) dtmp1 = y0 + y_hi0;
3453*25c28e83SPiotr Jasiukajtis
3454*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f48,%f40		! (6_1) dtmp1 = res0_lo * res0;
3455*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3456*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
3457*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (2_0) y_lo0 = y0 - y_hi0;
3458*25c28e83SPiotr Jasiukajtis
3459*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (4_0) ll = (long long)j0 << 32;
3460*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp9]		! (4_0) *(long long*)&scl0 = ll;
3461*25c28e83SPiotr Jasiukajtis	ba	.cont48
3462*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
3463*25c28e83SPiotr Jasiukajtis
3464*25c28e83SPiotr Jasiukajtis	.align	16
3465*25c28e83SPiotr Jasiukajtis.update47:
3466*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3467*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont47a		! (0_0) if ( hy0 < 0x00100000 )
3468*25c28e83SPiotr Jasiukajtis
3469*25c28e83SPiotr Jasiukajtis	cmp	counter,5
3470*25c28e83SPiotr Jasiukajtis	ble,a	1f
3471*25c28e83SPiotr Jasiukajtis	nop
3472*25c28e83SPiotr Jasiukajtis
3473*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
3474*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3475*25c28e83SPiotr Jasiukajtis
3476*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
3477*25c28e83SPiotr Jasiukajtis
3478*25c28e83SPiotr Jasiukajtis	mov	5,counter
3479*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
3480*25c28e83SPiotr Jasiukajtis1:
3481*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
3482*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
3483*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
3484*25c28e83SPiotr Jasiukajtis
3485*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
3486*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3487*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
3488*25c28e83SPiotr Jasiukajtis
3489*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f48,%f10		! (6_1) dtmp0 = res0_hi * res0;
3490*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3491*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
3492*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
3493*25c28e83SPiotr Jasiukajtis
3494*25c28e83SPiotr Jasiukajtis	ba	.cont47b
3495*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
3496*25c28e83SPiotr Jasiukajtis
3497*25c28e83SPiotr Jasiukajtis	.align	16
3498*25c28e83SPiotr Jasiukajtis.update49:
3499*25c28e83SPiotr Jasiukajtis	cmp	counter,6
3500*25c28e83SPiotr Jasiukajtis	ble	1f
3501*25c28e83SPiotr Jasiukajtis	nop
3502*25c28e83SPiotr Jasiukajtis
3503*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
3504*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3505*25c28e83SPiotr Jasiukajtis
3506*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3507*25c28e83SPiotr Jasiukajtis
3508*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3509*25c28e83SPiotr Jasiukajtis
3510*25c28e83SPiotr Jasiukajtis	mov	6,counter
3511*25c28e83SPiotr Jasiukajtis1:
3512*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (3_0) y_hi0 -= D2ON36;
3513*25c28e83SPiotr Jasiukajtis
3514*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
3515*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
3516*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (6_1) res0 += dtmp0;
3517*25c28e83SPiotr Jasiukajtis
3518*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
3519*25c28e83SPiotr Jasiukajtis
3520*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
3521*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
3522*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
3523*25c28e83SPiotr Jasiukajtis
3524*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
3525*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3526*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
3527*25c28e83SPiotr Jasiukajtis
3528*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
3529*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
3530*25c28e83SPiotr Jasiukajtis
3531*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
3532*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
3533*25c28e83SPiotr Jasiukajtis
3534*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
3535*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3536*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3537*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
3538*25c28e83SPiotr Jasiukajtis
3539*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
3540*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
3541*25c28e83SPiotr Jasiukajtis	ba	.cont52
3542*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3543*25c28e83SPiotr Jasiukajtis
3544*25c28e83SPiotr Jasiukajtis	.align	16
3545*25c28e83SPiotr Jasiukajtis.update50:
3546*25c28e83SPiotr Jasiukajtis	cmp	counter,6
3547*25c28e83SPiotr Jasiukajtis	ble	1f
3548*25c28e83SPiotr Jasiukajtis	nop
3549*25c28e83SPiotr Jasiukajtis
3550*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
3551*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3552*25c28e83SPiotr Jasiukajtis
3553*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3554*25c28e83SPiotr Jasiukajtis
3555*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3556*25c28e83SPiotr Jasiukajtis
3557*25c28e83SPiotr Jasiukajtis	mov	6,counter
3558*25c28e83SPiotr Jasiukajtis1:
3559*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
3560*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
3561*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f52,%f52		! (6_1) res0 += dtmp0;
3562*25c28e83SPiotr Jasiukajtis
3563*25c28e83SPiotr Jasiukajtis	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
3564*25c28e83SPiotr Jasiukajtis
3565*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
3566*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
3567*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
3568*25c28e83SPiotr Jasiukajtis
3569*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
3570*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3571*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
3572*25c28e83SPiotr Jasiukajtis
3573*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
3574*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
3575*25c28e83SPiotr Jasiukajtis
3576*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
3577*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
3578*25c28e83SPiotr Jasiukajtis
3579*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
3580*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3581*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3582*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
3583*25c28e83SPiotr Jasiukajtis
3584*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
3585*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
3586*25c28e83SPiotr Jasiukajtis	ba	.cont52
3587*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3588*25c28e83SPiotr Jasiukajtis
3589*25c28e83SPiotr Jasiukajtis	.align	16
3590*25c28e83SPiotr Jasiukajtis.update51:
3591*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3592*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont51a		! (0_0) if ( hy0 < 0x00100000 )
3593*25c28e83SPiotr Jasiukajtis
3594*25c28e83SPiotr Jasiukajtis	cmp	counter,6
3595*25c28e83SPiotr Jasiukajtis	ble,a	1f
3596*25c28e83SPiotr Jasiukajtis	nop
3597*25c28e83SPiotr Jasiukajtis
3598*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
3599*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3600*25c28e83SPiotr Jasiukajtis
3601*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3602*25c28e83SPiotr Jasiukajtis
3603*25c28e83SPiotr Jasiukajtis	mov	6,counter
3604*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3605*25c28e83SPiotr Jasiukajtis1:
3606*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
3607*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
3608*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
3609*25c28e83SPiotr Jasiukajtis
3610*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
3611*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3612*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
3613*25c28e83SPiotr Jasiukajtis
3614*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
3615*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3616*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3617*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
3618*25c28e83SPiotr Jasiukajtis
3619*25c28e83SPiotr Jasiukajtis	ba	.cont51b
3620*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3621*25c28e83SPiotr Jasiukajtis
3622*25c28e83SPiotr Jasiukajtis	.align	16
3623*25c28e83SPiotr Jasiukajtis.update53:
3624*25c28e83SPiotr Jasiukajtis	cmp	counter,7
3625*25c28e83SPiotr Jasiukajtis	ble	1f
3626*25c28e83SPiotr Jasiukajtis	nop
3627*25c28e83SPiotr Jasiukajtis
3628*25c28e83SPiotr Jasiukajtis	sub	counter,7,counter
3629*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3630*25c28e83SPiotr Jasiukajtis
3631*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
3632*25c28e83SPiotr Jasiukajtis
3633*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
3634*25c28e83SPiotr Jasiukajtis
3635*25c28e83SPiotr Jasiukajtis	mov	7,counter
3636*25c28e83SPiotr Jasiukajtis1:
3637*25c28e83SPiotr Jasiukajtis	fsubd	%f50,D2ON36,%f54	! (4_0) y_hi0 -= D2ON36;
3638*25c28e83SPiotr Jasiukajtis
3639*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
3640*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
3641*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
3642*25c28e83SPiotr Jasiukajtis
3643*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
3644*25c28e83SPiotr Jasiukajtis
3645*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
3646*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp12]	! (5_0) *(long long*)&scl0 = ll;
3647*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
3648*25c28e83SPiotr Jasiukajtis
3649*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
3650*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3651*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
3652*25c28e83SPiotr Jasiukajtis
3653*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
3654*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
3655*25c28e83SPiotr Jasiukajtis
3656*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
3657*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
3658*25c28e83SPiotr Jasiukajtis
3659*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
3660*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3661*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
3662*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
3663*25c28e83SPiotr Jasiukajtis
3664*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
3665*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
3666*25c28e83SPiotr Jasiukajtis	ba	.cont56
3667*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
3668*25c28e83SPiotr Jasiukajtis
3669*25c28e83SPiotr Jasiukajtis	.align	16
3670*25c28e83SPiotr Jasiukajtis.update54:
3671*25c28e83SPiotr Jasiukajtis	cmp	counter,7
3672*25c28e83SPiotr Jasiukajtis	ble	1f
3673*25c28e83SPiotr Jasiukajtis	nop
3674*25c28e83SPiotr Jasiukajtis
3675*25c28e83SPiotr Jasiukajtis	sub	counter,7,counter
3676*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3677*25c28e83SPiotr Jasiukajtis
3678*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
3679*25c28e83SPiotr Jasiukajtis
3680*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
3681*25c28e83SPiotr Jasiukajtis
3682*25c28e83SPiotr Jasiukajtis	mov	7,counter
3683*25c28e83SPiotr Jasiukajtis1:
3684*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
3685*25c28e83SPiotr Jasiukajtis	st	%f1,[%i5+4]		! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
3686*25c28e83SPiotr Jasiukajtis	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
3687*25c28e83SPiotr Jasiukajtis
3688*25c28e83SPiotr Jasiukajtis	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
3689*25c28e83SPiotr Jasiukajtis
3690*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
3691*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp12]	! (5_0) *(long long*)&scl0 = ll;
3692*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
3693*25c28e83SPiotr Jasiukajtis
3694*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
3695*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3696*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
3697*25c28e83SPiotr Jasiukajtis
3698*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
3699*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
3700*25c28e83SPiotr Jasiukajtis
3701*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
3702*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
3703*25c28e83SPiotr Jasiukajtis
3704*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
3705*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3706*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
3707*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
3708*25c28e83SPiotr Jasiukajtis
3709*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
3710*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
3711*25c28e83SPiotr Jasiukajtis	ba	.cont56
3712*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
3713*25c28e83SPiotr Jasiukajtis
3714*25c28e83SPiotr Jasiukajtis	.align	16
3715*25c28e83SPiotr Jasiukajtis.update55:
3716*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3717*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont55a		! (0_0) if ( hy0 < 0x00100000 )
3718*25c28e83SPiotr Jasiukajtis
3719*25c28e83SPiotr Jasiukajtis	cmp	counter,7
3720*25c28e83SPiotr Jasiukajtis	ble,a	1f
3721*25c28e83SPiotr Jasiukajtis	nop
3722*25c28e83SPiotr Jasiukajtis
3723*25c28e83SPiotr Jasiukajtis	sub	counter,7,counter
3724*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3725*25c28e83SPiotr Jasiukajtis
3726*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
3727*25c28e83SPiotr Jasiukajtis
3728*25c28e83SPiotr Jasiukajtis	mov	7,counter
3729*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp_py]
3730*25c28e83SPiotr Jasiukajtis1:
3731*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
3732*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp12]	! (5_0) *(long long*)&scl0 = ll;
3733*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
3734*25c28e83SPiotr Jasiukajtis
3735*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
3736*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3737*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
3738*25c28e83SPiotr Jasiukajtis
3739*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
3740*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3741*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i2
3742*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
3743*25c28e83SPiotr Jasiukajtis
3744*25c28e83SPiotr Jasiukajtis	ba	.cont55b
3745*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%o0
3746*25c28e83SPiotr Jasiukajtis
3747*25c28e83SPiotr Jasiukajtis	.align	16
3748*25c28e83SPiotr Jasiukajtis.update57:
3749*25c28e83SPiotr Jasiukajtis	cmp	counter,8
3750*25c28e83SPiotr Jasiukajtis	ble	1f
3751*25c28e83SPiotr Jasiukajtis	nop
3752*25c28e83SPiotr Jasiukajtis
3753*25c28e83SPiotr Jasiukajtis	sub	counter,8,counter
3754*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3755*25c28e83SPiotr Jasiukajtis
3756*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3757*25c28e83SPiotr Jasiukajtis
3758*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3759*25c28e83SPiotr Jasiukajtis
3760*25c28e83SPiotr Jasiukajtis	mov	8,counter
3761*25c28e83SPiotr Jasiukajtis1:
3762*25c28e83SPiotr Jasiukajtis	fsubd	%f12,D2ON36,%f54	! (5_0) y_hi0 -= D2ON36;
3763*25c28e83SPiotr Jasiukajtis
3764*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
3765*25c28e83SPiotr Jasiukajtis	st	%f3,[%i5+4]		! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
3766*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
3767*25c28e83SPiotr Jasiukajtis
3768*25c28e83SPiotr Jasiukajtis	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
3769*25c28e83SPiotr Jasiukajtis
3770*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
3771*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp14]	! (6_0) *(long long*)&scl0 = ll;
3772*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
3773*25c28e83SPiotr Jasiukajtis
3774*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
3775*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3776*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
3777*25c28e83SPiotr Jasiukajtis
3778*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
3779*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
3780*25c28e83SPiotr Jasiukajtis
3781*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
3782*25c28e83SPiotr Jasiukajtis	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
3783*25c28e83SPiotr Jasiukajtis
3784*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
3785*25c28e83SPiotr Jasiukajtis	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
3786*25c28e83SPiotr Jasiukajtis
3787*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3788*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3789*25c28e83SPiotr Jasiukajtis
3790*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
3791*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
3792*25c28e83SPiotr Jasiukajtis	ba	.cont60
3793*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3794*25c28e83SPiotr Jasiukajtis
3795*25c28e83SPiotr Jasiukajtis	.align	16
3796*25c28e83SPiotr Jasiukajtis.update58:
3797*25c28e83SPiotr Jasiukajtis	cmp	counter,8
3798*25c28e83SPiotr Jasiukajtis	ble	1f
3799*25c28e83SPiotr Jasiukajtis	nop
3800*25c28e83SPiotr Jasiukajtis
3801*25c28e83SPiotr Jasiukajtis	sub	counter,8,counter
3802*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3803*25c28e83SPiotr Jasiukajtis
3804*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3805*25c28e83SPiotr Jasiukajtis
3806*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3807*25c28e83SPiotr Jasiukajtis
3808*25c28e83SPiotr Jasiukajtis	mov	8,counter
3809*25c28e83SPiotr Jasiukajtis1:
3810*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
3811*25c28e83SPiotr Jasiukajtis	st	%f3,[%i5+4]		! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
3812*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
3813*25c28e83SPiotr Jasiukajtis
3814*25c28e83SPiotr Jasiukajtis	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
3815*25c28e83SPiotr Jasiukajtis
3816*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
3817*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp14]	! (6_0) *(long long*)&scl0 = ll;
3818*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
3819*25c28e83SPiotr Jasiukajtis
3820*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
3821*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3822*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
3823*25c28e83SPiotr Jasiukajtis
3824*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
3825*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
3826*25c28e83SPiotr Jasiukajtis
3827*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
3828*25c28e83SPiotr Jasiukajtis	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
3829*25c28e83SPiotr Jasiukajtis
3830*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
3831*25c28e83SPiotr Jasiukajtis	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
3832*25c28e83SPiotr Jasiukajtis
3833*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3834*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3835*25c28e83SPiotr Jasiukajtis
3836*25c28e83SPiotr Jasiukajtis	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
3837*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
3838*25c28e83SPiotr Jasiukajtis	ba	.cont60
3839*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3840*25c28e83SPiotr Jasiukajtis
3841*25c28e83SPiotr Jasiukajtis	.align	16
3842*25c28e83SPiotr Jasiukajtis.update59:
3843*25c28e83SPiotr Jasiukajtis	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3844*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.cont59a		! (0_0) if ( hy0 < 0x00100000 )
3845*25c28e83SPiotr Jasiukajtis
3846*25c28e83SPiotr Jasiukajtis	cmp	counter,8
3847*25c28e83SPiotr Jasiukajtis	ble,a	1f
3848*25c28e83SPiotr Jasiukajtis	nop
3849*25c28e83SPiotr Jasiukajtis
3850*25c28e83SPiotr Jasiukajtis	sub	counter,8,counter
3851*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
3852*25c28e83SPiotr Jasiukajtis
3853*25c28e83SPiotr Jasiukajtis	stx	%i4,[%fp+tmp_px]
3854*25c28e83SPiotr Jasiukajtis
3855*25c28e83SPiotr Jasiukajtis	mov	8,counter
3856*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
3857*25c28e83SPiotr Jasiukajtis1:
3858*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
3859*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+dtmp14]	! (6_0) *(long long*)&scl0 = ll;
3860*25c28e83SPiotr Jasiukajtis	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
3861*25c28e83SPiotr Jasiukajtis
3862*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
3863*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5		! pz += stridez
3864*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
3865*25c28e83SPiotr Jasiukajtis
3866*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
3867*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x3ff00000),%g1
3868*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i4
3869*25c28e83SPiotr Jasiukajtis	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
3870*25c28e83SPiotr Jasiukajtis
3871*25c28e83SPiotr Jasiukajtis	ba	.cont59b
3872*25c28e83SPiotr Jasiukajtis	add	TBL,TBL_SHIFT+24,%i3
3873*25c28e83SPiotr Jasiukajtis
3874*25c28e83SPiotr Jasiukajtis	.align	16
3875*25c28e83SPiotr Jasiukajtis.exit:
3876*25c28e83SPiotr Jasiukajtis	ret
3877*25c28e83SPiotr Jasiukajtis	restore
3878*25c28e83SPiotr Jasiukajtis	SET_SIZE(__vrhypot)
3879*25c28e83SPiotr Jasiukajtis
3880