1*25c28e83SPiotr Jasiukajtis/*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis/*
22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*25c28e83SPiotr Jasiukajtis */
24*25c28e83SPiotr Jasiukajtis/*
25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
27*25c28e83SPiotr Jasiukajtis */
28*25c28e83SPiotr Jasiukajtis
29*25c28e83SPiotr Jasiukajtis	.file	"__vhypot.S"
30*25c28e83SPiotr Jasiukajtis
31*25c28e83SPiotr Jasiukajtis#include "libm.h"
32*25c28e83SPiotr Jasiukajtis
33*25c28e83SPiotr Jasiukajtis	RO_DATA
34*25c28e83SPiotr Jasiukajtis	.align	64
35*25c28e83SPiotr Jasiukajtis
36*25c28e83SPiotr Jasiukajtis.CONST_TBL:
37*25c28e83SPiotr Jasiukajtis	.word	0x7ff00000, 0	! DC0
38*25c28e83SPiotr Jasiukajtis	.word	0x7fe00000, 0	! DC1
39*25c28e83SPiotr Jasiukajtis	.word	0x00100000, 0	! DC2
40*25c28e83SPiotr Jasiukajtis	.word	0x41b00000, 0	! D2ON28 = 268435456.0
41*25c28e83SPiotr Jasiukajtis	.word	0x7fd00000, 0	! DC3
42*25c28e83SPiotr Jasiukajtis
43*25c28e83SPiotr Jasiukajtis#define counter		%i0
44*25c28e83SPiotr Jasiukajtis#define tmp_counter	%l3
45*25c28e83SPiotr Jasiukajtis#define tmp_px		%l5
46*25c28e83SPiotr Jasiukajtis#define tmp_py		%o7
47*25c28e83SPiotr Jasiukajtis#define stridex		%i2
48*25c28e83SPiotr Jasiukajtis#define stridey		%i4
49*25c28e83SPiotr Jasiukajtis#define stridez		%l0
50*25c28e83SPiotr Jasiukajtis
51*25c28e83SPiotr Jasiukajtis#define DC0		%f8
52*25c28e83SPiotr Jasiukajtis#define DC0_HI		%f8
53*25c28e83SPiotr Jasiukajtis#define DC0_LO		%f9
54*25c28e83SPiotr Jasiukajtis#define DC1		%f46
55*25c28e83SPiotr Jasiukajtis#define DC2		%f48
56*25c28e83SPiotr Jasiukajtis#define DC3		%f0
57*25c28e83SPiotr Jasiukajtis#define D2ON28		%f62
58*25c28e83SPiotr Jasiukajtis
59*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
60*25c28e83SPiotr Jasiukajtis!      !!!!!   algorithm   !!!!!
61*25c28e83SPiotr Jasiukajtis!  ((float*)&x)[0] = ((float*)px)[0];
62*25c28e83SPiotr Jasiukajtis!  ((float*)&x)[1] = ((float*)px)[1];
63*25c28e83SPiotr Jasiukajtis!
64*25c28e83SPiotr Jasiukajtis!  ((float*)&y)[0] = ((float*)py)[0];
65*25c28e83SPiotr Jasiukajtis!  ((float*)&y)[1] = ((float*)py)[1];
66*25c28e83SPiotr Jasiukajtis!
67*25c28e83SPiotr Jasiukajtis!  x = fabs(x);
68*25c28e83SPiotr Jasiukajtis!  y = fabs(y);
69*25c28e83SPiotr Jasiukajtis!
70*25c28e83SPiotr Jasiukajtis!  c0 = vis_fcmple32(DC1,x);
71*25c28e83SPiotr Jasiukajtis!  c2 = vis_fcmple32(DC1,y);
72*25c28e83SPiotr Jasiukajtis!  c1 = vis_fcmpgt32(DC2,x);
73*25c28e83SPiotr Jasiukajtis!  c3 = vis_fcmpgt32(DC2,y);
74*25c28e83SPiotr Jasiukajtis!
75*25c28e83SPiotr Jasiukajtis!  c0 |= c2;
76*25c28e83SPiotr Jasiukajtis!  c1 &= c3;
77*25c28e83SPiotr Jasiukajtis!  if ( (c0 & 2) != 0 )
78*25c28e83SPiotr Jasiukajtis!  {
79*25c28e83SPiotr Jasiukajtis!    lx = ((int*)px)[1];
80*25c28e83SPiotr Jasiukajtis!    ly = ((int*)py)[1];
81*25c28e83SPiotr Jasiukajtis!    hx = *(int*)px;
82*25c28e83SPiotr Jasiukajtis!    hy = *(int*)py;
83*25c28e83SPiotr Jasiukajtis!
84*25c28e83SPiotr Jasiukajtis!    hx &= 0x7fffffff;
85*25c28e83SPiotr Jasiukajtis!    hy &= 0x7fffffff;
86*25c28e83SPiotr Jasiukajtis!
87*25c28e83SPiotr Jasiukajtis!    j0 = hx;
88*25c28e83SPiotr Jasiukajtis!    if ( j0 < hy ) j0 = hy;
89*25c28e83SPiotr Jasiukajtis!    j0 &= 0x7ff00000;
90*25c28e83SPiotr Jasiukajtis!    if ( j0 >= 0x7ff00000 )
91*25c28e83SPiotr Jasiukajtis!    {
92*25c28e83SPiotr Jasiukajtis!      if ( hx == 0x7ff00000 && lx == 0 ) res = x == y ? y : x;
93*25c28e83SPiotr Jasiukajtis!      else if ( hy == 0x7ff00000 && ly == 0 ) res = x == y ? x : y;
94*25c28e83SPiotr Jasiukajtis!      else res = x * y;
95*25c28e83SPiotr Jasiukajtis!
96*25c28e83SPiotr Jasiukajtis!      ((float*)pz)[0] = ((float*)&res)[0];
97*25c28e83SPiotr Jasiukajtis!      ((float*)pz)[1] = ((float*)&res)[1];
98*25c28e83SPiotr Jasiukajtis!    }
99*25c28e83SPiotr Jasiukajtis!    else
100*25c28e83SPiotr Jasiukajtis!    {
101*25c28e83SPiotr Jasiukajtis!      diff = hy - hx;
102*25c28e83SPiotr Jasiukajtis!      j0 = diff >> 31;
103*25c28e83SPiotr Jasiukajtis!      if ( ((diff ^ j0) - j0) < 0x03600000 )
104*25c28e83SPiotr Jasiukajtis!      {!
105*25c28e83SPiotr Jasiukajtis!        x *= D2ONM1022;
106*25c28e83SPiotr Jasiukajtis!        y *= D2ONM1022;
107*25c28e83SPiotr Jasiukajtis!
108*25c28e83SPiotr Jasiukajtis!        x_hi = ( x + two28 ) - two28;
109*25c28e83SPiotr Jasiukajtis!        x_lo = x - x_hi;
110*25c28e83SPiotr Jasiukajtis!        y_hi = ( y + two28 ) - two28;
111*25c28e83SPiotr Jasiukajtis!        y_lo = y - y_hi;
112*25c28e83SPiotr Jasiukajtis!        res = (x_hi * x_hi + y_hi * y_hi);
113*25c28e83SPiotr Jasiukajtis!        res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
114*25c28e83SPiotr Jasiukajtis!
115*25c28e83SPiotr Jasiukajtis!        res = sqrt(res);
116*25c28e83SPiotr Jasiukajtis!
117*25c28e83SPiotr Jasiukajtis!        res = D2ONP1022 * res;
118*25c28e83SPiotr Jasiukajtis!        ((float*)pz)[0] = ((float*)&res)[0];
119*25c28e83SPiotr Jasiukajtis!        ((float*)pz)[1] = ((float*)&res)[1];
120*25c28e83SPiotr Jasiukajtis!      }
121*25c28e83SPiotr Jasiukajtis!      else
122*25c28e83SPiotr Jasiukajtis!      {
123*25c28e83SPiotr Jasiukajtis!        res = x + y;
124*25c28e83SPiotr Jasiukajtis!        ((float*)pz)[0] = ((float*)&res)[0];
125*25c28e83SPiotr Jasiukajtis!        ((float*)pz)[1] = ((float*)&res)[1];
126*25c28e83SPiotr Jasiukajtis!      }
127*25c28e83SPiotr Jasiukajtis!    }
128*25c28e83SPiotr Jasiukajtis!    px += stridex;
129*25c28e83SPiotr Jasiukajtis!    py += stridey;
130*25c28e83SPiotr Jasiukajtis!    pz += stridez;
131*25c28e83SPiotr Jasiukajtis!    continue;
132*25c28e83SPiotr Jasiukajtis!  }
133*25c28e83SPiotr Jasiukajtis!  if ( (c1 & 2) != 0 )
134*25c28e83SPiotr Jasiukajtis!  {
135*25c28e83SPiotr Jasiukajtis!    x *= D2ONP1022;
136*25c28e83SPiotr Jasiukajtis!    y *= D2ONP1022;
137*25c28e83SPiotr Jasiukajtis!
138*25c28e83SPiotr Jasiukajtis!    x_hi = ( x + two28 ) - two28;
139*25c28e83SPiotr Jasiukajtis!    x_lo = x - x_hi;
140*25c28e83SPiotr Jasiukajtis!    y_hi = ( y + two28 ) - two28;
141*25c28e83SPiotr Jasiukajtis!    y_lo = y - y_hi;
142*25c28e83SPiotr Jasiukajtis!    res = (x_hi * x_hi + y_hi * y_hi);
143*25c28e83SPiotr Jasiukajtis!    res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
144*25c28e83SPiotr Jasiukajtis!
145*25c28e83SPiotr Jasiukajtis!    res = sqrt(res);
146*25c28e83SPiotr Jasiukajtis!
147*25c28e83SPiotr Jasiukajtis!    res = D2ONM1022 * res;
148*25c28e83SPiotr Jasiukajtis!    ((float*)pz)[0] = ((float*)&res)[0];
149*25c28e83SPiotr Jasiukajtis!    ((float*)pz)[1] = ((float*)&res)[1];
150*25c28e83SPiotr Jasiukajtis!    px += stridex;
151*25c28e83SPiotr Jasiukajtis!    py += stridey;
152*25c28e83SPiotr Jasiukajtis!    pz += stridez;
153*25c28e83SPiotr Jasiukajtis!    continue;
154*25c28e83SPiotr Jasiukajtis!  }
155*25c28e83SPiotr Jasiukajtis!
156*25c28e83SPiotr Jasiukajtis!  dmax = x;
157*25c28e83SPiotr Jasiukajtis!  if ( dmax < y ) dmax = y;
158*25c28e83SPiotr Jasiukajtis!
159*25c28e83SPiotr Jasiukajtis!  dmax = vis_fand(dmax,DC0);
160*25c28e83SPiotr Jasiukajtis!  dnorm = vis_fpsub32(DC1,dmax);
161*25c28e83SPiotr Jasiukajtis!
162*25c28e83SPiotr Jasiukajtis!  x *= dnorm;
163*25c28e83SPiotr Jasiukajtis!  y *= dnorm;
164*25c28e83SPiotr Jasiukajtis!
165*25c28e83SPiotr Jasiukajtis!  x_hi = x + D2ON28;
166*25c28e83SPiotr Jasiukajtis!  x_hi -= D2ON28;
167*25c28e83SPiotr Jasiukajtis!  x_lo = x - x_hi;
168*25c28e83SPiotr Jasiukajtis!
169*25c28e83SPiotr Jasiukajtis!  y_hi = y + D2ON28;
170*25c28e83SPiotr Jasiukajtis!  y_hi -= D2ON28;
171*25c28e83SPiotr Jasiukajtis!  y_lo = y - y_hi;
172*25c28e83SPiotr Jasiukajtis!
173*25c28e83SPiotr Jasiukajtis!  res = x_hi * x_hi;
174*25c28e83SPiotr Jasiukajtis!  dtmp1 = x + x_hi;
175*25c28e83SPiotr Jasiukajtis!  dtmp0 = y_hi * y_hi;
176*25c28e83SPiotr Jasiukajtis!  dtmp2 = y + y_hi;
177*25c28e83SPiotr Jasiukajtis!
178*25c28e83SPiotr Jasiukajtis!  res += dtmp0;
179*25c28e83SPiotr Jasiukajtis!  dtmp1 *= x_lo;
180*25c28e83SPiotr Jasiukajtis!  dtmp2 *= y_lo;
181*25c28e83SPiotr Jasiukajtis!  dtmp1 += dtmp2;
182*25c28e83SPiotr Jasiukajtis!  res += dtmp1;
183*25c28e83SPiotr Jasiukajtis!
184*25c28e83SPiotr Jasiukajtis!  res = sqrt(res);
185*25c28e83SPiotr Jasiukajtis!
186*25c28e83SPiotr Jasiukajtis!  res = dmax * res;
187*25c28e83SPiotr Jasiukajtis!  ((float*)pz)[0] = ((float*)&res)[0];
188*25c28e83SPiotr Jasiukajtis!  ((float*)pz)[1] = ((float*)&res)[1];
189*25c28e83SPiotr Jasiukajtis!
190*25c28e83SPiotr Jasiukajtis!  px += stridex;
191*25c28e83SPiotr Jasiukajtis!  py += stridey;
192*25c28e83SPiotr Jasiukajtis!  pz += stridez;
193*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
194*25c28e83SPiotr Jasiukajtis
195*25c28e83SPiotr Jasiukajtis	ENTRY(__vhypot)
196*25c28e83SPiotr Jasiukajtis	save	%sp,-SA(MINFRAME),%sp
197*25c28e83SPiotr Jasiukajtis	PIC_SETUP(l7)
198*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,.CONST_TBL,o3)
199*25c28e83SPiotr Jasiukajtis	wr	%g0,0x82,%asi
200*25c28e83SPiotr Jasiukajtis
201*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9
202*25c28e83SPiotr Jasiukajtis	ldx	[%fp+STACK_BIAS+176],%l0
203*25c28e83SPiotr Jasiukajtis#else
204*25c28e83SPiotr Jasiukajtis	ld	[%fp+STACK_BIAS+92],%l0
205*25c28e83SPiotr Jasiukajtis#endif
206*25c28e83SPiotr Jasiukajtis	ldd	[%o3],DC0
207*25c28e83SPiotr Jasiukajtis	sll	%i2,3,stridex
208*25c28e83SPiotr Jasiukajtis	mov	%i0,tmp_counter
209*25c28e83SPiotr Jasiukajtis
210*25c28e83SPiotr Jasiukajtis	ldd	[%o3+8],DC1
211*25c28e83SPiotr Jasiukajtis	sll	%i4,3,stridey
212*25c28e83SPiotr Jasiukajtis	mov	%i1,tmp_px
213*25c28e83SPiotr Jasiukajtis
214*25c28e83SPiotr Jasiukajtis	ldd	[%o3+16],DC2
215*25c28e83SPiotr Jasiukajtis	sll	%l0,3,stridez
216*25c28e83SPiotr Jasiukajtis	mov	%i3,tmp_py
217*25c28e83SPiotr Jasiukajtis
218*25c28e83SPiotr Jasiukajtis	ldd	[%o3+24],D2ON28
219*25c28e83SPiotr Jasiukajtis
220*25c28e83SPiotr Jasiukajtis	ldd	[%o3+32],DC3
221*25c28e83SPiotr Jasiukajtis
222*25c28e83SPiotr Jasiukajtis.begin:
223*25c28e83SPiotr Jasiukajtis	mov	tmp_counter,counter
224*25c28e83SPiotr Jasiukajtis	mov	tmp_px,%i1
225*25c28e83SPiotr Jasiukajtis	mov	tmp_py,%i3
226*25c28e83SPiotr Jasiukajtis	clr	tmp_counter
227*25c28e83SPiotr Jasiukajtis.begin1:
228*25c28e83SPiotr Jasiukajtis	cmp	counter,0
229*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.exit
230*25c28e83SPiotr Jasiukajtis	nop
231*25c28e83SPiotr Jasiukajtis
232*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%o0
233*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%o5
234*25c28e83SPiotr Jasiukajtis
235*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%o2
236*25c28e83SPiotr Jasiukajtis	add	%o5,1023,%o5
237*25c28e83SPiotr Jasiukajtis
238*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f26		! (1_0) ((float*)&x)[0] = ((float*)px)[0];
239*25c28e83SPiotr Jasiukajtis
240*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f27	! (1_0) ((float*)&x)[1] = ((float*)px)[1];
241*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%o1		! px += stridex
242*25c28e83SPiotr Jasiukajtis
243*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f24		! (1_0) ((float*)&y)[0] = ((float*)py)[0];
244*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00100000),%l7
245*25c28e83SPiotr Jasiukajtis	and	%o0,%o5,%o0
246*25c28e83SPiotr Jasiukajtis
247*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f25	! (1_0) ((float*)&y)[1] = ((float*)py)[1];
248*25c28e83SPiotr Jasiukajtis	and	%o2,%o5,%o2
249*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7fe00000),%l6
250*25c28e83SPiotr Jasiukajtis
251*25c28e83SPiotr Jasiukajtis	fabsd	%f26,%f36		! (1_0) x = fabs(x);
252*25c28e83SPiotr Jasiukajtis	cmp	%o0,%o2
253*25c28e83SPiotr Jasiukajtis	mov	%o2,%l4
254*25c28e83SPiotr Jasiukajtis
255*25c28e83SPiotr Jasiukajtis	fabsd	%f24,%f54		! (1_0) y = fabs(y);
256*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%o5		! py += stridey
257*25c28e83SPiotr Jasiukajtis	movg	%icc,%o0,%o2
258*25c28e83SPiotr Jasiukajtis	lda	[%o5]%asi,%f28		! (2_0) ((float*)&y)[0] = ((float*)py)[0];
259*25c28e83SPiotr Jasiukajtis
260*25c28e83SPiotr Jasiukajtis	cmp	%o2,%l6
261*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ff00000),%o4
262*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.spec0
263*25c28e83SPiotr Jasiukajtis	lda	[%o5+4]%asi,%f29	! (2_0) ((float*)&y)[1] = ((float*)py)[1];
264*25c28e83SPiotr Jasiukajtis
265*25c28e83SPiotr Jasiukajtis	cmp	%o2,%l7
266*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.spec1
267*25c28e83SPiotr Jasiukajtis	nop
268*25c28e83SPiotr Jasiukajtis	lda	[%o1]%asi,%f26		! (2_0) ((float*)&x)[0] = ((float*)px)[0];
269*25c28e83SPiotr Jasiukajtis
270*25c28e83SPiotr Jasiukajtis	lda	[%o1+4]%asi,%f27	! (2_0) ((float*)&x)[1] = ((float*)px)[1];
271*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i3		! py += stridey
272*25c28e83SPiotr Jasiukajtis
273*25c28e83SPiotr Jasiukajtis	fabsd	%f28,%f34		! (2_0) y = fabs(y);
274*25c28e83SPiotr Jasiukajtis
275*25c28e83SPiotr Jasiukajtis	fabsd	%f26,%f50		! (2_0) x = fabs(x);
276*25c28e83SPiotr Jasiukajtis
277*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f50,%o3	! (2_0) c0 = vis_fcmple32(DC1,x);
278*25c28e83SPiotr Jasiukajtis
279*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f34,%o0	! (2_0) c2 = vis_fcmple32(DC1,y);
280*25c28e83SPiotr Jasiukajtis
281*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f50,%o4	! (2_0) c1 = vis_fcmpgt32(DC2,x);
282*25c28e83SPiotr Jasiukajtis
283*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f34,%o5	! (2_0) c3 = vis_fcmpgt32(DC2,y);
284*25c28e83SPiotr Jasiukajtis
285*25c28e83SPiotr Jasiukajtis	or	%o3,%o0,%o3		! (2_0) c0 |= c2;
286*25c28e83SPiotr Jasiukajtis
287*25c28e83SPiotr Jasiukajtis	andcc	%o3,2,%g0		! (2_0) c0 & 2
288*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update0		! (2_0) if ( (c0 & 2) != 0 )
289*25c28e83SPiotr Jasiukajtis	and	%o4,%o5,%o4		! (2_0) c1 &= c3;
290*25c28e83SPiotr Jasiukajtis.cont0:
291*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%l4		! py += stridey
292*25c28e83SPiotr Jasiukajtis	andcc	%o4,2,%g0		! (2_0) c1 & 2
293*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update1		! (2_0) if ( (c1 & 2) != 0 )
294*25c28e83SPiotr Jasiukajtis	fmovd	%f36,%f56		! (1_0) dmax = x;
295*25c28e83SPiotr Jasiukajtis.cont1:
296*25c28e83SPiotr Jasiukajtis	lda	[%l4]%asi,%f30		! (3_0) ((float*)&y)[0] = ((float*)py)[0];
297*25c28e83SPiotr Jasiukajtis	add	%o1,stridex,%l2		! px += stridex
298*25c28e83SPiotr Jasiukajtis
299*25c28e83SPiotr Jasiukajtis	lda	[%l4+4]%asi,%f31	! (3_0) ((float*)&y)[1] = ((float*)py)[1];
300*25c28e83SPiotr Jasiukajtis
301*25c28e83SPiotr Jasiukajtis	lda	[%l2]%asi,%f18		! (3_1) ((float*)&x)[0] = ((float*)px)[0];
302*25c28e83SPiotr Jasiukajtis
303*25c28e83SPiotr Jasiukajtis	lda	[%l2+4]%asi,%f19	! (3_1) ((float*)&x)[1] = ((float*)px)[1];
304*25c28e83SPiotr Jasiukajtis
305*25c28e83SPiotr Jasiukajtis	fabsd	%f30,%f30		! (3_1) y = fabs(y);
306*25c28e83SPiotr Jasiukajtis
307*25c28e83SPiotr Jasiukajtis	fabsd	%f18,%f18		! (3_1) x = fabs(x);
308*25c28e83SPiotr Jasiukajtis
309*25c28e83SPiotr Jasiukajtis	fcmped	%fcc2,%f54,%f56		! (1_1) dmax ? y
310*25c28e83SPiotr Jasiukajtis
311*25c28e83SPiotr Jasiukajtis	fmovdg	%fcc2,%f54,%f56		! (1_1) if ( dmax < y ) dmax = y;
312*25c28e83SPiotr Jasiukajtis
313*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f18,%o3	! (3_1) c0 = vis_fcmple32(DC1,x);
314*25c28e83SPiotr Jasiukajtis
315*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f30,%o0	! (3_1) c2 = vis_fcmple32(DC1,y);
316*25c28e83SPiotr Jasiukajtis
317*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f18,%o4	! (3_1) c1 = vis_fcmpgt32(DC2,x);
318*25c28e83SPiotr Jasiukajtis
319*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f30,%o1	! (3_1) c3 = vis_fcmpgt32(DC2,y);
320*25c28e83SPiotr Jasiukajtis
321*25c28e83SPiotr Jasiukajtis	fand	%f56,DC0,%f38		! (1_1) dmax = vis_fand(dmax,DC0);
322*25c28e83SPiotr Jasiukajtis
323*25c28e83SPiotr Jasiukajtis	or	%o3,%o0,%o3		! (3_1) c0 |= c2;
324*25c28e83SPiotr Jasiukajtis
325*25c28e83SPiotr Jasiukajtis	andcc	%o3,2,%g0		! (3_1) c0 & 2
326*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update2		! (3_1) if ( (c0 & 2) != 0 )
327*25c28e83SPiotr Jasiukajtis	and	%o4,%o1,%o4		! (3_1) c1 &= c3;
328*25c28e83SPiotr Jasiukajtis.cont2:
329*25c28e83SPiotr Jasiukajtis	add	%l4,stridey,%i3		! py += stridey
330*25c28e83SPiotr Jasiukajtis	andcc	%o4,2,%g0		! (3_1) c1 & 2
331*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update3		! (3_1) if ( (c1 & 2) != 0 )
332*25c28e83SPiotr Jasiukajtis	fmovd	%f50,%f32		! (2_1) dmax = x;
333*25c28e83SPiotr Jasiukajtis.cont3:
334*25c28e83SPiotr Jasiukajtis	fpsub32	DC1,%f38,%f10		! (1_1) dnorm = vis_fpsub32(DC1,dmax);
335*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f20		! (0_0) ((float*)&y)[0] = ((float*)py)[0];
336*25c28e83SPiotr Jasiukajtis
337*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f21	! (0_0) ((float*)&y)[1] = ((float*)py)[1];
338*25c28e83SPiotr Jasiukajtis
339*25c28e83SPiotr Jasiukajtis	add	%l2,stridex,%l1		! px += stridex
340*25c28e83SPiotr Jasiukajtis
341*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f10,%f36		! (1_1) x *= dnorm;
342*25c28e83SPiotr Jasiukajtis	lda	[%l1]%asi,%f22		! (0_0) ((float*)&x)[0] = ((float*)px)[0]
343*25c28e83SPiotr Jasiukajtis
344*25c28e83SPiotr Jasiukajtis	lda	[%l1+4]%asi,%f23	! (0_0) ((float*)&x)[1] = ((float*)px)[1];
345*25c28e83SPiotr Jasiukajtis
346*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f10,%f56		! (1_1) y *= dnorm;
347*25c28e83SPiotr Jasiukajtis	fabsd	%f20,%f40		! (0_0) y = fabs(y);
348*25c28e83SPiotr Jasiukajtis
349*25c28e83SPiotr Jasiukajtis	fabsd	%f22,%f20		! (0_0) x = fabs(x);
350*25c28e83SPiotr Jasiukajtis
351*25c28e83SPiotr Jasiukajtis	fcmped	%fcc3,%f34,%f32		! (2_1) dmax ? y
352*25c28e83SPiotr Jasiukajtis
353*25c28e83SPiotr Jasiukajtis
354*25c28e83SPiotr Jasiukajtis	fmovdg	%fcc3,%f34,%f32		! (2_1) if ( dmax < y ) dmax = y;
355*25c28e83SPiotr Jasiukajtis
356*25c28e83SPiotr Jasiukajtis	faddd	%f36,D2ON28,%f58	! (1_1) x_hi = x + D2ON28;
357*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f20,%g5	! (0_0) c0 = vis_fcmple32(DC1,x);
358*25c28e83SPiotr Jasiukajtis
359*25c28e83SPiotr Jasiukajtis	faddd	%f56,D2ON28,%f22	! (1_1) y_hi = y + D2ON28;
360*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f40,%o2	! (0_0) c2 = vis_fcmple32(DC1,y);
361*25c28e83SPiotr Jasiukajtis
362*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f20,%g1	! (0_0) c1 = vis_fcmpgt32(DC2,x);
363*25c28e83SPiotr Jasiukajtis
364*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f40,%o4	! (0_0) c3 = vis_fcmpgt32(DC2,y);
365*25c28e83SPiotr Jasiukajtis
366*25c28e83SPiotr Jasiukajtis	fand	%f32,DC0,%f52		! (2_1) dmax = vis_fand(dmax,DC0);
367*25c28e83SPiotr Jasiukajtis
368*25c28e83SPiotr Jasiukajtis	or	%g5,%o2,%g5		! (0_0) c0 |= c2;
369*25c28e83SPiotr Jasiukajtis	fsubd	%f58,D2ON28,%f58	! (1_1) x_hi -= D2ON28;
370*25c28e83SPiotr Jasiukajtis
371*25c28e83SPiotr Jasiukajtis	andcc	%g5,2,%g0		! (0_0) c0 & 2
372*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update4		! (0_0) if ( (c0 & 2) != 0 )
373*25c28e83SPiotr Jasiukajtis	fsubd	%f22,D2ON28,%f22	! (1_1) y_hi -= D2ON28;
374*25c28e83SPiotr Jasiukajtis.cont4:
375*25c28e83SPiotr Jasiukajtis	and	%g1,%o4,%g1		! (0_0) c1 &= c3;
376*25c28e83SPiotr Jasiukajtis
377*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%l2		! py += stridey
378*25c28e83SPiotr Jasiukajtis	andcc	%g1,2,%g0		! (0_0) c1 & 2
379*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update5		! (0_0) if ( (c1 & 2) != 0 )
380*25c28e83SPiotr Jasiukajtis	fmovd	%f18,%f44		! (3_1) dmax = x;
381*25c28e83SPiotr Jasiukajtis.cont5:
382*25c28e83SPiotr Jasiukajtis	fpsub32	DC1,%f52,%f10		! (2_1) dnorm = vis_fpsub32(DC1,dmax);
383*25c28e83SPiotr Jasiukajtis	lda	[%l2]%asi,%f24		! (1_0) ((float*)&y)[0] = ((float*)py)[0];
384*25c28e83SPiotr Jasiukajtis
385*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f58,%f60		! (1_1) res = x_hi * x_hi;
386*25c28e83SPiotr Jasiukajtis	lda	[%l2+4]%asi,%f25	! (1_0) ((float*)&y)[1] = ((float*)py)[1];
387*25c28e83SPiotr Jasiukajtis	add	%l1,stridex,%l7		! px += stridex
388*25c28e83SPiotr Jasiukajtis	faddd	%f56,%f22,%f28		! (1_1) dtmp2 = y + y_hi;
389*25c28e83SPiotr Jasiukajtis
390*25c28e83SPiotr Jasiukajtis	faddd	%f36,%f58,%f6		! (1_1) dtmp1 = x + x_hi;
391*25c28e83SPiotr Jasiukajtis	lda	[%l7]%asi,%f26		! (1_0) ((float*)&x)[0] = ((float*)px)[0];
392*25c28e83SPiotr Jasiukajtis
393*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f10,%f50		! (2_1) x *= dnorm;
394*25c28e83SPiotr Jasiukajtis	fsubd	%f36,%f58,%f58		! (1_1) x_lo = x - x_hi;
395*25c28e83SPiotr Jasiukajtis	lda	[%l7+4]%asi,%f27	! (1_0) ((float*)&x)[1] = ((float*)px)[1];
396*25c28e83SPiotr Jasiukajtis
397*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f22,%f2		! (1_1) dtmp0 = y_hi * y_hi;
398*25c28e83SPiotr Jasiukajtis	fsubd	%f56,%f22,%f56		! (1_1) y_lo = y - y_hi;
399*25c28e83SPiotr Jasiukajtis
400*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f10,%f34		! (2_1) y *= dnorm;
401*25c28e83SPiotr Jasiukajtis	fabsd	%f24,%f54		! (1_0) y = fabs(y);
402*25c28e83SPiotr Jasiukajtis
403*25c28e83SPiotr Jasiukajtis	fabsd	%f26,%f36		! (1_0) x = fabs(x);
404*25c28e83SPiotr Jasiukajtis
405*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f58,%f10		! (1_1) dtmp1 *= x_lo;
406*25c28e83SPiotr Jasiukajtis	fcmped	%fcc0,%f30,%f44		! (3_1) dmax ? y
407*25c28e83SPiotr Jasiukajtis
408*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f56,%f26		! (1_1) dtmp2 *= y_lo;
409*25c28e83SPiotr Jasiukajtis
410*25c28e83SPiotr Jasiukajtis	fmovdg	%fcc0,%f30,%f44		! (3_1) if ( dmax < y ) dmax = y;
411*25c28e83SPiotr Jasiukajtis
412*25c28e83SPiotr Jasiukajtis	faddd	%f50,D2ON28,%f58	! (2_1) x_hi = x + D2ON28;
413*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f36,%g1	! (1_0) c0 = vis_fcmple32(DC1,x);
414*25c28e83SPiotr Jasiukajtis
415*25c28e83SPiotr Jasiukajtis	faddd	%f34,D2ON28,%f22	! (2_1) y_hi = y + D2ON28;
416*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f54,%g5	! (1_0) c2 = vis_fcmple32(DC1,y);
417*25c28e83SPiotr Jasiukajtis
418*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f2,%f24		! (1_1) res += dtmp0;
419*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f36,%o5	! (1_0) c1 = vis_fcmpgt32(DC2,x);
420*25c28e83SPiotr Jasiukajtis
421*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f26,%f28		! (1_1) dtmp1 += dtmp2;
422*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f54,%o1	! (1_0) c3 = vis_fcmpgt32(DC2,y);
423*25c28e83SPiotr Jasiukajtis
424*25c28e83SPiotr Jasiukajtis	fand	%f44,DC0,%f14		! (3_1) dmax = vis_fand(dmax,DC0);
425*25c28e83SPiotr Jasiukajtis
426*25c28e83SPiotr Jasiukajtis	or	%g1,%g5,%g1		! (1_0) c0 |= c2;
427*25c28e83SPiotr Jasiukajtis	fsubd	%f58,D2ON28,%f44	! (2_1) x_hi -= D2ON28;
428*25c28e83SPiotr Jasiukajtis
429*25c28e83SPiotr Jasiukajtis	andcc	%g1,2,%g0		! (1_0) c0 & 2
430*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update6		! (1_0) if ( (c0 & 2) != 0 )
431*25c28e83SPiotr Jasiukajtis	fsubd	%f22,D2ON28,%f58	! (2_1) y_hi -= D2ON28;
432*25c28e83SPiotr Jasiukajtis.cont6:
433*25c28e83SPiotr Jasiukajtis	and	%o5,%o1,%o5		! (1_0) c1 &= c3;
434*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f28,%f26		! (1_1) res += dtmp1;
435*25c28e83SPiotr Jasiukajtis
436*25c28e83SPiotr Jasiukajtis	add	%l2,stridey,%i3		! py += stridey
437*25c28e83SPiotr Jasiukajtis	andcc	%o5,2,%g0		! (1_0) c1 & 2
438*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update7		! (1_0) if ( (c1 & 2) != 0 )
439*25c28e83SPiotr Jasiukajtis	fmovd	%f20,%f4		! (0_0) dmax = x;
440*25c28e83SPiotr Jasiukajtis.cont7:
441*25c28e83SPiotr Jasiukajtis	fpsub32	DC1,%f14,%f10		! (3_1) dnorm = vis_fpsub32(DC1,dmax);
442*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f28		! (2_0) ((float*)&y)[0] = ((float*)py)[0];
443*25c28e83SPiotr Jasiukajtis
444*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f44,%f2		! (2_1) res = x_hi * x_hi;
445*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f29	! (2_0) ((float*)&y)[1] = ((float*)py)[1];
446*25c28e83SPiotr Jasiukajtis	add	%l7,stridex,%o1		! px += stridex
447*25c28e83SPiotr Jasiukajtis	faddd	%f34,%f58,%f60		! (2_1) dtmp2 = y + y_hi;
448*25c28e83SPiotr Jasiukajtis
449*25c28e83SPiotr Jasiukajtis	fsqrtd	%f26,%f24		! (1_1) res = sqrt(res);
450*25c28e83SPiotr Jasiukajtis	lda	[%o1]%asi,%f26		! (2_0) ((float*)&x)[0] = ((float*)px)[0];
451*25c28e83SPiotr Jasiukajtis	faddd	%f50,%f44,%f56		! (2_1) dtmp1 = x + x_hi;
452*25c28e83SPiotr Jasiukajtis
453*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f10,%f6		! (3_1) x *= dnorm;
454*25c28e83SPiotr Jasiukajtis	fsubd	%f50,%f44,%f18		! (2_1) x_lo = x - x_hi;
455*25c28e83SPiotr Jasiukajtis	lda	[%o1+4]%asi,%f27	! (2_0) ((float*)&x)[1] = ((float*)px)[1];
456*25c28e83SPiotr Jasiukajtis
457*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f58,%f44		! (2_1) dtmp0 = y_hi * y_hi;
458*25c28e83SPiotr Jasiukajtis	fsubd	%f34,%f58,%f22		! (2_1) y_lo = y - y_hi;
459*25c28e83SPiotr Jasiukajtis
460*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f10,%f58		! (3_1) y *= dnorm;
461*25c28e83SPiotr Jasiukajtis	fabsd	%f28,%f34		! (2_0) y = fabs(y);
462*25c28e83SPiotr Jasiukajtis
463*25c28e83SPiotr Jasiukajtis	fabsd	%f26,%f50		! (2_0) x = fabs(x);
464*25c28e83SPiotr Jasiukajtis
465*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f18,%f10		! (2_1) dtmp1 *= x_lo;
466*25c28e83SPiotr Jasiukajtis	fcmped	%fcc1,%f40,%f4		! (0_0) dmax ? y
467*25c28e83SPiotr Jasiukajtis
468*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f22,%f12		! (2_1) dtmp2 *= y_lo;
469*25c28e83SPiotr Jasiukajtis
470*25c28e83SPiotr Jasiukajtis	fmovdg	%fcc1,%f40,%f4		! (0_0) if ( dmax < y ) dmax = y;
471*25c28e83SPiotr Jasiukajtis
472*25c28e83SPiotr Jasiukajtis	faddd	%f6,D2ON28,%f56		! (3_1) x_hi = x + D2ON28;
473*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f50,%o3	! (2_0) c0 = vis_fcmple32(DC1,x);
474*25c28e83SPiotr Jasiukajtis
475*25c28e83SPiotr Jasiukajtis	faddd	%f58,D2ON28,%f28	! (3_1) y_hi = y + D2ON28;
476*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f34,%o0	! (2_0) c2 = vis_fcmple32(DC1,y);
477*25c28e83SPiotr Jasiukajtis
478*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f44,%f30		! (2_1) res += dtmp0;
479*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f50,%o4	! (2_0) c1 = vis_fcmpgt32(DC2,x);
480*25c28e83SPiotr Jasiukajtis
481*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f12,%f26		! (2_1) dtmp1 += dtmp2;
482*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f34,%o5	! (2_0) c3 = vis_fcmpgt32(DC2,y);
483*25c28e83SPiotr Jasiukajtis
484*25c28e83SPiotr Jasiukajtis	fand	%f4,DC0,%f16		! (0_0) dmax = vis_fand(dmax,DC0);
485*25c28e83SPiotr Jasiukajtis
486*25c28e83SPiotr Jasiukajtis	or	%o3,%o0,%o3		! (2_0) c0 |= c2;
487*25c28e83SPiotr Jasiukajtis	fsubd	%f56,D2ON28,%f18	! (3_1) x_hi -= D2ON28;
488*25c28e83SPiotr Jasiukajtis
489*25c28e83SPiotr Jasiukajtis	andcc	%o3,2,%g0		! (2_0) c0 & 2
490*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update8		! (2_0) if ( (c0 & 2) != 0 )
491*25c28e83SPiotr Jasiukajtis	fsubd	%f28,D2ON28,%f4		! (3_1) y_hi -= D2ON28;
492*25c28e83SPiotr Jasiukajtis.cont8:
493*25c28e83SPiotr Jasiukajtis	and	%o4,%o5,%o4		! (2_0) c1 &= c3;
494*25c28e83SPiotr Jasiukajtis	faddd	%f30,%f26,%f12		! (2_1) res += dtmp1;
495*25c28e83SPiotr Jasiukajtis
496*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%l4		! py += stridey
497*25c28e83SPiotr Jasiukajtis	andcc	%o4,2,%g0		! (2_0) c1 & 2
498*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update9		! (2_0) if ( (c1 & 2) != 0 )
499*25c28e83SPiotr Jasiukajtis	fmovd	%f36,%f56		! (1_0) dmax = x;
500*25c28e83SPiotr Jasiukajtis.cont9:
501*25c28e83SPiotr Jasiukajtis	lda	[%l4]%asi,%f30		! (3_0) ((float*)&y)[0] = ((float*)py)[0];
502*25c28e83SPiotr Jasiukajtis	add	%o1,stridex,%l2		! px += stridex
503*25c28e83SPiotr Jasiukajtis	fpsub32	DC1,%f16,%f44		! (0_0) dnorm = vis_fpsub32(DC1,dmax);
504*25c28e83SPiotr Jasiukajtis
505*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f18,%f60		! (3_1) res = x_hi * x_hi;
506*25c28e83SPiotr Jasiukajtis	lda	[%l4+4]%asi,%f31	! (3_0) ((float*)&y)[1] = ((float*)py)[1];
507*25c28e83SPiotr Jasiukajtis	faddd	%f58,%f4,%f32		! (3_1) dtmp2 = y + y_hi;
508*25c28e83SPiotr Jasiukajtis
509*25c28e83SPiotr Jasiukajtis	fsqrtd	%f12,%f12		! (2_1) res = sqrt(res);
510*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f18,%f28		! (3_1) dtmp1 = x + x_hi;
511*25c28e83SPiotr Jasiukajtis
512*25c28e83SPiotr Jasiukajtis	cmp	counter,4
513*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.tail
514*25c28e83SPiotr Jasiukajtis	nop
515*25c28e83SPiotr Jasiukajtis
516*25c28e83SPiotr Jasiukajtis	ba	.main_loop
517*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
518*25c28e83SPiotr Jasiukajtis
519*25c28e83SPiotr Jasiukajtis	.align	16
520*25c28e83SPiotr Jasiukajtis.main_loop:
521*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f44,%f2		! (0_1) x *= dnorm;
522*25c28e83SPiotr Jasiukajtis	fsubd	%f6,%f18,%f20		! (3_2) x_lo = x - x_hi;
523*25c28e83SPiotr Jasiukajtis	lda	[%l2]%asi,%f18		! (3_1) ((float*)&x)[0] = ((float*)px)[0];
524*25c28e83SPiotr Jasiukajtis
525*25c28e83SPiotr Jasiukajtis	fmuld	%f4,%f4,%f22		! (3_2) dtmp0 = y_hi * y_hi;
526*25c28e83SPiotr Jasiukajtis	lda	[%l2+4]%asi,%f19	! (3_1) ((float*)&x)[1] = ((float*)px)[1];
527*25c28e83SPiotr Jasiukajtis	fsubd	%f58,%f4,%f58		! (3_2) y_lo = y - y_hi;
528*25c28e83SPiotr Jasiukajtis
529*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f44,%f44		! (0_1) y *= dnorm;
530*25c28e83SPiotr Jasiukajtis	fabsd	%f30,%f30		! (3_1) y = fabs(y);
531*25c28e83SPiotr Jasiukajtis
532*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f24,%f10		! (1_2) res = dmax * res;
533*25c28e83SPiotr Jasiukajtis	fabsd	%f18,%f18		! (3_1) x = fabs(x);
534*25c28e83SPiotr Jasiukajtis	st	%f10,[%i5]		! (1_2) ((float*)pz)[0] = ((float*)&res)[0];
535*25c28e83SPiotr Jasiukajtis
536*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f20,%f28		! (3_2) dtmp1 *= x_lo;
537*25c28e83SPiotr Jasiukajtis	st	%f11,[%i5+4]		! (1_2) ((float*)pz)[1] = ((float*)&res)[1];
538*25c28e83SPiotr Jasiukajtis	fcmped	%fcc2,%f54,%f56		! (1_1) dmax ? y
539*25c28e83SPiotr Jasiukajtis
540*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f58,%f24		! (3_2) dtmp2 *= y_lo;
541*25c28e83SPiotr Jasiukajtis
542*25c28e83SPiotr Jasiukajtis	fmovdg	%fcc2,%f54,%f56		! (1_1) if ( dmax < y ) dmax = y;
543*25c28e83SPiotr Jasiukajtis
544*25c28e83SPiotr Jasiukajtis	faddd	%f2,D2ON28,%f10		! (0_1) x_hi = x + D2ON28;
545*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f18,%o3	! (3_1) c0 = vis_fcmple32(DC1,x);
546*25c28e83SPiotr Jasiukajtis
547*25c28e83SPiotr Jasiukajtis	faddd	%f44,D2ON28,%f20	! (0_1) y_hi = y + D2ON28;
548*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f30,%o0	! (3_1) c2 = vis_fcmple32(DC1,y);
549*25c28e83SPiotr Jasiukajtis
550*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f22,%f22		! (3_2) res += dtmp0;
551*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f18,%o4	! (3_1) c1 = vis_fcmpgt32(DC2,x);
552*25c28e83SPiotr Jasiukajtis
553*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f24,%f26		! (3_2) dtmp1 += dtmp2;
554*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f30,%o1	! (3_1) c3 = vis_fcmpgt32(DC2,y);
555*25c28e83SPiotr Jasiukajtis
556*25c28e83SPiotr Jasiukajtis	fand	%f56,DC0,%f38		! (1_1) dmax = vis_fand(dmax,DC0);
557*25c28e83SPiotr Jasiukajtis
558*25c28e83SPiotr Jasiukajtis	or	%o3,%o0,%o3		! (3_1) c0 |= c2;
559*25c28e83SPiotr Jasiukajtis	fsubd	%f10,D2ON28,%f58	! (0_1) x_hi -= D2ON28;
560*25c28e83SPiotr Jasiukajtis
561*25c28e83SPiotr Jasiukajtis	andcc	%o3,2,%g0		! (3_1) c0 & 2
562*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update10		! (3_1) if ( (c0 & 2) != 0 )
563*25c28e83SPiotr Jasiukajtis	fsubd	%f20,D2ON28,%f56	! (0_1) y_hi -= D2ON28;
564*25c28e83SPiotr Jasiukajtis.cont10:
565*25c28e83SPiotr Jasiukajtis	faddd	%f22,%f26,%f28		! (3_2) res += dtmp1;
566*25c28e83SPiotr Jasiukajtis	and	%o4,%o1,%o4		! (3_1) c1 &= c3;
567*25c28e83SPiotr Jasiukajtis
568*25c28e83SPiotr Jasiukajtis	add	%l4,stridey,%i3		! py += stridey
569*25c28e83SPiotr Jasiukajtis	andcc	%o4,2,%g0		! (3_1) c1 & 2
570*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update11		! (3_1) if ( (c1 & 2) != 0 )
571*25c28e83SPiotr Jasiukajtis	fmovd	%f50,%f32		! (2_1) dmax = x;
572*25c28e83SPiotr Jasiukajtis.cont11:
573*25c28e83SPiotr Jasiukajtis	fpsub32	DC1,%f38,%f10		! (1_1) dnorm = vis_fpsub32(DC1,dmax);
574*25c28e83SPiotr Jasiukajtis	add	%l2,stridex,%l1		! px += stridex
575*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f20		! (0_0) ((float*)&y)[0] = ((float*)py)[0];
576*25c28e83SPiotr Jasiukajtis
577*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f58,%f6		! (0_1) res = x_hi * x_hi;
578*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f21	! (0_0) ((float*)&y)[1] = ((float*)py)[1];
579*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%l6		! pz += stridez
580*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f56,%f60		! (0_1) dtmp2 = y + y_hi;
581*25c28e83SPiotr Jasiukajtis
582*25c28e83SPiotr Jasiukajtis	fsqrtd	%f28,%f4		! (3_2) res = sqrt(res);
583*25c28e83SPiotr Jasiukajtis	lda	[%l1]%asi,%f22		! (0_0) ((float*)&x)[0] = ((float*)px)[0];
584*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f58,%f24		! (0_1) dtmp1 = x + x_hi;
585*25c28e83SPiotr Jasiukajtis
586*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f10,%f36		! (1_1) x *= dnorm;
587*25c28e83SPiotr Jasiukajtis	fsubd	%f2,%f58,%f26		! (0_1) x_lo = x - x_hi;
588*25c28e83SPiotr Jasiukajtis	lda	[%l1+4]%asi,%f23	! (0_0) ((float*)&x)[1] = ((float*)px)[1];
589*25c28e83SPiotr Jasiukajtis
590*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f56,%f28		! (0_1) dtmp0 = y_hi * y_hi;
591*25c28e83SPiotr Jasiukajtis	fsubd	%f44,%f56,%f44		! (0_1) y_lo = y - y_hi;
592*25c28e83SPiotr Jasiukajtis
593*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f10,%f56		! (1_1) y *= dnorm;
594*25c28e83SPiotr Jasiukajtis	fabsd	%f20,%f40		! (0_0) y = fabs(y);
595*25c28e83SPiotr Jasiukajtis
596*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f12,%f12		! (2_2) res = dmax * res;
597*25c28e83SPiotr Jasiukajtis	fabsd	%f22,%f20		! (0_0) x = fabs(x);
598*25c28e83SPiotr Jasiukajtis	st	%f12,[%l6]		! (2_2) ((float*)pz)[0] = ((float*)&res)[0];
599*25c28e83SPiotr Jasiukajtis
600*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f26,%f10		! (0_1) dtmp1 *= x_lo;
601*25c28e83SPiotr Jasiukajtis	st	%f13,[%l6+4]		! (2_2) ((float*)pz)[1] = ((float*)&res)[1];
602*25c28e83SPiotr Jasiukajtis	fcmped	%fcc3,%f34,%f32		! (2_1) dmax ? y
603*25c28e83SPiotr Jasiukajtis
604*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f44,%f12		! (0_1) dtmp2 *= y_lo;
605*25c28e83SPiotr Jasiukajtis
606*25c28e83SPiotr Jasiukajtis	fmovdg	%fcc3,%f34,%f32		! (2_1) if ( dmax < y ) dmax = y;
607*25c28e83SPiotr Jasiukajtis
608*25c28e83SPiotr Jasiukajtis	faddd	%f36,D2ON28,%f58	! (1_1) x_hi = x + D2ON28;
609*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f20,%g5	! (0_0) c0 = vis_fcmple32(DC1,x);
610*25c28e83SPiotr Jasiukajtis
611*25c28e83SPiotr Jasiukajtis	faddd	%f56,D2ON28,%f22	! (1_1) y_hi = y + D2ON28;
612*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f40,%o2	! (0_0) c2 = vis_fcmple32(DC1,y);
613*25c28e83SPiotr Jasiukajtis
614*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f28,%f24		! (0_1) res += dtmp0;
615*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f20,%g1	! (0_0) c1 = vis_fcmpgt32(DC2,x);
616*25c28e83SPiotr Jasiukajtis
617*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f12,%f26		! (0_1) dtmp1 += dtmp2;
618*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f40,%o4	! (0_0) c3 = vis_fcmpgt32(DC2,y);
619*25c28e83SPiotr Jasiukajtis
620*25c28e83SPiotr Jasiukajtis	fand	%f32,DC0,%f52		! (2_1) dmax = vis_fand(dmax,DC0);
621*25c28e83SPiotr Jasiukajtis
622*25c28e83SPiotr Jasiukajtis	or	%g5,%o2,%g5		! (0_0) c0 |= c2;
623*25c28e83SPiotr Jasiukajtis	fsubd	%f58,D2ON28,%f58	! (1_1) x_hi -= D2ON28;
624*25c28e83SPiotr Jasiukajtis
625*25c28e83SPiotr Jasiukajtis	andcc	%g5,2,%g0		! (0_0) c0 & 2
626*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update12		! (0_0) if ( (c0 & 2) != 0 )
627*25c28e83SPiotr Jasiukajtis	fsubd	%f22,D2ON28,%f22	! (1_1) y_hi -= D2ON28;
628*25c28e83SPiotr Jasiukajtis.cont12:
629*25c28e83SPiotr Jasiukajtis	and	%g1,%o4,%g1		! (0_0) c1 &= c3;
630*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f26,%f12		! (0_1) res += dtmp1;
631*25c28e83SPiotr Jasiukajtis
632*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%l2		! py += stridey
633*25c28e83SPiotr Jasiukajtis	andcc	%g1,2,%g0		! (0_0) c1 & 2
634*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update13		! (0_0) if ( (c1 & 2) != 0 )
635*25c28e83SPiotr Jasiukajtis	fmovd	%f18,%f44		! (3_1) dmax = x;
636*25c28e83SPiotr Jasiukajtis.cont13:
637*25c28e83SPiotr Jasiukajtis	fpsub32	DC1,%f52,%f10		! (2_1) dnorm = vis_fpsub32(DC1,dmax);
638*25c28e83SPiotr Jasiukajtis	add	%l1,stridex,%l7		! px += stridex
639*25c28e83SPiotr Jasiukajtis	lda	[%l2]%asi,%f24		! (1_0) ((float*)&y)[0] = ((float*)py)[0];
640*25c28e83SPiotr Jasiukajtis
641*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f58,%f60		! (1_1) res = x_hi * x_hi;
642*25c28e83SPiotr Jasiukajtis	add	%l6,stridez,%i5		! pz += stridez
643*25c28e83SPiotr Jasiukajtis	lda	[%l2+4]%asi,%f25	! (1_0) ((float*)&y)[1] = ((float*)py)[1];
644*25c28e83SPiotr Jasiukajtis	faddd	%f56,%f22,%f28		! (1_1) dtmp2 = y + y_hi;
645*25c28e83SPiotr Jasiukajtis
646*25c28e83SPiotr Jasiukajtis	fsqrtd	%f12,%f12		! (0_1) res = sqrt(res);
647*25c28e83SPiotr Jasiukajtis	lda	[%l7]%asi,%f26		! (1_0) ((float*)&x)[0] = ((float*)px)[0];
648*25c28e83SPiotr Jasiukajtis	faddd	%f36,%f58,%f6		! (1_1) dtmp1 = x + x_hi;
649*25c28e83SPiotr Jasiukajtis
650*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f10,%f50		! (2_1) x *= dnorm;
651*25c28e83SPiotr Jasiukajtis	fsubd	%f36,%f58,%f58		! (1_1) x_lo = x - x_hi;
652*25c28e83SPiotr Jasiukajtis	lda	[%l7+4]%asi,%f27	! (1_0) ((float*)&x)[1] = ((float*)px)[1];
653*25c28e83SPiotr Jasiukajtis
654*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f22,%f2		! (1_1) dtmp0 = y_hi * y_hi;
655*25c28e83SPiotr Jasiukajtis	fsubd	%f56,%f22,%f56		! (1_1) y_lo = y - y_hi;
656*25c28e83SPiotr Jasiukajtis
657*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f10,%f34		! (2_1) y *= dnorm;
658*25c28e83SPiotr Jasiukajtis	fabsd	%f24,%f54		! (1_0) y = fabs(y);
659*25c28e83SPiotr Jasiukajtis
660*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f4,%f14		! (3_2) res = dmax * res;
661*25c28e83SPiotr Jasiukajtis	fabsd	%f26,%f36		! (1_0) x = fabs(x);
662*25c28e83SPiotr Jasiukajtis	st	%f14,[%i5]		! (3_2) ((float*)pz)[0] = ((float*)&res)[0];
663*25c28e83SPiotr Jasiukajtis
664*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f58,%f10		! (1_1) dtmp1 *= x_lo;
665*25c28e83SPiotr Jasiukajtis	st	%f15,[%i5+4]		! (3_2) ((float*)pz)[1] = ((float*)&res)[1];
666*25c28e83SPiotr Jasiukajtis	fcmped	%fcc0,%f30,%f44		! (3_1) dmax ? y
667*25c28e83SPiotr Jasiukajtis
668*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f56,%f26		! (1_1) dtmp2 *= y_lo;
669*25c28e83SPiotr Jasiukajtis
670*25c28e83SPiotr Jasiukajtis	fmovdg	%fcc0,%f30,%f44		! (3_1) if ( dmax < y ) dmax = y;
671*25c28e83SPiotr Jasiukajtis
672*25c28e83SPiotr Jasiukajtis	faddd	%f50,D2ON28,%f58	! (2_1) x_hi = x + D2ON28;
673*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f36,%g1	! (1_0) c0 = vis_fcmple32(DC1,x);
674*25c28e83SPiotr Jasiukajtis
675*25c28e83SPiotr Jasiukajtis	faddd	%f34,D2ON28,%f22	! (2_1) y_hi = y + D2ON28;
676*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f54,%g5	! (1_0) c2 = vis_fcmple32(DC1,y);
677*25c28e83SPiotr Jasiukajtis
678*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f2,%f24		! (1_1) res += dtmp0;
679*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f36,%o5	! (1_0) c1 = vis_fcmpgt32(DC2,x);
680*25c28e83SPiotr Jasiukajtis
681*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f26,%f28		! (1_1) dtmp1 += dtmp2;
682*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f54,%o1	! (1_0) c3 = vis_fcmpgt32(DC2,y);
683*25c28e83SPiotr Jasiukajtis
684*25c28e83SPiotr Jasiukajtis	fand	%f44,DC0,%f14		! (3_1) dmax = vis_fand(dmax,DC0);
685*25c28e83SPiotr Jasiukajtis
686*25c28e83SPiotr Jasiukajtis	or	%g1,%g5,%g1		! (1_0) c0 |= c2;
687*25c28e83SPiotr Jasiukajtis	fsubd	%f58,D2ON28,%f44	! (2_1) x_hi -= D2ON28;
688*25c28e83SPiotr Jasiukajtis
689*25c28e83SPiotr Jasiukajtis	andcc	%g1,2,%g0		! (1_0) c0 & 2
690*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update14		! (1_0) if ( (c0 & 2) != 0 )
691*25c28e83SPiotr Jasiukajtis	fsubd	%f22,D2ON28,%f58	! (2_1) y_hi -= D2ON28;
692*25c28e83SPiotr Jasiukajtis.cont14:
693*25c28e83SPiotr Jasiukajtis	and	%o5,%o1,%o5		! (1_0) c1 &= c3;
694*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f28,%f26		! (1_1) res += dtmp1;
695*25c28e83SPiotr Jasiukajtis
696*25c28e83SPiotr Jasiukajtis	add	%l2,stridey,%i3		! py += stridey
697*25c28e83SPiotr Jasiukajtis	andcc	%o5,2,%g0		! (1_0) c1 & 2
698*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update15		! (1_0) if ( (c1 & 2) != 0 )
699*25c28e83SPiotr Jasiukajtis	fmovd	%f20,%f4		! (0_0) dmax = x;
700*25c28e83SPiotr Jasiukajtis.cont15:
701*25c28e83SPiotr Jasiukajtis	fpsub32	DC1,%f14,%f10		! (3_1) dnorm = vis_fpsub32(DC1,dmax);
702*25c28e83SPiotr Jasiukajtis	add	%l7,stridex,%o1		! px += stridex
703*25c28e83SPiotr Jasiukajtis	lda	[%i3]%asi,%f28		! (2_0) ((float*)&y)[0] = ((float*)py)[0];
704*25c28e83SPiotr Jasiukajtis
705*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f44,%f2		! (2_1) res = x_hi * x_hi;
706*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%g5		! pz += stridez
707*25c28e83SPiotr Jasiukajtis	lda	[%i3+4]%asi,%f29	! (2_0) ((float*)&y)[1] = ((float*)py)[1];
708*25c28e83SPiotr Jasiukajtis	faddd	%f34,%f58,%f60		! (2_1) dtmp2 = y + y_hi;
709*25c28e83SPiotr Jasiukajtis
710*25c28e83SPiotr Jasiukajtis	fsqrtd	%f26,%f24		! (1_1) res = sqrt(res);
711*25c28e83SPiotr Jasiukajtis	lda	[%o1]%asi,%f26		! (2_0) ((float*)&x)[0] = ((float*)px)[0];
712*25c28e83SPiotr Jasiukajtis	faddd	%f50,%f44,%f56		! (2_1) dtmp1 = x + x_hi;
713*25c28e83SPiotr Jasiukajtis
714*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f10,%f6		! (3_1) x *= dnorm;
715*25c28e83SPiotr Jasiukajtis	fsubd	%f50,%f44,%f18		! (2_1) x_lo = x - x_hi;
716*25c28e83SPiotr Jasiukajtis	lda	[%o1+4]%asi,%f27	! (2_0) ((float*)&x)[1] = ((float*)px)[1];
717*25c28e83SPiotr Jasiukajtis
718*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f58,%f44		! (2_1) dtmp0 = y_hi * y_hi;
719*25c28e83SPiotr Jasiukajtis	fsubd	%f34,%f58,%f22		! (2_1) y_lo = y - y_hi;
720*25c28e83SPiotr Jasiukajtis
721*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f10,%f58		! (3_1) y *= dnorm;
722*25c28e83SPiotr Jasiukajtis	fabsd	%f28,%f34		! (2_0) y = fabs(y);
723*25c28e83SPiotr Jasiukajtis
724*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f12,%f16		! (0_1) res = dmax * res;
725*25c28e83SPiotr Jasiukajtis	fabsd	%f26,%f50		! (2_0) x = fabs(x);
726*25c28e83SPiotr Jasiukajtis	st	%f16,[%g5]		! (0_1) ((float*)pz)[0] = ((float*)&res)[0];
727*25c28e83SPiotr Jasiukajtis
728*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f18,%f10		! (2_1) dtmp1 *= x_lo;
729*25c28e83SPiotr Jasiukajtis	st	%f17,[%g5+4]		! (0_1) ((float*)pz)[1] = ((float*)&res)[1];
730*25c28e83SPiotr Jasiukajtis	fcmped	%fcc1,%f40,%f4		! (0_0) dmax ? y
731*25c28e83SPiotr Jasiukajtis
732*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f22,%f12		! (2_1) dtmp2 *= y_lo;
733*25c28e83SPiotr Jasiukajtis
734*25c28e83SPiotr Jasiukajtis	fmovdg	%fcc1,%f40,%f4		! (0_0) if ( dmax < y ) dmax = y;
735*25c28e83SPiotr Jasiukajtis
736*25c28e83SPiotr Jasiukajtis	faddd	%f6,D2ON28,%f56		! (3_1) x_hi = x + D2ON28;
737*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f50,%o3	! (2_0) c0 = vis_fcmple32(DC1,x);
738*25c28e83SPiotr Jasiukajtis
739*25c28e83SPiotr Jasiukajtis	faddd	%f58,D2ON28,%f28	! (3_1) y_hi = y + D2ON28;
740*25c28e83SPiotr Jasiukajtis	fcmple32	DC1,%f34,%o0	! (2_0) c2 = vis_fcmple32(DC1,y);
741*25c28e83SPiotr Jasiukajtis
742*25c28e83SPiotr Jasiukajtis	faddd	%f2,%f44,%f30		! (2_1) res += dtmp0;
743*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f50,%o4	! (2_0) c1 = vis_fcmpgt32(DC2,x);
744*25c28e83SPiotr Jasiukajtis
745*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f12,%f26		! (2_1) dtmp1 += dtmp2;
746*25c28e83SPiotr Jasiukajtis	fcmpgt32	DC2,%f34,%o5	! (2_0) c3 = vis_fcmpgt32(DC2,y);
747*25c28e83SPiotr Jasiukajtis
748*25c28e83SPiotr Jasiukajtis	fand	%f4,DC0,%f16		! (0_0) dmax = vis_fand(dmax,DC0);
749*25c28e83SPiotr Jasiukajtis
750*25c28e83SPiotr Jasiukajtis	or	%o3,%o0,%o3		! (2_0) c0 |= c2;
751*25c28e83SPiotr Jasiukajtis	fsubd	%f56,D2ON28,%f18	! (3_1) x_hi -= D2ON28;
752*25c28e83SPiotr Jasiukajtis
753*25c28e83SPiotr Jasiukajtis	andcc	%o3,2,%g0		! (2_0) c0 & 2
754*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update16		! (2_0) if ( (c0 & 2) != 0 )
755*25c28e83SPiotr Jasiukajtis	fsubd	%f28,D2ON28,%f4		! (3_1) y_hi -= D2ON28;
756*25c28e83SPiotr Jasiukajtis.cont16:
757*25c28e83SPiotr Jasiukajtis	and	%o4,%o5,%o4		! (2_0) c1 &= c3;
758*25c28e83SPiotr Jasiukajtis	faddd	%f30,%f26,%f12		! (2_1) res += dtmp1;
759*25c28e83SPiotr Jasiukajtis
760*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%l4		! py += stridey
761*25c28e83SPiotr Jasiukajtis	andcc	%o4,2,%g0		! (2_0) c1 & 2
762*25c28e83SPiotr Jasiukajtis	bnz,pn	%icc,.update17		! (2_0) if ( (c1 & 2) != 0 )
763*25c28e83SPiotr Jasiukajtis	fmovd	%f36,%f56		! (1_0) dmax = x;
764*25c28e83SPiotr Jasiukajtis.cont17:
765*25c28e83SPiotr Jasiukajtis	lda	[%l4]%asi,%f30		! (3_0) ((float*)&y)[0] = ((float*)py)[0];
766*25c28e83SPiotr Jasiukajtis	add	%o1,stridex,%l2		! px += stridex
767*25c28e83SPiotr Jasiukajtis	fpsub32	DC1,%f16,%f44		! (0_0) dnorm = vis_fpsub32(DC1,dmax);
768*25c28e83SPiotr Jasiukajtis
769*25c28e83SPiotr Jasiukajtis	fmuld	%f18,%f18,%f60		! (3_1) res = x_hi * x_hi;
770*25c28e83SPiotr Jasiukajtis	add	%g5,stridez,%i5		! pz += stridez
771*25c28e83SPiotr Jasiukajtis	lda	[%l4+4]%asi,%f31	! (3_0) ((float*)&y)[1] = ((float*)py)[1];
772*25c28e83SPiotr Jasiukajtis	faddd	%f58,%f4,%f32		! (3_1) dtmp2 = y + y_hi;
773*25c28e83SPiotr Jasiukajtis
774*25c28e83SPiotr Jasiukajtis	fsqrtd	%f12,%f12		! (2_1) res = sqrt(res);
775*25c28e83SPiotr Jasiukajtis	subcc	counter,4,counter	! counter -= 4;
776*25c28e83SPiotr Jasiukajtis	bpos,pt	%icc,.main_loop
777*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f18,%f28		! (3_1) dtmp1 = x + x_hi;
778*25c28e83SPiotr Jasiukajtis
779*25c28e83SPiotr Jasiukajtis	add	counter,4,counter
780*25c28e83SPiotr Jasiukajtis
781*25c28e83SPiotr Jasiukajtis.tail:
782*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
783*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
784*25c28e83SPiotr Jasiukajtis	nop
785*25c28e83SPiotr Jasiukajtis
786*25c28e83SPiotr Jasiukajtis	fsubd	%f6,%f18,%f20		! (3_2) x_lo = x - x_hi;
787*25c28e83SPiotr Jasiukajtis
788*25c28e83SPiotr Jasiukajtis	fmuld	%f4,%f4,%f22		! (3_2) dtmp0 = y_hi * y_hi;
789*25c28e83SPiotr Jasiukajtis	fsubd	%f58,%f4,%f58		! (3_2) y_lo = y - y_hi;
790*25c28e83SPiotr Jasiukajtis
791*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f24,%f10		! (1_2) res = dmax * res;
792*25c28e83SPiotr Jasiukajtis	st	%f10,[%i5]		! (1_2) ((float*)pz)[0] = ((float*)&res)[0];
793*25c28e83SPiotr Jasiukajtis
794*25c28e83SPiotr Jasiukajtis	st	%f11,[%i5+4]		! (1_2) ((float*)pz)[1] = ((float*)&res)[1];
795*25c28e83SPiotr Jasiukajtis
796*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
797*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
798*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5
799*25c28e83SPiotr Jasiukajtis
800*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f20,%f28		! (3_2) dtmp1 *= x_lo;
801*25c28e83SPiotr Jasiukajtis
802*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f58,%f24		! (3_2) dtmp2 *= y_lo;
803*25c28e83SPiotr Jasiukajtis
804*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f22,%f22		! (3_2) res += dtmp0;
805*25c28e83SPiotr Jasiukajtis
806*25c28e83SPiotr Jasiukajtis	faddd	%f28,%f24,%f26		! (3_2) dtmp1 += dtmp2;
807*25c28e83SPiotr Jasiukajtis
808*25c28e83SPiotr Jasiukajtis	faddd	%f22,%f26,%f28		! (3_2) res += dtmp1;
809*25c28e83SPiotr Jasiukajtis
810*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%l6		! pz += stridez
811*25c28e83SPiotr Jasiukajtis
812*25c28e83SPiotr Jasiukajtis	fsqrtd	%f28,%f4		! (3_2) res = sqrt(res);
813*25c28e83SPiotr Jasiukajtis	add	%l2,stridex,%l1		! px += stridex
814*25c28e83SPiotr Jasiukajtis
815*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f12,%f12		! (2_2) res = dmax * res;
816*25c28e83SPiotr Jasiukajtis	st	%f12,[%l6]		! (2_2) ((float*)pz)[0] = ((float*)&res)[0];
817*25c28e83SPiotr Jasiukajtis
818*25c28e83SPiotr Jasiukajtis	st	%f13,[%l6+4]		! (2_2) ((float*)pz)[1] = ((float*)&res)[1];
819*25c28e83SPiotr Jasiukajtis
820*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
821*25c28e83SPiotr Jasiukajtis	bneg	.begin
822*25c28e83SPiotr Jasiukajtis	add	%l6,stridez,%i5
823*25c28e83SPiotr Jasiukajtis
824*25c28e83SPiotr Jasiukajtis	fmuld	%f14,%f4,%f14		! (3_2) res = dmax * res;
825*25c28e83SPiotr Jasiukajtis	st	%f14,[%i5]		! (3_2) ((float*)pz)[0] = ((float*)&res)[0];
826*25c28e83SPiotr Jasiukajtis
827*25c28e83SPiotr Jasiukajtis	st	%f15,[%i5+4]		! (3_2) ((float*)pz)[1] = ((float*)&res)[1];
828*25c28e83SPiotr Jasiukajtis
829*25c28e83SPiotr Jasiukajtis	ba	.begin
830*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5
831*25c28e83SPiotr Jasiukajtis
832*25c28e83SPiotr Jasiukajtis	.align	16
833*25c28e83SPiotr Jasiukajtis.spec0:
834*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%l1		! lx = ((int*)px)[1];
835*25c28e83SPiotr Jasiukajtis	cmp	%o2,%o4			! j0 ? 0x7ff00000
836*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,1f			! if ( j0 >= 0x7ff00000 )
837*25c28e83SPiotr Jasiukajtis	fabsd	%f26,%f26		! x = fabs(x);
838*25c28e83SPiotr Jasiukajtis
839*25c28e83SPiotr Jasiukajtis	sub	%o0,%l4,%o0		! diff = hy - hx;
840*25c28e83SPiotr Jasiukajtis	fabsd	%f24,%f24		! y = fabs(y);
841*25c28e83SPiotr Jasiukajtis
842*25c28e83SPiotr Jasiukajtis	sra	%o0,31,%l4		! j0 = diff >> 31;
843*25c28e83SPiotr Jasiukajtis
844*25c28e83SPiotr Jasiukajtis	xor	%o0,%l4,%o0		! diff ^ j0
845*25c28e83SPiotr Jasiukajtis
846*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x03600000),%l1
847*25c28e83SPiotr Jasiukajtis	sub	%o0,%l4,%o0		! (diff ^ j0) - j0
848*25c28e83SPiotr Jasiukajtis
849*25c28e83SPiotr Jasiukajtis	cmp	%o0,%l1			! ((diff ^ j0) - j0) ? 0x03600000
850*25c28e83SPiotr Jasiukajtis	bge,a,pn	%icc,2f		! if ( ((diff ^ j0) - j0) >= 0x03600000 )
851*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f24,%f24		! *pz = x + y
852*25c28e83SPiotr Jasiukajtis
853*25c28e83SPiotr Jasiukajtis	fmuld	%f26,DC2,%f36		! (1_1) x *= dnorm;
854*25c28e83SPiotr Jasiukajtis
855*25c28e83SPiotr Jasiukajtis	fmuld	%f24,DC2,%f56		! (1_1) y *= dnorm;
856*25c28e83SPiotr Jasiukajtis
857*25c28e83SPiotr Jasiukajtis	faddd	%f36,D2ON28,%f58	! (1_1) x_hi = x + D2ON28;
858*25c28e83SPiotr Jasiukajtis
859*25c28e83SPiotr Jasiukajtis	faddd	%f56,D2ON28,%f22	! (1_1) y_hi = y + D2ON28;
860*25c28e83SPiotr Jasiukajtis
861*25c28e83SPiotr Jasiukajtis	fsubd	%f58,D2ON28,%f58	! (1_1) x_hi -= D2ON28;
862*25c28e83SPiotr Jasiukajtis
863*25c28e83SPiotr Jasiukajtis	fsubd	%f22,D2ON28,%f22	! (1_1) y_hi -= D2ON28;
864*25c28e83SPiotr Jasiukajtis
865*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f58,%f60		! (1_1) res = x_hi * x_hi;
866*25c28e83SPiotr Jasiukajtis	faddd	%f56,%f22,%f28		! (1_1) dtmp2 = y + y_hi;
867*25c28e83SPiotr Jasiukajtis
868*25c28e83SPiotr Jasiukajtis	faddd	%f36,%f58,%f6		! (1_1) dtmp1 = x + x_hi;
869*25c28e83SPiotr Jasiukajtis
870*25c28e83SPiotr Jasiukajtis	fsubd	%f36,%f58,%f58		! (1_1) x_lo = x - x_hi;
871*25c28e83SPiotr Jasiukajtis
872*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f22,%f2		! (1_1) dtmp0 = y_hi * y_hi;
873*25c28e83SPiotr Jasiukajtis	fsubd	%f56,%f22,%f56		! (1_1) y_lo = y - y_hi;
874*25c28e83SPiotr Jasiukajtis
875*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f58,%f10		! (1_1) dtmp1 *= x_lo;
876*25c28e83SPiotr Jasiukajtis
877*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f56,%f26		! (1_1) dtmp2 *= y_lo;
878*25c28e83SPiotr Jasiukajtis
879*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f2,%f24		! (1_1) res += dtmp0;
880*25c28e83SPiotr Jasiukajtis
881*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f26,%f28		! (1_1) dtmp1 += dtmp2;
882*25c28e83SPiotr Jasiukajtis
883*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f28,%f26		! (1_1) res += dtmp1;
884*25c28e83SPiotr Jasiukajtis
885*25c28e83SPiotr Jasiukajtis	fsqrtd	%f26,%f24		! (1_1) res = sqrt(res);
886*25c28e83SPiotr Jasiukajtis
887*25c28e83SPiotr Jasiukajtis	fmuld	DC3,%f24,%f24		! (1_2) res = dmax * res;
888*25c28e83SPiotr Jasiukajtis2:
889*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i3
890*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1
891*25c28e83SPiotr Jasiukajtis	st	%f24,[%i5]		! ((float*)pz)[0] = ((float*)&res)[0];
892*25c28e83SPiotr Jasiukajtis	st	%f25,[%i5+4]		! ((float*)pz)[1] = ((float*)&res)[1];
893*25c28e83SPiotr Jasiukajtis
894*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5
895*25c28e83SPiotr Jasiukajtis	ba	.begin1
896*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
897*25c28e83SPiotr Jasiukajtis
898*25c28e83SPiotr Jasiukajtis1:
899*25c28e83SPiotr Jasiukajtis	ld	[%i3+4],%l2		! ly = ((int*)py)[1];
900*25c28e83SPiotr Jasiukajtis	cmp	%o0,%o4			! hx ? 0x7ff00000
901*25c28e83SPiotr Jasiukajtis	bne,pn	%icc,1f			! if ( hx != 0x7ff00000 )
902*25c28e83SPiotr Jasiukajtis	fabsd	%f24,%f24		! y = fabs(y);
903*25c28e83SPiotr Jasiukajtis
904*25c28e83SPiotr Jasiukajtis	cmp	%l1,0			! lx ? 0
905*25c28e83SPiotr Jasiukajtis	be,pn	%icc,2f			! if ( lx == 0 )
906*25c28e83SPiotr Jasiukajtis	nop
907*25c28e83SPiotr Jasiukajtis1:
908*25c28e83SPiotr Jasiukajtis	cmp	%l4,%o4			! hy ? 0x7ff00000
909*25c28e83SPiotr Jasiukajtis	bne,pn	%icc,1f			! if ( hy != 0x7ff00000 )
910*25c28e83SPiotr Jasiukajtis	nop
911*25c28e83SPiotr Jasiukajtis
912*25c28e83SPiotr Jasiukajtis	cmp	%l2,0			! ly ? 0
913*25c28e83SPiotr Jasiukajtis	be,pn	%icc,2f			! if ( ly == 0 )
914*25c28e83SPiotr Jasiukajtis	nop
915*25c28e83SPiotr Jasiukajtis1:
916*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i3
917*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1
918*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f24,%f24		! res = x * y;
919*25c28e83SPiotr Jasiukajtis	st	%f24,[%i5]		! ((float*)pz)[0] = ((float*)&res)[0];
920*25c28e83SPiotr Jasiukajtis
921*25c28e83SPiotr Jasiukajtis	st	%f25,[%i5+4]		! ((float*)pz)[1] = ((float*)&res)[1];
922*25c28e83SPiotr Jasiukajtis
923*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5
924*25c28e83SPiotr Jasiukajtis	ba	.begin1
925*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
926*25c28e83SPiotr Jasiukajtis
927*25c28e83SPiotr Jasiukajtis2:
928*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1
929*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i3
930*25c28e83SPiotr Jasiukajtis	st	DC0_HI,[%i5]		! ((int*)pz)[0] = 0x7ff00000;
931*25c28e83SPiotr Jasiukajtis	st	DC0_LO,[%i5+4]		! ((int*)pz)[1] = 0;
932*25c28e83SPiotr Jasiukajtis	fcmpd	%f26,%f24		! x ? y
933*25c28e83SPiotr Jasiukajtis
934*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5
935*25c28e83SPiotr Jasiukajtis	ba	.begin1
936*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
937*25c28e83SPiotr Jasiukajtis
938*25c28e83SPiotr Jasiukajtis	.align	16
939*25c28e83SPiotr Jasiukajtis.spec1:
940*25c28e83SPiotr Jasiukajtis	fmuld	%f26,DC3,%f36		! (1_1) x *= dnorm;
941*25c28e83SPiotr Jasiukajtis
942*25c28e83SPiotr Jasiukajtis	fmuld	%f24,DC3,%f56		! (1_1) y *= dnorm;
943*25c28e83SPiotr Jasiukajtis
944*25c28e83SPiotr Jasiukajtis	faddd	%f36,D2ON28,%f58	! (1_1) x_hi = x + D2ON28;
945*25c28e83SPiotr Jasiukajtis
946*25c28e83SPiotr Jasiukajtis	faddd	%f56,D2ON28,%f22	! (1_1) y_hi = y + D2ON28;
947*25c28e83SPiotr Jasiukajtis
948*25c28e83SPiotr Jasiukajtis	fsubd	%f58,D2ON28,%f58	! (1_1) x_hi -= D2ON28;
949*25c28e83SPiotr Jasiukajtis
950*25c28e83SPiotr Jasiukajtis	fsubd	%f22,D2ON28,%f22	! (1_1) y_hi -= D2ON28;
951*25c28e83SPiotr Jasiukajtis
952*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f58,%f60		! (1_1) res = x_hi * x_hi;
953*25c28e83SPiotr Jasiukajtis	faddd	%f56,%f22,%f28		! (1_1) dtmp2 = y + y_hi;
954*25c28e83SPiotr Jasiukajtis
955*25c28e83SPiotr Jasiukajtis	faddd	%f36,%f58,%f6		! (1_1) dtmp1 = x + x_hi;
956*25c28e83SPiotr Jasiukajtis
957*25c28e83SPiotr Jasiukajtis	fsubd	%f36,%f58,%f58		! (1_1) x_lo = x - x_hi;
958*25c28e83SPiotr Jasiukajtis
959*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f22,%f2		! (1_1) dtmp0 = y_hi * y_hi;
960*25c28e83SPiotr Jasiukajtis	fsubd	%f56,%f22,%f56		! (1_1) y_lo = y - y_hi;
961*25c28e83SPiotr Jasiukajtis
962*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f58,%f10		! (1_1) dtmp1 *= x_lo;
963*25c28e83SPiotr Jasiukajtis
964*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f56,%f26		! (1_1) dtmp2 *= y_lo;
965*25c28e83SPiotr Jasiukajtis
966*25c28e83SPiotr Jasiukajtis	faddd	%f60,%f2,%f24		! (1_1) res += dtmp0;
967*25c28e83SPiotr Jasiukajtis
968*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f26,%f28		! (1_1) dtmp1 += dtmp2;
969*25c28e83SPiotr Jasiukajtis
970*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f28,%f26		! (1_1) res += dtmp1;
971*25c28e83SPiotr Jasiukajtis
972*25c28e83SPiotr Jasiukajtis	fsqrtd	%f26,%f24		! (1_1) res = sqrt(res);
973*25c28e83SPiotr Jasiukajtis
974*25c28e83SPiotr Jasiukajtis	fmuld	DC2,%f24,%f24		! (1_2) res = dmax * res;
975*25c28e83SPiotr Jasiukajtis
976*25c28e83SPiotr Jasiukajtis	add	%i3,stridey,%i3
977*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1
978*25c28e83SPiotr Jasiukajtis	st	%f24,[%i5]		! ((float*)pz)[0] = ((float*)&res)[0];
979*25c28e83SPiotr Jasiukajtis
980*25c28e83SPiotr Jasiukajtis	st	%f25,[%i5+4]		! ((float*)pz)[1] = ((float*)&res)[1];
981*25c28e83SPiotr Jasiukajtis	add	%i5,stridez,%i5
982*25c28e83SPiotr Jasiukajtis	ba	.begin1
983*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
984*25c28e83SPiotr Jasiukajtis
985*25c28e83SPiotr Jasiukajtis	.align	16
986*25c28e83SPiotr Jasiukajtis.update0:
987*25c28e83SPiotr Jasiukajtis	fzero	%f50
988*25c28e83SPiotr Jasiukajtis	cmp	counter,1
989*25c28e83SPiotr Jasiukajtis	ble	.cont0
990*25c28e83SPiotr Jasiukajtis	fzero	%f34
991*25c28e83SPiotr Jasiukajtis
992*25c28e83SPiotr Jasiukajtis	mov	%o1,tmp_px
993*25c28e83SPiotr Jasiukajtis	mov	%i3,tmp_py
994*25c28e83SPiotr Jasiukajtis
995*25c28e83SPiotr Jasiukajtis	sub	counter,1,tmp_counter
996*25c28e83SPiotr Jasiukajtis	ba	.cont0
997*25c28e83SPiotr Jasiukajtis	mov	1,counter
998*25c28e83SPiotr Jasiukajtis
999*25c28e83SPiotr Jasiukajtis	.align	16
1000*25c28e83SPiotr Jasiukajtis.update1:
1001*25c28e83SPiotr Jasiukajtis	fzero	%f50
1002*25c28e83SPiotr Jasiukajtis	cmp	counter,1
1003*25c28e83SPiotr Jasiukajtis	ble	.cont1
1004*25c28e83SPiotr Jasiukajtis	fzero	%f34
1005*25c28e83SPiotr Jasiukajtis
1006*25c28e83SPiotr Jasiukajtis	mov	%o1,tmp_px
1007*25c28e83SPiotr Jasiukajtis	mov	%i3,tmp_py
1008*25c28e83SPiotr Jasiukajtis
1009*25c28e83SPiotr Jasiukajtis	sub	counter,1,tmp_counter
1010*25c28e83SPiotr Jasiukajtis	ba	.cont1
1011*25c28e83SPiotr Jasiukajtis	mov	1,counter
1012*25c28e83SPiotr Jasiukajtis
1013*25c28e83SPiotr Jasiukajtis	.align	16
1014*25c28e83SPiotr Jasiukajtis.update2:
1015*25c28e83SPiotr Jasiukajtis	fzero	%f18
1016*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1017*25c28e83SPiotr Jasiukajtis	ble	.cont2
1018*25c28e83SPiotr Jasiukajtis	fzero	%f30
1019*25c28e83SPiotr Jasiukajtis
1020*25c28e83SPiotr Jasiukajtis	mov	%l2,tmp_px
1021*25c28e83SPiotr Jasiukajtis	mov	%l4,tmp_py
1022*25c28e83SPiotr Jasiukajtis
1023*25c28e83SPiotr Jasiukajtis	sub	counter,2,tmp_counter
1024*25c28e83SPiotr Jasiukajtis	ba	.cont1
1025*25c28e83SPiotr Jasiukajtis	mov	2,counter
1026*25c28e83SPiotr Jasiukajtis
1027*25c28e83SPiotr Jasiukajtis	.align	16
1028*25c28e83SPiotr Jasiukajtis.update3:
1029*25c28e83SPiotr Jasiukajtis	fzero	%f18
1030*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1031*25c28e83SPiotr Jasiukajtis	ble	.cont3
1032*25c28e83SPiotr Jasiukajtis	fzero	%f30
1033*25c28e83SPiotr Jasiukajtis
1034*25c28e83SPiotr Jasiukajtis	mov	%l2,tmp_px
1035*25c28e83SPiotr Jasiukajtis	mov	%l4,tmp_py
1036*25c28e83SPiotr Jasiukajtis
1037*25c28e83SPiotr Jasiukajtis	sub	counter,2,tmp_counter
1038*25c28e83SPiotr Jasiukajtis	ba	.cont3
1039*25c28e83SPiotr Jasiukajtis	mov	2,counter
1040*25c28e83SPiotr Jasiukajtis
1041*25c28e83SPiotr Jasiukajtis	.align	16
1042*25c28e83SPiotr Jasiukajtis.update4:
1043*25c28e83SPiotr Jasiukajtis	fzero	%f20
1044*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1045*25c28e83SPiotr Jasiukajtis	ble	.cont4
1046*25c28e83SPiotr Jasiukajtis	fzero	%f40
1047*25c28e83SPiotr Jasiukajtis
1048*25c28e83SPiotr Jasiukajtis	mov	%l1,tmp_px
1049*25c28e83SPiotr Jasiukajtis	mov	%i3,tmp_py
1050*25c28e83SPiotr Jasiukajtis
1051*25c28e83SPiotr Jasiukajtis	sub	counter,3,tmp_counter
1052*25c28e83SPiotr Jasiukajtis	ba	.cont4
1053*25c28e83SPiotr Jasiukajtis	mov	3,counter
1054*25c28e83SPiotr Jasiukajtis
1055*25c28e83SPiotr Jasiukajtis	.align	16
1056*25c28e83SPiotr Jasiukajtis.update5:
1057*25c28e83SPiotr Jasiukajtis	fzero	%f20
1058*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1059*25c28e83SPiotr Jasiukajtis	ble	.cont5
1060*25c28e83SPiotr Jasiukajtis	fzero	%f40
1061*25c28e83SPiotr Jasiukajtis
1062*25c28e83SPiotr Jasiukajtis	mov	%l1,tmp_px
1063*25c28e83SPiotr Jasiukajtis	mov	%i3,tmp_py
1064*25c28e83SPiotr Jasiukajtis
1065*25c28e83SPiotr Jasiukajtis	sub	counter,3,tmp_counter
1066*25c28e83SPiotr Jasiukajtis	ba	.cont5
1067*25c28e83SPiotr Jasiukajtis	mov	3,counter
1068*25c28e83SPiotr Jasiukajtis
1069*25c28e83SPiotr Jasiukajtis	.align	16
1070*25c28e83SPiotr Jasiukajtis.update6:
1071*25c28e83SPiotr Jasiukajtis	fzero	%f36
1072*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1073*25c28e83SPiotr Jasiukajtis	ble	.cont6
1074*25c28e83SPiotr Jasiukajtis	fzero	%f54
1075*25c28e83SPiotr Jasiukajtis
1076*25c28e83SPiotr Jasiukajtis	mov	%l7,tmp_px
1077*25c28e83SPiotr Jasiukajtis	mov	%l2,tmp_py
1078*25c28e83SPiotr Jasiukajtis
1079*25c28e83SPiotr Jasiukajtis	sub	counter,4,tmp_counter
1080*25c28e83SPiotr Jasiukajtis	ba	.cont6
1081*25c28e83SPiotr Jasiukajtis	mov	4,counter
1082*25c28e83SPiotr Jasiukajtis
1083*25c28e83SPiotr Jasiukajtis	.align	16
1084*25c28e83SPiotr Jasiukajtis.update7:
1085*25c28e83SPiotr Jasiukajtis	fzero	%f36
1086*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1087*25c28e83SPiotr Jasiukajtis	ble	.cont7
1088*25c28e83SPiotr Jasiukajtis	fzero	%f54
1089*25c28e83SPiotr Jasiukajtis
1090*25c28e83SPiotr Jasiukajtis	mov	%l7,tmp_px
1091*25c28e83SPiotr Jasiukajtis	mov	%l2,tmp_py
1092*25c28e83SPiotr Jasiukajtis
1093*25c28e83SPiotr Jasiukajtis	sub	counter,4,tmp_counter
1094*25c28e83SPiotr Jasiukajtis	ba	.cont7
1095*25c28e83SPiotr Jasiukajtis	mov	4,counter
1096*25c28e83SPiotr Jasiukajtis
1097*25c28e83SPiotr Jasiukajtis	.align	16
1098*25c28e83SPiotr Jasiukajtis.update8:
1099*25c28e83SPiotr Jasiukajtis	fzero	%f50
1100*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1101*25c28e83SPiotr Jasiukajtis	ble	.cont8
1102*25c28e83SPiotr Jasiukajtis	fzero	%f34
1103*25c28e83SPiotr Jasiukajtis
1104*25c28e83SPiotr Jasiukajtis	mov	%o1,tmp_px
1105*25c28e83SPiotr Jasiukajtis	mov	%i3,tmp_py
1106*25c28e83SPiotr Jasiukajtis
1107*25c28e83SPiotr Jasiukajtis	sub	counter,5,tmp_counter
1108*25c28e83SPiotr Jasiukajtis	ba	.cont8
1109*25c28e83SPiotr Jasiukajtis	mov	5,counter
1110*25c28e83SPiotr Jasiukajtis
1111*25c28e83SPiotr Jasiukajtis	.align	16
1112*25c28e83SPiotr Jasiukajtis.update9:
1113*25c28e83SPiotr Jasiukajtis	fzero	%f50
1114*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1115*25c28e83SPiotr Jasiukajtis	ble	.cont9
1116*25c28e83SPiotr Jasiukajtis	fzero	%f34
1117*25c28e83SPiotr Jasiukajtis
1118*25c28e83SPiotr Jasiukajtis	mov	%o1,tmp_px
1119*25c28e83SPiotr Jasiukajtis	mov	%i3,tmp_py
1120*25c28e83SPiotr Jasiukajtis
1121*25c28e83SPiotr Jasiukajtis	sub	counter,5,tmp_counter
1122*25c28e83SPiotr Jasiukajtis	ba	.cont9
1123*25c28e83SPiotr Jasiukajtis	mov	5,counter
1124*25c28e83SPiotr Jasiukajtis
1125*25c28e83SPiotr Jasiukajtis
1126*25c28e83SPiotr Jasiukajtis	.align	16
1127*25c28e83SPiotr Jasiukajtis.update10:
1128*25c28e83SPiotr Jasiukajtis	fzero	%f18
1129*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1130*25c28e83SPiotr Jasiukajtis	ble	.cont10
1131*25c28e83SPiotr Jasiukajtis	fzero	%f30
1132*25c28e83SPiotr Jasiukajtis
1133*25c28e83SPiotr Jasiukajtis	mov	%l2,tmp_px
1134*25c28e83SPiotr Jasiukajtis	mov	%l4,tmp_py
1135*25c28e83SPiotr Jasiukajtis
1136*25c28e83SPiotr Jasiukajtis	sub	counter,2,tmp_counter
1137*25c28e83SPiotr Jasiukajtis	ba	.cont10
1138*25c28e83SPiotr Jasiukajtis	mov	2,counter
1139*25c28e83SPiotr Jasiukajtis
1140*25c28e83SPiotr Jasiukajtis	.align	16
1141*25c28e83SPiotr Jasiukajtis.update11:
1142*25c28e83SPiotr Jasiukajtis	fzero	%f18
1143*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1144*25c28e83SPiotr Jasiukajtis	ble	.cont11
1145*25c28e83SPiotr Jasiukajtis	fzero	%f30
1146*25c28e83SPiotr Jasiukajtis
1147*25c28e83SPiotr Jasiukajtis	mov	%l2,tmp_px
1148*25c28e83SPiotr Jasiukajtis	mov	%l4,tmp_py
1149*25c28e83SPiotr Jasiukajtis
1150*25c28e83SPiotr Jasiukajtis	sub	counter,2,tmp_counter
1151*25c28e83SPiotr Jasiukajtis	ba	.cont11
1152*25c28e83SPiotr Jasiukajtis	mov	2,counter
1153*25c28e83SPiotr Jasiukajtis
1154*25c28e83SPiotr Jasiukajtis	.align	16
1155*25c28e83SPiotr Jasiukajtis.update12:
1156*25c28e83SPiotr Jasiukajtis	fzero	%f20
1157*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1158*25c28e83SPiotr Jasiukajtis	ble	.cont12
1159*25c28e83SPiotr Jasiukajtis	fzero	%f40
1160*25c28e83SPiotr Jasiukajtis
1161*25c28e83SPiotr Jasiukajtis	mov	%l1,tmp_px
1162*25c28e83SPiotr Jasiukajtis	mov	%i3,tmp_py
1163*25c28e83SPiotr Jasiukajtis
1164*25c28e83SPiotr Jasiukajtis	sub	counter,3,tmp_counter
1165*25c28e83SPiotr Jasiukajtis	ba	.cont12
1166*25c28e83SPiotr Jasiukajtis	mov	3,counter
1167*25c28e83SPiotr Jasiukajtis
1168*25c28e83SPiotr Jasiukajtis	.align	16
1169*25c28e83SPiotr Jasiukajtis.update13:
1170*25c28e83SPiotr Jasiukajtis	fzero	%f20
1171*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1172*25c28e83SPiotr Jasiukajtis	ble	.cont13
1173*25c28e83SPiotr Jasiukajtis	fzero	%f40
1174*25c28e83SPiotr Jasiukajtis
1175*25c28e83SPiotr Jasiukajtis	mov	%l1,tmp_px
1176*25c28e83SPiotr Jasiukajtis	mov	%i3,tmp_py
1177*25c28e83SPiotr Jasiukajtis
1178*25c28e83SPiotr Jasiukajtis	sub	counter,3,tmp_counter
1179*25c28e83SPiotr Jasiukajtis	ba	.cont13
1180*25c28e83SPiotr Jasiukajtis	mov	3,counter
1181*25c28e83SPiotr Jasiukajtis
1182*25c28e83SPiotr Jasiukajtis	.align	16
1183*25c28e83SPiotr Jasiukajtis.update14:
1184*25c28e83SPiotr Jasiukajtis	fzero	%f54
1185*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1186*25c28e83SPiotr Jasiukajtis	ble	.cont14
1187*25c28e83SPiotr Jasiukajtis	fzero	%f36
1188*25c28e83SPiotr Jasiukajtis
1189*25c28e83SPiotr Jasiukajtis	mov	%l7,tmp_px
1190*25c28e83SPiotr Jasiukajtis	mov	%l2,tmp_py
1191*25c28e83SPiotr Jasiukajtis
1192*25c28e83SPiotr Jasiukajtis	sub	counter,4,tmp_counter
1193*25c28e83SPiotr Jasiukajtis	ba	.cont14
1194*25c28e83SPiotr Jasiukajtis	mov	4,counter
1195*25c28e83SPiotr Jasiukajtis
1196*25c28e83SPiotr Jasiukajtis	.align	16
1197*25c28e83SPiotr Jasiukajtis.update15:
1198*25c28e83SPiotr Jasiukajtis	fzero	%f54
1199*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1200*25c28e83SPiotr Jasiukajtis	ble	.cont15
1201*25c28e83SPiotr Jasiukajtis	fzero	%f36
1202*25c28e83SPiotr Jasiukajtis
1203*25c28e83SPiotr Jasiukajtis	mov	%l7,tmp_px
1204*25c28e83SPiotr Jasiukajtis	mov	%l2,tmp_py
1205*25c28e83SPiotr Jasiukajtis
1206*25c28e83SPiotr Jasiukajtis	sub	counter,4,tmp_counter
1207*25c28e83SPiotr Jasiukajtis	ba	.cont15
1208*25c28e83SPiotr Jasiukajtis	mov	4,counter
1209*25c28e83SPiotr Jasiukajtis
1210*25c28e83SPiotr Jasiukajtis	.align	16
1211*25c28e83SPiotr Jasiukajtis.update16:
1212*25c28e83SPiotr Jasiukajtis	fzero	%f50
1213*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1214*25c28e83SPiotr Jasiukajtis	ble	.cont16
1215*25c28e83SPiotr Jasiukajtis	fzero	%f34
1216*25c28e83SPiotr Jasiukajtis
1217*25c28e83SPiotr Jasiukajtis	mov	%o1,tmp_px
1218*25c28e83SPiotr Jasiukajtis	mov	%i3,tmp_py
1219*25c28e83SPiotr Jasiukajtis
1220*25c28e83SPiotr Jasiukajtis	sub	counter,5,tmp_counter
1221*25c28e83SPiotr Jasiukajtis	ba	.cont16
1222*25c28e83SPiotr Jasiukajtis	mov	5,counter
1223*25c28e83SPiotr Jasiukajtis
1224*25c28e83SPiotr Jasiukajtis	.align	16
1225*25c28e83SPiotr Jasiukajtis.update17:
1226*25c28e83SPiotr Jasiukajtis	fzero	%f50
1227*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1228*25c28e83SPiotr Jasiukajtis	ble	.cont17
1229*25c28e83SPiotr Jasiukajtis	fzero	%f34
1230*25c28e83SPiotr Jasiukajtis
1231*25c28e83SPiotr Jasiukajtis	mov	%o1,tmp_px
1232*25c28e83SPiotr Jasiukajtis	mov	%i3,tmp_py
1233*25c28e83SPiotr Jasiukajtis
1234*25c28e83SPiotr Jasiukajtis	sub	counter,5,tmp_counter
1235*25c28e83SPiotr Jasiukajtis	ba	.cont17
1236*25c28e83SPiotr Jasiukajtis	mov	5,counter
1237*25c28e83SPiotr Jasiukajtis
1238*25c28e83SPiotr Jasiukajtis	.align	16
1239*25c28e83SPiotr Jasiukajtis.exit:
1240*25c28e83SPiotr Jasiukajtis	ret
1241*25c28e83SPiotr Jasiukajtis	restore
1242*25c28e83SPiotr Jasiukajtis	SET_SIZE(__vhypot)
1243*25c28e83SPiotr Jasiukajtis
1244