1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vrhypot.S"
30
31#include "libm.h"
32
33	RO_DATA
34	.align	64
35
36.CONST_TBL:
37	.word	0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465,
38	.word	0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a,
39	.word	0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6,
40	.word	0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3,
41	.word	0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b,
42	.word	0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036,
43	.word	0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01,
44	.word	0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1,
45	.word	0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb,
46	.word	0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5,
47	.word	0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405,
48	.word	0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc,
49	.word	0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7,
50	.word	0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec,
51	.word	0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b,
52	.word	0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed,
53	.word	0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150,
54	.word	0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539,
55	.word	0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66,
56	.word	0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995,
57	.word	0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d,
58	.word	0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19,
59	.word	0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404,
60	.word	0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22,
61	.word	0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47,
62	.word	0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a,
63	.word	0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06,
64	.word	0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358,
65	.word	0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20,
66	.word	0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f,
67	.word	0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197,
68	.word	0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010,
69
70	.word	0x42300000, 0		! D2ON36 = 2**36
71	.word	0xffffff00, 0		! DA0
72	.word	0xfff00000, 0		! DA1
73	.word	0x3ff00000, 0		! DONE = 1.0
74	.word	0x40000000, 0		! DTWO = 2.0
75	.word	0x7fd00000, 0		! D2ON1022
76	.word	0x3cb00000, 0		! D2ONM52
77	.word	0x43200000, 0		! D2ON51
78	.word	0x0007ffff, 0xffffffff	! 0x0007ffffffffffff
79
80#define stridex		%l2
81#define stridey		%l3
82#define stridez		%l5
83
84#define TBL_SHIFT	512
85
86#define TBL		%l1
87#define counter		%l4
88
89#define _0x7ff00000	%l0
90#define _0x00100000	%o5
91#define _0x7fffffff	%l6
92
93#define D2ON36		%f4
94#define DTWO		%f6
95#define DONE		%f8
96#define DA0		%f58
97#define DA1		%f56
98
99#define dtmp0		STACK_BIAS-0x80
100#define dtmp1		STACK_BIAS-0x78
101#define dtmp2		STACK_BIAS-0x70
102#define dtmp3		STACK_BIAS-0x68
103#define dtmp4		STACK_BIAS-0x60
104#define dtmp5		STACK_BIAS-0x58
105#define dtmp6		STACK_BIAS-0x50
106#define dtmp7		STACK_BIAS-0x48
107#define dtmp8		STACK_BIAS-0x40
108#define dtmp9		STACK_BIAS-0x38
109#define dtmp10		STACK_BIAS-0x30
110#define dtmp11		STACK_BIAS-0x28
111#define dtmp12		STACK_BIAS-0x20
112#define dtmp13		STACK_BIAS-0x18
113#define dtmp14		STACK_BIAS-0x10
114#define dtmp15		STACK_BIAS-0x08
115
116#define ftmp0		STACK_BIAS-0x100
117#define tmp_px		STACK_BIAS-0x98
118#define tmp_py		STACK_BIAS-0x90
119#define tmp_counter	STACK_BIAS-0x88
120
121! sizeof temp storage - must be a multiple of 16 for V9
122#define tmps		0x100
123
124!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
125!      !!!!!   algorithm   !!!!!
126!  hx0 = *(int*)px;
127!  hy0 = *(int*)py;
128!
129!  ((float*)&x0)[0] = ((float*)px)[0];
130!  ((float*)&x0)[1] = ((float*)px)[1];
131!  ((float*)&y0)[0] = ((float*)py)[0];
132!  ((float*)&y0)[1] = ((float*)py)[1];
133!
134!  hx0 &= 0x7fffffff;
135!  hy0 &= 0x7fffffff;
136!
137!  diff0 = hy0 - hx0;
138!  j0 = diff0 >> 31;
139!  j0 &= diff0;
140!  j0 = hy0 - j0;
141!  j0 &= 0x7ff00000;
142!
143!  j0 = 0x7ff00000 - j0;
144!  ll = (long long)j0 << 32;
145!  *(long long*)&scl0 = ll;
146!
147!  if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 )
148!  {
149!    lx = ((int*)px)[1];
150!    ly = ((int*)py)[1];
151!
152!    if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0;
153!    else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0;
154!    else res0 = fabs(x0) * fabs(y0);
155!
156!    ((float*)pz)[0] = ((float*)&res0)[0];
157!    ((float*)pz)[1] = ((float*)&res0)[1];
158!
159!    px += stridex;
160!    py += stridey;
161!    pz += stridez;
162!    continue;
163!  }
164!  if ( hx0 <  0x00100000 && hy0 <  0x00100000 )
165!  {
166!    lx = ((int*)px)[1];
167!    ly = ((int*)py)[1];
168!    ii = hx0 | hy0;
169!    ii |= lx;
170!    ii |= ly;
171!    if ( ii == 0 )
172!    {
173!      res0 = 1.0 / 0.0;
174!      ((float*)pz)[0] = ((float*)&res0)[0];
175!      ((float*)pz)[1] = ((float*)&res0)[1];
176!
177!      px += stridex;
178!      py += stridey;
179!      pz += stridez;
180!      continue;
181!    }
182!    x0 = fabs(x0);
183!    y0 = fabs(y0);
184!    if ( hx0 < 0x00080000 )
185!    {
186!      x0 = *(long long*)&x0;
187!    }
188!    else
189!    {
190!      ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
191!      x0 = vis_fand(x0, dtmp0);
192!      x0 = *(long long*)&x0;
193!      x0 += D2ON51;
194!    }
195!    x0 *= D2ONM52;
196!    if ( hy0 < 0x00080000 )
197!    {
198!      y0 = *(long long*)&y0;
199!    }
200!    else
201!    {
202!      ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
203!      y0 = vis_fand(y0, dtmp0);
204!      y0 = *(long long*)&y0;
205!      y0 += D2ON51;
206!    }
207!    y0 *= D2ONM52;
208!    *(long long*)&scl0 = 0x7fd0000000000000ULL;
209!  }
210!  else
211!  {
212!    x0 *= scl0;
213!    y0 *= scl0;
214!  }
215!
216!  x_hi0 = x0 + D2ON36;
217!  y_hi0 = y0 + D2ON36;
218!  x_hi0 -= D2ON36;
219!  y_hi0 -= D2ON36;
220!  x_lo0 = x0 - x_hi0;
221!  y_lo0 = y0 - y_hi0;
222!  res0_hi = x_hi0 * x_hi0;
223!  dtmp0 = y_hi0 * y_hi0;
224!  res0_hi += dtmp0;
225!  res0_lo = x0 + x_hi0;
226!  res0_lo *= x_lo0;
227!  dtmp1 = y0 + y_hi0;
228!  dtmp1 *= y_lo0;
229!  res0_lo += dtmp1;
230!
231!  dres = res0_hi + res0_lo;
232!  dexp0 = vis_fand(dres,DA1);
233!  iarr = ((int*)&dres)[0];
234!
235!  iarr >>= 11;
236!  iarr &= 0x1fc;
237!  dtmp0 = ((double*)((char*)dll1 + iarr))[0];
238!  dd = vis_fpsub32(dtmp0, dexp0);
239!
240!  dtmp0 = dd * dres;
241!  dtmp0 = DTWO - dtmp0;
242!  dd *= dtmp0;
243!  dtmp1 = dd * dres;
244!  dtmp1 = DTWO - dtmp1;
245!  dd *= dtmp1;
246!  dtmp2 = dd * dres;
247!  dtmp2 = DTWO - dtmp2;
248!  dres = dd * dtmp2;
249!
250!  res0 = vis_fand(dres,DA0);
251!
252!  dtmp0 = res0_hi * res0;
253!  dtmp0 = DONE - dtmp0;
254!  dtmp1 = res0_lo * res0;
255!  dtmp0 -= dtmp1;
256!  dtmp0 *= dres;
257!  res0 += dtmp0;
258!
259!  res0 = sqrt ( res0 );
260!
261!  res0 = scl0 * res0;
262!
263!  ((float*)pz)[0] = ((float*)&res0)[0];
264!  ((float*)pz)[1] = ((float*)&res0)[1];
265!
266!  px += stridex;
267!  py += stridey;
268!  pz += stridez;
269!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
270
271	ENTRY(__vrhypot)
272	save	%sp,-SA(MINFRAME)-tmps,%sp
273	PIC_SETUP(l7)
274	PIC_SET(l7,.CONST_TBL,l1)
275	wr	%g0,0x82,%asi
276
277#ifdef __sparcv9
278	ldx	[%fp+STACK_BIAS+176],stridez
279#else
280	ld	[%fp+STACK_BIAS+92],stridez
281#endif
282
283	sll	%i2,3,stridex
284	sethi	%hi(0x7ff00000),_0x7ff00000
285	st	%i0,[%fp+tmp_counter]
286
287	sll	%i4,3,stridey
288	sethi	%hi(0x00100000),_0x00100000
289	stx	%i1,[%fp+tmp_px]
290
291	sll	stridez,3,stridez
292	sethi	%hi(0x7ffffc00),_0x7fffffff
293	stx	%i3,[%fp+tmp_py]
294
295	ldd	[TBL+TBL_SHIFT],D2ON36
296	add	_0x7fffffff,1023,_0x7fffffff
297
298	ldd	[TBL+TBL_SHIFT+8],DA0
299
300	ldd	[TBL+TBL_SHIFT+16],DA1
301
302	ldd	[TBL+TBL_SHIFT+24],DONE
303
304	ldd	[TBL+TBL_SHIFT+32],DTWO
305
306.begin:
307	ld	[%fp+tmp_counter],counter
308	ldx	[%fp+tmp_px],%i4
309	ldx	[%fp+tmp_py],%i3
310	st	%g0,[%fp+tmp_counter]
311.begin1:
312	cmp	counter,0
313	ble,pn	%icc,.exit
314
315	lda	[%i4]0x82,%o1		! (7_0) hx0 = *(int*)px;
316	add	%i4,stridex,%i1
317
318	lda	[%i3]0x82,%o4		! (7_0) hy0 = *(int*)py;
319	add	%i3,stridey,%i0		! py += stridey
320
321	and	%o1,_0x7fffffff,%o7	! (7_0) hx0 &= 0x7fffffff;
322
323	cmp	%o7,_0x7ff00000		! (7_0) hx0 ? 0x7ff00000
324	bge,pn	%icc,.spec0		! (7_0) if ( hx0 >= 0x7ff00000 )
325	and	%o4,_0x7fffffff,%l7	! (7_0) hy0 &= 0x7fffffff;
326
327	cmp	%l7,_0x7ff00000		! (7_0) hy0 ? 0x7ff00000
328	bge,pn	%icc,.spec0		! (7_0) if ( hy0 >= 0x7ff00000 )
329	sub	%l7,%o7,%o1		! (7_0) diff0 = hy0 - hx0;
330
331	sra	%o1,31,%o3		! (7_0) j0 = diff0 >> 31;
332	cmp	%o7,_0x00100000		! (7_0) hx0 ? 0x00100000
333	bl,pn	%icc,.spec1		! (7_0) if ( hx0 < 0x00100000 )
334
335	and	%o1,%o3,%o1		! (7_0) j0 &= diff0;
336.cont_spec0:
337	sub	%l7,%o1,%o4		! (7_0) j0 = hy0 - j0;
338
339	and	%o4,%l0,%o4		! (7_0) j0 &= 0x7ff00000;
340
341	sub	%l0,%o4,%g1		! (7_0) j0 = 0x7ff00000 - j0;
342
343	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
344
345	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
346
347	stx	%g1,[%fp+dtmp0]		! (7_1) *(long long*)&scl0 = ll;
348.cont_spec1:
349	lda	[%i1]0x82,%o1		! (0_0) hx0 = *(int*)px;
350	mov	%i1,%i2
351
352	lda	[%i0]0x82,%o4		! (0_0) hy0 = *(int*)py;
353
354	and	%o1,_0x7fffffff,%o7	! (0_0) hx0 &= 0x7fffffff;
355	mov	%i0,%o0
356
357	cmp	%o7,_0x7ff00000		! (0_0) hx0 ? 0x7ff00000
358	bge,pn	%icc,.update0		! (0_0) if ( hx0 >= 0x7ff00000 )
359	and	%o4,_0x7fffffff,%l7	! (0_0) hy0 &= 0x7fffffff;
360
361	cmp	%l7,_0x7ff00000		! (0_0) hy0 ? 0x7ff00000
362	sub	%l7,%o7,%o1		! (0_0) diff0 = hy0 - hx0;
363	bge,pn	%icc,.update0		! (0_0) if ( hy0 >= 0x7ff00000 )
364	sra	%o1,31,%o3		! (0_0) j0 = diff0 >> 31;
365
366	cmp	%o7,_0x00100000		! (0_0) hx0 ? 0x00100000
367
368	and	%o1,%o3,%o1		! (0_0) j0 &= diff0;
369	bl,pn	%icc,.update1		! (0_0) if ( hx0 < 0x00100000 )
370	sub	%l7,%o1,%o4		! (0_0) j0 = hy0 - j0;
371.cont0:
372	and	%o4,%l0,%o4		! (0_0) j0 &= 0x7ff00000;
373
374	sub	%l0,%o4,%o4		! (0_0) j0 = 0x7ff00000 - j0;
375.cont1:
376	sllx	%o4,32,%o4		! (0_0) ll = (long long)j0 << 32;
377	stx	%o4,[%fp+dtmp1]		! (0_0) *(long long*)&scl0 = ll;
378
379	ldd	[%fp+dtmp15],%f62	! (7_1) *(long long*)&scl0 = ll;
380
381	lda	[%i4]%asi,%f10		! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
382
383	lda	[%i4+4]%asi,%f11	! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
384
385	lda	[%i3]%asi,%f12		! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
386
387	add	%i1,stridex,%i4		! px += stridex
388	lda	[%i3+4]%asi,%f13	! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
389
390	fmuld	%f10,%f62,%f10		! (7_1) x0 *= scl0;
391	add	%i4,stridex,%i1		! px += stridex
392
393	fmuld	%f12,%f62,%f60		! (7_1) y0 *= scl0;
394
395	lda	[%i4]0x82,%o1		! (1_0) hx0 = *(int*)px;
396
397	add	%i0,stridey,%i3		! py += stridey
398	faddd	%f10,D2ON36,%f46	! (7_1) x_hi0 = x0 + D2ON36;
399
400	lda	[%i3]0x82,%g1		! (1_0) hy0 = *(int*)py;
401	add	%i3,stridey,%i0		! py += stridey
402	faddd	%f60,D2ON36,%f50	! (7_1) y_hi0 = y0 + D2ON36;
403
404	and	%o1,_0x7fffffff,%o7	! (1_0) hx0 &= 0x7fffffff;
405
406	cmp	%o7,_0x7ff00000		! (1_0) hx0 ? 0x7ff00000
407	stx	%o4,[%fp+dtmp2]		! (0_0) *(long long*)&scl0 = ll;
408
409	and	%g1,_0x7fffffff,%l7	! (1_0) hy0 &= 0x7fffffff;
410	bge,pn	%icc,.update2		! (1_0) if ( hx0 >= 0x7ff00000 )
411	fsubd	%f46,D2ON36,%f20	! (7_1) x_hi0 -= D2ON36;
412
413	cmp	%l7,_0x7ff00000		! (1_0) hy0 ? 0x7ff00000
414	sub	%l7,%o7,%o1		! (1_0) diff0 = hy0 - hx0;
415	bge,pn	%icc,.update3		! (1_0) if ( hy0 >= 0x7ff00000 )
416	fsubd	%f50,D2ON36,%f54	! (7_1) y_hi0 -= D2ON36;
417
418	sra	%o1,31,%o3		! (1_0) j0 = diff0 >> 31;
419
420	and	%o1,%o3,%o1		! (1_0) j0 &= diff0;
421
422	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
423	sub	%l7,%o1,%o4		! (1_0) j0 = hy0 - j0;
424	cmp	%o7,_0x00100000		! (1_0) hx0 ? 0x00100000
425	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
426
427	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
428	and	%o4,%l0,%o4		! (1_0) j0 &= 0x7ff00000;
429	bl,pn	%icc,.update4		! (1_0) if ( hx0 < 0x00100000 )
430	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
431
432	sub	%l0,%o4,%o4		! (1_0) j0 = 0x7ff00000 - j0;
433.cont4:
434	sllx	%o4,32,%o4		! (1_0) ll = (long long)j0 << 32;
435	stx	%o4,[%fp+dtmp3]		! (1_0) *(long long*)&scl0 = ll;
436	faddd	%f60,%f54,%f50		! (7_1) dtmp1 = y0 + y_hi0;
437
438	fsubd	%f60,%f54,%f12		! (7_1) y_lo0 = y0 - y_hi0;
439
440	fmuld	%f62,%f0,%f0		! (7_1) res0_lo *= x_lo0;
441	ldd	[%fp+dtmp1],%f62	! (0_0) *(long long*)&scl0 = ll;
442	faddd	%f2,%f46,%f44		! (7_1) res0_hi += dtmp0;
443
444	lda	[%i2]%asi,%f10		! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
445
446	lda	[%i2+4]%asi,%f11	! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
447
448	fmuld	%f50,%f12,%f26		! (7_1) dtmp1 *= y_lo0;
449	lda	[%o0]%asi,%f12		! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
450
451	lda	[%o0+4]%asi,%f13	! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
452
453	fmuld	%f10,%f62,%f10		! (0_0) x0 *= scl0;
454
455	fmuld	%f12,%f62,%f60		! (0_0) y0 *= scl0;
456	faddd	%f0,%f26,%f38		! (7_1) res0_lo += dtmp1;
457
458	lda	[%i1]0x82,%o1		! (2_0) hx0 = *(int*)px;
459	mov	%i1,%i2
460
461	faddd	%f10,D2ON36,%f46	! (0_0) x_hi0 = x0 + D2ON36;
462
463	lda	[%i0]0x82,%g1		! (2_0) hy0 = *(int*)py;
464	mov	%i0,%o0
465	faddd	%f60,D2ON36,%f12	! (0_0) y_hi0 = y0 + D2ON36;
466
467	faddd	%f44,%f38,%f14		! (7_1) dres = res0_hi + res0_lo;
468	and	%o1,_0x7fffffff,%o7	! (2_0) hx0 &= 0x7fffffff;
469
470	cmp	%o7,_0x7ff00000		! (2_0) hx0 ? 0x7ff00000
471	bge,pn	%icc,.update5		! (2_0) if ( hx0 >= 0x7ff00000 )
472	stx	%o4,[%fp+dtmp4]		! (1_0) *(long long*)&scl0 = ll;
473
474	and	%g1,_0x7fffffff,%l7	! (2_0) hx0 &= 0x7fffffff;
475	st	%f14,[%fp+ftmp0]	! (7_1) iarr = ((int*)&dres)[0];
476	fsubd	%f46,D2ON36,%f20	! (0_0) x_hi0 -= D2ON36;
477
478	sub	%l7,%o7,%o1		! (2_0) diff0 = hy0 - hx0;
479	cmp	%l7,_0x7ff00000		! (2_0) hy0 ? 0x7ff00000
480	bge,pn	%icc,.update6		! (2_0) if ( hy0 >= 0x7ff00000 )
481	fsubd	%f12,D2ON36,%f54	! (0_0) y_hi0 -= D2ON36;
482
483	sra	%o1,31,%o3		! (2_0) j0 = diff0 >> 31;
484
485	and	%o1,%o3,%o1		! (2_0) j0 &= diff0;
486
487	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
488	cmp	%o7,_0x00100000		! (2_0) hx0 ? 0x00100000
489	sub	%l7,%o1,%o4		! (2_0) j0 = hy0 - j0;
490	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
491
492	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
493	and	%o4,%l0,%o4		! (2_0) j0 &= 0x7ff00000;
494	bl,pn	%icc,.update7		! (2_0) if ( hx0 < 0x00100000 )
495	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
496.cont7:
497	sub	%l0,%o4,%g1		! (2_0) j0 = 0x7ff00000 - j0;
498
499	sllx	%g1,32,%g1		! (2_0) ll = (long long)j0 << 32;
500.cont8:
501	stx	%g1,[%fp+dtmp5]		! (2_0) *(long long*)&scl0 = ll;
502	faddd	%f60,%f54,%f50		! (0_0) dtmp1 = y0 + y_hi0;
503
504	fsubd	%f60,%f54,%f12		! (0_0) y_lo0 = y0 - y_hi0;
505
506	fmuld	%f62,%f0,%f0		! (0_0) res0_lo *= x_lo0;
507	ldd	[%fp+dtmp3],%f62	! (1_0) *(long long*)&scl0 = ll;
508	faddd	%f2,%f46,%f32		! (0_0) res0_hi += dtmp0;
509
510	lda	[%i4]%asi,%f10		! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
511
512	lda	[%i4+4]%asi,%f11	! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
513
514	fmuld	%f50,%f12,%f28		! (0_0) dtmp1 *= y_lo0;
515	lda	[%i3]%asi,%f12		! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
516
517	add	%i1,stridex,%i4		! px += stridex
518	lda	[%i3+4]%asi,%f13	! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
519
520	ld	[%fp+ftmp0],%o2		! (7_1) iarr = ((int*)&dres)[0];
521	add	%i4,stridex,%i1		! px += stridex
522	fand	%f14,DA1,%f2		! (7_1) dexp0 = vis_fand(dres,DA1);
523
524	fmuld	%f10,%f62,%f10		! (1_0) x0 *= scl0;
525
526	fmuld	%f12,%f62,%f60		! (1_0) y0 *= scl0;
527	sra	%o2,11,%i3		! (7_1) iarr >>= 11;
528	faddd	%f0,%f28,%f36		! (0_0) res0_lo += dtmp1;
529
530	and	%i3,0x1fc,%i3		! (7_1) iarr &= 0x1fc;
531
532	add	%i3,TBL,%o4		! (7_1) (char*)dll1 + iarr
533	lda	[%i4]0x82,%o1		! (3_0) hx0 = *(int*)px;
534
535	add	%i0,stridey,%i3		! py += stridey
536	ld	[%o4],%f26		! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
537	faddd	%f10,D2ON36,%f46	! (1_0) x_hi0 = x0 + D2ON36;
538
539	lda	[%i3]0x82,%o4		! (3_0) hy0 = *(int*)py;
540	add	%i3,stridey,%i0		! py += stridey
541	faddd	%f60,D2ON36,%f12	! (1_0) y_hi0 = y0 + D2ON36;
542
543	faddd	%f32,%f36,%f22		! (0_0) dres = res0_hi + res0_lo;
544	and	%o1,_0x7fffffff,%o7	! (3_0) hx0 &= 0x7fffffff;
545
546	cmp	%o7,_0x7ff00000		! (3_0) hx0 ? 0x7ff00000
547	stx	%g1,[%fp+dtmp6]		! (2_0) *(long long*)&scl0 = ll;
548	bge,pn	%icc,.update9		! (3_0) if ( hx0 >= 0x7ff00000 )
549	fpsub32	%f26,%f2,%f26		! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
550
551	and	%o4,_0x7fffffff,%l7	! (3_0) hy0 &= 0x7fffffff;
552	st	%f22,[%fp+ftmp0]	! (0_0) iarr = ((int*)&dres)[0];
553	fsubd	%f46,D2ON36,%f20	! (1_0) x_hi0 -= D2ON36;
554
555	sub	%l7,%o7,%o1		! (3_0) diff0 = hy0 - hx0;
556	cmp	%l7,_0x7ff00000		! (3_0) hy0 ? 0x7ff00000
557	bge,pn	%icc,.update10		! (3_0) if ( hy0 >= 0x7ff00000 )
558	fsubd	%f12,D2ON36,%f54	! (1_0) y_hi0 -= D2ON36;
559
560	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
561	sra	%o1,31,%o3		! (3_0) j0 = diff0 >> 31;
562
563	and	%o1,%o3,%o1		! (3_0) j0 &= diff0;
564
565	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
566	cmp	%o7,_0x00100000		! (3_0) hx0 ? 0x00100000
567	sub	%l7,%o1,%o4		! (3_0) j0 = hy0 - j0;
568	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
569
570	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
571	and	%o4,%l0,%o4		! (3_0) j0 &= 0x7ff00000;
572	bl,pn	%icc,.update11		! (3_0) if ( hx0 < 0x00100000 )
573	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
574.cont11:
575	sub	%l0,%o4,%g1		! (3_0) j0 = 0x7ff00000 - j0;
576	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
577.cont12:
578	sllx	%g1,32,%g1		! (3_0) ll = (long long)j0 << 32;
579	stx	%g1,[%fp+dtmp7]		! (3_0) *(long long*)&scl0 = ll;
580	faddd	%f60,%f54,%f50		! (1_0) dtmp1 = y0 + y_hi0;
581
582	fsubd	%f60,%f54,%f12		! (1_0) y_lo0 = y0 - y_hi0
583
584	fmuld	%f62,%f0,%f0		! (1_0) res0_lo *= x_lo0;
585	ldd	[%fp+dtmp5],%f62	! (2_0) *(long long*)&scl0 = ll;
586	faddd	%f2,%f46,%f42		! (1_0) res0_hi += dtmp0;
587
588	lda	[%i2]%asi,%f10		! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
589	fmuld	%f26,%f20,%f54		! (7_1) dd *= dtmp0;
590
591	lda	[%i2+4]%asi,%f11	! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
592
593	fmuld	%f50,%f12,%f26		! (1_0) dtmp1 *= y_lo0;
594	lda	[%o0]%asi,%f12		! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
595
596	lda	[%o0+4]%asi,%f13	! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
597
598	fmuld	%f54,%f14,%f50		! (7_1) dtmp1 = dd * dres;
599	ld	[%fp+ftmp0],%o2		! (0_0) iarr = ((int*)&dres)[0];
600	fand	%f22,DA1,%f2		! (0_0) dexp0 = vis_fand(dres,DA1);
601
602	fmuld	%f10,%f62,%f10		! (2_0) x0 *= scl0;
603
604	fmuld	%f12,%f62,%f60		! (2_0) y0 *= scl0;
605	sra	%o2,11,%o4		! (0_0) iarr >>= 11;
606	faddd	%f0,%f26,%f34		! (1_0) res0_lo += dtmp1;
607
608	and	%o4,0x1fc,%o4		! (0_0) iarr &= 0x1fc;
609
610	add	%o4,TBL,%o4		! (0_0) (char*)dll1 + iarr
611	mov	%i1,%i2
612	lda	[%i1]0x82,%o1		! (4_0) hx0 = *(int*)px;
613	fsubd	DTWO,%f50,%f20		! (7_1) dtmp1 = DTWO - dtmp1;
614
615	ld	[%o4],%f28		! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
616	faddd	%f10,D2ON36,%f46	! (2_0) x_hi0 = x0 + D2ON36;
617
618	lda	[%i0]0x82,%o4		! (4_0) hy0 = *(int*)py;
619	mov	%i0,%o0
620	faddd	%f60,D2ON36,%f50	! (2_0) y_hi0 = y0 + D2ON36;
621
622	and	%o1,_0x7fffffff,%o7	! (4_0) hx0 &= 0x7fffffff;
623	faddd	%f42,%f34,%f18		! (1_0) dres = res0_hi + res0_lo;
624
625	fmuld	%f54,%f20,%f16		! (7_1) dd *= dtmp1;
626	cmp	%o7,_0x7ff00000		! (4_0) hx0 ? 0x7ff00000
627	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
628	fpsub32	%f28,%f2,%f28		! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
629
630	and	%o4,_0x7fffffff,%l7	! (4_0) hy0 &= 0x7fffffff;
631	bge,pn	%icc,.update13		! (4_0) if ( hx0 >= 0x7ff00000 )
632	st	%f18,[%fp+ftmp0]	! (1_0) iarr = ((int*)&dres)[0];
633	fsubd	%f46,D2ON36,%f20	! (2_0) x_hi0 -= D2ON36;
634
635	sub	%l7,%o7,%o1		! (4_0) diff0 = hy0 - hx0;
636	cmp	%l7,_0x7ff00000		! (4_0) hy0 ? 0x7ff00000
637	bge,pn	%icc,.update14		! (4_0) if ( hy0 >= 0x7ff00000 )
638	fsubd	%f50,D2ON36,%f54	! (2_0) y_hi0 -= D2ON36;
639
640	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
641	sra	%o1,31,%o3		! (4_0) j0 = diff0 >> 31;
642
643	and	%o1,%o3,%o1		! (4_0) j0 &= diff0;
644
645	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
646	sub	%l7,%o1,%o4		! (4_0) j0 = hy0 - j0;
647	cmp	%o7,_0x00100000		! (4_0) hx0 ? 0x00100000
648	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
649
650	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
651	and	%o4,%l0,%o4		! (4_0) j0 &= 0x7ff00000;
652	bl,pn	%icc,.update15		! (4_0) if ( hx0 < 0x00100000 )
653	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
654.cont15:
655	sub	%l0,%o4,%g1		! (4_0) j0 = 0x7ff00000 - j0;
656	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
657.cont16:
658	fmuld	%f16,%f14,%f14		! (7_1) dtmp2 = dd * dres;
659	sllx	%g1,32,%g1		! (4_0) ll = (long long)j0 << 32;
660	stx	%g1,[%fp+dtmp9]		! (4_0) *(long long*)&scl0 = ll;
661	faddd	%f60,%f54,%f50		! (2_0) dtmp1 = y0 + y_hi0;
662
663	fsubd	%f60,%f54,%f12		! (2_0) y_lo0 = y0 - y_hi0;
664
665	fmuld	%f62,%f0,%f0		! (2_0) res0_lo *= x_lo0;
666	ldd	[%fp+dtmp7],%f62	! (3_0) *(long long*)&scl0 = ll;
667	faddd	%f2,%f46,%f30		! (2_0) res0_hi += dtmp0;
668
669	lda	[%i4]%asi,%f10		! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
670	fmuld	%f28,%f20,%f54		! (0_0) dd *= dtmp0;
671
672	lda	[%i4+4]%asi,%f11	! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
673
674	fmuld	%f50,%f12,%f28		! (2_0) dtmp1 *= y_lo0;
675	lda	[%i3]%asi,%f12		! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
676	fsubd	DTWO,%f14,%f20		! (7_1) dtmp2 = DTWO - dtmp2;
677
678	lda	[%i3+4]%asi,%f13	! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
679	add	%i1,stridex,%i4		! px += stridex
680
681	fmuld	%f54,%f22,%f50		! (0_0) dtmp1 = dd * dres;
682	ld	[%fp+ftmp0],%o2		! (1_0) iarr = ((int*)&dres)[0];
683	add	%i4,stridex,%i1		! px += stridex
684	fand	%f18,DA1,%f2		! (1_0) dexp0 = vis_fand(dres,DA1);
685
686	fmuld	%f10,%f62,%f10		! (3_0) x0 *= scl0;
687
688	fmuld	%f12,%f62,%f60		! (3_0) y0 *= scl0;
689	sra	%o2,11,%i3		! (1_0) iarr >>= 11;
690	faddd	%f0,%f28,%f40		! (2_0) res0_lo += dtmp1;
691
692	and	%i3,0x1fc,%i3		! (1_0) iarr &= 0x1fc;
693	fmuld	%f16,%f20,%f28		! (7_1) dres = dd * dtmp2;
694
695	add	%i3,TBL,%o4		! (1_0) (char*)dll1 + iarr
696	lda	[%i4]0x82,%o1		! (5_0) hx0 = *(int*)px;
697	fsubd	DTWO,%f50,%f20		! (0_0) dtmp1 = DTWO - dtmp1;
698
699	add	%i0,stridey,%i3		! py += stridey
700	ld	[%o4],%f26		! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
701	faddd	%f10,D2ON36,%f46	! (3_0) x_hi0 = x0 + D2ON36;
702
703	lda	[%i3]0x82,%o4		! (5_0) hy0 = *(int*)py;
704	add	%i3,stridey,%i0		! py += stridey
705	faddd	%f60,D2ON36,%f50	! (3_0) y_hi0 = y0 + D2ON36;
706
707	and	%o1,_0x7fffffff,%o7	! (5_0) hx0 &= 0x7fffffff;
708	faddd	%f30,%f40,%f14		! (2_0) dres = res0_hi + res0_lo;
709
710	fmuld	%f54,%f20,%f24		! (0_0) dd *= dtmp1;
711	cmp	%o7,_0x7ff00000		! (5_0) hx0 ? 0x7ff00000
712	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
713	fpsub32	%f26,%f2,%f26		! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
714
715	and	%o4,_0x7fffffff,%l7	! (5_0) hy0 &= 0x7fffffff;
716	st	%f14,[%fp+ftmp0]	! (2_0) iarr = ((int*)&dres)[0];
717	bge,pn	%icc,.update17		! (5_0) if ( hx0 >= 0x7ff00000 )
718	fsubd	%f46,D2ON36,%f20	! (3_0) x_hi0 -= D2ON36;
719
720	sub	%l7,%o7,%o1		! (5_0) diff0 = hy0 - hx0;
721	cmp	%l7,_0x7ff00000		! (5_0) hy0 ? 0x7ff00000
722	bge,pn	%icc,.update18		! (5_0) if ( hy0 >= 0x7ff00000 )
723	fsubd	%f50,D2ON36,%f54	! (3_0) y_hi0 -= D2ON36;
724
725	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
726	sra	%o1,31,%o3		! (5_0) j0 = diff0 >> 31;
727
728	and	%o1,%o3,%o1		! (5_0) j0 &= diff0;
729	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
730
731	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
732	sub	%l7,%o1,%o4		! (5_0) j0 = hy0 - j0;
733	cmp	%o7,_0x00100000		! (5_0) hx0 ? 0x00100000
734	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
735
736	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
737	and	%o4,%l0,%o4		! (5_0) j0 &= 0x7ff00000;
738	bl,pn	%icc,.update19		! (5_0) if ( hx0 < 0x00100000 )
739	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
740.cont19a:
741	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
742	sub	%l0,%o4,%g1		! (5_0) j0 = 0x7ff00000 - j0;
743	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
744.cont19b:
745	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
746	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
747	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
748	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
749
750	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
751	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
752.cont20:
753	fmuld	%f62,%f0,%f0		! (3_0) res0_lo *= x_lo0;
754	ldd	[%fp+dtmp9],%f62	! (4_0) *(long long*)&scl0 = ll;
755	faddd	%f2,%f46,%f44		! (3_0) res0_hi += dtmp0;
756
757	fsubd	DONE,%f10,%f60		! (7_1) dtmp0 = DONE - dtmp0;
758	lda	[%i2]%asi,%f10		! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
759	fmuld	%f26,%f20,%f54		! (1_0) dd *= dtmp0;
760
761	lda	[%i2+4]%asi,%f11	! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
762
763	fmuld	%f50,%f12,%f26		! (3_0) dtmp1 *= y_lo0;
764	lda	[%o0]%asi,%f12		! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
765	fsubd	DTWO,%f22,%f20		! (0_0) dtmp2 = DTWO - dtmp2;
766
767	lda	[%o0+4]%asi,%f13	! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
768
769	fmuld	%f54,%f18,%f50		! (1_0) dtmp1 = dd * dres;
770	ld	[%fp+ftmp0],%o2		! (2_0) iarr = ((int*)&dres)[0];
771	fand	%f14,DA1,%f2		! (2_0) dexp0 = vis_fand(dres,DA1);
772
773	fmuld	%f10,%f62,%f10		! (4_0) x0 *= scl0;
774	fsubd	%f60,%f38,%f46		! (7_1) dtmp0 -= dtmp1;
775
776	fmuld	%f12,%f62,%f60		! (4_0) y0 *= scl0;
777	sra	%o2,11,%o4		! (2_0) iarr >>= 11;
778	faddd	%f0,%f26,%f38		! (3_0) res0_lo += dtmp1;
779
780	and	%o4,0x1fc,%o4		! (2_0) iarr &= 0x1fc;
781	fmuld	%f24,%f20,%f26		! (0_0) dres = dd * dtmp2;
782
783	add	%o4,TBL,%o4		! (2_0) (char*)dll1 + iarr
784	mov	%i1,%i2
785	lda	[%i1]0x82,%o1		! (6_0) hx0 = *(int*)px;
786	fsubd	DTWO,%f50,%f52		! (1_0) dtmp1 = DTWO - dtmp1;
787
788	fmuld	%f46,%f28,%f28		! (7_1) dtmp0 *= dres;
789	ld	[%o4],%f20		! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
790	faddd	%f10,D2ON36,%f46	! (4_0) x_hi0 = x0 + D2ON36;
791
792	lda	[%i0]0x82,%o4		! (6_0) hy0 = *(int*)py;
793	mov	%i0,%o0
794	faddd	%f60,D2ON36,%f50	! (4_0) y_hi0 = y0 + D2ON36;
795
796	and	%o1,_0x7fffffff,%o7	! (6_0) hx0 &= 0x7fffffff;
797	faddd	%f44,%f38,%f22		! (3_0) dres = res0_hi + res0_lo;
798
799	fmuld	%f54,%f52,%f16		! (1_0) dd *= dtmp1;
800	cmp	%o7,_0x7ff00000		! (6_0) hx0 ? 0x7ff00000
801	stx	%g1,[%fp+dtmp12]	! (5_0) *(long long*)&scl0 = ll;
802	fpsub32	%f20,%f2,%f52		! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
803
804	and	%o4,_0x7fffffff,%l7	! (6_0) hy0 &= 0x7fffffff;
805	st	%f22,[%fp+ftmp0]	! (3_0) iarr = ((int*)&dres)[0];
806	bge,pn	%icc,.update21		! (6_0) if ( hx0 >= 0x7ff00000 )
807	fsubd	%f46,D2ON36,%f46	! (4_0) x_hi0 -= D2ON36;
808
809	sub	%l7,%o7,%o1		! (6_0) diff0 = hy0 - hx0;
810	cmp	%l7,_0x7ff00000		! (6_0) hy0 ? 0x7ff00000
811	bge,pn	%icc,.update22		! (6_0) if ( hy0 >= 0x7ff00000 )
812	fsubd	%f50,D2ON36,%f54	! (4_0) y_hi0 -= D2ON36;
813
814	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
815	sra	%o1,31,%o3		! (6_0) j0 = diff0 >> 31;
816	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
817
818	and	%o1,%o3,%o1		! (6_0) j0 &= diff0;
819	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
820
821	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
822	sub	%l7,%o1,%o4		! (6_0) j0 = hy0 - j0;
823	cmp	%o7,_0x00100000		! (6_0) hx0 ? 0x00100000
824	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
825
826	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
827	and	%o4,%l0,%o4		! (6_0) j0 &= 0x7ff00000;
828	bl,pn	%icc,.update23		! (6_0) if ( hx0 < 0x00100000 )
829	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
830.cont23a:
831	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
832	sub	%l0,%o4,%g1		! (6_0) j0 = 0x7ff00000 - j0;
833	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
834.cont23b:
835	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
836	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
837	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
838	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
839
840	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
841	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
842.cont24:
843	fmuld	%f62,%f2,%f2		! (4_0) res0_lo *= x_lo0;
844	ldd	[%fp+dtmp11],%f62	! (5_0) *(long long*)&scl0 = ll;
845	faddd	%f0,%f20,%f32		! (4_0) res0_hi += dtmp0;
846
847	lda	[%i4]%asi,%f0		! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
848	fmuld	%f52,%f10,%f10		! (2_0) dd *= dtmp0;
849
850	lda	[%i4+4]%asi,%f1		! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
851	fsubd	DONE,%f50,%f52		! (0_0) dtmp0 = DONE - dtmp0;
852
853	fmuld	%f46,%f60,%f46		! (4_0) dtmp1 *= y_lo0;
854	lda	[%i3]%asi,%f12		! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
855	fsubd	DTWO,%f18,%f18		! (1_0) dtmp2 = DTWO - dtmp2;
856
857	add	%i1,stridex,%i4		! px += stridex
858	lda	[%i3+4]%asi,%f13	! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
859
860	fmuld	%f10,%f14,%f50		! (2_0) dtmp1 = dd * dres;
861	add	%i4,stridex,%i1		! px += stridex
862	ld	[%fp+ftmp0],%o2		! (3_0) iarr = ((int*)&dres)[0];
863	fand	%f22,DA1,%f54		! (3_0) dexp0 = vis_fand(dres,DA1);
864
865	fmuld	%f0,%f62,%f60		! (5_0) x0 *= scl0;
866	fsubd	%f52,%f36,%f20		! (0_0) dtmp0 -= dtmp1;
867
868	fmuld	%f12,%f62,%f52		! (5_0) y0 *= scl0;
869	sra	%o2,11,%i3		! (3_0) iarr >>= 11;
870	faddd	%f2,%f46,%f36		! (4_0) res0_lo += dtmp1;
871
872	and	%i3,0x1fc,%i3		! (3_0) iarr &= 0x1fc;
873	fmuld	%f16,%f18,%f16		! (1_0) dres = dd * dtmp2;
874
875	fsqrtd	%f48,%f18		! (7_1) res0 = sqrt ( res0 );
876	add	%i3,TBL,%o4		! (3_0) (char*)dll1 + iarr
877	lda	[%i4]0x82,%o1		! (7_0) hx0 = *(int*)px;
878	fsubd	DTWO,%f50,%f46		! (2_0) dtmp1 = DTWO - dtmp1;
879
880	fmuld	%f20,%f26,%f48		! (0_0) dtmp0 *= dres;
881	add	%i0,stridey,%i3		! py += stridey
882	ld	[%o4],%f20		! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
883	faddd	%f60,D2ON36,%f50	! (5_0) x_hi0 = x0 + D2ON36;
884
885	lda	[%i3]0x82,%o4		! (7_0) hy0 = *(int*)py;
886	add	%i3,stridey,%i0		! py += stridey
887	faddd	%f52,D2ON36,%f12	! (5_0) y_hi0 = y0 + D2ON36;
888
889	and	%o1,_0x7fffffff,%o7	! (7_0) hx0 &= 0x7fffffff;
890	faddd	%f32,%f36,%f24		! (4_0) dres = res0_hi + res0_lo;
891
892	fmuld	%f10,%f46,%f26		! (2_0) dd *= dtmp1;
893	cmp	%o7,_0x7ff00000		! (7_0) hx0 ? 0x7ff00000
894	stx	%g1,[%fp+dtmp14]	! (6_0) *(long long*)&scl0 = ll;
895	fpsub32	%f20,%f54,%f10		! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
896
897	and	%o4,_0x7fffffff,%l7	! (7_0) hy0 &= 0x7fffffff;
898	st	%f24,[%fp+ftmp0]	! (4_0) iarr = ((int*)&dres)[0];
899	bge,pn	%icc,.update25		! (7_0) if ( hx0 >= 0x7ff00000 )
900	fsubd	%f50,D2ON36,%f20	! (5_0) x_hi0 -= D2ON36;
901
902	sub	%l7,%o7,%o1		! (7_0) diff0 = hy0 - hx0;
903	cmp	%l7,_0x7ff00000		! (7_0) hy0 ? 0x7ff00000
904	bge,pn	%icc,.update26		! (7_0) if ( hy0 >= 0x7ff00000 )
905	fsubd	%f12,D2ON36,%f54	! (5_0) y_hi0 -= D2ON36;
906
907	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
908	sra	%o1,31,%o3		! (7_0) j0 = diff0 >> 31;
909	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
910
911	and	%o1,%o3,%o1		! (7_0) j0 &= diff0;
912	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
913
914	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
915	sub	%l7,%o1,%o4		! (7_0) j0 = hy0 - j0;
916	cmp	%o7,_0x00100000		! (7_0) hx0 ? 0x00100000
917	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
918
919	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
920	and	%o4,%l0,%o4		! (7_0) j0 &= 0x7ff00000;
921	bl,pn	%icc,.update27		! (7_0) if ( hx0 < 0x00100000 )
922	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
923.cont27a:
924	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
925	sub	%l0,%o4,%g1		! (7_0) j0 = 0x7ff00000 - j0;
926	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
927.cont27b:
928	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
929	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
930	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
931	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
932
933	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
934	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
935.cont28:
936	fmuld	%f62,%f2,%f2		! (5_0) res0_lo *= x_lo0;
937	ldd	[%fp+dtmp13],%f62	! (6_0) *(long long*)&scl0 = ll;
938	faddd	%f0,%f46,%f42		! (5_0) res0_hi += dtmp0;
939
940	fmuld	%f10,%f20,%f52		! (3_0) dd *= dtmp0;
941	lda	[%i2]%asi,%f10		! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
942
943	lda	[%i2+4]%asi,%f11	! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
944	fsubd	DONE,%f60,%f60		! (1_0) dtmp0 = DONE - dtmp0;
945
946	fmuld	%f50,%f54,%f46		! (5_0) dtmp1 *= y_lo0;
947	lda	[%o0]%asi,%f12		! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
948	fsubd	DTWO,%f14,%f14		! (2_0) dtmp2 = DTWO - dtmp2;
949
950	lda	[%o0+4]%asi,%f13	! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
951
952	fmuld	%f52,%f22,%f50		! (3_0) dtmp1 = dd * dres;
953	ld	[%fp+ftmp0],%o2		! (4_0) iarr = ((int*)&dres)[0];
954	fand	%f24,DA1,%f54		! (4_0) dexp0 = vis_fand(dres,DA1);
955
956	fmuld	%f10,%f62,%f10		! (6_0) x0 *= scl0;
957	ldd	[%fp+dtmp0],%f0		! (7_1) *(long long*)&scl0 = ll;
958	fsubd	%f60,%f34,%f20		! (1_0) dtmp0 -= dtmp1;
959
960	fmuld	%f12,%f62,%f60		! (6_0) y0 *= scl0;
961	sra	%o2,11,%o4		! (4_0) iarr >>= 11;
962	faddd	%f2,%f46,%f34		! (5_0) res0_lo += dtmp1;
963
964	and	%o4,0x1fc,%o4		! (4_0) iarr &= 0x1fc;
965	fmuld	%f26,%f14,%f26		! (2_0) dres = dd * dtmp2;
966
967	cmp	counter,8
968	bl,pn	%icc,.tail
969	nop
970
971	ba	.main_loop
972	sub	counter,8,counter
973
974	.align	16
975.main_loop:
976	fsqrtd	%f48,%f14		! (0_1) res0 = sqrt ( res0 );
977	add	%o4,TBL,%o4		! (4_1) (char*)dll1 + iarr
978	lda	[%i1]0x82,%o1		! (0_0) hx0 = *(int*)px;
979	fsubd	DTWO,%f50,%f46		! (3_1) dtmp1 = DTWO - dtmp1;
980
981	fmuld	%f20,%f16,%f48		! (1_1) dtmp0 *= dres;
982	mov	%i1,%i2
983	ld	[%o4],%f20		! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
984	faddd	%f10,D2ON36,%f50	! (6_1) x_hi0 = x0 + D2ON36;
985
986	nop
987	mov	%i0,%o0
988	lda	[%i0]0x82,%o4		! (0_0) hy0 = *(int*)py;
989	faddd	%f60,D2ON36,%f2		! (6_1) y_hi0 = y0 + D2ON36;
990
991	faddd	%f42,%f34,%f16		! (5_1) dres = res0_hi + res0_lo;
992	and	%o1,_0x7fffffff,%o7	! (0_0) hx0 &= 0x7fffffff;
993	st	%f16,[%fp+ftmp0]	! (5_1) iarr = ((int*)&dres)[0];
994	fmuld	%f0,%f18,%f0		! (7_2) res0 = scl0 * res0;
995
996	fmuld	%f52,%f46,%f18		! (3_1) dd *= dtmp1;
997	cmp	%o7,_0x7ff00000		! (0_0) hx0 ? 0x7ff00000
998	st	%f0,[%i5]		! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
999	fpsub32	%f20,%f54,%f54		! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
1000
1001	and	%o4,_0x7fffffff,%l7	! (0_0) hy0 &= 0x7fffffff;
1002	st	%f1,[%i5+4]		! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
1003	bge,pn	%icc,.update29		! (0_0) if ( hx0 >= 0x7ff00000 )
1004	fsubd	%f50,D2ON36,%f20	! (6_1) x_hi0 -= D2ON36;
1005
1006	cmp	%l7,_0x7ff00000		! (0_0) hy0 ? 0x7ff00000
1007	sub	%l7,%o7,%o1		! (0_0) diff0 = hy0 - hx0;
1008	bge,pn	%icc,.update30		! (0_0) if ( hy0 >= 0x7ff00000 )
1009	fsubd	%f2,D2ON36,%f2		! (6_1) y_hi0 -= D2ON36;
1010
1011	fmuld	%f54,%f24,%f50		! (4_1) dtmp0 = dd * dres;
1012	sra	%o1,31,%o3		! (0_0) j0 = diff0 >> 31;
1013	stx	%g1,[%fp+dtmp0]		! (7_1) *(long long*)&scl0 = ll;
1014	faddd	%f28,%f48,%f52		! (1_1) res0 += dtmp0;
1015
1016	and	%o1,%o3,%o1		! (0_0) j0 &= diff0;
1017	cmp	%o7,_0x00100000		! (0_0) hx0 ? 0x00100000
1018	bl,pn	%icc,.update31		! (0_0) if ( hx0 < 0x00100000 )
1019	fand	%f26,DA0,%f48		! (2_1) res0 = vis_fand(dres,DA0);
1020.cont31:
1021	fmuld	%f20,%f20,%f0		! (6_1) res0_hi = x_hi0 * x_hi0;
1022	sub	%l7,%o1,%o4		! (0_0) j0 = hy0 - j0;
1023	nop
1024	fsubd	%f10,%f20,%f28		! (6_1) x_lo0 = x0 - x_hi0;
1025
1026	fmuld	%f2,%f2,%f46		! (6_1) dtmp0 = y_hi0 * y_hi0;
1027	add	%i5,stridez,%i5		! pz += stridez
1028	and	%o4,%l0,%o4		! (0_0) j0 &= 0x7ff00000;
1029	faddd	%f10,%f20,%f62		! (6_1) res0_lo = x0 + x_hi0;
1030
1031	fmuld	%f18,%f22,%f22		! (3_1) dtmp2 = dd * dres;
1032	sub	%l0,%o4,%o4		! (0_0) j0 = 0x7ff00000 - j0;
1033	nop
1034	fsubd	DTWO,%f50,%f20		! (4_1) dtmp0 = DTWO - dtmp0;
1035.cont32:
1036	fmuld	%f30,%f48,%f12		! (2_1) dtmp0 = res0_hi * res0;
1037	sllx	%o4,32,%o4		! (0_0) ll = (long long)j0 << 32;
1038	stx	%o4,[%fp+dtmp1]		! (0_0) *(long long*)&scl0 = ll;
1039	faddd	%f60,%f2,%f50		! (6_1) dtmp1 = y0 + y_hi0;
1040
1041	fmuld	%f40,%f48,%f40		! (2_1) dtmp1 = res0_lo * res0;
1042	nop
1043	bn,pn	%icc,.exit
1044	fsubd	%f60,%f2,%f2		! (6_1) y_lo0 = y0 - y_hi0;
1045
1046	fmuld	%f62,%f28,%f28		! (6_1) res0_lo *= x_lo0;
1047	nop
1048	ldd	[%fp+dtmp15],%f62	! (7_1) *(long long*)&scl0 = ll;
1049	faddd	%f0,%f46,%f30		! (6_1) res0_hi += dtmp0;
1050
1051	nop
1052	nop
1053	lda	[%i4]%asi,%f10		! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
1054	fmuld	%f54,%f20,%f54		! (4_1) dd *= dtmp0;
1055
1056	nop
1057	nop
1058	lda	[%i4+4]%asi,%f11	! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
1059	fsubd	DONE,%f12,%f60		! (2_1) dtmp0 = DONE - dtmp0;
1060
1061	fmuld	%f50,%f2,%f46		! (6_1) dtmp1 *= y_lo0;
1062	nop
1063	lda	[%i3]%asi,%f12		! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
1064	fsubd	DTWO,%f22,%f22		! (3_1) dtmp2 = DTWO - dtmp2;
1065
1066	add	%i1,stridex,%i4		! px += stridex
1067	nop
1068	lda	[%i3+4]%asi,%f13	! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
1069	bn,pn	%icc,.exit
1070
1071	fmuld	%f54,%f24,%f50		! (4_1) dtmp1 = dd * dres;
1072	add	%i4,stridex,%i1		! px += stridex
1073	ld	[%fp+ftmp0],%o2		! (5_1) iarr = ((int*)&dres)[0];
1074	fand	%f16,DA1,%f2		! (5_1) dexp0 = vis_fand(dres,DA1);
1075
1076	fmuld	%f10,%f62,%f10		! (7_1) x0 *= scl0;
1077	nop
1078	ldd	[%fp+dtmp2],%f0		! (0_1) *(long long*)&scl0 = ll;
1079	fsubd	%f60,%f40,%f20		! (2_1) dtmp0 -= dtmp1;
1080
1081	fmuld	%f12,%f62,%f60		! (7_1) y0 *= scl0;
1082	sra	%o2,11,%i3		! (5_1) iarr >>= 11;
1083	nop
1084	faddd	%f28,%f46,%f40		! (6_1) res0_lo += dtmp1;
1085
1086	and	%i3,0x1fc,%i3		! (5_1) iarr &= 0x1fc;
1087	nop
1088	bn,pn	%icc,.exit
1089	fmuld	%f18,%f22,%f28		! (3_1) dres = dd * dtmp2;
1090
1091	fsqrtd	%f52,%f22		! (1_1) res0 = sqrt ( res0 );
1092	lda	[%i4]0x82,%o1		! (1_0) hx0 = *(int*)px;
1093	add	%i3,TBL,%g1		! (5_1) (char*)dll1 + iarr
1094	fsubd	DTWO,%f50,%f62		! (4_1) dtmp1 = DTWO - dtmp1;
1095
1096	fmuld	%f20,%f26,%f52		! (2_1) dtmp0 *= dres;
1097	add	%i0,stridey,%i3		! py += stridey
1098	ld	[%g1],%f26		! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1099	faddd	%f10,D2ON36,%f46	! (7_1) x_hi0 = x0 + D2ON36;
1100
1101	nop
1102	add	%i3,stridey,%i0		! py += stridey
1103	lda	[%i3]0x82,%g1		! (1_0) hy0 = *(int*)py;
1104	faddd	%f60,D2ON36,%f50	! (7_1) y_hi0 = y0 + D2ON36;
1105
1106	faddd	%f30,%f40,%f18		! (6_1) dres = res0_hi + res0_lo;
1107	and	%o1,_0x7fffffff,%o7	! (1_0) hx0 &= 0x7fffffff;
1108	st	%f18,[%fp+ftmp0]	! (6_1) iarr = ((int*)&dres)[0];
1109	fmuld	%f0,%f14,%f0		! (0_1) res0 = scl0 * res0;
1110
1111	fmuld	%f54,%f62,%f14		! (4_1) dd *= dtmp1;
1112	cmp	%o7,_0x7ff00000		! (1_0) hx0 ? 0x7ff00000
1113	st	%f0,[%i5]		! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
1114	fpsub32	%f26,%f2,%f26		! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
1115
1116	and	%g1,_0x7fffffff,%l7	! (1_0) hy0 &= 0x7fffffff;
1117	nop
1118	bge,pn	%icc,.update33		! (1_0) if ( hx0 >= 0x7ff00000 )
1119	fsubd	%f46,D2ON36,%f20	! (7_1) x_hi0 -= D2ON36;
1120
1121	cmp	%l7,_0x7ff00000		! (1_0) hy0 ? 0x7ff00000
1122	sub	%l7,%o7,%o1		! (1_0) diff0 = hy0 - hx0;
1123	st	%f1,[%i5+4]		! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
1124	fsubd	%f50,D2ON36,%f54	! (7_1) y_hi0 -= D2ON36;
1125
1126	fmuld	%f26,%f16,%f50		! (5_1) dtmp0 = dd * dres;
1127	sra	%o1,31,%o3		! (1_0) j0 = diff0 >> 31;
1128	bge,pn	%icc,.update34		! (1_0) if ( hy0 >= 0x7ff00000 )
1129	faddd	%f48,%f52,%f52		! (2_1) res0 += dtmp0;
1130
1131	and	%o1,%o3,%o1		! (1_0) j0 &= diff0;
1132	add	%i5,stridez,%i5		! pz += stridez
1133	stx	%o4,[%fp+dtmp2]		! (0_0) *(long long*)&scl0 = ll;
1134	fand	%f28,DA0,%f48		! (3_1) res0 = vis_fand(dres,DA0);
1135
1136	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
1137	sub	%l7,%o1,%o4		! (1_0) j0 = hy0 - j0;
1138	cmp	%o7,_0x00100000		! (1_0) hx0 ? 0x00100000
1139	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
1140
1141	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
1142	and	%o4,%l0,%o4		! (1_0) j0 &= 0x7ff00000;
1143	bl,pn	%icc,.update35		! (1_0) if ( hx0 < 0x00100000 )
1144	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
1145.cont35a:
1146	fmuld	%f44,%f48,%f10		! (3_1) dtmp0 = res0_hi * res0;
1147	nop
1148	sub	%l0,%o4,%o4		! (1_0) j0 = 0x7ff00000 - j0;
1149	fsubd	DTWO,%f50,%f20		! (5_1) dtmp0 = DTWO - dtmp0;
1150.cont35b:
1151	fmuld	%f14,%f24,%f24		! (4_1) dtmp2 = dd * dres;
1152	sllx	%o4,32,%o4		! (1_0) ll = (long long)j0 << 32;
1153	stx	%o4,[%fp+dtmp3]		! (1_0) *(long long*)&scl0 = ll;
1154	faddd	%f60,%f54,%f50		! (7_1) dtmp1 = y0 + y_hi0;
1155
1156	fmuld	%f38,%f48,%f38		! (3_1) dtmp1 = res0_lo * res0;
1157	nop
1158	nop
1159	fsubd	%f60,%f54,%f12		! (7_1) y_lo0 = y0 - y_hi0;
1160.cont36:
1161	fmuld	%f62,%f0,%f0		! (7_1) res0_lo *= x_lo0;
1162	nop
1163	ldd	[%fp+dtmp1],%f62	! (0_0) *(long long*)&scl0 = ll;
1164	faddd	%f2,%f46,%f44		! (7_1) res0_hi += dtmp0;
1165
1166	fsubd	DONE,%f10,%f60		! (3_1) dtmp0 = DONE - dtmp0;
1167	nop
1168	lda	[%i2]%asi,%f10		! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
1169	fmuld	%f26,%f20,%f54		! (5_1) dd *= dtmp0;
1170
1171	nop
1172	nop
1173	lda	[%i2+4]%asi,%f11	! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
1174	bn,pn	%icc,.exit
1175
1176	fmuld	%f50,%f12,%f26		! (7_1) dtmp1 *= y_lo0;
1177	nop
1178	lda	[%o0]%asi,%f12		! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
1179	fsubd	DTWO,%f24,%f24		! (4_1) dtmp2 = DTWO - dtmp2;
1180
1181	nop
1182	nop
1183	lda	[%o0+4]%asi,%f13	! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
1184	bn,pn	%icc,.exit
1185
1186	fmuld	%f54,%f16,%f46		! (5_1) dtmp1 = dd * dres;
1187	nop
1188	ld	[%fp+ftmp0],%o2		! (6_1) iarr = ((int*)&dres)[0];
1189	fand	%f18,DA1,%f2		! (6_1) dexp0 = vis_fand(dres,DA1);
1190
1191	fmuld	%f10,%f62,%f10		! (0_0) x0 *= scl0;
1192	nop
1193	ldd	[%fp+dtmp4],%f50	! (1_1) *(long long*)&scl0 = ll;
1194	fsubd	%f60,%f38,%f20		! (3_1) dtmp0 -= dtmp1;
1195
1196	fmuld	%f12,%f62,%f60		! (0_0) y0 *= scl0;
1197	sra	%o2,11,%g1		! (6_1) iarr >>= 11;
1198	nop
1199	faddd	%f0,%f26,%f38		! (7_1) res0_lo += dtmp1;
1200
1201	nop
1202	and	%g1,0x1fc,%g1		! (6_1) iarr &= 0x1fc;
1203	bn,pn	%icc,.exit
1204	fmuld	%f14,%f24,%f26		! (4_1) dres = dd * dtmp2;
1205
1206	fsqrtd	%f52,%f24		! (2_1) res0 = sqrt ( res0 );
1207	lda	[%i1]0x82,%o1		! (2_0) hx0 = *(int*)px;
1208	add	%g1,TBL,%g1		! (6_1) (char*)dll1 + iarr
1209	fsubd	DTWO,%f46,%f62		! (5_1) dtmp1 = DTWO - dtmp1;
1210
1211	fmuld	%f20,%f28,%f52		! (3_1) dtmp0 *= dres;
1212	mov	%i1,%i2
1213	ld	[%g1],%f28		! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1214	faddd	%f10,D2ON36,%f46	! (0_0) x_hi0 = x0 + D2ON36;
1215
1216	nop
1217	mov	%i0,%o0
1218	lda	[%i0]0x82,%g1		! (2_0) hy0 = *(int*)py;
1219	faddd	%f60,D2ON36,%f12	! (0_0) y_hi0 = y0 + D2ON36;
1220
1221	faddd	%f44,%f38,%f14		! (7_1) dres = res0_hi + res0_lo;
1222	and	%o1,_0x7fffffff,%o7	! (2_0) hx0 &= 0x7fffffff;
1223	st	%f14,[%fp+ftmp0]	! (7_1) iarr = ((int*)&dres)[0];
1224	fmuld	%f50,%f22,%f0		! (1_1) res0 = scl0 * res0;
1225
1226	fmuld	%f54,%f62,%f22		! (5_1) dd *= dtmp1;
1227	cmp	%o7,_0x7ff00000		! (2_0) hx0 ? 0x7ff00000
1228	st	%f0,[%i5]		! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
1229	fpsub32	%f28,%f2,%f28		! (6_1) dd = vis_fpsub32(dtmp0, dexp0);
1230
1231	and	%g1,_0x7fffffff,%l7	! (2_0) hx0 &= 0x7fffffff;
1232	nop
1233	bge,pn	%icc,.update37		! (2_0) if ( hx0 >= 0x7ff00000 )
1234	fsubd	%f46,D2ON36,%f20	! (0_0) x_hi0 -= D2ON36;
1235
1236	sub	%l7,%o7,%o1		! (2_0) diff0 = hy0 - hx0;
1237	cmp	%l7,_0x7ff00000		! (2_0) hy0 ? 0x7ff00000
1238	st	%f1,[%i5+4]		! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
1239	fsubd	%f12,D2ON36,%f54	! (0_0) y_hi0 -= D2ON36;
1240
1241	fmuld	%f28,%f18,%f50		! (6_1) dtmp0 = dd * dres;
1242	sra	%o1,31,%o3		! (2_0) j0 = diff0 >> 31;
1243	bge,pn	%icc,.update38		! (2_0) if ( hy0 >= 0x7ff00000 )
1244	faddd	%f48,%f52,%f52		! (3_1) res0 += dtmp0;
1245
1246	and	%o1,%o3,%o1		! (2_0) j0 &= diff0;
1247	add	%i5,stridez,%i5		! pz += stridez
1248	stx	%o4,[%fp+dtmp4]		! (1_0) *(long long*)&scl0 = ll;
1249	fand	%f26,DA0,%f48		! (4_1) res0 = vis_fand(dres,DA0);
1250
1251	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
1252	cmp	%o7,_0x00100000		! (2_0) hx0 ? 0x00100000
1253	sub	%l7,%o1,%o4		! (2_0) j0 = hy0 - j0;
1254	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
1255
1256	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
1257	and	%o4,%l0,%o4		! (2_0) j0 &= 0x7ff00000;
1258	bl,pn	%icc,.update39		! (2_0) if ( hx0 < 0x00100000 )
1259	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
1260.cont39a:
1261	fmuld	%f32,%f48,%f10		! (4_1) dtmp0 = res0_hi * res0;
1262	sub	%l0,%o4,%g1		! (2_0) j0 = 0x7ff00000 - j0;
1263	nop
1264	fsubd	DTWO,%f50,%f20		! (6_1) dtmp0 = DTWO - dtmp0;
1265.cont39b:
1266	fmuld	%f22,%f16,%f16		! (5_1) dtmp2 = dd * dres;
1267	sllx	%g1,32,%g1		! (2_0) ll = (long long)j0 << 32;
1268	stx	%g1,[%fp+dtmp5]		! (2_0) *(long long*)&scl0 = ll;
1269	faddd	%f60,%f54,%f50		! (0_0) dtmp1 = y0 + y_hi0;
1270
1271	fmuld	%f36,%f48,%f36		! (4_1) dtmp1 = res0_lo * res0;
1272	nop
1273	nop
1274	fsubd	%f60,%f54,%f12		! (0_0) y_lo0 = y0 - y_hi0;
1275.cont40:
1276	fmuld	%f62,%f0,%f0		! (0_0) res0_lo *= x_lo0;
1277	nop
1278	ldd	[%fp+dtmp3],%f62	! (1_0) *(long long*)&scl0 = ll;
1279	faddd	%f2,%f46,%f32		! (0_0) res0_hi += dtmp0;
1280
1281	fsubd	DONE,%f10,%f60		! (4_1) dtmp0 = DONE - dtmp0;
1282	nop
1283	lda	[%i4]%asi,%f10		! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
1284	fmuld	%f28,%f20,%f54		! (6_1) dd *= dtmp0;
1285
1286	nop
1287	nop
1288	lda	[%i4+4]%asi,%f11	! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
1289	bn,pn	%icc,.exit
1290
1291	fmuld	%f50,%f12,%f28		! (0_0) dtmp1 *= y_lo0;
1292	nop
1293	lda	[%i3]%asi,%f12		! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
1294	fsubd	DTWO,%f16,%f16		! (5_1) dtmp2 = DTWO - dtmp2;
1295
1296	add	%i1,stridex,%i4		! px += stridex
1297	nop
1298	lda	[%i3+4]%asi,%f13	! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
1299	bn,pn	%icc,.exit
1300
1301	fmuld	%f54,%f18,%f46		! (6_1) dtmp1 = dd * dres;
1302	add	%i4,stridex,%i1		! px += stridex
1303	ld	[%fp+ftmp0],%o2		! (7_1) iarr = ((int*)&dres)[0];
1304	fand	%f14,DA1,%f2		! (7_1) dexp0 = vis_fand(dres,DA1);
1305
1306	fmuld	%f10,%f62,%f10		! (1_0) x0 *= scl0;
1307	nop
1308	ldd	[%fp+dtmp6],%f50	! (2_1) *(long long*)&scl0 = ll;
1309	fsubd	%f60,%f36,%f20		! (4_1) dtmp0 -= dtmp1;
1310
1311	fmuld	%f12,%f62,%f60		! (1_0) y0 *= scl0;
1312	sra	%o2,11,%i3		! (7_1) iarr >>= 11;
1313	nop
1314	faddd	%f0,%f28,%f36		! (0_0) res0_lo += dtmp1;
1315
1316	and	%i3,0x1fc,%i3		! (7_1) iarr &= 0x1fc;
1317	nop
1318	bn,pn	%icc,.exit
1319	fmuld	%f22,%f16,%f28		! (5_1) dres = dd * dtmp2;
1320
1321	fsqrtd	%f52,%f16		! (3_1) res0 = sqrt ( res0 );
1322	add	%i3,TBL,%o4		! (7_1) (char*)dll1 + iarr
1323	lda	[%i4]0x82,%o1		! (3_0) hx0 = *(int*)px;
1324	fsubd	DTWO,%f46,%f62		! (6_1) dtmp1 = DTWO - dtmp1;
1325
1326	fmuld	%f20,%f26,%f52		! (4_1) dtmp0 *= dres;
1327	add	%i0,stridey,%i3		! py += stridey
1328	ld	[%o4],%f26		! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1329	faddd	%f10,D2ON36,%f46	! (1_0) x_hi0 = x0 + D2ON36;
1330
1331	nop
1332	add	%i3,stridey,%i0		! py += stridey
1333	lda	[%i3]0x82,%o4		! (3_0) hy0 = *(int*)py;
1334	faddd	%f60,D2ON36,%f12	! (1_0) y_hi0 = y0 + D2ON36;
1335
1336	faddd	%f32,%f36,%f22		! (0_0) dres = res0_hi + res0_lo;
1337	and	%o1,_0x7fffffff,%o7	! (3_0) hx0 &= 0x7fffffff;
1338	st	%f22,[%fp+ftmp0]	! (0_0) iarr = ((int*)&dres)[0];
1339	fmuld	%f50,%f24,%f0		! (2_1) res0 = scl0 * res0;
1340
1341	fmuld	%f54,%f62,%f24		! (6_1) dd *= dtmp1;
1342	cmp	%o7,_0x7ff00000		! (3_0) hx0 ? 0x7ff00000
1343	st	%f0,[%i5]		! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
1344	fpsub32	%f26,%f2,%f26		! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
1345
1346	and	%o4,_0x7fffffff,%l7	! (3_0) hy0 &= 0x7fffffff;
1347	nop
1348	bge,pn	%icc,.update41		! (3_0) if ( hx0 >= 0x7ff00000 )
1349	fsubd	%f46,D2ON36,%f20	! (1_0) x_hi0 -= D2ON36;
1350
1351	sub	%l7,%o7,%o1		! (3_0) diff0 = hy0 - hx0;
1352	cmp	%l7,_0x7ff00000		! (3_0) hy0 ? 0x7ff00000
1353	st	%f1,[%i5+4]		! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
1354	fsubd	%f12,D2ON36,%f54	! (1_0) y_hi0 -= D2ON36;
1355
1356	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
1357	sra	%o1,31,%o3		! (3_0) j0 = diff0 >> 31;
1358	bge,pn	%icc,.update42		! (3_0) if ( hy0 >= 0x7ff00000 )
1359	faddd	%f48,%f52,%f52		! (4_1) res0 += dtmp0;
1360
1361	and	%o1,%o3,%o1		! (3_0) j0 &= diff0;
1362	add	%i5,stridez,%i5		! pz += stridez
1363	stx	%g1,[%fp+dtmp6]		! (2_0) *(long long*)&scl0 = ll;
1364	fand	%f28,DA0,%f48		! (5_1) res0 = vis_fand(dres,DA0);
1365
1366	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
1367	cmp	%o7,_0x00100000		! (3_0) hx0 ? 0x00100000
1368	sub	%l7,%o1,%o4		! (3_0) j0 = hy0 - j0;
1369	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
1370
1371	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
1372	and	%o4,%l0,%o4		! (3_0) j0 &= 0x7ff00000;
1373	bl,pn	%icc,.update43		! (3_0) if ( hx0 < 0x00100000 )
1374	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
1375.cont43a:
1376	fmuld	%f42,%f48,%f10		! (5_1) dtmp0 = res0_hi * res0;
1377	nop
1378	sub	%l0,%o4,%g1		! (3_0) j0 = 0x7ff00000 - j0;
1379	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
1380.cont43b:
1381	fmuld	%f24,%f18,%f18		! (6_1) dtmp2 = dd * dres;
1382	sllx	%g1,32,%g1		! (3_0) ll = (long long)j0 << 32;
1383	stx	%g1,[%fp+dtmp7]		! (3_0) *(long long*)&scl0 = ll;
1384	faddd	%f60,%f54,%f50		! (1_0) dtmp1 = y0 + y_hi0;
1385
1386	fmuld	%f34,%f48,%f34		! (5_1) dtmp1 = res0_lo * res0;
1387	nop
1388	nop
1389	fsubd	%f60,%f54,%f12		! (1_0) y_lo0 = y0 - y_hi0
1390.cont44:
1391	fmuld	%f62,%f0,%f0		! (1_0) res0_lo *= x_lo0;
1392	nop
1393	ldd	[%fp+dtmp5],%f62	! (2_0) *(long long*)&scl0 = ll;
1394	faddd	%f2,%f46,%f42		! (1_0) res0_hi += dtmp0;
1395
1396	fsubd	DONE,%f10,%f60		! (5_1) dtmp0 = DONE - dtmp0;
1397	nop
1398	lda	[%i2]%asi,%f10		! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
1399	fmuld	%f26,%f20,%f54		! (7_1) dd *= dtmp0;
1400
1401	nop
1402	nop
1403	lda	[%i2+4]%asi,%f11	! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
1404	bn,pn	%icc,.exit
1405
1406	fmuld	%f50,%f12,%f26		! (1_0) dtmp1 *= y_lo0;
1407	nop
1408	lda	[%o0]%asi,%f12		! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
1409	fsubd	DTWO,%f18,%f20		! (6_1) dtmp2 = DTWO - dtmp2;
1410
1411	nop
1412	nop
1413	lda	[%o0+4]%asi,%f13	! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
1414	bn,pn	%icc,.exit
1415
1416	fmuld	%f54,%f14,%f50		! (7_1) dtmp1 = dd * dres;
1417	nop
1418	ld	[%fp+ftmp0],%o2		! (0_0) iarr = ((int*)&dres)[0];
1419	fand	%f22,DA1,%f2		! (0_0) dexp0 = vis_fand(dres,DA1);
1420
1421	fmuld	%f10,%f62,%f10		! (2_0) x0 *= scl0;
1422	nop
1423	ldd	[%fp+dtmp8],%f18	! (3_1) *(long long*)&scl0 = ll;
1424	fsubd	%f60,%f34,%f46		! (5_1) dtmp0 -= dtmp1;
1425
1426	fmuld	%f12,%f62,%f60		! (2_0) y0 *= scl0;
1427	sra	%o2,11,%o4		! (0_0) iarr >>= 11;
1428	nop
1429	faddd	%f0,%f26,%f34		! (1_0) res0_lo += dtmp1;
1430
1431	and	%o4,0x1fc,%o4		! (0_0) iarr &= 0x1fc;
1432	nop
1433	bn,pn	%icc,.exit
1434	fmuld	%f24,%f20,%f26		! (6_1) dres = dd * dtmp2;
1435
1436	fsqrtd	%f52,%f24		! (4_1) res0 = sqrt ( res0 );
1437	add	%o4,TBL,%o4		! (0_0) (char*)dll1 + iarr
1438	lda	[%i1]0x82,%o1		! (4_0) hx0 = *(int*)px;
1439	fsubd	DTWO,%f50,%f20		! (7_1) dtmp1 = DTWO - dtmp1;
1440
1441	fmuld	%f46,%f28,%f52		! (5_1) dtmp0 -= dtmp1;
1442	mov	%i1,%i2
1443	ld	[%o4],%f28		! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1444	faddd	%f10,D2ON36,%f46	! (2_0) x_hi0 = x0 + D2ON36;
1445
1446	nop
1447	mov	%i0,%o0
1448	lda	[%i0]0x82,%o4		! (4_0) hy0 = *(int*)py;
1449	faddd	%f60,D2ON36,%f50	! (2_0) y_hi0 = y0 + D2ON36;
1450
1451	fmuld	%f18,%f16,%f0		! (3_1) res0 = scl0 * res0;
1452	nop
1453	and	%o1,_0x7fffffff,%o7	! (4_0) hx0 &= 0x7fffffff;
1454	faddd	%f42,%f34,%f18		! (1_0) dres = res0_hi + res0_lo;
1455
1456	fmuld	%f54,%f20,%f16		! (7_1) dd *= dtmp1;
1457	cmp	%o7,_0x7ff00000		! (4_0) hx0 ? 0x7ff00000
1458	st	%f18,[%fp+ftmp0]	! (1_0) iarr = ((int*)&dres)[0];
1459	fpsub32	%f28,%f2,%f28		! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
1460
1461	and	%o4,_0x7fffffff,%l7	! (4_0) hy0 &= 0x7fffffff;
1462	st	%f0,[%i5]		! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
1463	bge,pn	%icc,.update45		! (4_0) if ( hx0 >= 0x7ff00000 )
1464	fsubd	%f46,D2ON36,%f20	! (2_0) x_hi0 -= D2ON36;
1465
1466	sub	%l7,%o7,%o1		! (4_0) diff0 = hy0 - hx0;
1467	cmp	%l7,_0x7ff00000		! (4_0) hy0 ? 0x7ff00000
1468	bge,pn	%icc,.update46		! (4_0) if ( hy0 >= 0x7ff00000 )
1469	fsubd	%f50,D2ON36,%f54	! (2_0) y_hi0 -= D2ON36;
1470
1471	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
1472	sra	%o1,31,%o3		! (4_0) j0 = diff0 >> 31;
1473	st	%f1,[%i5+4]		! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
1474	faddd	%f48,%f52,%f52		! (5_1) res0 += dtmp0;
1475
1476	and	%o1,%o3,%o1		! (4_0) j0 &= diff0;
1477	cmp	%o7,_0x00100000		! (4_0) hx0 ? 0x00100000
1478	bl,pn	%icc,.update47		! (4_0) if ( hx0 < 0x00100000 )
1479	fand	%f26,DA0,%f48		! (6_1) res0 = vis_fand(dres,DA0);
1480.cont47a:
1481	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
1482	sub	%l7,%o1,%o4		! (4_0) j0 = hy0 - j0;
1483	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
1484	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
1485
1486	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
1487	and	%o4,%l0,%o4		! (4_0) j0 &= 0x7ff00000;
1488	add	%i5,stridez,%i5		! pz += stridez
1489	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
1490
1491	fmuld	%f30,%f48,%f10		! (6_1) dtmp0 = res0_hi * res0;
1492	nop
1493	sub	%l0,%o4,%g1		! (4_0) j0 = 0x7ff00000 - j0;
1494	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
1495.cont47b:
1496	fmuld	%f16,%f14,%f14		! (7_1) dtmp2 = dd * dres;
1497	sllx	%g1,32,%g1		! (4_0) ll = (long long)j0 << 32;
1498	stx	%g1,[%fp+dtmp9]		! (4_0) *(long long*)&scl0 = ll;
1499	faddd	%f60,%f54,%f50		! (2_0) dtmp1 = y0 + y_hi0;
1500
1501	fmuld	%f40,%f48,%f40		! (6_1) dtmp1 = res0_lo * res0;
1502	nop
1503	nop
1504	fsubd	%f60,%f54,%f12		! (2_0) y_lo0 = y0 - y_hi0;
1505.cont48:
1506	fmuld	%f62,%f0,%f0		! (2_0) res0_lo *= x_lo0;
1507	nop
1508	ldd	[%fp+dtmp7],%f62	! (3_0) *(long long*)&scl0 = ll;
1509	faddd	%f2,%f46,%f30		! (2_0) res0_hi += dtmp0;
1510
1511	fsubd	DONE,%f10,%f60		! (6_1) dtmp0 = DONE - dtmp0;
1512	nop
1513	lda	[%i4]%asi,%f10		! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
1514	fmuld	%f28,%f20,%f54		! (0_0) dd *= dtmp0;
1515
1516	nop
1517	nop
1518	lda	[%i4+4]%asi,%f11	! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
1519	bn,pn	%icc,.exit
1520
1521	fmuld	%f50,%f12,%f28		! (2_0) dtmp1 *= y_lo0;
1522	nop
1523	lda	[%i3]%asi,%f12		! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
1524	fsubd	DTWO,%f14,%f20		! (7_1) dtmp2 = DTWO - dtmp2;
1525
1526	lda	[%i3+4]%asi,%f13	! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
1527	add	%i1,stridex,%i4		! px += stridex
1528	nop
1529	bn,pn	%icc,.exit
1530
1531	fmuld	%f54,%f22,%f50		! (0_0) dtmp1 = dd * dres;
1532	add	%i4,stridex,%i1		! px += stridex
1533	ld	[%fp+ftmp0],%o2		! (1_0) iarr = ((int*)&dres)[0];
1534	fand	%f18,DA1,%f2		! (1_0) dexp0 = vis_fand(dres,DA1);
1535
1536	fmuld	%f10,%f62,%f10		! (3_0) x0 *= scl0;
1537	nop
1538	ldd	[%fp+dtmp10],%f14	! (4_1) *(long long*)&scl0 = ll;
1539	fsubd	%f60,%f40,%f46		! (6_1) dtmp0 -= dtmp1;
1540
1541	fmuld	%f12,%f62,%f60		! (3_0) y0 *= scl0;
1542	sra	%o2,11,%i3		! (1_0) iarr >>= 11;
1543	nop
1544	faddd	%f0,%f28,%f40		! (2_0) res0_lo += dtmp1;
1545
1546	and	%i3,0x1fc,%i3		! (1_0) iarr &= 0x1fc;
1547	nop
1548	bn,pn	%icc,.exit
1549	fmuld	%f16,%f20,%f28		! (7_1) dres = dd * dtmp2;
1550
1551	fsqrtd	%f52,%f16		! (5_1) res0 = sqrt ( res0 );
1552	add	%i3,TBL,%o4		! (1_0) (char*)dll1 + iarr
1553	lda	[%i4]0x82,%o1		! (5_0) hx0 = *(int*)px;
1554	fsubd	DTWO,%f50,%f20		! (0_0) dtmp1 = DTWO - dtmp1;
1555
1556	fmuld	%f46,%f26,%f52		! (6_1) dtmp0 *= dres;
1557	add	%i0,stridey,%i3		! py += stridey
1558	ld	[%o4],%f26		! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1559	faddd	%f10,D2ON36,%f46	! (3_0) x_hi0 = x0 + D2ON36;
1560
1561	nop
1562	add	%i3,stridey,%i0		! py += stridey
1563	lda	[%i3]0x82,%o4		! (5_0) hy0 = *(int*)py;
1564	faddd	%f60,D2ON36,%f50	! (3_0) y_hi0 = y0 + D2ON36;
1565
1566	fmuld	%f14,%f24,%f0		! (4_1) res0 = scl0 * res0;
1567	and	%o1,_0x7fffffff,%o7	! (5_0) hx0 &= 0x7fffffff;
1568	nop
1569	faddd	%f30,%f40,%f14		! (2_0) dres = res0_hi + res0_lo;
1570
1571	fmuld	%f54,%f20,%f24		! (0_0) dd *= dtmp1;
1572	cmp	%o7,_0x7ff00000		! (5_0) hx0 ? 0x7ff00000
1573	st	%f14,[%fp+ftmp0]	! (2_0) iarr = ((int*)&dres)[0];
1574	fpsub32	%f26,%f2,%f26		! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
1575
1576	and	%o4,_0x7fffffff,%l7	! (5_0) hy0 &= 0x7fffffff;
1577	st	%f0,[%i5]		! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
1578	bge,pn	%icc,.update49		! (5_0) if ( hx0 >= 0x7ff00000 )
1579	fsubd	%f46,D2ON36,%f20	! (3_0) x_hi0 -= D2ON36;
1580
1581	sub	%l7,%o7,%o1		! (5_0) diff0 = hy0 - hx0;
1582	cmp	%l7,_0x7ff00000		! (5_0) hy0 ? 0x7ff00000
1583	bge,pn	%icc,.update50		! (5_0) if ( hy0 >= 0x7ff00000 )
1584	fsubd	%f50,D2ON36,%f54	! (3_0) y_hi0 -= D2ON36;
1585
1586	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
1587	sra	%o1,31,%o3		! (5_0) j0 = diff0 >> 31;
1588	st	%f1,[%i5+4]		! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
1589	faddd	%f48,%f52,%f52		! (6_1) res0 += dtmp0;
1590
1591	and	%o1,%o3,%o1		! (5_0) j0 &= diff0;
1592	cmp	%o7,_0x00100000		! (5_0) hx0 ? 0x00100000
1593	bl,pn	%icc,.update51		! (5_0) if ( hx0 < 0x00100000 )
1594	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
1595.cont51a:
1596	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
1597	sub	%l7,%o1,%o4		! (5_0) j0 = hy0 - j0;
1598	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
1599	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
1600
1601	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
1602	and	%o4,%l0,%o4		! (5_0) j0 &= 0x7ff00000;
1603	add	%i5,stridez,%i5		! pz += stridez
1604	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
1605
1606	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
1607	sub	%l0,%o4,%g1		! (5_0) j0 = 0x7ff00000 - j0;
1608	nop
1609	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
1610.cont51b:
1611	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
1612	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
1613	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
1614	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
1615
1616	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
1617	nop
1618	nop
1619	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
1620.cont52:
1621	fmuld	%f62,%f0,%f0		! (3_0) res0_lo *= x_lo0;
1622	nop
1623	ldd	[%fp+dtmp9],%f62	! (4_0) *(long long*)&scl0 = ll;
1624	faddd	%f2,%f46,%f44		! (3_0) res0_hi += dtmp0;
1625
1626	fsubd	DONE,%f10,%f60		! (7_1) dtmp0 = DONE - dtmp0;
1627	nop
1628	lda	[%i2]%asi,%f10		! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
1629	fmuld	%f26,%f20,%f54		! (1_0) dd *= dtmp0;
1630
1631	nop
1632	nop
1633	lda	[%i2+4]%asi,%f11	! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
1634	bn,pn	%icc,.exit
1635
1636	fmuld	%f50,%f12,%f26		! (3_0) dtmp1 *= y_lo0;
1637	nop
1638	lda	[%o0]%asi,%f12		! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
1639	fsubd	DTWO,%f22,%f20		! (0_0) dtmp2 = DTWO - dtmp2;
1640
1641	nop
1642	nop
1643	lda	[%o0+4]%asi,%f13	! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
1644	bn,pn	%icc,.exit
1645
1646	fmuld	%f54,%f18,%f50		! (1_0) dtmp1 = dd * dres;
1647	nop
1648	ld	[%fp+ftmp0],%o2		! (2_0) iarr = ((int*)&dres)[0];
1649	fand	%f14,DA1,%f2		! (2_0) dexp0 = vis_fand(dres,DA1);
1650
1651	fmuld	%f10,%f62,%f10		! (4_0) x0 *= scl0;
1652	nop
1653	ldd	[%fp+dtmp12],%f22	! (5_1) *(long long*)&scl0 = ll;
1654	fsubd	%f60,%f38,%f46		! (7_1) dtmp0 -= dtmp1;
1655
1656	fmuld	%f12,%f62,%f60		! (4_0) y0 *= scl0;
1657	sra	%o2,11,%o4		! (2_0) iarr >>= 11;
1658	nop
1659	faddd	%f0,%f26,%f38		! (3_0) res0_lo += dtmp1;
1660
1661	and	%o4,0x1fc,%o4		! (2_0) iarr &= 0x1fc;
1662	nop
1663	bn,pn	%icc,.exit
1664	fmuld	%f24,%f20,%f26		! (0_0) dres = dd * dtmp2;
1665
1666	fsqrtd	%f52,%f24		! (6_1) res0 = sqrt ( res0 );
1667	add	%o4,TBL,%o4		! (2_0) (char*)dll1 + iarr
1668	lda	[%i1]0x82,%o1		! (6_0) hx0 = *(int*)px;
1669	fsubd	DTWO,%f50,%f52		! (1_0) dtmp1 = DTWO - dtmp1;
1670
1671	fmuld	%f46,%f28,%f28		! (7_1) dtmp0 *= dres;
1672	mov	%i1,%i2
1673	ld	[%o4],%f20		! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1674	faddd	%f10,D2ON36,%f46	! (4_0) x_hi0 = x0 + D2ON36;
1675
1676	nop
1677	mov	%i0,%o0
1678	lda	[%i0]0x82,%o4		! (6_0) hy0 = *(int*)py;
1679	faddd	%f60,D2ON36,%f50	! (4_0) y_hi0 = y0 + D2ON36;
1680
1681	fmuld	%f22,%f16,%f0		! (5_1) res0 = scl0 * res0;
1682	and	%o1,_0x7fffffff,%o7	! (6_0) hx0 &= 0x7fffffff;
1683	nop
1684	faddd	%f44,%f38,%f22		! (3_0) dres = res0_hi + res0_lo;
1685
1686	fmuld	%f54,%f52,%f16		! (1_0) dd *= dtmp1;
1687	cmp	%o7,_0x7ff00000		! (6_0) hx0 ? 0x7ff00000
1688	st	%f22,[%fp+ftmp0]	! (3_0) iarr = ((int*)&dres)[0];
1689	fpsub32	%f20,%f2,%f52		! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
1690
1691	and	%o4,_0x7fffffff,%l7	! (6_0) hy0 &= 0x7fffffff;
1692	st	%f0,[%i5]		! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
1693	bge,pn	%icc,.update53		! (6_0) if ( hx0 >= 0x7ff00000 )
1694	fsubd	%f46,D2ON36,%f46	! (4_0) x_hi0 -= D2ON36;
1695
1696	sub	%l7,%o7,%o1		! (6_0) diff0 = hy0 - hx0;
1697	cmp	%l7,_0x7ff00000		! (6_0) hy0 ? 0x7ff00000
1698	bge,pn	%icc,.update54		! (6_0) if ( hy0 >= 0x7ff00000 )
1699	fsubd	%f50,D2ON36,%f54	! (4_0) y_hi0 -= D2ON36;
1700
1701	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
1702	sra	%o1,31,%o3		! (6_0) j0 = diff0 >> 31;
1703	st	%f1,[%i5+4]		! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
1704	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
1705
1706	and	%o1,%o3,%o1		! (6_0) j0 &= diff0;
1707	cmp	%o7,_0x00100000		! (6_0) hx0 ? 0x00100000
1708	bl,pn	%icc,.update55		! (6_0) if ( hx0 < 0x00100000 )
1709	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
1710.cont55a:
1711	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
1712	sub	%l7,%o1,%o4		! (6_0) j0 = hy0 - j0;
1713	stx	%g1,[%fp+dtmp12]	! (5_0) *(long long*)&scl0 = ll;
1714	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
1715
1716	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
1717	and	%o4,%l0,%o4		! (6_0) j0 &= 0x7ff00000;
1718	add	%i5,stridez,%i5		! pz += stridez
1719	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
1720
1721	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
1722	sub	%l0,%o4,%g1		! (6_0) j0 = 0x7ff00000 - j0;
1723	nop
1724	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
1725.cont55b:
1726	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
1727	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
1728	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
1729	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
1730
1731	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
1732	nop
1733	nop
1734	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
1735.cont56:
1736	fmuld	%f62,%f2,%f2		! (4_0) res0_lo *= x_lo0;
1737	nop
1738	ldd	[%fp+dtmp11],%f62	! (5_0) *(long long*)&scl0 = ll;
1739	faddd	%f0,%f20,%f32		! (4_0) res0_hi += dtmp0;
1740
1741	lda	[%i4]%asi,%f0		! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
1742	nop
1743	nop
1744	fmuld	%f52,%f10,%f10		! (2_0) dd *= dtmp0;
1745
1746	lda	[%i4+4]%asi,%f1		! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
1747	nop
1748	nop
1749	fsubd	DONE,%f50,%f52		! (0_0) dtmp0 = DONE - dtmp0;
1750
1751	fmuld	%f46,%f60,%f46		! (4_0) dtmp1 *= y_lo0;
1752	nop
1753	lda	[%i3]%asi,%f12		! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
1754	fsubd	DTWO,%f18,%f18		! (1_0) dtmp2 = DTWO - dtmp2;
1755
1756	nop
1757	add	%i1,stridex,%i4		! px += stridex
1758	lda	[%i3+4]%asi,%f13	! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
1759	bn,pn	%icc,.exit
1760
1761	fmuld	%f10,%f14,%f50		! (2_0) dtmp1 = dd * dres;
1762	add	%i4,stridex,%i1		! px += stridex
1763	ld	[%fp+ftmp0],%o2		! (3_0) iarr = ((int*)&dres)[0];
1764	fand	%f22,DA1,%f54		! (3_0) dexp0 = vis_fand(dres,DA1);
1765
1766	fmuld	%f0,%f62,%f60		! (5_0) x0 *= scl0;
1767	nop
1768	ldd	[%fp+dtmp14],%f0	! (6_1) *(long long*)&scl0 = ll;
1769	fsubd	%f52,%f36,%f20		! (0_0) dtmp0 -= dtmp1;
1770
1771	fmuld	%f12,%f62,%f52		! (5_0) y0 *= scl0;
1772	sra	%o2,11,%i3		! (3_0) iarr >>= 11;
1773	nop
1774	faddd	%f2,%f46,%f36		! (4_0) res0_lo += dtmp1;
1775
1776	and	%i3,0x1fc,%i3		! (3_0) iarr &= 0x1fc;
1777	nop
1778	bn,pn	%icc,.exit
1779	fmuld	%f16,%f18,%f16		! (1_0) dres = dd * dtmp2;
1780
1781	fsqrtd	%f48,%f18		! (7_1) res0 = sqrt ( res0 );
1782	add	%i3,TBL,%o4		! (3_0) (char*)dll1 + iarr
1783	lda	[%i4]0x82,%o1		! (7_0) hx0 = *(int*)px;
1784	fsubd	DTWO,%f50,%f46		! (2_0) dtmp1 = DTWO - dtmp1;
1785
1786	fmuld	%f20,%f26,%f48		! (0_0) dtmp0 *= dres;
1787	add	%i0,stridey,%i3		! py += stridey
1788	ld	[%o4],%f20		! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1789	faddd	%f60,D2ON36,%f50	! (5_0) x_hi0 = x0 + D2ON36;
1790
1791	nop
1792	add	%i3,stridey,%i0		! py += stridey
1793	lda	[%i3]0x82,%o4		! (7_0) hy0 = *(int*)py;
1794	faddd	%f52,D2ON36,%f12	! (5_0) y_hi0 = y0 + D2ON36;
1795
1796	fmuld	%f0,%f24,%f2		! (6_1) res0 = scl0 * res0;
1797	and	%o1,_0x7fffffff,%o7	! (7_0) hx0 &= 0x7fffffff;
1798	nop
1799	faddd	%f32,%f36,%f24		! (4_0) dres = res0_hi + res0_lo;
1800
1801	fmuld	%f10,%f46,%f26		! (2_0) dd *= dtmp1;
1802	cmp	%o7,_0x7ff00000		! (7_0) hx0 ? 0x7ff00000
1803	st	%f24,[%fp+ftmp0]	! (4_0) iarr = ((int*)&dres)[0];
1804	fpsub32	%f20,%f54,%f10		! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
1805
1806	and	%o4,_0x7fffffff,%l7	! (7_0) hy0 &= 0x7fffffff;
1807	st	%f2,[%i5]		! (6_1) ((float*)pz)[0] = ((float*)&res0)[0];
1808	bge,pn	%icc,.update57		! (7_0) if ( hx0 >= 0x7ff00000 )
1809	fsubd	%f50,D2ON36,%f20	! (5_0) x_hi0 -= D2ON36;
1810
1811	sub	%l7,%o7,%o1		! (7_0) diff0 = hy0 - hx0;
1812	cmp	%l7,_0x7ff00000		! (7_0) hy0 ? 0x7ff00000
1813	bge,pn	%icc,.update58		! (7_0) if ( hy0 >= 0x7ff00000 )
1814	fsubd	%f12,D2ON36,%f54	! (5_0) y_hi0 -= D2ON36;
1815
1816	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
1817	sra	%o1,31,%o3		! (7_0) j0 = diff0 >> 31;
1818	st	%f3,[%i5+4]		! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
1819	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
1820
1821	and	%o1,%o3,%o1		! (7_0) j0 &= diff0;
1822	cmp	%o7,_0x00100000		! (7_0) hx0 ? 0x00100000
1823	bl,pn	%icc,.update59		! (7_0) if ( hx0 < 0x00100000 )
1824	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
1825.cont59a:
1826	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
1827	sub	%l7,%o1,%o4		! (7_0) j0 = hy0 - j0;
1828	stx	%g1,[%fp+dtmp14]	! (6_0) *(long long*)&scl0 = ll;
1829	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
1830
1831	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
1832	and	%o4,%l0,%o4		! (7_0) j0 &= 0x7ff00000;
1833	add	%i5,stridez,%i5		! pz += stridez
1834	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
1835
1836	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
1837	sub	%l0,%o4,%g1		! (7_0) j0 = 0x7ff00000 - j0;
1838	nop
1839	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
1840.cont59b:
1841	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
1842	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
1843	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
1844	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
1845
1846	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
1847	nop
1848	nop
1849	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
1850.cont60:
1851	fmuld	%f62,%f2,%f2		! (5_0) res0_lo *= x_lo0;
1852	nop
1853	ldd	[%fp+dtmp13],%f62	! (6_0) *(long long*)&scl0 = ll;
1854	faddd	%f0,%f46,%f42		! (5_0) res0_hi += dtmp0;
1855
1856	fmuld	%f10,%f20,%f52		! (3_0) dd *= dtmp0;
1857	nop
1858	lda	[%i2]%asi,%f10		! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
1859	bn,pn	%icc,.exit
1860
1861	lda	[%i2+4]%asi,%f11	! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
1862	nop
1863	nop
1864	fsubd	DONE,%f60,%f60		! (1_0) dtmp0 = DONE - dtmp0;
1865
1866	fmuld	%f50,%f54,%f46		! (5_0) dtmp1 *= y_lo0;
1867	nop
1868	lda	[%o0]%asi,%f12		! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
1869	fsubd	DTWO,%f14,%f14		! (2_0) dtmp2 = DTWO - dtmp2;
1870
1871	nop
1872	nop
1873	lda	[%o0+4]%asi,%f13	! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
1874	bn,pn	%icc,.exit
1875
1876	fmuld	%f52,%f22,%f50		! (3_0) dtmp1 = dd * dres;
1877	nop
1878	ld	[%fp+ftmp0],%o2		! (4_0) iarr = ((int*)&dres)[0];
1879	fand	%f24,DA1,%f54		! (4_0) dexp0 = vis_fand(dres,DA1);
1880
1881	fmuld	%f10,%f62,%f10		! (6_0) x0 *= scl0;
1882	nop
1883	ldd	[%fp+dtmp0],%f0		! (7_1) *(long long*)&scl0 = ll;
1884	fsubd	%f60,%f34,%f20		! (1_0) dtmp0 -= dtmp1;
1885
1886	fmuld	%f12,%f62,%f60		! (6_0) y0 *= scl0;
1887	sra	%o2,11,%o4		! (4_0) iarr >>= 11;
1888	nop
1889	faddd	%f2,%f46,%f34		! (5_0) res0_lo += dtmp1;
1890
1891	and	%o4,0x1fc,%o4		! (4_0) iarr &= 0x1fc;
1892	subcc	counter,8,counter	! counter -= 8;
1893	bpos,pt	%icc,.main_loop
1894	fmuld	%f26,%f14,%f26		! (2_0) dres = dd * dtmp2;
1895
1896	add	counter,8,counter
1897
1898.tail:
1899	subcc	counter,1,counter
1900	bneg	.begin
1901	nop
1902
1903	fsqrtd	%f48,%f14		! (0_1) res0 = sqrt ( res0 );
1904	add	%o4,TBL,%o4		! (4_1) (char*)dll1 + iarr
1905	fsubd	DTWO,%f50,%f46		! (3_1) dtmp1 = DTWO - dtmp1;
1906
1907	fmuld	%f20,%f16,%f48		! (1_1) dtmp0 *= dres;
1908	ld	[%o4],%f20		! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1909
1910	fmuld	%f0,%f18,%f0		! (7_2) res0 = scl0 * res0;
1911	st	%f0,[%i5]		! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
1912	faddd	%f42,%f34,%f16		! (5_1) dres = res0_hi + res0_lo;
1913
1914	subcc	counter,1,counter
1915	st	%f1,[%i5+4]		! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
1916	bneg	.begin
1917	add	%i5,stridez,%i5		! pz += stridez
1918
1919	fmuld	%f52,%f46,%f18		! (3_1) dd *= dtmp1;
1920	st	%f16,[%fp+ftmp0]	! (5_1) iarr = ((int*)&dres)[0];
1921	fpsub32	%f20,%f54,%f54		! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
1922
1923	fmuld	%f54,%f24,%f50		! (4_1) dtmp0 = dd * dres;
1924	faddd	%f28,%f48,%f52		! (1_1) res0 += dtmp0;
1925
1926
1927	fand	%f26,DA0,%f48		! (2_1) res0 = vis_fand(dres,DA0);
1928
1929	fmuld	%f18,%f22,%f22		! (3_1) dtmp2 = dd * dres;
1930	fsubd	DTWO,%f50,%f20		! (4_1) dtmp0 = DTWO - dtmp0;
1931
1932	fmuld	%f30,%f48,%f12		! (2_1) dtmp0 = res0_hi * res0;
1933
1934	fmuld	%f40,%f48,%f40		! (2_1) dtmp1 = res0_lo * res0;
1935
1936	fmuld	%f54,%f20,%f54		! (4_1) dd *= dtmp0;
1937
1938	fsubd	DONE,%f12,%f60		! (2_1) dtmp0 = DONE - dtmp0;
1939
1940	fsubd	DTWO,%f22,%f22		! (3_1) dtmp2 = DTWO - dtmp2;
1941
1942	fmuld	%f54,%f24,%f50		! (4_1) dtmp1 = dd * dres;
1943	ld	[%fp+ftmp0],%o2		! (5_1) iarr = ((int*)&dres)[0];
1944	fand	%f16,DA1,%f2		! (5_1) dexp0 = vis_fand(dres,DA1);
1945
1946	ldd	[%fp+dtmp2],%f0		! (0_1) *(long long*)&scl0 = ll;
1947	fsubd	%f60,%f40,%f20		! (2_1) dtmp0 -= dtmp1;
1948
1949	sra	%o2,11,%i3		! (5_1) iarr >>= 11;
1950
1951	and	%i3,0x1fc,%i3		! (5_1) iarr &= 0x1fc;
1952	fmuld	%f18,%f22,%f28		! (3_1) dres = dd * dtmp2;
1953
1954	fsqrtd	%f52,%f22		! (1_1) res0 = sqrt ( res0 );
1955	add	%i3,TBL,%g1		! (5_1) (char*)dll1 + iarr
1956	fsubd	DTWO,%f50,%f62		! (4_1) dtmp1 = DTWO - dtmp1;
1957
1958	fmuld	%f20,%f26,%f52		! (2_1) dtmp0 *= dres;
1959	ld	[%g1],%f26		! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1960
1961	fmuld	%f0,%f14,%f0		! (0_1) res0 = scl0 * res0;
1962
1963	fmuld	%f54,%f62,%f14		! (4_1) dd *= dtmp1;
1964	fpsub32	%f26,%f2,%f26		! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
1965
1966	st	%f0,[%i5]		! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
1967
1968	fmuld	%f26,%f16,%f50		! (5_1) dtmp0 = dd * dres;
1969	st	%f1,[%i5+4]		! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
1970	faddd	%f48,%f52,%f52		! (2_1) res0 += dtmp0;
1971
1972	subcc	counter,1,counter
1973	bneg	.begin
1974	add	%i5,stridez,%i5		! pz += stridez
1975
1976	fand	%f28,DA0,%f48		! (3_1) res0 = vis_fand(dres,DA0);
1977
1978	fmuld	%f44,%f48,%f10		! (3_1) dtmp0 = res0_hi * res0;
1979	fsubd	DTWO,%f50,%f20		! (5_1) dtmp0 = DTWO - dtmp0;
1980
1981	fmuld	%f14,%f24,%f24		! (4_1) dtmp2 = dd * dres;
1982
1983	fmuld	%f38,%f48,%f38		! (3_1) dtmp1 = res0_lo * res0;
1984
1985	fsubd	DONE,%f10,%f60		! (3_1) dtmp0 = DONE - dtmp0;
1986	fmuld	%f26,%f20,%f54		! (5_1) dd *= dtmp0;
1987
1988	fsubd	DTWO,%f24,%f24		! (4_1) dtmp2 = DTWO - dtmp2;
1989
1990	fmuld	%f54,%f16,%f46		! (5_1) dtmp1 = dd * dres;
1991
1992	ldd	[%fp+dtmp4],%f50	! (1_1) *(long long*)&scl0 = ll;
1993	fsubd	%f60,%f38,%f20		! (3_1) dtmp0 -= dtmp1;
1994
1995	fmuld	%f14,%f24,%f26		! (4_1) dres = dd * dtmp2;
1996
1997	fsqrtd	%f52,%f24		! (2_1) res0 = sqrt ( res0 );
1998	fsubd	DTWO,%f46,%f62		! (5_1) dtmp1 = DTWO - dtmp1;
1999
2000	fmuld	%f20,%f28,%f52		! (3_1) dtmp0 *= dres;
2001
2002	fmuld	%f50,%f22,%f0		! (1_1) res0 = scl0 * res0;
2003
2004	fmuld	%f54,%f62,%f22		! (5_1) dd *= dtmp1;
2005
2006	st	%f0,[%i5]		! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
2007
2008	subcc	counter,1,counter
2009	st	%f1,[%i5+4]		! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
2010	bneg	.begin
2011	add	%i5,stridez,%i5		! pz += stridez
2012
2013	faddd	%f48,%f52,%f52		! (3_1) res0 += dtmp0;
2014
2015	fand	%f26,DA0,%f48		! (4_1) res0 = vis_fand(dres,DA0);
2016
2017	fmuld	%f32,%f48,%f10		! (4_1) dtmp0 = res0_hi * res0;
2018
2019	fmuld	%f22,%f16,%f16		! (5_1) dtmp2 = dd * dres;
2020
2021	fmuld	%f36,%f48,%f36		! (4_1) dtmp1 = res0_lo * res0;
2022
2023	fsubd	DONE,%f10,%f60		! (4_1) dtmp0 = DONE - dtmp0;
2024
2025	fsubd	DTWO,%f16,%f16		! (5_1) dtmp2 = DTWO - dtmp2;
2026
2027	ldd	[%fp+dtmp6],%f50	! (2_1) *(long long*)&scl0 = ll;
2028	fsubd	%f60,%f36,%f20		! (4_1) dtmp0 -= dtmp1;
2029
2030	fmuld	%f22,%f16,%f28		! (5_1) dres = dd * dtmp2;
2031
2032	fsqrtd	%f52,%f16		! (3_1) res0 = sqrt ( res0 );
2033
2034	fmuld	%f20,%f26,%f52		! (4_1) dtmp0 *= dres;
2035
2036	fmuld	%f50,%f24,%f0		! (2_1) res0 = scl0 * res0;
2037
2038	st	%f0,[%i5]		! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
2039
2040	st	%f1,[%i5+4]		! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
2041	faddd	%f48,%f52,%f52		! (4_1) res0 += dtmp0;
2042
2043	subcc	counter,1,counter
2044	bneg	.begin
2045	add	%i5,stridez,%i5		! pz += stridez
2046
2047	fand	%f28,DA0,%f48		! (5_1) res0 = vis_fand(dres,DA0);
2048
2049	fmuld	%f42,%f48,%f10		! (5_1) dtmp0 = res0_hi * res0;
2050
2051	fmuld	%f34,%f48,%f34		! (5_1) dtmp1 = res0_lo * res0;
2052
2053	fsubd	DONE,%f10,%f60		! (5_1) dtmp0 = DONE - dtmp0;
2054
2055	ldd	[%fp+dtmp8],%f18	! (3_1) *(long long*)&scl0 = ll;
2056	fsubd	%f60,%f34,%f46		! (5_1) dtmp0 -= dtmp1;
2057
2058	fsqrtd	%f52,%f24		! (4_1) res0 = sqrt ( res0 );
2059
2060	fmuld	%f46,%f28,%f52		! (5_1) dtmp0 -= dtmp1;
2061
2062	fmuld	%f18,%f16,%f0		! (3_1) res0 = scl0 * res0;
2063	st	%f0,[%i5]		! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
2064	st	%f1,[%i5+4]		! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
2065	faddd	%f48,%f52,%f52		! (5_1) res0 += dtmp0;
2066
2067	subcc	counter,1,counter
2068	bneg	.begin
2069	add	%i5,stridez,%i5		! pz += stridez
2070
2071	ldd	[%fp+dtmp10],%f14	! (4_1) *(long long*)&scl0 = ll;
2072
2073	fsqrtd	%f52,%f16		! (5_1) res0 = sqrt ( res0 );
2074
2075	fmuld	%f14,%f24,%f0		! (4_1) res0 = scl0 * res0
2076	st	%f0,[%i5]		! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
2077	st	%f1,[%i5+4]		! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
2078
2079	subcc	counter,1,counter
2080	bneg	.begin
2081	add	%i5,stridez,%i5		! pz += stridez
2082
2083	ldd	[%fp+dtmp12],%f22	! (5_1) *(long long*)&scl0 = ll;
2084
2085	fmuld	%f22,%f16,%f0		! (5_1) res0 = scl0 * res0;
2086	st	%f0,[%i5]		! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
2087	st	%f1,[%i5+4]		! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
2088
2089	ba	.begin
2090	add	%i5,stridez,%i5
2091
2092	.align	16
2093.spec0:
2094	cmp	%o7,_0x7ff00000		! hx0 ? 0x7ff00000
2095	bne	1f			! if ( hx0 != 0x7ff00000 )
2096	ld	[%i4+4],%i2		! lx = ((int*)px)[1];
2097
2098	cmp	%i2,0			! lx ? 0
2099	be	3f			! if ( lx == 0 )
2100	nop
21011:
2102	cmp	%l7,_0x7ff00000		! hy0 ? 0x7ff00000
2103	bne	2f			! if ( hy0 != 0x7ff00000 )
2104	ld	[%i3+4],%o2		! ly = ((int*)py)[1];
2105
2106	cmp	%o2,0			! ly ? 0
2107	be	3f			! if ( ly == 0 )
21082:
2109	ld	[%i4],%f0		! ((float*)&x0)[0] = ((float*)px)[0];
2110	ld	[%i4+4],%f1		! ((float*)&x0)[1] = ((float*)px)[1];
2111
2112	ld	[%i3],%f2		! ((float*)&y0)[0] = ((float*)py)[0];
2113	add	%i4,stridex,%i4		! px += stridex
2114	ld	[%i3+4],%f3		! ((float*)&y0)[1] = ((float*)py)[1];
2115
2116	fabsd	%f0,%f0
2117
2118	fabsd	%f2,%f2
2119
2120	fmuld	%f0,%f2,%f0		! res0 = fabs(x0) * fabs(y0);
2121	add	%i3,stridey,%i3		! py += stridey;
2122	st	%f0,[%i5]		! ((float*)pz)[0] = ((float*)&res0)[0];
2123
2124	st	%f1,[%i5+4]		! ((float*)pz)[1] = ((float*)&res0)[1];
2125	add	%i5,stridez,%i5		! pz += stridez
2126	ba	.begin1
2127	sub	counter,1,counter
21283:
2129	add	%i4,stridex,%i4		! px += stridex
2130	add	%i3,stridey,%i3		! py += stridey
2131	st	%g0,[%i5]		! ((int*)pz)[0] = 0;
2132
2133	add	%i5,stridez,%i5		! pz += stridez;
2134	st	%g0,[%i5+4]		! ((int*)pz)[1] = 0;
2135	ba	.begin1
2136	sub	counter,1,counter
2137
2138	.align	16
2139.spec1:
2140	and	%o1,%o3,%o1		! (7_0) j0 &= diff0;
2141
2142	cmp	%l7,_0x00100000		! (7_0) hy0 ? 0x00100000
2143	bge,pn	%icc,.cont_spec0	! (7_0) if ( hy0 < 0x00100000 )
2144
2145	ld	[%i4+4],%i2		! lx = ((int*)px)[1];
2146	or	%o7,%l7,%g5		! ii = hx0 | hy0;
2147	fzero	%f0
2148
2149	ld	[%i3+4],%o2		! ly = ((int*)py)[1];
2150	or	%i2,%g5,%g5		! ii |= lx;
2151
2152	orcc	%o2,%g5,%g5		! ii |= ly;
2153	bnz,a,pn	%icc,1f		! if ( ii != 0 )
2154	sethi	%hi(0x00080000),%i2
2155
2156	fdivd	DONE,%f0,%f0		! res0 = 1.0 / 0.0;
2157
2158	st	%f0,[%i5]		! ((float*)pz)[0] = ((float*)&res0)[0];
2159
2160	add	%i4,stridex,%i4		! px += stridex;
2161	add	%i3,stridey,%i3		! py += stridey;
2162	st	%f1,[%i5+4]		! ((float*)pz)[1] = ((float*)&res0)[1];
2163
2164	add	%i5,stridez,%i5		! pz += stridez;
2165	ba	.begin1
2166	sub	counter,1,counter
21671:
2168	ld	[%i4],%f0		! ((float*)&x0)[0] = ((float*)px)[0];
2169
2170	ld	[%i4+4],%f1		! ((float*)&x0)[1] = ((float*)px)[1];
2171
2172	ld	[%i3],%f2		! ((float*)&y0)[0] = ((float*)py)[0];
2173
2174	fabsd	%f0,%f0			! x0 = fabs(x0);
2175	ld	[%i3+4],%f3		! ((float*)&y0)[1] = ((float*)py)[1];
2176
2177	ldd	[TBL+TBL_SHIFT+64],%f12	! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
2178	add	%fp,dtmp2,%i4
2179	add	%fp,dtmp3,%i3
2180
2181	fabsd	%f2,%f2			! y0 = fabs(y0);
2182	ldd	[TBL+TBL_SHIFT+56],%f10	! D2ON51
2183
2184	ldx	[TBL+TBL_SHIFT+48],%g5	! D2ONM52
2185	cmp	%o7,%i2			! hx0 ? 0x00080000
2186	bl,a	1f			! if ( hx0 < 0x00080000 )
2187	fxtod	%f0,%f0			! x0 = *(long long*)&x0;
2188
2189	fand	%f0,%f12,%f0		! x0 = vis_fand(x0, dtmp0);
2190	fxtod	%f0,%f0			! x0 = *(long long*)&x0;
2191	faddd	%f0,%f10,%f0		! x0 += D2ON51;
21921:
2193	std	%f0,[%i4]
2194
2195	ldx	[TBL+TBL_SHIFT+40],%g1	! D2ON1022
2196	cmp	%l7,%i2			! hy0 ? 0x00080000
2197	bl,a	1f			! if ( hy0 < 0x00080000 )
2198	fxtod	%f2,%f2			! y0 = *(long long*)&y0;
2199
2200	fand	%f2,%f12,%f2		! y0 = vis_fand(y0, dtmp0);
2201	fxtod	%f2,%f2			! y0 = *(long long*)&y0;
2202	faddd	%f2,%f10,%f2		! y0 += D2ON51;
22031:
2204	std	%f2,[%i3]
2205
2206	stx	%g5,[%fp+dtmp15]	! D2ONM52
2207
2208	ba	.cont_spec1
2209	stx	%g1,[%fp+dtmp0]		! D2ON1022
2210
2211	.align	16
2212.update0:
2213	cmp	counter,1
2214	ble	1f
2215	nop
2216
2217	sub	counter,1,counter
2218	st	counter,[%fp+tmp_counter]
2219
2220	stx	%i2,[%fp+tmp_px]
2221
2222	stx	%o0,[%fp+tmp_py]
2223
2224	mov	1,counter
22251:
2226	sethi	%hi(0x3ff00000),%o4
2227	add	TBL,TBL_SHIFT+24,%i2
2228	ba	.cont1
2229	add	TBL,TBL_SHIFT+24,%o0
2230
2231	.align	16
2232.update1:
2233	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2234	bge,pn	%icc,.cont0		! (0_0) if ( hy0 < 0x00100000 )
2235
2236	cmp	counter,1
2237	ble,a	1f
2238	nop
2239
2240	sub	counter,1,counter
2241	st	counter,[%fp+tmp_counter]
2242
2243	stx	%i2,[%fp+tmp_px]
2244
2245	mov	1,counter
2246	stx	%o0,[%fp+tmp_py]
22471:
2248	sethi	%hi(0x3ff00000),%o4
2249	add	TBL,TBL_SHIFT+24,%i2
2250	ba	.cont1
2251	add	TBL,TBL_SHIFT+24,%o0
2252
2253	.align	16
2254.update2:
2255	cmp	counter,2
2256	ble	1f
2257	nop
2258
2259	sub	counter,2,counter
2260	st	counter,[%fp+tmp_counter]
2261
2262	stx	%i4,[%fp+tmp_px]
2263
2264	stx	%i3,[%fp+tmp_py]
2265
2266	mov	2,counter
22671:
2268	fsubd	%f50,D2ON36,%f54	! (7_1) y_hi0 -= D2ON36;
2269
2270	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
2271	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
2272
2273	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
2274	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
2275
2276	sethi	%hi(0x3ff00000),%o4
2277	add	TBL,TBL_SHIFT+24,%i4
2278	ba	.cont4
2279	add	TBL,TBL_SHIFT+24,%i3
2280
2281	.align	16
2282.update3:
2283	cmp	counter,2
2284	ble	1f
2285	nop
2286
2287	sub	counter,2,counter
2288	st	counter,[%fp+tmp_counter]
2289
2290	stx	%i4,[%fp+tmp_px]
2291
2292	stx	%i3,[%fp+tmp_py]
2293
2294	mov	2,counter
22951:
2296	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
2297	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
2298
2299	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
2300	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
2301
2302	sethi	%hi(0x3ff00000),%o4
2303	add	TBL,TBL_SHIFT+24,%i4
2304	ba	.cont4
2305	add	TBL,TBL_SHIFT+24,%i3
2306
2307	.align	16
2308.update4:
2309	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2310	bge,a,pn	%icc,.cont4	! (0_0) if ( hy0 < 0x00100000 )
2311	sub	%l0,%o4,%o4		! (1_0) j0 = 0x7ff00000 - j0;
2312
2313	cmp	counter,2
2314	ble,a	1f
2315	nop
2316
2317	sub	counter,2,counter
2318	st	counter,[%fp+tmp_counter]
2319
2320	stx	%i4,[%fp+tmp_px]
2321
2322	mov	2,counter
2323	stx	%i3,[%fp+tmp_py]
23241:
2325	sethi	%hi(0x3ff00000),%o4
2326	add	TBL,TBL_SHIFT+24,%i4
2327	ba	.cont4
2328	add	TBL,TBL_SHIFT+24,%i3
2329
2330	.align	16
2331.update5:
2332	cmp	counter,3
2333	ble	1f
2334	nop
2335
2336	sub	counter,3,counter
2337	st	counter,[%fp+tmp_counter]
2338
2339	stx	%i2,[%fp+tmp_px]
2340
2341	stx	%o0,[%fp+tmp_py]
2342
2343	mov	3,counter
23441:
2345	st	%f14,[%fp+ftmp0]	! (7_1) iarr = ((int*)&dres)[0];
2346	fsubd	%f46,D2ON36,%f20	! (0_0) x_hi0 -= D2ON36;
2347
2348	fsubd	%f12,D2ON36,%f54	! (0_0) y_hi0 -= D2ON36;
2349
2350	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
2351	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
2352
2353	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
2354	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
2355
2356	sethi	%hi(0x3ff00000),%g1
2357	add	TBL,TBL_SHIFT+24,%i2
2358
2359	sllx	%g1,32,%g1
2360	ba	.cont8
2361	add	TBL,TBL_SHIFT+24,%o0
2362
2363	.align	16
2364.update6:
2365	cmp	counter,3
2366	ble	1f
2367	nop
2368
2369	sub	counter,3,counter
2370	st	counter,[%fp+tmp_counter]
2371
2372	stx	%i2,[%fp+tmp_px]
2373
2374	stx	%o0,[%fp+tmp_py]
2375
2376	mov	3,counter
23771:
2378	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
2379	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
2380
2381	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
2382	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
2383
2384	sethi	%hi(0x3ff00000),%g1
2385	add	TBL,TBL_SHIFT+24,%i2
2386
2387	sllx	%g1,32,%g1
2388	ba	.cont8
2389	add	TBL,TBL_SHIFT+24,%o0
2390
2391	.align	16
2392.update7:
2393	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2394	bge,pn	%icc,.cont7		! (0_0) if ( hy0 < 0x00100000 )
2395
2396	cmp	counter,3
2397	ble,a	1f
2398	nop
2399
2400	sub	counter,3,counter
2401	st	counter,[%fp+tmp_counter]
2402
2403	stx	%i2,[%fp+tmp_px]
2404
2405	mov	3,counter
2406	stx	%o0,[%fp+tmp_py]
24071:
2408	sethi	%hi(0x3ff00000),%g1
2409	add	TBL,TBL_SHIFT+24,%i2
2410
2411	sllx	%g1,32,%g1
2412	ba	.cont8
2413	add	TBL,TBL_SHIFT+24,%o0
2414
2415	.align	16
2416.update9:
2417	cmp	counter,4
2418	ble	1f
2419	nop
2420
2421	sub	counter,4,counter
2422	st	counter,[%fp+tmp_counter]
2423
2424	stx	%i4,[%fp+tmp_px]
2425
2426	stx	%i3,[%fp+tmp_py]
2427
2428	mov	4,counter
24291:
2430	st	%f22,[%fp+ftmp0]	! (0_0) iarr = ((int*)&dres)[0];
2431	fsubd	%f46,D2ON36,%f20	! (1_0) x_hi0 -= D2ON36;
2432
2433	fsubd	%f12,D2ON36,%f54	! (1_0) y_hi0 -= D2ON36;
2434
2435	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
2436
2437
2438	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
2439	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
2440
2441	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
2442	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
2443
2444	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
2445
2446	sethi	%hi(0x3ff00000),%g1
2447	add	TBL,TBL_SHIFT+24,%i4
2448	ba	.cont12
2449	add	TBL,TBL_SHIFT+24,%i3
2450
2451	.align	16
2452.update10:
2453	cmp	counter,4
2454	ble	1f
2455	nop
2456
2457	sub	counter,4,counter
2458	st	counter,[%fp+tmp_counter]
2459
2460	stx	%i4,[%fp+tmp_px]
2461
2462	stx	%i3,[%fp+tmp_py]
2463
2464	mov	4,counter
24651:
2466	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
2467
2468
2469	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
2470	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
2471
2472	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
2473	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
2474
2475	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
2476
2477	sethi	%hi(0x3ff00000),%g1
2478	add	TBL,TBL_SHIFT+24,%i4
2479	ba	.cont12
2480	add	TBL,TBL_SHIFT+24,%i3
2481
2482	.align	16
2483.update11:
2484	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2485	bge,pn	%icc,.cont11		! (0_0) if ( hy0 < 0x00100000 )
2486
2487	cmp	counter,4
2488	ble,a	1f
2489	nop
2490
2491	sub	counter,4,counter
2492	st	counter,[%fp+tmp_counter]
2493
2494	stx	%i4,[%fp+tmp_px]
2495
2496	mov	4,counter
2497	stx	%i3,[%fp+tmp_py]
24981:
2499	sethi	%hi(0x3ff00000),%g1
2500	add	TBL,TBL_SHIFT+24,%i4
2501
2502	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
2503	ba	.cont12
2504	add	TBL,TBL_SHIFT+24,%i3
2505
2506	.align	16
2507.update13:
2508	cmp	counter,5
2509	ble	1f
2510	nop
2511
2512	sub	counter,5,counter
2513	st	counter,[%fp+tmp_counter]
2514
2515	stx	%i2,[%fp+tmp_px]
2516
2517	stx	%o0,[%fp+tmp_py]
2518
2519	mov	5,counter
25201:
2521	fsubd	%f46,D2ON36,%f20	! (2_0) x_hi0 -= D2ON36;
2522
2523	fsubd	%f50,D2ON36,%f54	! (2_0) y_hi0 -= D2ON36;
2524
2525	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
2526
2527	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
2528	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
2529
2530	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
2531	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
2532
2533	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
2534
2535	sethi	%hi(0x3ff00000),%g1
2536	add	TBL,TBL_SHIFT+24,%i2
2537	ba	.cont16
2538	add	TBL,TBL_SHIFT+24,%o0
2539
2540	.align	16
2541.update14:
2542	cmp	counter,5
2543	ble	1f
2544	nop
2545
2546	sub	counter,5,counter
2547	st	counter,[%fp+tmp_counter]
2548
2549	stx	%i2,[%fp+tmp_px]
2550
2551	stx	%o0,[%fp+tmp_py]
2552
2553	mov	5,counter
25541:
2555	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
2556
2557	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
2558	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
2559
2560	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
2561	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
2562
2563	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
2564
2565	sethi	%hi(0x3ff00000),%g1
2566	add	TBL,TBL_SHIFT+24,%i2
2567	ba	.cont16
2568	add	TBL,TBL_SHIFT+24,%o0
2569
2570	.align	16
2571.update15:
2572	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2573	bge,pn	%icc,.cont15		! (0_0) if ( hy0 < 0x00100000 )
2574
2575	cmp	counter,5
2576	ble,a	1f
2577	nop
2578
2579	sub	counter,5,counter
2580	st	counter,[%fp+tmp_counter]
2581
2582	stx	%i2,[%fp+tmp_px]
2583
2584	mov	5,counter
2585	stx	%o0,[%fp+tmp_py]
25861:
2587	sethi	%hi(0x3ff00000),%g1
2588	add	TBL,TBL_SHIFT+24,%i2
2589
2590	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
2591	ba	.cont16
2592	add	TBL,TBL_SHIFT+24,%o0
2593
2594	.align	16
2595.update17:
2596	cmp	counter,6
2597	ble	1f
2598	nop
2599
2600	sub	counter,6,counter
2601	st	counter,[%fp+tmp_counter]
2602
2603	stx	%i4,[%fp+tmp_px]
2604
2605	stx	%i3,[%fp+tmp_py]
2606
2607	mov	6,counter
26081:
2609	fsubd	%f50,D2ON36,%f54	! (3_0) y_hi0 -= D2ON36;
2610
2611	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
2612
2613	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
2614
2615	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
2616	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
2617
2618	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
2619	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
2620
2621	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
2622	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
2623
2624	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
2625	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
2626
2627	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
2628	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
2629
2630	sethi	%hi(0x3ff00000),%g1
2631	add	TBL,TBL_SHIFT+24,%i4
2632
2633	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
2634	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
2635	ba	.cont20
2636	add	TBL,TBL_SHIFT+24,%i3
2637
2638	.align	16
2639.update18:
2640	cmp	counter,6
2641	ble	1f
2642	nop
2643
2644	sub	counter,6,counter
2645	st	counter,[%fp+tmp_counter]
2646
2647	stx	%i4,[%fp+tmp_px]
2648
2649	stx	%i3,[%fp+tmp_py]
2650
2651	mov	6,counter
26521:
2653	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
2654
2655	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
2656
2657	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
2658	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
2659
2660	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
2661	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
2662
2663	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
2664	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
2665
2666	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
2667	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
2668
2669	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
2670	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
2671
2672	sethi	%hi(0x3ff00000),%g1
2673	add	TBL,TBL_SHIFT+24,%i4
2674
2675	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
2676	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
2677	ba	.cont20
2678	add	TBL,TBL_SHIFT+24,%i3
2679
2680	.align	16
2681.update19:
2682	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2683	bge,pn	%icc,.cont19a		! (0_0) if ( hy0 < 0x00100000 )
2684
2685	cmp	counter,6
2686	ble,a	1f
2687	nop
2688
2689	sub	counter,6,counter
2690	st	counter,[%fp+tmp_counter]
2691
2692	stx	%i4,[%fp+tmp_px]
2693
2694	mov	6,counter
2695	stx	%i3,[%fp+tmp_py]
26961:
2697	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
2698	sethi	%hi(0x3ff00000),%g1
2699	add	TBL,TBL_SHIFT+24,%i4
2700	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
2701
2702	ba	.cont19b
2703	add	TBL,TBL_SHIFT+24,%i3
2704
2705	.align	16
2706.update21:
2707	cmp	counter,7
2708	ble	1f
2709	nop
2710
2711	sub	counter,7,counter
2712	st	counter,[%fp+tmp_counter]
2713
2714	stx	%i2,[%fp+tmp_px]
2715
2716	stx	%o0,[%fp+tmp_py]
2717
2718	mov	7,counter
27191:
2720	fsubd	%f50,D2ON36,%f54	! (4_0) y_hi0 -= D2ON36;
2721
2722	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
2723	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
2724
2725	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
2726
2727	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
2728	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
2729
2730	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
2731	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
2732
2733	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
2734	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
2735
2736	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
2737	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
2738
2739	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
2740	sethi	%hi(0x3ff00000),%g1
2741	add	TBL,TBL_SHIFT+24,%i2
2742	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
2743
2744	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
2745	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
2746	ba	.cont24
2747	add	TBL,TBL_SHIFT+24,%o0
2748
2749	.align	16
2750.update22:
2751	cmp	counter,7
2752	ble	1f
2753	nop
2754
2755	sub	counter,7,counter
2756	st	counter,[%fp+tmp_counter]
2757
2758	stx	%i2,[%fp+tmp_px]
2759
2760	stx	%o0,[%fp+tmp_py]
2761
2762	mov	7,counter
27631:
2764	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
2765	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
2766
2767	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
2768
2769	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
2770	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
2771
2772	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
2773	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
2774
2775	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
2776	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
2777
2778	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
2779	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
2780
2781	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
2782	sethi	%hi(0x3ff00000),%g1
2783	add	TBL,TBL_SHIFT+24,%i2
2784	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
2785
2786	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
2787	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
2788	ba	.cont24
2789	add	TBL,TBL_SHIFT+24,%o0
2790
2791	.align	16
2792.update23:
2793	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2794	bge,pn	%icc,.cont23a		! (0_0) if ( hy0 < 0x00100000 )
2795
2796	cmp	counter,7
2797	ble,a	1f
2798	nop
2799
2800	sub	counter,7,counter
2801	st	counter,[%fp+tmp_counter]
2802
2803	stx	%i2,[%fp+tmp_px]
2804
2805	mov	7,counter
2806	stx	%o0,[%fp+tmp_py]
28071:
2808	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
2809	sethi	%hi(0x3ff00000),%g1
2810	add	TBL,TBL_SHIFT+24,%i2
2811	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
2812
2813	ba	.cont23b
2814	add	TBL,TBL_SHIFT+24,%o0
2815
2816	.align	16
2817.update25:
2818	cmp	counter,8
2819	ble	1f
2820	nop
2821
2822	sub	counter,8,counter
2823	st	counter,[%fp+tmp_counter]
2824
2825	stx	%i4,[%fp+tmp_px]
2826
2827	stx	%i3,[%fp+tmp_py]
2828
2829	mov	8,counter
28301:
2831	fsubd	%f12,D2ON36,%f54	! (5_0) y_hi0 -= D2ON36;
2832
2833	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
2834	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
2835
2836	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
2837
2838	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
2839	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
2840
2841	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
2842	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
2843
2844	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
2845	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
2846
2847	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
2848	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
2849
2850	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
2851	sethi	%hi(0x3ff00000),%g1
2852	add	TBL,TBL_SHIFT+24,%i4
2853	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
2854
2855	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
2856	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
2857	ba	.cont28
2858	add	TBL,TBL_SHIFT+24,%i3
2859
2860	.align	16
2861.update26:
2862	cmp	counter,8
2863	ble	1f
2864	nop
2865
2866	sub	counter,8,counter
2867	st	counter,[%fp+tmp_counter]
2868
2869	stx	%i4,[%fp+tmp_px]
2870
2871	stx	%i3,[%fp+tmp_py]
2872
2873	mov	8,counter
28741:
2875	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
2876	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
2877
2878	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
2879
2880	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
2881	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
2882
2883	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
2884	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
2885
2886	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
2887	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
2888
2889	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
2890	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
2891
2892	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
2893	sethi	%hi(0x3ff00000),%g1
2894	add	TBL,TBL_SHIFT+24,%i4
2895	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
2896
2897	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
2898	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
2899	ba	.cont28
2900	add	TBL,TBL_SHIFT+24,%i3
2901
2902	.align	16
2903.update27:
2904	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
2905	bge,pn	%icc,.cont27a		! (0_0) if ( hy0 < 0x00100000 )
2906
2907	cmp	counter,8
2908	ble,a	1f
2909	nop
2910
2911	sub	counter,8,counter
2912	st	counter,[%fp+tmp_counter]
2913
2914	stx	%i4,[%fp+tmp_px]
2915
2916	mov	8,counter
2917	stx	%i3,[%fp+tmp_py]
29181:
2919	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
2920	sethi	%hi(0x3ff00000),%g1
2921	add	TBL,TBL_SHIFT+24,%i4
2922	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
2923
2924	ba	.cont27b
2925	add	TBL,TBL_SHIFT+24,%i3
2926
2927	.align	16
2928.update29:
2929	cmp	counter,1
2930	ble	1f
2931	nop
2932
2933	sub	counter,1,counter
2934	st	counter,[%fp+tmp_counter]
2935
2936	stx	%i2,[%fp+tmp_px]
2937
2938	stx	%o0,[%fp+tmp_py]
2939
2940	mov	1,counter
29411:
2942	fsubd	%f2,D2ON36,%f2		! (6_1) y_hi0 -= D2ON36;
2943
2944	fmuld	%f54,%f24,%f50		! (4_1) dtmp0 = dd * dres;
2945	stx	%g1,[%fp+dtmp0]		! (7_1) *(long long*)&scl0 = ll;
2946	faddd	%f28,%f48,%f52		! (1_1) res0 += dtmp0;
2947
2948	fand	%f26,DA0,%f48		! (2_1) res0 = vis_fand(dres,DA0);
2949
2950	fmuld	%f20,%f20,%f0		! (6_1) res0_hi = x_hi0 * x_hi0;
2951	fsubd	%f10,%f20,%f28		! (6_1) x_lo0 = x0 - x_hi0;
2952
2953	fmuld	%f2,%f2,%f46		! (6_1) dtmp0 = y_hi0 * y_hi0;
2954	add	%i5,stridez,%i5		! pz += stridez
2955	faddd	%f10,%f20,%f62		! (6_1) res0_lo = x0 + x_hi0;
2956
2957	fmuld	%f18,%f22,%f22		! (3_1) dtmp2 = dd * dres;
2958	sethi	%hi(0x3ff00000),%o4
2959	add	TBL,TBL_SHIFT+24,%i2
2960	fsubd	DTWO,%f50,%f20		! (4_1) dtmp0 = DTWO - dtmp0;
2961
2962	ba	.cont32
2963	add	TBL,TBL_SHIFT+24,%o0
2964
2965	.align	16
2966.update30:
2967	cmp	counter,1
2968	ble	1f
2969	nop
2970
2971	sub	counter,1,counter
2972	st	counter,[%fp+tmp_counter]
2973
2974	stx	%i2,[%fp+tmp_px]
2975
2976	stx	%o0,[%fp+tmp_py]
2977
2978	mov	1,counter
29791:
2980	fmuld	%f54,%f24,%f50		! (4_1) dtmp0 = dd * dres;
2981	stx	%g1,[%fp+dtmp0]		! (7_1) *(long long*)&scl0 = ll;
2982	faddd	%f28,%f48,%f52		! (1_1) res0 += dtmp0;
2983
2984	fand	%f26,DA0,%f48		! (2_1) res0 = vis_fand(dres,DA0);
2985
2986	fmuld	%f20,%f20,%f0		! (6_1) res0_hi = x_hi0 * x_hi0;
2987	fsubd	%f10,%f20,%f28		! (6_1) x_lo0 = x0 - x_hi0;
2988
2989	fmuld	%f2,%f2,%f46		! (6_1) dtmp0 = y_hi0 * y_hi0;
2990	add	%i5,stridez,%i5		! pz += stridez
2991	faddd	%f10,%f20,%f62		! (6_1) res0_lo = x0 + x_hi0;
2992
2993	fmuld	%f18,%f22,%f22		! (3_1) dtmp2 = dd * dres;
2994	sethi	%hi(0x3ff00000),%o4
2995	add	TBL,TBL_SHIFT+24,%i2
2996	fsubd	DTWO,%f50,%f20		! (4_1) dtmp0 = DTWO - dtmp0;
2997
2998	ba	.cont32
2999	add	TBL,TBL_SHIFT+24,%o0
3000
3001	.align	16
3002.update31:
3003	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3004	bge,pn	%icc,.cont31		! (0_0) if ( hy0 < 0x00100000 )
3005
3006	cmp	counter,1
3007	ble,a	1f
3008	nop
3009
3010	sub	counter,1,counter
3011	st	counter,[%fp+tmp_counter]
3012
3013	stx	%i2,[%fp+tmp_px]
3014
3015	mov	1,counter
3016	stx	%o0,[%fp+tmp_py]
30171:
3018	fmuld	%f20,%f20,%f0		! (6_1) res0_hi = x_hi0 * x_hi0;
3019	fsubd	%f10,%f20,%f28		! (6_1) x_lo0 = x0 - x_hi0;
3020
3021	fmuld	%f2,%f2,%f46		! (6_1) dtmp0 = y_hi0 * y_hi0;
3022	add	%i5,stridez,%i5		! pz += stridez
3023	faddd	%f10,%f20,%f62		! (6_1) res0_lo = x0 + x_hi0;
3024
3025	fmuld	%f18,%f22,%f22		! (3_1) dtmp2 = dd * dres;
3026	sethi	%hi(0x3ff00000),%o4
3027	add	TBL,TBL_SHIFT+24,%i2
3028	fsubd	DTWO,%f50,%f20		! (4_1) dtmp0 = DTWO - dtmp0;
3029
3030	ba	.cont32
3031	add	TBL,TBL_SHIFT+24,%o0
3032
3033	.align	16
3034.update33:
3035	cmp	counter,2
3036	ble	1f
3037	nop
3038
3039	sub	counter,2,counter
3040	st	counter,[%fp+tmp_counter]
3041
3042	stx	%i4,[%fp+tmp_px]
3043
3044	stx	%i3,[%fp+tmp_py]
3045
3046	mov	2,counter
30471:
3048	st	%f1,[%i5+4]		! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
3049	fsubd	%f50,D2ON36,%f54	! (7_1) y_hi0 -= D2ON36;
3050
3051	fmuld	%f26,%f16,%f50		! (5_1) dtmp0 = dd * dres;
3052	faddd	%f48,%f52,%f52		! (2_1) res0 += dtmp0;
3053
3054	add	%i5,stridez,%i5		! pz += stridez
3055	stx	%o4,[%fp+dtmp2]		! (0_0) *(long long*)&scl0 = ll;
3056	fand	%f28,DA0,%f48		! (3_1) res0 = vis_fand(dres,DA0);
3057
3058	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
3059	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
3060
3061	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
3062	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
3063
3064	fmuld	%f44,%f48,%f10		! (3_1) dtmp0 = res0_hi * res0;
3065	fsubd	DTWO,%f50,%f20		! (5_1) dtmp0 = DTWO - dtmp0;
3066
3067	fmuld	%f14,%f24,%f24		! (4_1) dtmp2 = dd * dres;
3068	faddd	%f60,%f54,%f50		! (7_1) dtmp1 = y0 + y_hi0;
3069
3070	fmuld	%f38,%f48,%f38		! (3_1) dtmp1 = res0_lo * res0;
3071	sethi	%hi(0x3ff00000),%o4
3072	add	TBL,TBL_SHIFT+24,%i4
3073	fsubd	%f60,%f54,%f12		! (7_1) y_lo0 = y0 - y_hi0;
3074
3075	sllx	%o4,32,%o4		! (1_0) ll = (long long)j0 << 32;
3076	stx	%o4,[%fp+dtmp3]		! (1_0) *(long long*)&scl0 = ll;
3077	ba	.cont36
3078	add	TBL,TBL_SHIFT+24,%i3
3079
3080	.align	16
3081.update34:
3082	cmp	counter,2
3083	ble	1f
3084	nop
3085
3086	sub	counter,2,counter
3087	st	counter,[%fp+tmp_counter]
3088
3089	stx	%i4,[%fp+tmp_px]
3090
3091	stx	%i3,[%fp+tmp_py]
3092
3093	mov	2,counter
30941:
3095	add	%i5,stridez,%i5		! pz += stridez
3096	stx	%o4,[%fp+dtmp2]		! (0_0) *(long long*)&scl0 = ll;
3097	fand	%f28,DA0,%f48		! (3_1) res0 = vis_fand(dres,DA0);
3098
3099	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
3100	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
3101
3102	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
3103	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
3104
3105	fmuld	%f44,%f48,%f10		! (3_1) dtmp0 = res0_hi * res0;
3106	fsubd	DTWO,%f50,%f20		! (5_1) dtmp0 = DTWO - dtmp0;
3107
3108	fmuld	%f14,%f24,%f24		! (4_1) dtmp2 = dd * dres;
3109	faddd	%f60,%f54,%f50		! (7_1) dtmp1 = y0 + y_hi0;
3110
3111	fmuld	%f38,%f48,%f38		! (3_1) dtmp1 = res0_lo * res0;
3112	sethi	%hi(0x3ff00000),%o4
3113	add	TBL,TBL_SHIFT+24,%i4
3114	fsubd	%f60,%f54,%f12		! (7_1) y_lo0 = y0 - y_hi0;
3115
3116	sllx	%o4,32,%o4		! (1_0) ll = (long long)j0 << 32;
3117	stx	%o4,[%fp+dtmp3]		! (1_0) *(long long*)&scl0 = ll;
3118	ba	.cont36
3119	add	TBL,TBL_SHIFT+24,%i3
3120
3121	.align	16
3122.update35:
3123	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3124	bge,pn	%icc,.cont35a		! (0_0) if ( hy0 < 0x00100000 )
3125
3126	cmp	counter,2
3127	ble,a	1f
3128	nop
3129
3130	sub	counter,2,counter
3131	st	counter,[%fp+tmp_counter]
3132
3133	stx	%i4,[%fp+tmp_px]
3134
3135	mov	2,counter
3136	stx	%i3,[%fp+tmp_py]
31371:
3138	fmuld	%f44,%f48,%f10		! (3_1) dtmp0 = res0_hi * res0;
3139	sethi	%hi(0x3ff00000),%o4
3140	add	TBL,TBL_SHIFT+24,%i4
3141	fsubd	DTWO,%f50,%f20		! (5_1) dtmp0 = DTWO - dtmp0;
3142
3143	ba	.cont35b
3144	add	TBL,TBL_SHIFT+24,%i3
3145
3146	.align	16
3147.update37:
3148	cmp	counter,3
3149	ble	1f
3150	nop
3151
3152	sub	counter,3,counter
3153	st	counter,[%fp+tmp_counter]
3154
3155	stx	%i2,[%fp+tmp_px]
3156
3157	stx	%o0,[%fp+tmp_py]
3158
3159	mov	3,counter
31601:
3161	st	%f1,[%i5+4]		! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
3162	fsubd	%f12,D2ON36,%f54	! (0_0) y_hi0 -= D2ON36;
3163
3164	fmuld	%f28,%f18,%f50		! (6_1) dtmp0 = dd * dres;
3165	faddd	%f48,%f52,%f52		! (3_1) res0 += dtmp0;
3166
3167	add	%i5,stridez,%i5		! pz += stridez
3168	stx	%o4,[%fp+dtmp4]		! (1_0) *(long long*)&scl0 = ll;
3169	fand	%f26,DA0,%f48		! (4_1) res0 = vis_fand(dres,DA0);
3170
3171	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
3172	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
3173
3174	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
3175	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
3176
3177	fmuld	%f32,%f48,%f10		! (4_1) dtmp0 = res0_hi * res0;
3178	fsubd	DTWO,%f50,%f20		! (6_1) dtmp0 = DTWO - dtmp0;
3179
3180	fmuld	%f22,%f16,%f16		! (5_1) dtmp2 = dd * dres;
3181	faddd	%f60,%f54,%f50		! (0_0) dtmp1 = y0 + y_hi0;
3182
3183	fmuld	%f36,%f48,%f36		! (4_1) dtmp1 = res0_lo * res0;
3184	sethi	%hi(0x3ff00000),%g1
3185	add	TBL,TBL_SHIFT+24,%i2
3186	fsubd	%f60,%f54,%f12		! (0_0) y_lo0 = y0 - y_hi0;
3187
3188	sllx	%g1,32,%g1		! (2_0) ll = (long long)j0 << 32;
3189	stx	%g1,[%fp+dtmp5]		! (2_0) *(long long*)&scl0 = ll;
3190	ba	.cont40
3191	add	TBL,TBL_SHIFT+24,%o0
3192
3193	.align	16
3194.update38:
3195	cmp	counter,3
3196	ble	1f
3197	nop
3198
3199	sub	counter,3,counter
3200	st	counter,[%fp+tmp_counter]
3201
3202	stx	%i2,[%fp+tmp_px]
3203
3204	stx	%o0,[%fp+tmp_py]
3205
3206	mov	3,counter
32071:
3208	add	%i5,stridez,%i5		! pz += stridez
3209	stx	%o4,[%fp+dtmp4]		! (1_0) *(long long*)&scl0 = ll;
3210	fand	%f26,DA0,%f48		! (4_1) res0 = vis_fand(dres,DA0);
3211
3212	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
3213	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
3214
3215	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
3216	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
3217
3218	fmuld	%f32,%f48,%f10		! (4_1) dtmp0 = res0_hi * res0;
3219	fsubd	DTWO,%f50,%f20		! (6_1) dtmp0 = DTWO - dtmp0;
3220
3221	fmuld	%f22,%f16,%f16		! (5_1) dtmp2 = dd * dres;
3222	faddd	%f60,%f54,%f50		! (0_0) dtmp1 = y0 + y_hi0;
3223
3224	fmuld	%f36,%f48,%f36		! (4_1) dtmp1 = res0_lo * res0;
3225	sethi	%hi(0x3ff00000),%g1
3226	add	TBL,TBL_SHIFT+24,%i2
3227	fsubd	%f60,%f54,%f12		! (0_0) y_lo0 = y0 - y_hi0;
3228
3229	sllx	%g1,32,%g1		! (2_0) ll = (long long)j0 << 32;
3230	stx	%g1,[%fp+dtmp5]		! (2_0) *(long long*)&scl0 = ll;
3231	ba	.cont40
3232	add	TBL,TBL_SHIFT+24,%o0
3233
3234	.align	16
3235.update39:
3236	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3237	bge,pn	%icc,.cont39a		! (0_0) if ( hy0 < 0x00100000 )
3238
3239	cmp	counter,3
3240	ble,a	1f
3241	nop
3242
3243	sub	counter,3,counter
3244	st	counter,[%fp+tmp_counter]
3245
3246	stx	%i2,[%fp+tmp_px]
3247
3248	mov	3,counter
3249	stx	%o0,[%fp+tmp_py]
32501:
3251	fmuld	%f32,%f48,%f10		! (4_1) dtmp0 = res0_hi * res0;
3252	sethi	%hi(0x3ff00000),%g1
3253	add	TBL,TBL_SHIFT+24,%i2
3254	fsubd	DTWO,%f50,%f20		! (6_1) dtmp0 = DTWO - dtmp0;
3255
3256	ba	.cont39b
3257	add	TBL,TBL_SHIFT+24,%o0
3258
3259	.align	16
3260.update41:
3261	cmp	counter,4
3262	ble	1f
3263	nop
3264
3265	sub	counter,4,counter
3266	st	counter,[%fp+tmp_counter]
3267
3268	stx	%i4,[%fp+tmp_px]
3269
3270	stx	%i3,[%fp+tmp_py]
3271
3272	mov	4,counter
32731:
3274	st	%f1,[%i5+4]		! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
3275	fsubd	%f12,D2ON36,%f54	! (1_0) y_hi0 -= D2ON36;
3276
3277	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
3278	faddd	%f48,%f52,%f52		! (4_1) res0 += dtmp0;
3279
3280	add	%i5,stridez,%i5		! pz += stridez
3281	stx	%g1,[%fp+dtmp6]		! (2_0) *(long long*)&scl0 = ll;
3282	fand	%f28,DA0,%f48		! (5_1) res0 = vis_fand(dres,DA0);
3283
3284	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
3285	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
3286
3287	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
3288	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
3289
3290	fmuld	%f42,%f48,%f10		! (5_1) dtmp0 = res0_hi * res0;
3291	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
3292
3293	fmuld	%f24,%f18,%f18		! (6_1) dtmp2 = dd * dres;
3294	faddd	%f60,%f54,%f50		! (1_0) dtmp1 = y0 + y_hi0;
3295
3296	fmuld	%f34,%f48,%f34		! (5_1) dtmp1 = res0_lo * res0;
3297	sethi	%hi(0x3ff00000),%g1
3298	add	TBL,TBL_SHIFT+24,%i4
3299	fsubd	%f60,%f54,%f12		! (1_0) y_lo0 = y0 - y_hi0
3300
3301	sllx	%g1,32,%g1		! (3_0) ll = (long long)j0 << 32;
3302	stx	%g1,[%fp+dtmp7]		! (3_0) *(long long*)&scl0 = ll;
3303	ba	.cont44
3304	add	TBL,TBL_SHIFT+24,%i3
3305
3306	.align	16
3307.update42:
3308	cmp	counter,4
3309	ble	1f
3310	nop
3311
3312	sub	counter,4,counter
3313	st	counter,[%fp+tmp_counter]
3314
3315	stx	%i4,[%fp+tmp_px]
3316
3317	stx	%i3,[%fp+tmp_py]
3318
3319	mov	4,counter
33201:
3321	add	%i5,stridez,%i5		! pz += stridez
3322	stx	%g1,[%fp+dtmp6]		! (2_0) *(long long*)&scl0 = ll;
3323	fand	%f28,DA0,%f48		! (5_1) res0 = vis_fand(dres,DA0);
3324
3325	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
3326	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
3327
3328	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
3329	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
3330
3331	fmuld	%f42,%f48,%f10		! (5_1) dtmp0 = res0_hi * res0;
3332	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
3333
3334	fmuld	%f24,%f18,%f18		! (6_1) dtmp2 = dd * dres;
3335	faddd	%f60,%f54,%f50		! (1_0) dtmp1 = y0 + y_hi0;
3336
3337	fmuld	%f34,%f48,%f34		! (5_1) dtmp1 = res0_lo * res0;
3338	sethi	%hi(0x3ff00000),%g1
3339	add	TBL,TBL_SHIFT+24,%i4
3340	fsubd	%f60,%f54,%f12		! (1_0) y_lo0 = y0 - y_hi0
3341
3342	sllx	%g1,32,%g1		! (3_0) ll = (long long)j0 << 32;
3343	stx	%g1,[%fp+dtmp7]		! (3_0) *(long long*)&scl0 = ll;
3344	ba	.cont44
3345	add	TBL,TBL_SHIFT+24,%i3
3346
3347	.align	16
3348.update43:
3349	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3350	bge,pn	%icc,.cont43a		! (0_0) if ( hy0 < 0x00100000 )
3351
3352	cmp	counter,4
3353	ble,a	1f
3354	nop
3355
3356	sub	counter,4,counter
3357	st	counter,[%fp+tmp_counter]
3358
3359	stx	%i4,[%fp+tmp_px]
3360
3361	mov	4,counter
3362	stx	%i3,[%fp+tmp_py]
33631:
3364	fmuld	%f42,%f48,%f10		! (5_1) dtmp0 = res0_hi * res0;
3365	sethi	%hi(0x3ff00000),%g1
3366	add	TBL,TBL_SHIFT+24,%i4
3367	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
3368
3369	ba	.cont43b
3370	add	TBL,TBL_SHIFT+24,%i3
3371
3372	.align	16
3373.update45:
3374	cmp	counter,5
3375	ble	1f
3376	nop
3377
3378	sub	counter,5,counter
3379	st	counter,[%fp+tmp_counter]
3380
3381	stx	%i2,[%fp+tmp_px]
3382
3383	stx	%o0,[%fp+tmp_py]
3384
3385	mov	5,counter
33861:
3387	fsubd	%f50,D2ON36,%f54	! (2_0) y_hi0 -= D2ON36;
3388
3389	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
3390	st	%f1,[%i5+4]		! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
3391	faddd	%f48,%f52,%f52		! (5_1) res0 += dtmp0;
3392
3393	fand	%f26,DA0,%f48		! (6_1) res0 = vis_fand(dres,DA0);
3394
3395	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
3396	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
3397	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
3398
3399	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
3400	add	%i5,stridez,%i5		! pz += stridez
3401	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
3402
3403	fmuld	%f30,%f48,%f10		! (6_1) dtmp0 = res0_hi * res0;
3404	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
3405
3406	fmuld	%f16,%f14,%f14		! (7_1) dtmp2 = dd * dres;
3407	faddd	%f60,%f54,%f50		! (2_0) dtmp1 = y0 + y_hi0;
3408
3409	fmuld	%f40,%f48,%f40		! (6_1) dtmp1 = res0_lo * res0;
3410	sethi	%hi(0x3ff00000),%g1
3411	add	TBL,TBL_SHIFT+24,%i2
3412	fsubd	%f60,%f54,%f12		! (2_0) y_lo0 = y0 - y_hi0;
3413
3414	sllx	%g1,32,%g1		! (4_0) ll = (long long)j0 << 32;
3415	stx	%g1,[%fp+dtmp9]		! (4_0) *(long long*)&scl0 = ll;
3416	ba	.cont48
3417	add	TBL,TBL_SHIFT+24,%o0
3418
3419	.align	16
3420.update46:
3421	cmp	counter,5
3422	ble	1f
3423	nop
3424
3425	sub	counter,5,counter
3426	st	counter,[%fp+tmp_counter]
3427
3428	stx	%i2,[%fp+tmp_px]
3429
3430	stx	%o0,[%fp+tmp_py]
3431
3432	mov	5,counter
34331:
3434	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
3435	st	%f1,[%i5+4]		! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
3436	faddd	%f48,%f52,%f52		! (5_1) res0 += dtmp0;
3437
3438	fand	%f26,DA0,%f48		! (6_1) res0 = vis_fand(dres,DA0);
3439
3440	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
3441	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
3442	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
3443
3444	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
3445	add	%i5,stridez,%i5		! pz += stridez
3446	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
3447
3448	fmuld	%f30,%f48,%f10		! (6_1) dtmp0 = res0_hi * res0;
3449	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
3450
3451	fmuld	%f16,%f14,%f14		! (7_1) dtmp2 = dd * dres;
3452	faddd	%f60,%f54,%f50		! (2_0) dtmp1 = y0 + y_hi0;
3453
3454	fmuld	%f40,%f48,%f40		! (6_1) dtmp1 = res0_lo * res0;
3455	sethi	%hi(0x3ff00000),%g1
3456	add	TBL,TBL_SHIFT+24,%i2
3457	fsubd	%f60,%f54,%f12		! (2_0) y_lo0 = y0 - y_hi0;
3458
3459	sllx	%g1,32,%g1		! (4_0) ll = (long long)j0 << 32;
3460	stx	%g1,[%fp+dtmp9]		! (4_0) *(long long*)&scl0 = ll;
3461	ba	.cont48
3462	add	TBL,TBL_SHIFT+24,%o0
3463
3464	.align	16
3465.update47:
3466	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3467	bge,pn	%icc,.cont47a		! (0_0) if ( hy0 < 0x00100000 )
3468
3469	cmp	counter,5
3470	ble,a	1f
3471	nop
3472
3473	sub	counter,5,counter
3474	st	counter,[%fp+tmp_counter]
3475
3476	stx	%i2,[%fp+tmp_px]
3477
3478	mov	5,counter
3479	stx	%o0,[%fp+tmp_py]
34801:
3481	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
3482	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
3483	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
3484
3485	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
3486	add	%i5,stridez,%i5		! pz += stridez
3487	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
3488
3489	fmuld	%f30,%f48,%f10		! (6_1) dtmp0 = res0_hi * res0;
3490	sethi	%hi(0x3ff00000),%g1
3491	add	TBL,TBL_SHIFT+24,%i2
3492	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
3493
3494	ba	.cont47b
3495	add	TBL,TBL_SHIFT+24,%o0
3496
3497	.align	16
3498.update49:
3499	cmp	counter,6
3500	ble	1f
3501	nop
3502
3503	sub	counter,6,counter
3504	st	counter,[%fp+tmp_counter]
3505
3506	stx	%i4,[%fp+tmp_px]
3507
3508	stx	%i3,[%fp+tmp_py]
3509
3510	mov	6,counter
35111:
3512	fsubd	%f50,D2ON36,%f54	! (3_0) y_hi0 -= D2ON36;
3513
3514	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
3515	st	%f1,[%i5+4]		! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
3516	faddd	%f48,%f52,%f52		! (6_1) res0 += dtmp0;
3517
3518	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
3519
3520	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
3521	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
3522	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
3523
3524	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
3525	add	%i5,stridez,%i5		! pz += stridez
3526	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
3527
3528	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
3529	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
3530
3531	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
3532	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
3533
3534	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
3535	sethi	%hi(0x3ff00000),%g1
3536	add	TBL,TBL_SHIFT+24,%i4
3537	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
3538
3539	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
3540	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
3541	ba	.cont52
3542	add	TBL,TBL_SHIFT+24,%i3
3543
3544	.align	16
3545.update50:
3546	cmp	counter,6
3547	ble	1f
3548	nop
3549
3550	sub	counter,6,counter
3551	st	counter,[%fp+tmp_counter]
3552
3553	stx	%i4,[%fp+tmp_px]
3554
3555	stx	%i3,[%fp+tmp_py]
3556
3557	mov	6,counter
35581:
3559	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
3560	st	%f1,[%i5+4]		! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
3561	faddd	%f48,%f52,%f52		! (6_1) res0 += dtmp0;
3562
3563	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
3564
3565	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
3566	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
3567	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
3568
3569	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
3570	add	%i5,stridez,%i5		! pz += stridez
3571	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
3572
3573	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
3574	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
3575
3576	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
3577	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
3578
3579	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
3580	sethi	%hi(0x3ff00000),%g1
3581	add	TBL,TBL_SHIFT+24,%i4
3582	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
3583
3584	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
3585	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
3586	ba	.cont52
3587	add	TBL,TBL_SHIFT+24,%i3
3588
3589	.align	16
3590.update51:
3591	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3592	bge,pn	%icc,.cont51a		! (0_0) if ( hy0 < 0x00100000 )
3593
3594	cmp	counter,6
3595	ble,a	1f
3596	nop
3597
3598	sub	counter,6,counter
3599	st	counter,[%fp+tmp_counter]
3600
3601	stx	%i4,[%fp+tmp_px]
3602
3603	mov	6,counter
3604	stx	%i3,[%fp+tmp_py]
36051:
3606	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
3607	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
3608	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
3609
3610	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
3611	add	%i5,stridez,%i5		! pz += stridez
3612	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
3613
3614	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
3615	sethi	%hi(0x3ff00000),%g1
3616	add	TBL,TBL_SHIFT+24,%i4
3617	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
3618
3619	ba	.cont51b
3620	add	TBL,TBL_SHIFT+24,%i3
3621
3622	.align	16
3623.update53:
3624	cmp	counter,7
3625	ble	1f
3626	nop
3627
3628	sub	counter,7,counter
3629	st	counter,[%fp+tmp_counter]
3630
3631	stx	%i2,[%fp+tmp_px]
3632
3633	stx	%o0,[%fp+tmp_py]
3634
3635	mov	7,counter
36361:
3637	fsubd	%f50,D2ON36,%f54	! (4_0) y_hi0 -= D2ON36;
3638
3639	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
3640	st	%f1,[%i5+4]		! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
3641	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
3642
3643	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
3644
3645	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
3646	stx	%g1,[%fp+dtmp12]	! (5_0) *(long long*)&scl0 = ll;
3647	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
3648
3649	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
3650	add	%i5,stridez,%i5		! pz += stridez
3651	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
3652
3653	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
3654	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
3655
3656	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
3657	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
3658
3659	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
3660	sethi	%hi(0x3ff00000),%g1
3661	add	TBL,TBL_SHIFT+24,%i2
3662	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
3663
3664	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
3665	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
3666	ba	.cont56
3667	add	TBL,TBL_SHIFT+24,%o0
3668
3669	.align	16
3670.update54:
3671	cmp	counter,7
3672	ble	1f
3673	nop
3674
3675	sub	counter,7,counter
3676	st	counter,[%fp+tmp_counter]
3677
3678	stx	%i2,[%fp+tmp_px]
3679
3680	stx	%o0,[%fp+tmp_py]
3681
3682	mov	7,counter
36831:
3684	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
3685	st	%f1,[%i5+4]		! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
3686	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
3687
3688	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
3689
3690	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
3691	stx	%g1,[%fp+dtmp12]	! (5_0) *(long long*)&scl0 = ll;
3692	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
3693
3694	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
3695	add	%i5,stridez,%i5		! pz += stridez
3696	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
3697
3698	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
3699	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
3700
3701	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
3702	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
3703
3704	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
3705	sethi	%hi(0x3ff00000),%g1
3706	add	TBL,TBL_SHIFT+24,%i2
3707	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
3708
3709	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
3710	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
3711	ba	.cont56
3712	add	TBL,TBL_SHIFT+24,%o0
3713
3714	.align	16
3715.update55:
3716	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3717	bge,pn	%icc,.cont55a		! (0_0) if ( hy0 < 0x00100000 )
3718
3719	cmp	counter,7
3720	ble,a	1f
3721	nop
3722
3723	sub	counter,7,counter
3724	st	counter,[%fp+tmp_counter]
3725
3726	stx	%i2,[%fp+tmp_px]
3727
3728	mov	7,counter
3729	stx	%o0,[%fp+tmp_py]
37301:
3731	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
3732	stx	%g1,[%fp+dtmp12]	! (5_0) *(long long*)&scl0 = ll;
3733	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
3734
3735	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
3736	add	%i5,stridez,%i5		! pz += stridez
3737	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
3738
3739	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
3740	sethi	%hi(0x3ff00000),%g1
3741	add	TBL,TBL_SHIFT+24,%i2
3742	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
3743
3744	ba	.cont55b
3745	add	TBL,TBL_SHIFT+24,%o0
3746
3747	.align	16
3748.update57:
3749	cmp	counter,8
3750	ble	1f
3751	nop
3752
3753	sub	counter,8,counter
3754	st	counter,[%fp+tmp_counter]
3755
3756	stx	%i4,[%fp+tmp_px]
3757
3758	stx	%i3,[%fp+tmp_py]
3759
3760	mov	8,counter
37611:
3762	fsubd	%f12,D2ON36,%f54	! (5_0) y_hi0 -= D2ON36;
3763
3764	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
3765	st	%f3,[%i5+4]		! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
3766	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
3767
3768	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
3769
3770	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
3771	stx	%g1,[%fp+dtmp14]	! (6_0) *(long long*)&scl0 = ll;
3772	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
3773
3774	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
3775	add	%i5,stridez,%i5		! pz += stridez
3776	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
3777
3778	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
3779	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
3780
3781	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
3782	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
3783
3784	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
3785	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
3786
3787	sethi	%hi(0x3ff00000),%g1
3788	add	TBL,TBL_SHIFT+24,%i4
3789
3790	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
3791	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
3792	ba	.cont60
3793	add	TBL,TBL_SHIFT+24,%i3
3794
3795	.align	16
3796.update58:
3797	cmp	counter,8
3798	ble	1f
3799	nop
3800
3801	sub	counter,8,counter
3802	st	counter,[%fp+tmp_counter]
3803
3804	stx	%i4,[%fp+tmp_px]
3805
3806	stx	%i3,[%fp+tmp_py]
3807
3808	mov	8,counter
38091:
3810	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
3811	st	%f3,[%i5+4]		! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
3812	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
3813
3814	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
3815
3816	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
3817	stx	%g1,[%fp+dtmp14]	! (6_0) *(long long*)&scl0 = ll;
3818	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
3819
3820	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
3821	add	%i5,stridez,%i5		! pz += stridez
3822	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
3823
3824	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
3825	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
3826
3827	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
3828	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
3829
3830	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
3831	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
3832
3833	sethi	%hi(0x3ff00000),%g1
3834	add	TBL,TBL_SHIFT+24,%i4
3835
3836	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
3837	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
3838	ba	.cont60
3839	add	TBL,TBL_SHIFT+24,%i3
3840
3841	.align	16
3842.update59:
3843	cmp	%l7,_0x00100000		! (0_0) hy0 ? 0x00100000
3844	bge,pn	%icc,.cont59a		! (0_0) if ( hy0 < 0x00100000 )
3845
3846	cmp	counter,8
3847	ble,a	1f
3848	nop
3849
3850	sub	counter,8,counter
3851	st	counter,[%fp+tmp_counter]
3852
3853	stx	%i4,[%fp+tmp_px]
3854
3855	mov	8,counter
3856	stx	%i3,[%fp+tmp_py]
38571:
3858	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
3859	stx	%g1,[%fp+dtmp14]	! (6_0) *(long long*)&scl0 = ll;
3860	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
3861
3862	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
3863	add	%i5,stridez,%i5		! pz += stridez
3864	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
3865
3866	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
3867	sethi	%hi(0x3ff00000),%g1
3868	add	TBL,TBL_SHIFT+24,%i4
3869	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
3870
3871	ba	.cont59b
3872	add	TBL,TBL_SHIFT+24,%i3
3873
3874	.align	16
3875.exit:
3876	ret
3877	restore
3878	SET_SIZE(__vrhypot)
3879
3880