1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vrhypot.S"
30
31#include "libm.h"
32
33	RO_DATA
34	.align	64
35
36.CONST_TBL:
37	.word	0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465,
38	.word	0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a,
39	.word	0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6,
40	.word	0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3,
41	.word	0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b,
42	.word	0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036,
43	.word	0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01,
44	.word	0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1,
45	.word	0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb,
46	.word	0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5,
47	.word	0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405,
48	.word	0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc,
49	.word	0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7,
50	.word	0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec,
51	.word	0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b,
52	.word	0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed,
53	.word	0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150,
54	.word	0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539,
55	.word	0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66,
56	.word	0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995,
57	.word	0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d,
58	.word	0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19,
59	.word	0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404,
60	.word	0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22,
61	.word	0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47,
62	.word	0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a,
63	.word	0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06,
64	.word	0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358,
65	.word	0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20,
66	.word	0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f,
67	.word	0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197,
68	.word	0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010,
69
70	.word	0x42300000, 0		! D2ON36 = 2**36
71	.word	0xffffff00, 0		! DA0
72	.word	0xfff00000, 0		! DA1
73	.word	0x3ff00000, 0		! DONE = 1.0
74	.word	0x40000000, 0		! DTWO = 2.0
75	.word	0x7fd00000, 0		! D2ON1022
76	.word	0x3cb00000, 0		! D2ONM52
77	.word	0x43200000, 0		! D2ON51
78	.word	0x0007ffff, 0xffffffff	! 0x0007ffffffffffff
79
80#define stridex		%l2
81#define stridey		%l3
82#define stridez		%l5
83
84#define TBL_SHIFT	512
85
86#define TBL		%l1
87#define counter		%l4
88
89#define _0x7ff00000	%l0
90#define _0x00100000	%o5
91#define _0x7fffffff	%l6
92
93#define D2ON36		%f4
94#define DTWO		%f6
95#define DONE		%f8
96#define DA0		%f58
97#define DA1		%f56
98
99#define dtmp0		STACK_BIAS-0x80
100#define dtmp1		STACK_BIAS-0x78
101#define dtmp2		STACK_BIAS-0x70
102#define dtmp3		STACK_BIAS-0x68
103#define dtmp4		STACK_BIAS-0x60
104#define dtmp5		STACK_BIAS-0x58
105#define dtmp6		STACK_BIAS-0x50
106#define dtmp7		STACK_BIAS-0x48
107#define dtmp8		STACK_BIAS-0x40
108#define dtmp9		STACK_BIAS-0x38
109#define dtmp10		STACK_BIAS-0x30
110#define dtmp11		STACK_BIAS-0x28
111#define dtmp12		STACK_BIAS-0x20
112#define dtmp13		STACK_BIAS-0x18
113#define dtmp14		STACK_BIAS-0x10
114#define dtmp15		STACK_BIAS-0x08
115
116#define ftmp0		STACK_BIAS-0x100
117#define tmp_px		STACK_BIAS-0x98
118#define tmp_py		STACK_BIAS-0x90
119#define tmp_counter	STACK_BIAS-0x88
120
121! sizeof temp storage - must be a multiple of 16 for V9
122#define tmps		0x100
123
124!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
125!      !!!!!   algorithm   !!!!!
126!  hx0 = *(int*)px;
127!  hy0 = *(int*)py;
128!
129!  ((float*)&x0)[0] = ((float*)px)[0];
130!  ((float*)&x0)[1] = ((float*)px)[1];
131!  ((float*)&y0)[0] = ((float*)py)[0];
132!  ((float*)&y0)[1] = ((float*)py)[1];
133!
134!  hx0 &= 0x7fffffff;
135!  hy0 &= 0x7fffffff;
136!
137!  diff0 = hy0 - hx0;
138!  j0 = diff0 >> 31;
139!  j0 &= diff0;
140!  j0 = hy0 - j0;
141!  j0 &= 0x7ff00000;
142!
143!  j0 = 0x7ff00000 - j0;
144!  ll = (long long)j0 << 32;
145!  *(long long*)&scl0 = ll;
146!
147!  if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 )
148!  {
149!    lx = ((int*)px)[1];
150!    ly = ((int*)py)[1];
151!
152!    if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0;
153!    else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0;
154!    else res0 = fabs(x0) * fabs(y0);
155!
156!    ((float*)pz)[0] = ((float*)&res0)[0];
157!    ((float*)pz)[1] = ((float*)&res0)[1];
158!
159!    px += stridex;
160!    py += stridey;
161!    pz += stridez;
162!    continue;
163!  }
164!  if ( hx0 <  0x00100000 && hy0 <  0x00100000 )
165!  {
166!    lx = ((int*)px)[1];
167!    ly = ((int*)py)[1];
168!    ii = hx0 | hy0;
169!    ii |= lx;
170!    ii |= ly;
171!    if ( ii == 0 )
172!    {
173!      res0 = 1.0 / 0.0;
174!      ((float*)pz)[0] = ((float*)&res0)[0];
175!      ((float*)pz)[1] = ((float*)&res0)[1];
176!
177!      px += stridex;
178!      py += stridey;
179!      pz += stridez;
180!      continue;
181!    }
182!    x0 = fabs(x0);
183!    y0 = fabs(y0);
184!    if ( hx0 < 0x00080000 )
185!    {
186!      x0 = *(long long*)&x0;
187!    }
188!    else
189!    {
190!      ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
191!      x0 = vis_fand(x0, dtmp0);
192!      x0 = *(long long*)&x0;
193!      x0 += D2ON51;
194!    }
195!    x0 *= D2ONM52;
196!    if ( hy0 < 0x00080000 )
197!    {
198!      y0 = *(long long*)&y0;
199!    }
200!    else
201!    {
202!      ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
203!      y0 = vis_fand(y0, dtmp0);
204!      y0 = *(long long*)&y0;
205!      y0 += D2ON51;
206!    }
207!    y0 *= D2ONM52;
208!    *(long long*)&scl0 = 0x7fd0000000000000ULL;
209!  }
210!  else
211!  {
212!    x0 *= scl0;
213!    y0 *= scl0;
214!  }
215!
216!  x_hi0 = x0 + D2ON36;
217!  y_hi0 = y0 + D2ON36;
218!  x_hi0 -= D2ON36;
219!  y_hi0 -= D2ON36;
220!  x_lo0 = x0 - x_hi0;
221!  y_lo0 = y0 - y_hi0;
222!  res0_hi = x_hi0 * x_hi0;
223!  dtmp0 = y_hi0 * y_hi0;
224!  res0_hi += dtmp0;
225!  res0_lo = x0 + x_hi0;
226!  res0_lo *= x_lo0;
227!  dtmp1 = y0 + y_hi0;
228!  dtmp1 *= y_lo0;
229!  res0_lo += dtmp1;
230!
231!  dres = res0_hi + res0_lo;
232!  dexp0 = vis_fand(dres,DA1);
233!  iarr = ((int*)&dres)[0];
234!
235!  iarr >>= 11;
236!  iarr &= 0x1fc;
237!  dtmp0 = ((double*)((char*)dll1 + iarr))[0];
238!  dd = vis_fpsub32(dtmp0, dexp0);
239!
240!  dtmp0 = dd * dres;
241!  dtmp0 = DTWO - dtmp0;
242!  dd *= dtmp0;
243!  dtmp1 = dd * dres;
244!  dtmp1 = DTWO - dtmp1;
245!  dd *= dtmp1;
246!  dtmp2 = dd * dres;
247!  dtmp2 = DTWO - dtmp2;
248!  dres = dd * dtmp2;
249!
250!  res0 = vis_fand(dres,DA0);
251!
252!  dtmp0 = res0_hi * res0;
253!  dtmp0 = DONE - dtmp0;
254!  dtmp1 = res0_lo * res0;
255!  dtmp0 -= dtmp1;
256!  dtmp0 *= dres;
257!  res0 += dtmp0;
258!
259!  res0 = sqrt ( res0 );
260!
261!  res0 = scl0 * res0;
262!
263!  ((float*)pz)[0] = ((float*)&res0)[0];
264!  ((float*)pz)[1] = ((float*)&res0)[1];
265!
266!  px += stridex;
267!  py += stridey;
268!  pz += stridez;
269!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
270
271	ENTRY(__vrhypot)
272	save	%sp,-SA(MINFRAME)-tmps,%sp
273	PIC_SETUP(l7)
274	PIC_SET(l7,.CONST_TBL,l1)
275	wr	%g0,0x82,%asi
276
277#ifdef __sparcv9
278	ldx	[%fp+STACK_BIAS+176],stridez
279#else
280	ld	[%fp+STACK_BIAS+92],stridez
281#endif
282
283	sll	%i2,3,stridex
284	sethi	%hi(0x7ff00000),_0x7ff00000
285	st	%i0,[%fp+tmp_counter]
286
287	sll	%i4,3,stridey
288	sethi	%hi(0x00100000),_0x00100000
289	stx	%i1,[%fp+tmp_px]
290
291	sll	stridez,3,stridez
292	sethi	%hi(0x7ffffc00),_0x7fffffff
293	stx	%i3,[%fp+tmp_py]
294
295	ldd	[TBL+TBL_SHIFT],D2ON36
296	add	_0x7fffffff,1023,_0x7fffffff
297
298	ldd	[TBL+TBL_SHIFT+8],DA0
299
300	ldd	[TBL+TBL_SHIFT+16],DA1
301
302	ldd	[TBL+TBL_SHIFT+24],DONE
303
304	ldd	[TBL+TBL_SHIFT+32],DTWO
305
306.begin:
307	ld	[%fp+tmp_counter],counter
308	ldx	[%fp+tmp_px],%i4
309	ldx	[%fp+tmp_py],%i3
310	st	%g0,[%fp+tmp_counter]
311.begin1:
312	cmp	counter,0
313	ble,pn	%icc,.exit
314
315	lda	[%i4]0x82,%o1		! (7_0) hx0 = *(int*)px;
316	add	%i4,stridex,%i1
317
318	lda	[%i3]0x82,%o4		! (7_0) hy0 = *(int*)py;
319	add	%i3,stridey,%i0		! py += stridey
320
321	and	%o1,_0x7fffffff,%o7	! (7_0) hx0 &= 0x7fffffff;
322
323	cmp	%o7,_0x7ff00000		! (7_0) hx0 ? 0x7ff00000
324	bge,pn	%icc,.spec0		! (7_0) if ( hx0 >= 0x7ff00000 )
325	and	%o4,_0x7fffffff,%l7	! (7_0) hy0 &= 0x7fffffff;
326
327	cmp	%l7,_0x7ff00000		! (7_0) hy0 ? 0x7ff00000
328	bge,pn	%icc,.spec0		! (7_0) if ( hy0 >= 0x7ff00000 )
329	sub	%l7,%o7,%o1		! (7_0) diff0 = hy0 - hx0;
330
331	sra	%o1,31,%o3		! (7_0) j0 = diff0 >> 31;
332	cmp	%o7,_0x00100000		! (7_0) hx0 ? 0x00100000
333	bl,pn	%icc,.spec1		! (7_0) if ( hx0 < 0x00100000 )
334
335	and	%o1,%o3,%o1		! (7_0) j0 &= diff0;
336.cont_spec0:
337	sub	%l7,%o1,%o4		! (7_0) j0 = hy0 - j0;
338
339	and	%o4,%l0,%o4		! (7_0) j0 &= 0x7ff00000;
340
341	sub	%l0,%o4,%g1		! (7_0) j0 = 0x7ff00000 - j0;
342
343	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
344
345	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
346
347	stx	%g1,[%fp+dtmp0]		! (7_1) *(long long*)&scl0 = ll;
348.cont_spec1:
349	lda	[%i1]0x82,%o1		! (0_0) hx0 = *(int*)px;
350	mov	%i1,%i2
351
352	lda	[%i0]0x82,%o4		! (0_0) hy0 = *(int*)py;
353
354	and	%o1,_0x7fffffff,%o7	! (0_0) hx0 &= 0x7fffffff;
355	mov	%i0,%o0
356
357	cmp	%o7,_0x7ff00000		! (0_0) hx0 ? 0x7ff00000
358	bge,pn	%icc,.update0		! (0_0) if ( hx0 >= 0x7ff00000 )
359	and	%o4,_0x7fffffff,%l7	! (0_0) hy0 &= 0x7fffffff;
360
361	cmp	%l7,_0x7ff00000		! (0_0) hy0 ? 0x7ff00000
362	sub	%l7,%o7,%o1		! (0_0) diff0 = hy0 - hx0;
363	bge,pn	%icc,.update0		! (0_0) if ( hy0 >= 0x7ff00000 )
364	sra	%o1,31,%o3		! (0_0) j0 = diff0 >> 31;
365
366	cmp	%o7,_0x00100000		! (0_0) hx0 ? 0x00100000
367
368	and	%o1,%o3,%o1		! (0_0) j0 &= diff0;
369	bl,pn	%icc,.update1		! (0_0) if ( hx0 < 0x00100000 )
370	sub	%l7,%o1,%o4		! (0_0) j0 = hy0 - j0;
371.cont0:
372	and	%o4,%l0,%o4		! (0_0) j0 &= 0x7ff00000;
373
374	sub	%l0,%o4,%o4		! (0_0) j0 = 0x7ff00000 - j0;
375.cont1:
376	sllx	%o4,32,%o4		! (0_0) ll = (long long)j0 << 32;
377	stx	%o4,[%fp+dtmp1]		! (0_0) *(long long*)&scl0 = ll;
378
379	ldd	[%fp+dtmp15],%f62	! (7_1) *(long long*)&scl0 = ll;
380
381	lda	[%i4]%asi,%f10		! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
382
383	lda	[%i4+4]%asi,%f11	! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
384
385	lda	[%i3]%asi,%f12		! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
386
387	add	%i1,stridex,%i4		! px += stridex
388	lda	[%i3+4]%asi,%f13	! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
389
390	fmuld	%f10,%f62,%f10		! (7_1) x0 *= scl0;
391	add	%i4,stridex,%i1		! px += stridex
392
393	fmuld	%f12,%f62,%f60		! (7_1) y0 *= scl0;
394
395	lda	[%i4]0x82,%o1		! (1_0) hx0 = *(int*)px;
396
397	add	%i0,stridey,%i3		! py += stridey
398	faddd	%f10,D2ON36,%f46	! (7_1) x_hi0 = x0 + D2ON36;
399
400	lda	[%i3]0x82,%g1		! (1_0) hy0 = *(int*)py;
401	add	%i3,stridey,%i0		! py += stridey
402	faddd	%f60,D2ON36,%f50	! (7_1) y_hi0 = y0 + D2ON36;
403
404	and	%o1,_0x7fffffff,%o7	! (1_0) hx0 &= 0x7fffffff;
405
406	cmp	%o7,_0x7ff00000		! (1_0) hx0 ? 0x7ff00000
407	stx	%o4,[%fp+dtmp2]		! (0_0) *(long long*)&scl0 = ll;
408
409	and	%g1,_0x7fffffff,%l7	! (1_0) hy0 &= 0x7fffffff;
410	bge,pn	%icc,.update2		! (1_0) if ( hx0 >= 0x7ff00000 )
411	fsubd	%f46,D2ON36,%f20	! (7_1) x_hi0 -= D2ON36;
412
413	cmp	%l7,_0x7ff00000		! (1_0) hy0 ? 0x7ff00000
414	sub	%l7,%o7,%o1		! (1_0) diff0 = hy0 - hx0;
415	bge,pn	%icc,.update3		! (1_0) if ( hy0 >= 0x7ff00000 )
416	fsubd	%f50,D2ON36,%f54	! (7_1) y_hi0 -= D2ON36;
417
418	sra	%o1,31,%o3		! (1_0) j0 = diff0 >> 31;
419
420	and	%o1,%o3,%o1		! (1_0) j0 &= diff0;
421
422	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
423	sub	%l7,%o1,%o4		! (1_0) j0 = hy0 - j0;
424	cmp	%o7,_0x00100000		! (1_0) hx0 ? 0x00100000
425	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
426
427	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
428	and	%o4,%l0,%o4		! (1_0) j0 &= 0x7ff00000;
429	bl,pn	%icc,.update4		! (1_0) if ( hx0 < 0x00100000 )
430	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
431
432	sub	%l0,%o4,%o4		! (1_0) j0 = 0x7ff00000 - j0;
433.cont4:
434	sllx	%o4,32,%o4		! (1_0) ll = (long long)j0 << 32;
435	stx	%o4,[%fp+dtmp3]		! (1_0) *(long long*)&scl0 = ll;
436	faddd	%f60,%f54,%f50		! (7_1) dtmp1 = y0 + y_hi0;
437
438	fsubd	%f60,%f54,%f12		! (7_1) y_lo0 = y0 - y_hi0;
439
440	fmuld	%f62,%f0,%f0		! (7_1) res0_lo *= x_lo0;
441	ldd	[%fp+dtmp1],%f62	! (0_0) *(long long*)&scl0 = ll;
442	faddd	%f2,%f46,%f44		! (7_1) res0_hi += dtmp0;
443
444	lda	[%i2]%asi,%f10		! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
445
446	lda	[%i2+4]%asi,%f11	! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
447
448	fmuld	%f50,%f12,%f26		! (7_1) dtmp1 *= y_lo0;
449	lda	[%o0]%asi,%f12		! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
450
451	lda	[%o0+4]%asi,%f13	! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
452
453	fmuld	%f10,%f62,%f10		! (0_0) x0 *= scl0;
454
455	fmuld	%f12,%f62,%f60		! (0_0) y0 *= scl0;
456	faddd	%f0,%f26,%f38		! (7_1) res0_lo += dtmp1;
457
458	lda	[%i1]0x82,%o1		! (2_0) hx0 = *(int*)px;
459	mov	%i1,%i2
460
461	faddd	%f10,D2ON36,%f46	! (0_0) x_hi0 = x0 + D2ON36;
462
463	lda	[%i0]0x82,%g1		! (2_0) hy0 = *(int*)py;
464	mov	%i0,%o0
465	faddd	%f60,D2ON36,%f12	! (0_0) y_hi0 = y0 + D2ON36;
466
467	faddd	%f44,%f38,%f14		! (7_1) dres = res0_hi + res0_lo;
468	and	%o1,_0x7fffffff,%o7	! (2_0) hx0 &= 0x7fffffff;
469
470	cmp	%o7,_0x7ff00000		! (2_0) hx0 ? 0x7ff00000
471	bge,pn	%icc,.update5		! (2_0) if ( hx0 >= 0x7ff00000 )
472	stx	%o4,[%fp+dtmp4]		! (1_0) *(long long*)&scl0 = ll;
473
474	and	%g1,_0x7fffffff,%l7	! (2_0) hx0 &= 0x7fffffff;
475	st	%f14,[%fp+ftmp0]	! (7_1) iarr = ((int*)&dres)[0];
476	fsubd	%f46,D2ON36,%f20	! (0_0) x_hi0 -= D2ON36;
477
478	sub	%l7,%o7,%o1		! (2_0) diff0 = hy0 - hx0;
479	cmp	%l7,_0x7ff00000		! (2_0) hy0 ? 0x7ff00000
480	bge,pn	%icc,.update6		! (2_0) if ( hy0 >= 0x7ff00000 )
481	fsubd	%f12,D2ON36,%f54	! (0_0) y_hi0 -= D2ON36;
482
483	sra	%o1,31,%o3		! (2_0) j0 = diff0 >> 31;
484
485	and	%o1,%o3,%o1		! (2_0) j0 &= diff0;
486
487	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
488	cmp	%o7,_0x00100000		! (2_0) hx0 ? 0x00100000
489	sub	%l7,%o1,%o4		! (2_0) j0 = hy0 - j0;
490	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
491
492	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
493	and	%o4,%l0,%o4		! (2_0) j0 &= 0x7ff00000;
494	bl,pn	%icc,.update7		! (2_0) if ( hx0 < 0x00100000 )
495	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
496.cont7:
497	sub	%l0,%o4,%g1		! (2_0) j0 = 0x7ff00000 - j0;
498
499	sllx	%g1,32,%g1		! (2_0) ll = (long long)j0 << 32;
500.cont8:
501	stx	%g1,[%fp+dtmp5]		! (2_0) *(long long*)&scl0 = ll;
502	faddd	%f60,%f54,%f50		! (0_0) dtmp1 = y0 + y_hi0;
503
504	fsubd	%f60,%f54,%f12		! (0_0) y_lo0 = y0 - y_hi0;
505
506	fmuld	%f62,%f0,%f0		! (0_0) res0_lo *= x_lo0;
507	ldd	[%fp+dtmp3],%f62	! (1_0) *(long long*)&scl0 = ll;
508	faddd	%f2,%f46,%f32		! (0_0) res0_hi += dtmp0;
509
510	lda	[%i4]%asi,%f10		! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
511
512	lda	[%i4+4]%asi,%f11	! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
513
514	fmuld	%f50,%f12,%f28		! (0_0) dtmp1 *= y_lo0;
515	lda	[%i3]%asi,%f12		! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
516
517	add	%i1,stridex,%i4		! px += stridex
518	lda	[%i3+4]%asi,%f13	! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
519
520	ld	[%fp+ftmp0],%o2		! (7_1) iarr = ((int*)&dres)[0];
521	add	%i4,stridex,%i1		! px += stridex
522	fand	%f14,DA1,%f2		! (7_1) dexp0 = vis_fand(dres,DA1);
523
524	fmuld	%f10,%f62,%f10		! (1_0) x0 *= scl0;
525
526	fmuld	%f12,%f62,%f60		! (1_0) y0 *= scl0;
527	sra	%o2,11,%i3		! (7_1) iarr >>= 11;
528	faddd	%f0,%f28,%f36		! (0_0) res0_lo += dtmp1;
529
530	and	%i3,0x1fc,%i3		! (7_1) iarr &= 0x1fc;
531
532	add	%i3,TBL,%o4		! (7_1) (char*)dll1 + iarr
533	lda	[%i4]0x82,%o1		! (3_0) hx0 = *(int*)px;
534
535	add	%i0,stridey,%i3		! py += stridey
536	ld	[%o4],%f26		! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
537	faddd	%f10,D2ON36,%f46	! (1_0) x_hi0 = x0 + D2ON36;
538
539	lda	[%i3]0x82,%o4		! (3_0) hy0 = *(int*)py;
540	add	%i3,stridey,%i0		! py += stridey
541	faddd	%f60,D2ON36,%f12	! (1_0) y_hi0 = y0 + D2ON36;
542
543	faddd	%f32,%f36,%f22		! (0_0) dres = res0_hi + res0_lo;
544	and	%o1,_0x7fffffff,%o7	! (3_0) hx0 &= 0x7fffffff;
545
546	cmp	%o7,_0x7ff00000		! (3_0) hx0 ? 0x7ff00000
547	stx	%g1,[%fp+dtmp6]		! (2_0) *(long long*)&scl0 = ll;
548	bge,pn	%icc,.update9		! (3_0) if ( hx0 >= 0x7ff00000 )
549	fpsub32	%f26,%f2,%f26		! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
550
551	and	%o4,_0x7fffffff,%l7	! (3_0) hy0 &= 0x7fffffff;
552	st	%f22,[%fp+ftmp0]	! (0_0) iarr = ((int*)&dres)[0];
553	fsubd	%f46,D2ON36,%f20	! (1_0) x_hi0 -= D2ON36;
554
555	sub	%l7,%o7,%o1		! (3_0) diff0 = hy0 - hx0;
556	cmp	%l7,_0x7ff00000		! (3_0) hy0 ? 0x7ff00000
557	bge,pn	%icc,.update10		! (3_0) if ( hy0 >= 0x7ff00000 )
558	fsubd	%f12,D2ON36,%f54	! (1_0) y_hi0 -= D2ON36;
559
560	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
561	sra	%o1,31,%o3		! (3_0) j0 = diff0 >> 31;
562
563	and	%o1,%o3,%o1		! (3_0) j0 &= diff0;
564
565	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
566	cmp	%o7,_0x00100000		! (3_0) hx0 ? 0x00100000
567	sub	%l7,%o1,%o4		! (3_0) j0 = hy0 - j0;
568	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
569
570	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
571	and	%o4,%l0,%o4		! (3_0) j0 &= 0x7ff00000;
572	bl,pn	%icc,.update11		! (3_0) if ( hx0 < 0x00100000 )
573	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
574.cont11:
575	sub	%l0,%o4,%g1		! (3_0) j0 = 0x7ff00000 - j0;
576	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
577.cont12:
578	sllx	%g1,32,%g1		! (3_0) ll = (long long)j0 << 32;
579	stx	%g1,[%fp+dtmp7]		! (3_0) *(long long*)&scl0 = ll;
580	faddd	%f60,%f54,%f50		! (1_0) dtmp1 = y0 + y_hi0;
581
582	fsubd	%f60,%f54,%f12		! (1_0) y_lo0 = y0 - y_hi0
583
584	fmuld	%f62,%f0,%f0		! (1_0) res0_lo *= x_lo0;
585	ldd	[%fp+dtmp5],%f62	! (2_0) *(long long*)&scl0 = ll;
586	faddd	%f2,%f46,%f42		! (1_0) res0_hi += dtmp0;
587
588	lda	[%i2]%asi,%f10		! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
589	fmuld	%f26,%f20,%f54		! (7_1) dd *= dtmp0;
590
591	lda	[%i2+4]%asi,%f11	! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
592
593	fmuld	%f50,%f12,%f26		! (1_0) dtmp1 *= y_lo0;
594	lda	[%o0]%asi,%f12		! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
595
596	lda	[%o0+4]%asi,%f13	! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
597
598	fmuld	%f54,%f14,%f50		! (7_1) dtmp1 = dd * dres;
599	ld	[%fp+ftmp0],%o2		! (0_0) iarr = ((int*)&dres)[0];
600	fand	%f22,DA1,%f2		! (0_0) dexp0 = vis_fand(dres,DA1);
601
602	fmuld	%f10,%f62,%f10		! (2_0) x0 *= scl0;
603
604	fmuld	%f12,%f62,%f60		! (2_0) y0 *= scl0;
605	sra	%o2,11,%o4		! (0_0) iarr >>= 11;
606	faddd	%f0,%f26,%f34		! (1_0) res0_lo += dtmp1;
607
608	and	%o4,0x1fc,%o4		! (0_0) iarr &= 0x1fc;
609
610	add	%o4,TBL,%o4		! (0_0) (char*)dll1 + iarr
611	mov	%i1,%i2
612	lda	[%i1]0x82,%o1		! (4_0) hx0 = *(int*)px;
613	fsubd	DTWO,%f50,%f20		! (7_1) dtmp1 = DTWO - dtmp1;
614
615	ld	[%o4],%f28		! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
616	faddd	%f10,D2ON36,%f46	! (2_0) x_hi0 = x0 + D2ON36;
617
618	lda	[%i0]0x82,%o4		! (4_0) hy0 = *(int*)py;
619	mov	%i0,%o0
620	faddd	%f60,D2ON36,%f50	! (2_0) y_hi0 = y0 + D2ON36;
621
622	and	%o1,_0x7fffffff,%o7	! (4_0) hx0 &= 0x7fffffff;
623	faddd	%f42,%f34,%f18		! (1_0) dres = res0_hi + res0_lo;
624
625	fmuld	%f54,%f20,%f16		! (7_1) dd *= dtmp1;
626	cmp	%o7,_0x7ff00000		! (4_0) hx0 ? 0x7ff00000
627	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
628	fpsub32	%f28,%f2,%f28		! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
629
630	and	%o4,_0x7fffffff,%l7	! (4_0) hy0 &= 0x7fffffff;
631	bge,pn	%icc,.update13		! (4_0) if ( hx0 >= 0x7ff00000 )
632	st	%f18,[%fp+ftmp0]	! (1_0) iarr = ((int*)&dres)[0];
633	fsubd	%f46,D2ON36,%f20	! (2_0) x_hi0 -= D2ON36;
634
635	sub	%l7,%o7,%o1		! (4_0) diff0 = hy0 - hx0;
636	cmp	%l7,_0x7ff00000		! (4_0) hy0 ? 0x7ff00000
637	bge,pn	%icc,.update14		! (4_0) if ( hy0 >= 0x7ff00000 )
638	fsubd	%f50,D2ON36,%f54	! (2_0) y_hi0 -= D2ON36;
639
640	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
641	sra	%o1,31,%o3		! (4_0) j0 = diff0 >> 31;
642
643	and	%o1,%o3,%o1		! (4_0) j0 &= diff0;
644
645	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
646	sub	%l7,%o1,%o4		! (4_0) j0 = hy0 - j0;
647	cmp	%o7,_0x00100000		! (4_0) hx0 ? 0x00100000
648	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
649
650	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
651	and	%o4,%l0,%o4		! (4_0) j0 &= 0x7ff00000;
652	bl,pn	%icc,.update15		! (4_0) if ( hx0 < 0x00100000 )
653	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
654.cont15:
655	sub	%l0,%o4,%g1		! (4_0) j0 = 0x7ff00000 - j0;
656	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
657.cont16:
658	fmuld	%f16,%f14,%f14		! (7_1) dtmp2 = dd * dres;
659	sllx	%g1,32,%g1		! (4_0) ll = (long long)j0 << 32;
660	stx	%g1,[%fp+dtmp9]		! (4_0) *(long long*)&scl0 = ll;
661	faddd	%f60,%f54,%f50		! (2_0) dtmp1 = y0 + y_hi0;
662
663	fsubd	%f60,%f54,%f12		! (2_0) y_lo0 = y0 - y_hi0;
664
665	fmuld	%f62,%f0,%f0		! (2_0) res0_lo *= x_lo0;
666	ldd	[%fp+dtmp7],%f62	! (3_0) *(long long*)&scl0 = ll;
667	faddd	%f2,%f46,%f30		! (2_0) res0_hi += dtmp0;
668
669	lda	[%i4]%asi,%f10		! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
670	fmuld	%f28,%f20,%f54		! (0_0) dd *= dtmp0;
671
672	lda	[%i4+4]%asi,%f11	! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
673
674	fmuld	%f50,%f12,%f28		! (2_0) dtmp1 *= y_lo0;
675	lda	[%i3]%asi,%f12		! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
676	fsubd	DTWO,%f14,%f20		! (7_1) dtmp2 = DTWO - dtmp2;
677
678	lda	[%i3+4]%asi,%f13	! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
679	add	%i1,stridex,%i4		! px += stridex
680
681	fmuld	%f54,%f22,%f50		! (0_0) dtmp1 = dd * dres;
682	ld	[%fp+ftmp0],%o2		! (1_0) iarr = ((int*)&dres)[0];
683	add	%i4,stridex,%i1		! px += stridex
684	fand	%f18,DA1,%f2		! (1_0) dexp0 = vis_fand(dres,DA1);
685
686	fmuld	%f10,%f62,%f10		! (3_0) x0 *= scl0;
687
688	fmuld	%f12,%f62,%f60		! (3_0) y0 *= scl0;
689	sra	%o2,11,%i3		! (1_0) iarr >>= 11;
690	faddd	%f0,%f28,%f40		! (2_0) res0_lo += dtmp1;
691
692	and	%i3,0x1fc,%i3		! (1_0) iarr &= 0x1fc;
693	fmuld	%f16,%f20,%f28		! (7_1) dres = dd * dtmp2;
694
695	add	%i3,TBL,%o4		! (1_0) (char*)dll1 + iarr
696	lda	[%i4]0x82,%o1		! (5_0) hx0 = *(int*)px;
697	fsubd	DTWO,%f50,%f20		! (0_0) dtmp1 = DTWO - dtmp1;
698
699	add	%i0,stridey,%i3		! py += stridey
700	ld	[%o4],%f26		! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
701	faddd	%f10,D2ON36,%f46	! (3_0) x_hi0 = x0 + D2ON36;
702
703	lda	[%i3]0x82,%o4		! (5_0) hy0 = *(int*)py;
704	add	%i3,stridey,%i0		! py += stridey
705	faddd	%f60,D2ON36,%f50	! (3_0) y_hi0 = y0 + D2ON36;
706
707	and	%o1,_0x7fffffff,%o7	! (5_0) hx0 &= 0x7fffffff;
708	faddd	%f30,%f40,%f14		! (2_0) dres = res0_hi + res0_lo;
709
710	fmuld	%f54,%f20,%f24		! (0_0) dd *= dtmp1;
711	cmp	%o7,_0x7ff00000		! (5_0) hx0 ? 0x7ff00000
712	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
713	fpsub32	%f26,%f2,%f26		! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
714
715	and	%o4,_0x7fffffff,%l7	! (5_0) hy0 &= 0x7fffffff;
716	st	%f14,[%fp+ftmp0]	! (2_0) iarr = ((int*)&dres)[0];
717	bge,pn	%icc,.update17		! (5_0) if ( hx0 >= 0x7ff00000 )
718	fsubd	%f46,D2ON36,%f20	! (3_0) x_hi0 -= D2ON36;
719
720	sub	%l7,%o7,%o1		! (5_0) diff0 = hy0 - hx0;
721	cmp	%l7,_0x7ff00000		! (5_0) hy0 ? 0x7ff00000
722	bge,pn	%icc,.update18		! (5_0) if ( hy0 >= 0x7ff00000 )
723	fsubd	%f50,D2ON36,%f54	! (3_0) y_hi0 -= D2ON36;
724
725	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
726	sra	%o1,31,%o3		! (5_0) j0 = diff0 >> 31;
727
728	and	%o1,%o3,%o1		! (5_0) j0 &= diff0;
729	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
730
731	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
732	sub	%l7,%o1,%o4		! (5_0) j0 = hy0 - j0;
733	cmp	%o7,_0x00100000		! (5_0) hx0 ? 0x00100000
734	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
735
736	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
737	and	%o4,%l0,%o4		! (5_0) j0 &= 0x7ff00000;
738	bl,pn	%icc,.update19		! (5_0) if ( hx0 < 0x00100000 )
739	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
740.cont19a:
741	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
742	sub	%l0,%o4,%g1		! (5_0) j0 = 0x7ff00000 - j0;
743	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
744.cont19b:
745	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
746	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
747	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
748	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
749
750	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
751	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
752.cont20:
753	fmuld	%f62,%f0,%f0		! (3_0) res0_lo *= x_lo0;
754	ldd	[%fp+dtmp9],%f62	! (4_0) *(long long*)&scl0 = ll;
755	faddd	%f2,%f46,%f44		! (3_0) res0_hi += dtmp0;
756
757	fsubd	DONE,%f10,%f60		! (7_1) dtmp0 = DONE - dtmp0;
758	lda	[%i2]%asi,%f10		! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
759	fmuld	%f26,%f20,%f54		! (1_0) dd *= dtmp0;
760
761	lda	[%i2+4]%asi,%f11	! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
762
763	fmuld	%f50,%f12,%f26		! (3_0) dtmp1 *= y_lo0;
764	lda	[%o0]%asi,%f12		! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
765	fsubd	DTWO,%f22,%f20		! (0_0) dtmp2 = DTWO - dtmp2;
766
767	lda	[%o0+4]%asi,%f13	! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
768
769	fmuld	%f54,%f18,%f50		! (1_0) dtmp1 = dd * dres;
770	ld	[%fp+ftmp0],%o2		! (2_0) iarr = ((int*)&dres)[0];
771	fand	%f14,DA1,%f2		! (2_0) dexp0 = vis_fand(dres,DA1);
772
773	fmuld	%f10,%f62,%f10		! (4_0) x0 *= scl0;
774	fsubd	%f60,%f38,%f46		! (7_1) dtmp0 -= dtmp1;
775
776	fmuld	%f12,%f62,%f60		! (4_0) y0 *= scl0;
777	sra	%o2,11,%o4		! (2_0) iarr >>= 11;
778	faddd	%f0,%f26,%f38		! (3_0) res0_lo += dtmp1;
779
780	and	%o4,0x1fc,%o4		! (2_0) iarr &= 0x1fc;
781	fmuld	%f24,%f20,%f26		! (0_0) dres = dd * dtmp2;
782
783	add	%o4,TBL,%o4		! (2_0) (char*)dll1 + iarr
784	mov	%i1,%i2
785	lda	[%i1]0x82,%o1		! (6_0) hx0 = *(int*)px;
786	fsubd	DTWO,%f50,%f52		! (1_0) dtmp1 = DTWO - dtmp1;
787
788	fmuld	%f46,%f28,%f28		! (7_1) dtmp0 *= dres;
789	ld	[%o4],%f20		! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
790	faddd	%f10,D2ON36,%f46	! (4_0) x_hi0 = x0 + D2ON36;
791
792	lda	[%i0]0x82,%o4		! (6_0) hy0 = *(int*)py;
793	mov	%i0,%o0
794	faddd	%f60,D2ON36,%f50	! (4_0) y_hi0 = y0 + D2ON36;
795
796	and	%o1,_0x7fffffff,%o7	! (6_0) hx0 &= 0x7fffffff;
797	faddd	%f44,%f38,%f22		! (3_0) dres = res0_hi + res0_lo;
798
799	fmuld	%f54,%f52,%f16		! (1_0) dd *= dtmp1;
800	cmp	%o7,_0x7ff00000		! (6_0) hx0 ? 0x7ff00000
801	stx	%g1,[%fp+dtmp12]	! (5_0) *(long long*)&scl0 = ll;
802	fpsub32	%f20,%f2,%f52		! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
803
804	and	%o4,_0x7fffffff,%l7	! (6_0) hy0 &= 0x7fffffff;
805	st	%f22,[%fp+ftmp0]	! (3_0) iarr = ((int*)&dres)[0];
806	bge,pn	%icc,.update21		! (6_0) if ( hx0 >= 0x7ff00000 )
807	fsubd	%f46,D2ON36,%f46	! (4_0) x_hi0 -= D2ON36;
808
809	sub	%l7,%o7,%o1		! (6_0) diff0 = hy0 - hx0;
810	cmp	%l7,_0x7ff00000		! (6_0) hy0 ? 0x7ff00000
811	bge,pn	%icc,.update22		! (6_0) if ( hy0 >= 0x7ff00000 )
812	fsubd	%f50,D2ON36,%f54	! (4_0) y_hi0 -= D2ON36;
813
814	fmuld	%f52,%f14,%f50		! (2_0) dtmp0 = dd * dres;
815	sra	%o1,31,%o3		! (6_0) j0 = diff0 >> 31;
816	faddd	%f48,%f28,%f48		! (7_1) res0 += dtmp0;
817
818	and	%o1,%o3,%o1		! (6_0) j0 &= diff0;
819	fand	%f26,DA0,%f28		! (0_0) res0 = vis_fand(dres,DA0);
820
821	fmuld	%f46,%f46,%f0		! (4_0) res0_hi = x_hi0 * x_hi0;
822	sub	%l7,%o1,%o4		! (6_0) j0 = hy0 - j0;
823	cmp	%o7,_0x00100000		! (6_0) hx0 ? 0x00100000
824	fsubd	%f10,%f46,%f2		! (4_0) x_lo0 = x0 - x_hi0;
825
826	fmuld	%f54,%f54,%f20		! (4_0) dtmp0 = y_hi0 * y_hi0;
827	and	%o4,%l0,%o4		! (6_0) j0 &= 0x7ff00000;
828	bl,pn	%icc,.update23		! (6_0) if ( hx0 < 0x00100000 )
829	faddd	%f10,%f46,%f62		! (4_0) res0_lo = x0 + x_hi0;
830.cont23a:
831	fmuld	%f16,%f18,%f18		! (1_0) dtmp2 = dd * dres;
832	sub	%l0,%o4,%g1		! (6_0) j0 = 0x7ff00000 - j0;
833	fsubd	DTWO,%f50,%f10		! (2_0) dtmp0 = DTWO - dtmp0;
834.cont23b:
835	fmuld	%f32,%f28,%f50		! (0_0) dtmp0 = res0_hi * res0;
836	sllx	%g1,32,%g1		! (6_0) ll = (long long)j0 << 32;
837	stx	%g1,[%fp+dtmp13]	! (6_0) *(long long*)&scl0 = ll;
838	faddd	%f60,%f54,%f46		! (4_0) dtmp1 = y0 + y_hi0;
839
840	fmuld	%f36,%f28,%f36		! (0_0) dtmp1 = res0_lo * res0;
841	fsubd	%f60,%f54,%f60		! (4_0) y_lo0 = y0 - y_hi0;
842.cont24:
843	fmuld	%f62,%f2,%f2		! (4_0) res0_lo *= x_lo0;
844	ldd	[%fp+dtmp11],%f62	! (5_0) *(long long*)&scl0 = ll;
845	faddd	%f0,%f20,%f32		! (4_0) res0_hi += dtmp0;
846
847	lda	[%i4]%asi,%f0		! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
848	fmuld	%f52,%f10,%f10		! (2_0) dd *= dtmp0;
849
850	lda	[%i4+4]%asi,%f1		! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
851	fsubd	DONE,%f50,%f52		! (0_0) dtmp0 = DONE - dtmp0;
852
853	fmuld	%f46,%f60,%f46		! (4_0) dtmp1 *= y_lo0;
854	lda	[%i3]%asi,%f12		! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
855	fsubd	DTWO,%f18,%f18		! (1_0) dtmp2 = DTWO - dtmp2;
856
857	add	%i1,stridex,%i4		! px += stridex
858	lda	[%i3+4]%asi,%f13	! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
859
860	fmuld	%f10,%f14,%f50		! (2_0) dtmp1 = dd * dres;
861	add	%i4,stridex,%i1		! px += stridex
862	ld	[%fp+ftmp0],%o2		! (3_0) iarr = ((int*)&dres)[0];
863	fand	%f22,DA1,%f54		! (3_0) dexp0 = vis_fand(dres,DA1);
864
865	fmuld	%f0,%f62,%f60		! (5_0) x0 *= scl0;
866	fsubd	%f52,%f36,%f20		! (0_0) dtmp0 -= dtmp1;
867
868	fmuld	%f12,%f62,%f52		! (5_0) y0 *= scl0;
869	sra	%o2,11,%i3		! (3_0) iarr >>= 11;
870	faddd	%f2,%f46,%f36		! (4_0) res0_lo += dtmp1;
871
872	and	%i3,0x1fc,%i3		! (3_0) iarr &= 0x1fc;
873	fmuld	%f16,%f18,%f16		! (1_0) dres = dd * dtmp2;
874
875	fsqrtd	%f48,%f18		! (7_1) res0 = sqrt ( res0 );
876	add	%i3,TBL,%o4		! (3_0) (char*)dll1 + iarr
877	lda	[%i4]0x82,%o1		! (7_0) hx0 = *(int*)px;
878	fsubd	DTWO,%f50,%f46		! (2_0) dtmp1 = DTWO - dtmp1;
879
880	fmuld	%f20,%f26,%f48		! (0_0) dtmp0 *= dres;
881	add	%i0,stridey,%i3		! py += stridey
882	ld	[%o4],%f20		! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
883	faddd	%f60,D2ON36,%f50	! (5_0) x_hi0 = x0 + D2ON36;
884
885	lda	[%i3]0x82,%o4		! (7_0) hy0 = *(int*)py;
886	add	%i3,stridey,%i0		! py += stridey
887	faddd	%f52,D2ON36,%f12	! (5_0) y_hi0 = y0 + D2ON36;
888
889	and	%o1,_0x7fffffff,%o7	! (7_0) hx0 &= 0x7fffffff;
890	faddd	%f32,%f36,%f24		! (4_0) dres = res0_hi + res0_lo;
891
892	fmuld	%f10,%f46,%f26		! (2_0) dd *= dtmp1;
893	cmp	%o7,_0x7ff00000		! (7_0) hx0 ? 0x7ff00000
894	stx	%g1,[%fp+dtmp14]	! (6_0) *(long long*)&scl0 = ll;
895	fpsub32	%f20,%f54,%f10		! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
896
897	and	%o4,_0x7fffffff,%l7	! (7_0) hy0 &= 0x7fffffff;
898	st	%f24,[%fp+ftmp0]	! (4_0) iarr = ((int*)&dres)[0];
899	bge,pn	%icc,.update25		! (7_0) if ( hx0 >= 0x7ff00000 )
900	fsubd	%f50,D2ON36,%f20	! (5_0) x_hi0 -= D2ON36;
901
902	sub	%l7,%o7,%o1		! (7_0) diff0 = hy0 - hx0;
903	cmp	%l7,_0x7ff00000		! (7_0) hy0 ? 0x7ff00000
904	bge,pn	%icc,.update26		! (7_0) if ( hy0 >= 0x7ff00000 )
905	fsubd	%f12,D2ON36,%f54	! (5_0) y_hi0 -= D2ON36;
906
907	fmuld	%f10,%f22,%f50		! (3_0) dtmp0 = dd * dres;
908	sra	%o1,31,%o3		! (7_0) j0 = diff0 >> 31;
909	faddd	%f28,%f48,%f48		! (0_0) res0 += dtmp0;
910
911	and	%o1,%o3,%o1		! (7_0) j0 &= diff0;
912	fand	%f16,DA0,%f28		! (1_0) res0 = vis_fand(dres,DA0);
913
914	fmuld	%f20,%f20,%f0		! (5_0) res0_hi = x_hi0 * x_hi0;
915	sub	%l7,%o1,%o4		! (7_0) j0 = hy0 - j0;
916	cmp	%o7,_0x00100000		! (7_0) hx0 ? 0x00100000
917	fsubd	%f60,%f20,%f2		! (5_0) x_lo0 = x0 - x_hi0;
918
919	fmuld	%f54,%f54,%f46		! (5_0) dtmp0 = y_hi0 * y_hi0;
920	and	%o4,%l0,%o4		! (7_0) j0 &= 0x7ff00000;
921	bl,pn	%icc,.update27		! (7_0) if ( hx0 < 0x00100000 )
922	faddd	%f60,%f20,%f62		! (5_0) res0_lo = x0 + x_hi0;
923.cont27a:
924	fmuld	%f26,%f14,%f14		! (2_0) dtmp2 = dd * dres;
925	sub	%l0,%o4,%g1		! (7_0) j0 = 0x7ff00000 - j0;
926	fsubd	DTWO,%f50,%f20		! (3_0) dtmp0 = DTWO - dtmp0;
927.cont27b:
928	fmuld	%f42,%f28,%f60		! (1_0) dtmp0 = res0_hi * res0;
929	sllx	%g1,32,%g1		! (7_0) ll = (long long)j0 << 32;
930	stx	%g1,[%fp+dtmp15]	! (7_0) *(long long*)&scl0 = ll;
931	faddd	%f52,%f54,%f50		! (5_0) dtmp1 = y0 + y_hi0;
932
933	fmuld	%f34,%f28,%f34		! (1_0) dtmp1 = res0_lo * res0;
934	fsubd	%f52,%f54,%f54		! (5_0) y_lo0 = y0 - y_hi0;
935.cont28:
936	fmuld	%f62,%f2,%f2		! (5_0) res0_lo *= x_lo0;
937	ldd	[%fp+dtmp13],%f62	! (6_0) *(long long*)&scl0 = ll;
938	faddd	%f0,%f46,%f42		! (5_0) res0_hi += dtmp0;
939
940	fmuld	%f10,%f20,%f52		! (3_0) dd *= dtmp0;
941	lda	[%i2]%asi,%f10		! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
942
943	lda	[%i2+4]%asi,%f11	! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
944	fsubd	DONE,%f60,%f60		! (1_0) dtmp0 = DONE - dtmp0;
945
946	fmuld	%f50,%f54,%f46		! (5_0) dtmp1 *= y_lo0;
947	lda	[%o0]%asi,%f12		! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
948	fsubd	DTWO,%f14,%f14		! (2_0) dtmp2 = DTWO - dtmp2;
949
950	lda	[%o0+4]%asi,%f13	! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
951
952	fmuld	%f52,%f22,%f50		! (3_0) dtmp1 = dd * dres;
953	ld	[%fp+ftmp0],%o2		! (4_0) iarr = ((int*)&dres)[0];
954	fand	%f24,DA1,%f54		! (4_0) dexp0 = vis_fand(dres,DA1);
955
956	fmuld	%f10,%f62,%f10		! (6_0) x0 *= scl0;
957	ldd	[%fp+dtmp0],%f0		! (7_1) *(long long*)&scl0 = ll;
958	fsubd	%f60,%f34,%f20		! (1_0) dtmp0 -= dtmp1;
959
960	fmuld	%f12,%f62,%f60		! (6_0) y0 *= scl0;
961	sra	%o2,11,%o4		! (4_0) iarr >>= 11;
962	faddd	%f2,%f46,%f34		! (5_0) res0_lo += dtmp1;
963
964	and	%o4,0x1fc,%o4		! (4_0) iarr &= 0x1fc;
965	fmuld	%f26,%f14,%f26		! (2_0) dres = dd * dtmp2;
966
967	cmp	counter,8
968	bl,pn	%icc,.tail
969	nop
970
971	ba	.main_loop
972	sub	counter,8,counter
973
974	.align	16
975.main_loop:
976	fsqrtd	%f48,%f14		! (0_1) res0 = sqrt ( res0 );
977	add	%o4,TBL,%o4		! (4_1) (char*)dll1 + iarr
978	lda	[%i1]0x82,%o1		! (0_0) hx0 = *(int*)px;
979	fsubd	DTWO,%f50,%f46		! (3_1) dtmp1 = DTWO - dtmp1;
980
981	fmuld	%f20,%f16,%f48		! (1_1) dtmp0 *= dres;
982	mov	%i1,%i2
983	ld	[%o4],%f20		! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
984	faddd	%f10,D2ON36,%f50	! (6_1) x_hi0 = x0 + D2ON36;
985
986	nop
987	mov	%i0,%o0
988	lda	[%i0]0x82,%o4		! (0_0) hy0 = *(int*)py;
989	faddd	%f60,D2ON36,%f2		! (6_1) y_hi0 = y0 + D2ON36;
990
991	faddd	%f42,%f34,%f16		! (5_1) dres = res0_hi + res0_lo;
992	and	%o1,_0x7fffffff,%o7	! (0_0) hx0 &= 0x7fffffff;
993	st	%f16,[%fp+ftmp0]	! (5_1) iarr = ((int*)&dres)[0];
994	fmuld	%f0,%f18,%f0		! (7_2) res0 = scl0 * res0;
995
996	fmuld	%f52,%f46,%f18		! (3_1) dd *= dtmp1;
997	cmp	%o7,_0x7ff00000		! (0_0) hx0 ? 0x7ff00000
998	st	%f0,[%i5]		! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
999	fpsub32	%f20,%f54,%f54		! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
1000
1001	and	%o4,_0x7fffffff,%l7	! (0_0) hy0 &= 0x7fffffff;
1002	st	%f1,[%i5+4]		! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
1003	bge,pn	%icc,.update29		! (0_0) if ( hx0 >= 0x7ff00000 )
1004	fsubd	%f50,D2ON36,%f20	! (6_1) x_hi0 -= D2ON36;
1005
1006	cmp	%l7,_0x7ff00000		! (0_0) hy0 ? 0x7ff00000
1007	sub	%l7,%o7,%o1		! (0_0) diff0 = hy0 - hx0;
1008	bge,pn	%icc,.update30		! (0_0) if ( hy0 >= 0x7ff00000 )
1009	fsubd	%f2,D2ON36,%f2		! (6_1) y_hi0 -= D2ON36;
1010
1011	fmuld	%f54,%f24,%f50		! (4_1) dtmp0 = dd * dres;
1012	sra	%o1,31,%o3		! (0_0) j0 = diff0 >> 31;
1013	stx	%g1,[%fp+dtmp0]		! (7_1) *(long long*)&scl0 = ll;
1014	faddd	%f28,%f48,%f52		! (1_1) res0 += dtmp0;
1015
1016	and	%o1,%o3,%o1		! (0_0) j0 &= diff0;
1017	cmp	%o7,_0x00100000		! (0_0) hx0 ? 0x00100000
1018	bl,pn	%icc,.update31		! (0_0) if ( hx0 < 0x00100000 )
1019	fand	%f26,DA0,%f48		! (2_1) res0 = vis_fand(dres,DA0);
1020.cont31:
1021	fmuld	%f20,%f20,%f0		! (6_1) res0_hi = x_hi0 * x_hi0;
1022	sub	%l7,%o1,%o4		! (0_0) j0 = hy0 - j0;
1023	nop
1024	fsubd	%f10,%f20,%f28		! (6_1) x_lo0 = x0 - x_hi0;
1025
1026	fmuld	%f2,%f2,%f46		! (6_1) dtmp0 = y_hi0 * y_hi0;
1027	add	%i5,stridez,%i5		! pz += stridez
1028	and	%o4,%l0,%o4		! (0_0) j0 &= 0x7ff00000;
1029	faddd	%f10,%f20,%f62		! (6_1) res0_lo = x0 + x_hi0;
1030
1031	fmuld	%f18,%f22,%f22		! (3_1) dtmp2 = dd * dres;
1032	sub	%l0,%o4,%o4		! (0_0) j0 = 0x7ff00000 - j0;
1033	nop
1034	fsubd	DTWO,%f50,%f20		! (4_1) dtmp0 = DTWO - dtmp0;
1035.cont32:
1036	fmuld	%f30,%f48,%f12		! (2_1) dtmp0 = res0_hi * res0;
1037	sllx	%o4,32,%o4		! (0_0) ll = (long long)j0 << 32;
1038	stx	%o4,[%fp+dtmp1]		! (0_0) *(long long*)&scl0 = ll;
1039	faddd	%f60,%f2,%f50		! (6_1) dtmp1 = y0 + y_hi0;
1040
1041	fmuld	%f40,%f48,%f40		! (2_1) dtmp1 = res0_lo * res0;
1042	nop
1043	bn,pn	%icc,.exit
1044	fsubd	%f60,%f2,%f2		! (6_1) y_lo0 = y0 - y_hi0;
1045
1046	fmuld	%f62,%f28,%f28		! (6_1) res0_lo *= x_lo0;
1047	nop
1048	ldd	[%fp+dtmp15],%f62	! (7_1) *(long long*)&scl0 = ll;
1049	faddd	%f0,%f46,%f30		! (6_1) res0_hi += dtmp0;
1050
1051	nop
1052	nop
1053	lda	[%i4]%asi,%f10		! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
1054	fmuld	%f54,%f20,%f54		! (4_1) dd *= dtmp0;
1055
1056	nop
1057	nop
1058	lda	[%i4+4]%asi,%f11	! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
1059	fsubd	DONE,%f12,%f60		! (2_1) dtmp0 = DONE - dtmp0;
1060
1061	fmuld	%f50,%f2,%f46		! (6_1) dtmp1 *= y_lo0;
1062	nop
1063	lda	[%i3]%asi,%f12		! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
1064	fsubd	DTWO,%f22,%f22		! (3_1) dtmp2 = DTWO - dtmp2;
1065
1066	add	%i1,stridex,%i4		! px += stridex
1067	nop
1068	lda	[%i3+4]%asi,%f13	! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
1069	bn,pn	%icc,.exit
1070
1071	fmuld	%f54,%f24,%f50		! (4_1) dtmp1 = dd * dres;
1072	add	%i4,stridex,%i1		! px += stridex
1073	ld	[%fp+ftmp0],%o2		! (5_1) iarr = ((int*)&dres)[0];
1074	fand	%f16,DA1,%f2		! (5_1) dexp0 = vis_fand(dres,DA1);
1075
1076	fmuld	%f10,%f62,%f10		! (7_1) x0 *= scl0;
1077	nop
1078	ldd	[%fp+dtmp2],%f0		! (0_1) *(long long*)&scl0 = ll;
1079	fsubd	%f60,%f40,%f20		! (2_1) dtmp0 -= dtmp1;
1080
1081	fmuld	%f12,%f62,%f60		! (7_1) y0 *= scl0;
1082	sra	%o2,11,%i3		! (5_1) iarr >>= 11;
1083	nop
1084	faddd	%f28,%f46,%f40		! (6_1) res0_lo += dtmp1;
1085
1086	and	%i3,0x1fc,%i3		! (5_1) iarr &= 0x1fc;
1087	nop
1088	bn,pn	%icc,.exit
1089	fmuld	%f18,%f22,%f28		! (3_1) dres = dd * dtmp2;
1090
1091	fsqrtd	%f52,%f22		! (1_1) res0 = sqrt ( res0 );
1092	lda	[%i4]0x82,%o1		! (1_0) hx0 = *(int*)px;
1093	add	%i3,TBL,%g1		! (5_1) (char*)dll1 + iarr
1094	fsubd	DTWO,%f50,%f62		! (4_1) dtmp1 = DTWO - dtmp1;
1095
1096	fmuld	%f20,%f26,%f52		! (2_1) dtmp0 *= dres;
1097	add	%i0,stridey,%i3		! py += stridey
1098	ld	[%g1],%f26		! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1099	faddd	%f10,D2ON36,%f46	! (7_1) x_hi0 = x0 + D2ON36;
1100
1101	nop
1102	add	%i3,stridey,%i0		! py += stridey
1103	lda	[%i3]0x82,%g1		! (1_0) hy0 = *(int*)py;
1104	faddd	%f60,D2ON36,%f50	! (7_1) y_hi0 = y0 + D2ON36;
1105
1106	faddd	%f30,%f40,%f18		! (6_1) dres = res0_hi + res0_lo;
1107	and	%o1,_0x7fffffff,%o7	! (1_0) hx0 &= 0x7fffffff;
1108	st	%f18,[%fp+ftmp0]	! (6_1) iarr = ((int*)&dres)[0];
1109	fmuld	%f0,%f14,%f0		! (0_1) res0 = scl0 * res0;
1110
1111	fmuld	%f54,%f62,%f14		! (4_1) dd *= dtmp1;
1112	cmp	%o7,_0x7ff00000		! (1_0) hx0 ? 0x7ff00000
1113	st	%f0,[%i5]		! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
1114	fpsub32	%f26,%f2,%f26		! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
1115
1116	and	%g1,_0x7fffffff,%l7	! (1_0) hy0 &= 0x7fffffff;
1117	nop
1118	bge,pn	%icc,.update33		! (1_0) if ( hx0 >= 0x7ff00000 )
1119	fsubd	%f46,D2ON36,%f20	! (7_1) x_hi0 -= D2ON36;
1120
1121	cmp	%l7,_0x7ff00000		! (1_0) hy0 ? 0x7ff00000
1122	sub	%l7,%o7,%o1		! (1_0) diff0 = hy0 - hx0;
1123	st	%f1,[%i5+4]		! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
1124	fsubd	%f50,D2ON36,%f54	! (7_1) y_hi0 -= D2ON36;
1125
1126	fmuld	%f26,%f16,%f50		! (5_1) dtmp0 = dd * dres;
1127	sra	%o1,31,%o3		! (1_0) j0 = diff0 >> 31;
1128	bge,pn	%icc,.update34		! (1_0) if ( hy0 >= 0x7ff00000 )
1129	faddd	%f48,%f52,%f52		! (2_1) res0 += dtmp0;
1130
1131	and	%o1,%o3,%o1		! (1_0) j0 &= diff0;
1132	add	%i5,stridez,%i5		! pz += stridez
1133	stx	%o4,[%fp+dtmp2]		! (0_0) *(long long*)&scl0 = ll;
1134	fand	%f28,DA0,%f48		! (3_1) res0 = vis_fand(dres,DA0);
1135
1136	fmuld	%f20,%f20,%f2		! (7_1) res0_hi = x_hi0 * x_hi0;
1137	sub	%l7,%o1,%o4		! (1_0) j0 = hy0 - j0;
1138	cmp	%o7,_0x00100000		! (1_0) hx0 ? 0x00100000
1139	fsubd	%f10,%f20,%f0		! (7_1) x_lo0 = x0 - x_hi0;
1140
1141	fmuld	%f54,%f54,%f46		! (7_1) dtmp0 = y_hi0 * y_hi0;
1142	and	%o4,%l0,%o4		! (1_0) j0 &= 0x7ff00000;
1143	bl,pn	%icc,.update35		! (1_0) if ( hx0 < 0x00100000 )
1144	faddd	%f10,%f20,%f62		! (7_1) res0_lo = x0 + x_hi0;
1145.cont35a:
1146	fmuld	%f44,%f48,%f10		! (3_1) dtmp0 = res0_hi * res0;
1147	nop
1148	sub	%l0,%o4,%o4		! (1_0) j0 = 0x7ff00000 - j0;
1149	fsubd	DTWO,%f50,%f20		! (5_1) dtmp0 = DTWO - dtmp0;
1150.cont35b:
1151	fmuld	%f14,%f24,%f24		! (4_1) dtmp2 = dd * dres;
1152	sllx	%o4,32,%o4		! (1_0) ll = (long long)j0 << 32;
1153	stx	%o4,[%fp+dtmp3]		! (1_0) *(long long*)&scl0 = ll;
1154	faddd	%f60,%f54,%f50		! (7_1) dtmp1 = y0 + y_hi0;
1155
1156	fmuld	%f38,%f48,%f38		! (3_1) dtmp1 = res0_lo * res0;
1157	nop
1158	nop
1159	fsubd	%f60,%f54,%f12		! (7_1) y_lo0 = y0 - y_hi0;
1160.cont36:
1161	fmuld	%f62,%f0,%f0		! (7_1) res0_lo *= x_lo0;
1162	nop
1163	ldd	[%fp+dtmp1],%f62	! (0_0) *(long long*)&scl0 = ll;
1164	faddd	%f2,%f46,%f44		! (7_1) res0_hi += dtmp0;
1165
1166	fsubd	DONE,%f10,%f60		! (3_1) dtmp0 = DONE - dtmp0;
1167	nop
1168	lda	[%i2]%asi,%f10		! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
1169	fmuld	%f26,%f20,%f54		! (5_1) dd *= dtmp0;
1170
1171	nop
1172	nop
1173	lda	[%i2+4]%asi,%f11	! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
1174	bn,pn	%icc,.exit
1175
1176	fmuld	%f50,%f12,%f26		! (7_1) dtmp1 *= y_lo0;
1177	nop
1178	lda	[%o0]%asi,%f12		! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
1179	fsubd	DTWO,%f24,%f24		! (4_1) dtmp2 = DTWO - dtmp2;
1180
1181	nop
1182	nop
1183	lda	[%o0+4]%asi,%f13	! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
1184	bn,pn	%icc,.exit
1185
1186	fmuld	%f54,%f16,%f46		! (5_1) dtmp1 = dd * dres;
1187	nop
1188	ld	[%fp+ftmp0],%o2		! (6_1) iarr = ((int*)&dres)[0];
1189	fand	%f18,DA1,%f2		! (6_1) dexp0 = vis_fand(dres,DA1);
1190
1191	fmuld	%f10,%f62,%f10		! (0_0) x0 *= scl0;
1192	nop
1193	ldd	[%fp+dtmp4],%f50	! (1_1) *(long long*)&scl0 = ll;
1194	fsubd	%f60,%f38,%f20		! (3_1) dtmp0 -= dtmp1;
1195
1196	fmuld	%f12,%f62,%f60		! (0_0) y0 *= scl0;
1197	sra	%o2,11,%g1		! (6_1) iarr >>= 11;
1198	nop
1199	faddd	%f0,%f26,%f38		! (7_1) res0_lo += dtmp1;
1200
1201	nop
1202	and	%g1,0x1fc,%g1		! (6_1) iarr &= 0x1fc;
1203	bn,pn	%icc,.exit
1204	fmuld	%f14,%f24,%f26		! (4_1) dres = dd * dtmp2;
1205
1206	fsqrtd	%f52,%f24		! (2_1) res0 = sqrt ( res0 );
1207	lda	[%i1]0x82,%o1		! (2_0) hx0 = *(int*)px;
1208	add	%g1,TBL,%g1		! (6_1) (char*)dll1 + iarr
1209	fsubd	DTWO,%f46,%f62		! (5_1) dtmp1 = DTWO - dtmp1;
1210
1211	fmuld	%f20,%f28,%f52		! (3_1) dtmp0 *= dres;
1212	mov	%i1,%i2
1213	ld	[%g1],%f28		! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1214	faddd	%f10,D2ON36,%f46	! (0_0) x_hi0 = x0 + D2ON36;
1215
1216	nop
1217	mov	%i0,%o0
1218	lda	[%i0]0x82,%g1		! (2_0) hy0 = *(int*)py;
1219	faddd	%f60,D2ON36,%f12	! (0_0) y_hi0 = y0 + D2ON36;
1220
1221	faddd	%f44,%f38,%f14		! (7_1) dres = res0_hi + res0_lo;
1222	and	%o1,_0x7fffffff,%o7	! (2_0) hx0 &= 0x7fffffff;
1223	st	%f14,[%fp+ftmp0]	! (7_1) iarr = ((int*)&dres)[0];
1224	fmuld	%f50,%f22,%f0		! (1_1) res0 = scl0 * res0;
1225
1226	fmuld	%f54,%f62,%f22		! (5_1) dd *= dtmp1;
1227	cmp	%o7,_0x7ff00000		! (2_0) hx0 ? 0x7ff00000
1228	st	%f0,[%i5]		! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
1229	fpsub32	%f28,%f2,%f28		! (6_1) dd = vis_fpsub32(dtmp0, dexp0);
1230
1231	and	%g1,_0x7fffffff,%l7	! (2_0) hx0 &= 0x7fffffff;
1232	nop
1233	bge,pn	%icc,.update37		! (2_0) if ( hx0 >= 0x7ff00000 )
1234	fsubd	%f46,D2ON36,%f20	! (0_0) x_hi0 -= D2ON36;
1235
1236	sub	%l7,%o7,%o1		! (2_0) diff0 = hy0 - hx0;
1237	cmp	%l7,_0x7ff00000		! (2_0) hy0 ? 0x7ff00000
1238	st	%f1,[%i5+4]		! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
1239	fsubd	%f12,D2ON36,%f54	! (0_0) y_hi0 -= D2ON36;
1240
1241	fmuld	%f28,%f18,%f50		! (6_1) dtmp0 = dd * dres;
1242	sra	%o1,31,%o3		! (2_0) j0 = diff0 >> 31;
1243	bge,pn	%icc,.update38		! (2_0) if ( hy0 >= 0x7ff00000 )
1244	faddd	%f48,%f52,%f52		! (3_1) res0 += dtmp0;
1245
1246	and	%o1,%o3,%o1		! (2_0) j0 &= diff0;
1247	add	%i5,stridez,%i5		! pz += stridez
1248	stx	%o4,[%fp+dtmp4]		! (1_0) *(long long*)&scl0 = ll;
1249	fand	%f26,DA0,%f48		! (4_1) res0 = vis_fand(dres,DA0);
1250
1251	fmuld	%f20,%f20,%f2		! (0_0) res0_hi = x_hi0 * x_hi0;
1252	cmp	%o7,_0x00100000		! (2_0) hx0 ? 0x00100000
1253	sub	%l7,%o1,%o4		! (2_0) j0 = hy0 - j0;
1254	fsubd	%f10,%f20,%f0		! (0_0) x_lo0 = x0 - x_hi0;
1255
1256	fmuld	%f54,%f54,%f46		! (0_0) dtmp0 = y_hi0 * y_hi0;
1257	and	%o4,%l0,%o4		! (2_0) j0 &= 0x7ff00000;
1258	bl,pn	%icc,.update39		! (2_0) if ( hx0 < 0x00100000 )
1259	faddd	%f10,%f20,%f62		! (0_0) res0_lo = x0 + x_hi0;
1260.cont39a:
1261	fmuld	%f32,%f48,%f10		! (4_1) dtmp0 = res0_hi * res0;
1262	sub	%l0,%o4,%g1		! (2_0) j0 = 0x7ff00000 - j0;
1263	nop
1264	fsubd	DTWO,%f50,%f20		! (6_1) dtmp0 = DTWO - dtmp0;
1265.cont39b:
1266	fmuld	%f22,%f16,%f16		! (5_1) dtmp2 = dd * dres;
1267	sllx	%g1,32,%g1		! (2_0) ll = (long long)j0 << 32;
1268	stx	%g1,[%fp+dtmp5]		! (2_0) *(long long*)&scl0 = ll;
1269	faddd	%f60,%f54,%f50		! (0_0) dtmp1 = y0 + y_hi0;
1270
1271	fmuld	%f36,%f48,%f36		! (4_1) dtmp1 = res0_lo * res0;
1272	nop
1273	nop
1274	fsubd	%f60,%f54,%f12		! (0_0) y_lo0 = y0 - y_hi0;
1275.cont40:
1276	fmuld	%f62,%f0,%f0		! (0_0) res0_lo *= x_lo0;
1277	nop
1278	ldd	[%fp+dtmp3],%f62	! (1_0) *(long long*)&scl0 = ll;
1279	faddd	%f2,%f46,%f32		! (0_0) res0_hi += dtmp0;
1280
1281	fsubd	DONE,%f10,%f60		! (4_1) dtmp0 = DONE - dtmp0;
1282	nop
1283	lda	[%i4]%asi,%f10		! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
1284	fmuld	%f28,%f20,%f54		! (6_1) dd *= dtmp0;
1285
1286	nop
1287	nop
1288	lda	[%i4+4]%asi,%f11	! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
1289	bn,pn	%icc,.exit
1290
1291	fmuld	%f50,%f12,%f28		! (0_0) dtmp1 *= y_lo0;
1292	nop
1293	lda	[%i3]%asi,%f12		! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
1294	fsubd	DTWO,%f16,%f16		! (5_1) dtmp2 = DTWO - dtmp2;
1295
1296	add	%i1,stridex,%i4		! px += stridex
1297	nop
1298	lda	[%i3+4]%asi,%f13	! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
1299	bn,pn	%icc,.exit
1300
1301	fmuld	%f54,%f18,%f46		! (6_1) dtmp1 = dd * dres;
1302	add	%i4,stridex,%i1		! px += stridex
1303	ld	[%fp+ftmp0],%o2		! (7_1) iarr = ((int*)&dres)[0];
1304	fand	%f14,DA1,%f2		! (7_1) dexp0 = vis_fand(dres,DA1);
1305
1306	fmuld	%f10,%f62,%f10		! (1_0) x0 *= scl0;
1307	nop
1308	ldd	[%fp+dtmp6],%f50	! (2_1) *(long long*)&scl0 = ll;
1309	fsubd	%f60,%f36,%f20		! (4_1) dtmp0 -= dtmp1;
1310
1311	fmuld	%f12,%f62,%f60		! (1_0) y0 *= scl0;
1312	sra	%o2,11,%i3		! (7_1) iarr >>= 11;
1313	nop
1314	faddd	%f0,%f28,%f36		! (0_0) res0_lo += dtmp1;
1315
1316	and	%i3,0x1fc,%i3		! (7_1) iarr &= 0x1fc;
1317	nop
1318	bn,pn	%icc,.exit
1319	fmuld	%f22,%f16,%f28		! (5_1) dres = dd * dtmp2;
1320
1321	fsqrtd	%f52,%f16		! (3_1) res0 = sqrt ( res0 );
1322	add	%i3,TBL,%o4		! (7_1) (char*)dll1 + iarr
1323	lda	[%i4]0x82,%o1		! (3_0) hx0 = *(int*)px;
1324	fsubd	DTWO,%f46,%f62		! (6_1) dtmp1 = DTWO - dtmp1;
1325
1326	fmuld	%f20,%f26,%f52		! (4_1) dtmp0 *= dres;
1327	add	%i0,stridey,%i3		! py += stridey
1328	ld	[%o4],%f26		! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1329	faddd	%f10,D2ON36,%f46	! (1_0) x_hi0 = x0 + D2ON36;
1330
1331	nop
1332	add	%i3,stridey,%i0		! py += stridey
1333	lda	[%i3]0x82,%o4		! (3_0) hy0 = *(int*)py;
1334	faddd	%f60,D2ON36,%f12	! (1_0) y_hi0 = y0 + D2ON36;
1335
1336	faddd	%f32,%f36,%f22		! (0_0) dres = res0_hi + res0_lo;
1337	and	%o1,_0x7fffffff,%o7	! (3_0) hx0 &= 0x7fffffff;
1338	st	%f22,[%fp+ftmp0]	! (0_0) iarr = ((int*)&dres)[0];
1339	fmuld	%f50,%f24,%f0		! (2_1) res0 = scl0 * res0;
1340
1341	fmuld	%f54,%f62,%f24		! (6_1) dd *= dtmp1;
1342	cmp	%o7,_0x7ff00000		! (3_0) hx0 ? 0x7ff00000
1343	st	%f0,[%i5]		! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
1344	fpsub32	%f26,%f2,%f26		! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
1345
1346	and	%o4,_0x7fffffff,%l7	! (3_0) hy0 &= 0x7fffffff;
1347	nop
1348	bge,pn	%icc,.update41		! (3_0) if ( hx0 >= 0x7ff00000 )
1349	fsubd	%f46,D2ON36,%f20	! (1_0) x_hi0 -= D2ON36;
1350
1351	sub	%l7,%o7,%o1		! (3_0) diff0 = hy0 - hx0;
1352	cmp	%l7,_0x7ff00000		! (3_0) hy0 ? 0x7ff00000
1353	st	%f1,[%i5+4]		! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
1354	fsubd	%f12,D2ON36,%f54	! (1_0) y_hi0 -= D2ON36;
1355
1356	fmuld	%f26,%f14,%f50		! (7_1) dtmp0 = dd * dres;
1357	sra	%o1,31,%o3		! (3_0) j0 = diff0 >> 31;
1358	bge,pn	%icc,.update42		! (3_0) if ( hy0 >= 0x7ff00000 )
1359	faddd	%f48,%f52,%f52		! (4_1) res0 += dtmp0;
1360
1361	and	%o1,%o3,%o1		! (3_0) j0 &= diff0;
1362	add	%i5,stridez,%i5		! pz += stridez
1363	stx	%g1,[%fp+dtmp6]		! (2_0) *(long long*)&scl0 = ll;
1364	fand	%f28,DA0,%f48		! (5_1) res0 = vis_fand(dres,DA0);
1365
1366	fmuld	%f20,%f20,%f2		! (1_0) res0_hi = x_hi0 * x_hi0;
1367	cmp	%o7,_0x00100000		! (3_0) hx0 ? 0x00100000
1368	sub	%l7,%o1,%o4		! (3_0) j0 = hy0 - j0;
1369	fsubd	%f10,%f20,%f0		! (1_0) x_lo0 = x0 - x_hi0;
1370
1371	fmuld	%f54,%f54,%f46		! (1_0) dtmp0 = y_hi0 * y_hi0;
1372	and	%o4,%l0,%o4		! (3_0) j0 &= 0x7ff00000;
1373	bl,pn	%icc,.update43		! (3_0) if ( hx0 < 0x00100000 )
1374	faddd	%f10,%f20,%f62		! (1_0) res0_lo = x0 + x_hi0;
1375.cont43a:
1376	fmuld	%f42,%f48,%f10		! (5_1) dtmp0 = res0_hi * res0;
1377	nop
1378	sub	%l0,%o4,%g1		! (3_0) j0 = 0x7ff00000 - j0;
1379	fsubd	DTWO,%f50,%f20		! (7_1) dtmp0 = DTWO - dtmp0;
1380.cont43b:
1381	fmuld	%f24,%f18,%f18		! (6_1) dtmp2 = dd * dres;
1382	sllx	%g1,32,%g1		! (3_0) ll = (long long)j0 << 32;
1383	stx	%g1,[%fp+dtmp7]		! (3_0) *(long long*)&scl0 = ll;
1384	faddd	%f60,%f54,%f50		! (1_0) dtmp1 = y0 + y_hi0;
1385
1386	fmuld	%f34,%f48,%f34		! (5_1) dtmp1 = res0_lo * res0;
1387	nop
1388	nop
1389	fsubd	%f60,%f54,%f12		! (1_0) y_lo0 = y0 - y_hi0
1390.cont44:
1391	fmuld	%f62,%f0,%f0		! (1_0) res0_lo *= x_lo0;
1392	nop
1393	ldd	[%fp+dtmp5],%f62	! (2_0) *(long long*)&scl0 = ll;
1394	faddd	%f2,%f46,%f42		! (1_0) res0_hi += dtmp0;
1395
1396	fsubd	DONE,%f10,%f60		! (5_1) dtmp0 = DONE - dtmp0;
1397	nop
1398	lda	[%i2]%asi,%f10		! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
1399	fmuld	%f26,%f20,%f54		! (7_1) dd *= dtmp0;
1400
1401	nop
1402	nop
1403	lda	[%i2+4]%asi,%f11	! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
1404	bn,pn	%icc,.exit
1405
1406	fmuld	%f50,%f12,%f26		! (1_0) dtmp1 *= y_lo0;
1407	nop
1408	lda	[%o0]%asi,%f12		! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
1409	fsubd	DTWO,%f18,%f20		! (6_1) dtmp2 = DTWO - dtmp2;
1410
1411	nop
1412	nop
1413	lda	[%o0+4]%asi,%f13	! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
1414	bn,pn	%icc,.exit
1415
1416	fmuld	%f54,%f14,%f50		! (7_1) dtmp1 = dd * dres;
1417	nop
1418	ld	[%fp+ftmp0],%o2		! (0_0) iarr = ((int*)&dres)[0];
1419	fand	%f22,DA1,%f2		! (0_0) dexp0 = vis_fand(dres,DA1);
1420
1421	fmuld	%f10,%f62,%f10		! (2_0) x0 *= scl0;
1422	nop
1423	ldd	[%fp+dtmp8],%f18	! (3_1) *(long long*)&scl0 = ll;
1424	fsubd	%f60,%f34,%f46		! (5_1) dtmp0 -= dtmp1;
1425
1426	fmuld	%f12,%f62,%f60		! (2_0) y0 *= scl0;
1427	sra	%o2,11,%o4		! (0_0) iarr >>= 11;
1428	nop
1429	faddd	%f0,%f26,%f34		! (1_0) res0_lo += dtmp1;
1430
1431	and	%o4,0x1fc,%o4		! (0_0) iarr &= 0x1fc;
1432	nop
1433	bn,pn	%icc,.exit
1434	fmuld	%f24,%f20,%f26		! (6_1) dres = dd * dtmp2;
1435
1436	fsqrtd	%f52,%f24		! (4_1) res0 = sqrt ( res0 );
1437	add	%o4,TBL,%o4		! (0_0) (char*)dll1 + iarr
1438	lda	[%i1]0x82,%o1		! (4_0) hx0 = *(int*)px;
1439	fsubd	DTWO,%f50,%f20		! (7_1) dtmp1 = DTWO - dtmp1;
1440
1441	fmuld	%f46,%f28,%f52		! (5_1) dtmp0 -= dtmp1;
1442	mov	%i1,%i2
1443	ld	[%o4],%f28		! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1444	faddd	%f10,D2ON36,%f46	! (2_0) x_hi0 = x0 + D2ON36;
1445
1446	nop
1447	mov	%i0,%o0
1448	lda	[%i0]0x82,%o4		! (4_0) hy0 = *(int*)py;
1449	faddd	%f60,D2ON36,%f50	! (2_0) y_hi0 = y0 + D2ON36;
1450
1451	fmuld	%f18,%f16,%f0		! (3_1) res0 = scl0 * res0;
1452	nop
1453	and	%o1,_0x7fffffff,%o7	! (4_0) hx0 &= 0x7fffffff;
1454	faddd	%f42,%f34,%f18		! (1_0) dres = res0_hi + res0_lo;
1455
1456	fmuld	%f54,%f20,%f16		! (7_1) dd *= dtmp1;
1457	cmp	%o7,_0x7ff00000		! (4_0) hx0 ? 0x7ff00000
1458	st	%f18,[%fp+ftmp0]	! (1_0) iarr = ((int*)&dres)[0];
1459	fpsub32	%f28,%f2,%f28		! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
1460
1461	and	%o4,_0x7fffffff,%l7	! (4_0) hy0 &= 0x7fffffff;
1462	st	%f0,[%i5]		! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
1463	bge,pn	%icc,.update45		! (4_0) if ( hx0 >= 0x7ff00000 )
1464	fsubd	%f46,D2ON36,%f20	! (2_0) x_hi0 -= D2ON36;
1465
1466	sub	%l7,%o7,%o1		! (4_0) diff0 = hy0 - hx0;
1467	cmp	%l7,_0x7ff00000		! (4_0) hy0 ? 0x7ff00000
1468	bge,pn	%icc,.update46		! (4_0) if ( hy0 >= 0x7ff00000 )
1469	fsubd	%f50,D2ON36,%f54	! (2_0) y_hi0 -= D2ON36;
1470
1471	fmuld	%f28,%f22,%f50		! (0_0) dtmp0 = dd * dres;
1472	sra	%o1,31,%o3		! (4_0) j0 = diff0 >> 31;
1473	st	%f1,[%i5+4]		! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
1474	faddd	%f48,%f52,%f52		! (5_1) res0 += dtmp0;
1475
1476	and	%o1,%o3,%o1		! (4_0) j0 &= diff0;
1477	cmp	%o7,_0x00100000		! (4_0) hx0 ? 0x00100000
1478	bl,pn	%icc,.update47		! (4_0) if ( hx0 < 0x00100000 )
1479	fand	%f26,DA0,%f48		! (6_1) res0 = vis_fand(dres,DA0);
1480.cont47a:
1481	fmuld	%f20,%f20,%f2		! (2_0) res0_hi = x_hi0 * x_hi0;
1482	sub	%l7,%o1,%o4		! (4_0) j0 = hy0 - j0;
1483	stx	%g1,[%fp+dtmp8]		! (3_0) *(long long*)&scl0 = ll;
1484	fsubd	%f10,%f20,%f0		! (2_0) x_lo0 = x0 - x_hi0;
1485
1486	fmuld	%f54,%f54,%f46		! (2_0) dtmp0 = y_hi0 * y_hi0;
1487	and	%o4,%l0,%o4		! (4_0) j0 &= 0x7ff00000;
1488	add	%i5,stridez,%i5		! pz += stridez
1489	faddd	%f10,%f20,%f62		! (2_0) res0_lo = x0 + x_hi0;
1490
1491	fmuld	%f30,%f48,%f10		! (6_1) dtmp0 = res0_hi * res0;
1492	nop
1493	sub	%l0,%o4,%g1		! (4_0) j0 = 0x7ff00000 - j0;
1494	fsubd	DTWO,%f50,%f20		! (0_0) dtmp0 = DTWO - dtmp0;
1495.cont47b:
1496	fmuld	%f16,%f14,%f14		! (7_1) dtmp2 = dd * dres;
1497	sllx	%g1,32,%g1		! (4_0) ll = (long long)j0 << 32;
1498	stx	%g1,[%fp+dtmp9]		! (4_0) *(long long*)&scl0 = ll;
1499	faddd	%f60,%f54,%f50		! (2_0) dtmp1 = y0 + y_hi0;
1500
1501	fmuld	%f40,%f48,%f40		! (6_1) dtmp1 = res0_lo * res0;
1502	nop
1503	nop
1504	fsubd	%f60,%f54,%f12		! (2_0) y_lo0 = y0 - y_hi0;
1505.cont48:
1506	fmuld	%f62,%f0,%f0		! (2_0) res0_lo *= x_lo0;
1507	nop
1508	ldd	[%fp+dtmp7],%f62	! (3_0) *(long long*)&scl0 = ll;
1509	faddd	%f2,%f46,%f30		! (2_0) res0_hi += dtmp0;
1510
1511	fsubd	DONE,%f10,%f60		! (6_1) dtmp0 = DONE - dtmp0;
1512	nop
1513	lda	[%i4]%asi,%f10		! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
1514	fmuld	%f28,%f20,%f54		! (0_0) dd *= dtmp0;
1515
1516	nop
1517	nop
1518	lda	[%i4+4]%asi,%f11	! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
1519	bn,pn	%icc,.exit
1520
1521	fmuld	%f50,%f12,%f28		! (2_0) dtmp1 *= y_lo0;
1522	nop
1523	lda	[%i3]%asi,%f12		! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
1524	fsubd	DTWO,%f14,%f20		! (7_1) dtmp2 = DTWO - dtmp2;
1525
1526	lda	[%i3+4]%asi,%f13	! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
1527	add	%i1,stridex,%i4		! px += stridex
1528	nop
1529	bn,pn	%icc,.exit
1530
1531	fmuld	%f54,%f22,%f50		! (0_0) dtmp1 = dd * dres;
1532	add	%i4,stridex,%i1		! px += stridex
1533	ld	[%fp+ftmp0],%o2		! (1_0) iarr = ((int*)&dres)[0];
1534	fand	%f18,DA1,%f2		! (1_0) dexp0 = vis_fand(dres,DA1);
1535
1536	fmuld	%f10,%f62,%f10		! (3_0) x0 *= scl0;
1537	nop
1538	ldd	[%fp+dtmp10],%f14	! (4_1) *(long long*)&scl0 = ll;
1539	fsubd	%f60,%f40,%f46		! (6_1) dtmp0 -= dtmp1;
1540
1541	fmuld	%f12,%f62,%f60		! (3_0) y0 *= scl0;
1542	sra	%o2,11,%i3		! (1_0) iarr >>= 11;
1543	nop
1544	faddd	%f0,%f28,%f40		! (2_0) res0_lo += dtmp1;
1545
1546	and	%i3,0x1fc,%i3		! (1_0) iarr &= 0x1fc;
1547	nop
1548	bn,pn	%icc,.exit
1549	fmuld	%f16,%f20,%f28		! (7_1) dres = dd * dtmp2;
1550
1551	fsqrtd	%f52,%f16		! (5_1) res0 = sqrt ( res0 );
1552	add	%i3,TBL,%o4		! (1_0) (char*)dll1 + iarr
1553	lda	[%i4]0x82,%o1		! (5_0) hx0 = *(int*)px;
1554	fsubd	DTWO,%f50,%f20		! (0_0) dtmp1 = DTWO - dtmp1;
1555
1556	fmuld	%f46,%f26,%f52		! (6_1) dtmp0 *= dres;
1557	add	%i0,stridey,%i3		! py += stridey
1558	ld	[%o4],%f26		! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1559	faddd	%f10,D2ON36,%f46	! (3_0) x_hi0 = x0 + D2ON36;
1560
1561	nop
1562	add	%i3,stridey,%i0		! py += stridey
1563	lda	[%i3]0x82,%o4		! (5_0) hy0 = *(int*)py;
1564	faddd	%f60,D2ON36,%f50	! (3_0) y_hi0 = y0 + D2ON36;
1565
1566	fmuld	%f14,%f24,%f0		! (4_1) res0 = scl0 * res0;
1567	and	%o1,_0x7fffffff,%o7	! (5_0) hx0 &= 0x7fffffff;
1568	nop
1569	faddd	%f30,%f40,%f14		! (2_0) dres = res0_hi + res0_lo;
1570
1571	fmuld	%f54,%f20,%f24		! (0_0) dd *= dtmp1;
1572	cmp	%o7,_0x7ff00000		! (5_0) hx0 ? 0x7ff00000
1573	st	%f14,[%fp+ftmp0]	! (2_0) iarr = ((int*)&dres)[0];
1574	fpsub32	%f26,%f2,%f26		! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
1575
1576	and	%o4,_0x7fffffff,%l7	! (5_0) hy0 &= 0x7fffffff;
1577	st	%f0,[%i5]		! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
1578	bge,pn	%icc,.update49		! (5_0) if ( hx0 >= 0x7ff00000 )
1579	fsubd	%f46,D2ON36,%f20	! (3_0) x_hi0 -= D2ON36;
1580
1581	sub	%l7,%o7,%o1		! (5_0) diff0 = hy0 - hx0;
1582	cmp	%l7,_0x7ff00000		! (5_0) hy0 ? 0x7ff00000
1583	bge,pn	%icc,.update50		! (5_0) if ( hy0 >= 0x7ff00000 )
1584	fsubd	%f50,D2ON36,%f54	! (3_0) y_hi0 -= D2ON36;
1585
1586	fmuld	%f26,%f18,%f50		! (1_0) dtmp0 = dd * dres;
1587	sra	%o1,31,%o3		! (5_0) j0 = diff0 >> 31;
1588	st	%f1,[%i5+4]		! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
1589	faddd	%f48,%f52,%f52		! (6_1) res0 += dtmp0;
1590
1591	and	%o1,%o3,%o1		! (5_0) j0 &= diff0;
1592	cmp	%o7,_0x00100000		! (5_0) hx0 ? 0x00100000
1593	bl,pn	%icc,.update51		! (5_0) if ( hx0 < 0x00100000 )
1594	fand	%f28,DA0,%f48		! (7_1) res0 = vis_fand(dres,DA0);
1595.cont51a:
1596	fmuld	%f20,%f20,%f2		! (3_0) res0_hi = x_hi0 * x_hi0;
1597	sub	%l7,%o1,%o4		! (5_0) j0 = hy0 - j0;
1598	stx	%g1,[%fp+dtmp10]	! (4_0) *(long long*)&scl0 = ll;
1599	fsubd	%f10,%f20,%f0		! (3_0) x_lo0 = x0 - x_hi0;
1600
1601	fmuld	%f54,%f54,%f46		! (3_0) dtmp0 = y_hi0 * y_hi0;
1602	and	%o4,%l0,%o4		! (5_0) j0 &= 0x7ff00000;
1603	add	%i5,stridez,%i5		! pz += stridez
1604	faddd	%f10,%f20,%f62		! (3_0) res0_lo = x0 + x_hi0;
1605
1606	fmuld	%f44,%f48,%f10		! (7_1) dtmp0 = res0_hi * res0;
1607	sub	%l0,%o4,%g1		! (5_0) j0 = 0x7ff00000 - j0;
1608	nop
1609	fsubd	DTWO,%f50,%f20		! (1_0) dtmp0 = DTWO - dtmp0;
1610.cont51b:
1611	fmuld	%f24,%f22,%f22		! (0_0) dtmp2 = dd * dres;
1612	sllx	%g1,32,%g1		! (5_0) ll = (long long)j0 << 32;
1613	stx	%g1,[%fp+dtmp11]	! (5_0) *(long long*)&scl0 = ll;
1614	faddd	%f60,%f54,%f50		! (3_0) dtmp1 = y0 + y_hi0;
1615
1616	fmuld	%f38,%f48,%f38		! (7_1) dtmp1 = res0_lo * res0;
1617	nop
1618	nop
1619	fsubd	%f60,%f54,%f12		! (3_0) y_lo0 = y0 - y_hi0;
1620.cont52:
1621	fmuld	%f62,%f0,%f0		! (3_0) res0_lo *= x_lo0;
1622	nop
1623	ldd	[%fp+dtmp9],%f62	! (4_0) *(long long*)&scl0 = ll;
1624	faddd	%f2,%f46,%f44		! (3_0) res0_hi += dtmp0;
1625
1626	fsubd	DONE,%f10,%f60		! (7_1) dtmp0 = DONE - dtmp0;
1627	nop
1628	lda	[%i2]%asi,%f10		! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
1629	fmuld	%f26,%f20,%f54		! (1_0) dd *= dtmp0;
1630
1631	nop
1632	nop
1633	lda	[%i2+4]%asi,%f11	! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
1634	bn,pn	%icc,.exit
1635
1636	fmuld	%f50,%f12,%f26		! (3_0) dtmp1 *= y_lo0;
1637	nop
1638	lda	[%o0]%asi,%f12		! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
1639	fsubd	DTWO,%f22,%f20		! (0_0) dtmp2 = DTWO - dtmp2;
1640
1641	nop
1642	nop
1643	lda	[%o0+4]%asi,%f13	! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
1644	bn,pn	%icc,.exit
1645
1646	fmuld	%f54,%f18,%f50		! (1_0) dtmp1 = dd * dres;
1647	nop
1648	ld	[%fp+ftmp0],%o2		! (2_0) iarr = ((int*)&dres)[0];
1649	fand	%f14,DA1,%f2		! (2_0) dexp0 = vis_fand(dres,DA1);
1650
1651	fmuld	%f10,%f62,%f10		! (4_0) x0 *= scl0;
1652	nop
1653	ldd	[%fp+dtmp12],%f22	! (5_1) *(long long*)&scl0 = ll;
1654	fsubd	%f60,%f38,%f46		! (7_1) dtmp0 -= dtmp1;
1655
1656	fmuld	%f12,%f62,%f60		! (4_0) y0 *= scl0;
1657	sra	%o2,11,%o4		! (2_0) iarr >>= 11;
1658	nop
1659	faddd	%f0,%f26,%f38		! (3_0) res0_lo += dtmp1;
1660
1661	and	%o4,0x1fc,%o4		! (2_0) iarr &= 0x1fc;
1662	nop
1663	bn,pn	%icc,.exit
1664	fmuld	%f24,%f20,%f26		! (0_0) dres = dd * dtmp2;
1665
1666	fsqrtd	%f52,%f24		! (6_1) res0 = sqrt ( res0 );
1667	add	%o4,TBL,%o4		! (2_0) (char*)dll1 + iarr
1668	lda	[%i1]0x82,%o1		! (6_0) hx0 = *(int*)px;
1669	fsubd	DTWO,%f50,%f52		! (1_0) dtmp1 = DTWO - dtmp1;
1670
1671	fmuld	%f46,%f28,%f28		! (7_1) dtmp0 *= dres;
1672	mov	%i1,%i2
1673	ld	[%o4],%f20		! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1674	faddd	%f10,D2ON36,%f46	! (4_0) x_hi0 = x0 + D2ON36;
1675
1676