1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vatan2f.S"
30
31#include "libm.h"
32
33	RO_DATA
34	.align	64
35.CONST_TBL:
36	.word	0xbff921fb, 0x54442d18	! -M_PI_2
37	.word	0x3ff921fb, 0x54442d18	!  M_PI_2
38	.word	0xbff921fb, 0x54442d18	! -M_PI_2
39	.word	0x3ff921fb, 0x54442d18	!  M_PI_2
40	.word	0xc00921fb, 0x54442d18	! -M_PI
41	.word	0x400921fb, 0x54442d18	!  M_PI
42	.word	0x80000000, 0x00000000	! -0.0
43	.word	0x00000000, 0x00000000	!  0.0
44
45	.word	0xbff00000, 0x00000000	! -1.0
46	.word	0x3ff00000, 0x00000000	!  1.0
47
48	.word	0x3fefffff, 0xfe79bf93	! K0 =  9.99999997160545464888e-01
49	.word	0xbfd55552, 0xf0db4320	! K1 = -3.33332762919825514315e-01
50	.word	0x3fc998f8, 0x2493d066	! K2 =  1.99980752811487135558e-01
51	.word	0xbfc240b8, 0xd994abf9	! K3 = -1.42600160828209047720e-01
52	.word	0x3fbbfc9e, 0x8c2b0243	! K4 =  1.09323415013030928421e-01
53	.word	0xbfb56013, 0x64b1cac3	! K5 = -8.34972496830160174704e-02
54	.word	0x3fad3ad7, 0x9f53e142	! K6 =  5.70895559303061900411e-02
55	.word	0xbf9f148f, 0x2a829af1	! K7 = -3.03518647857811706139e-02
56	.word	0x3f857a8c, 0x747ed314	! K8 =  1.04876492549493055747e-02
57	.word	0xbf5bdf39, 0x729124b6	! K9 = -1.70117006406859722727e-03
58
59	.word	0x3fe921fb, 0x54442d18	! M_PI_4
60	.word	0x36a00000, 0x00000000	! 2^(-149)
61
62#define counter		%o3
63#define stridex		%i4
64#define stridey		%i5
65#define stridez		%l1
66#define cmul_arr	%i0
67#define cadd_arr	%i2
68#define _0x7fffffff	%l0
69#define _0x7f800000	%l2
70
71#define K0		%f42
72#define K1		%f44
73#define K2		%f46
74#define K3		%f48
75#define K4		%f50
76#define K5		%f52
77#define K6		%f54
78#define K7		%f56
79#define K8		%f58
80#define K9		%f60
81
82#define tmp_counter	STACK_BIAS-32
83#define tmp_py		STACK_BIAS-24
84#define tmp_px		STACK_BIAS-16
85#define tmp_pz		STACK_BIAS-8
86
87! sizeof temp storage - must be a multiple of 16 for V9
88#define tmps		0x20
89
90!--------------------------------------------------------------------
91!		!!!!!	vatan2f algorithm	!!!!!
92!	uy0 = *(int*)py;
93!	ux0 = *(int*)px;
94!	ay0 = uy0 & 0x7fffffff;
95!	ax0 = ux0 & 0x7fffffff;
96!	if ( ax0 >= 0x7f800000 || ay0 >= 0x7f800000 )
97!	{
98!		/* |X| or |Y| = Nan */
99!		if ( ax0 > 0x7f800000 || ay0 > 0x7f800000 )
100!		{
101!			ftmp0 = *(float*)&ax0 * *(float*)&ay0;
102!			*pz = ftmp0;
103!		}
104!		signx0 = (unsigned)ux0 >> 30;
105!		signx0 &= 2;
106!		signy0 = uy0 >> 31;
107!		if (ay0 == 0x7f800000)
108!			signx0 = (ax0 == 0x7f800000) ? signx0 + 1 : 2;
109!		else
110!			signx0 += signx0;
111!		res = signx0 * M_PI_4;
112!		signy0 <<= 3;
113!		dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0);
114!		res *= dtmp0;
115!		ftmp0 = (float) res;
116!		*pz = ftmp0;
117!		goto next;
118!	}
119!	if ( ax0 == 0 && ay0 == 0 )
120!	{
121!		signy0 = uy0 >> 28;
122!		signx0 = ux0 >> 27;
123!		ldiff0 = ax0 - ay0;
124!		ldiff0 >>= 31;
125!		signx0 &= -16;
126!		signy0 &= -8;
127!		ldiff0 <<= 5;
128!		signx0 += signy0;
129!		res = *(double*)((char*)(cadd_arr + 7) + ldiff0 + signx0 + signy0);
130!		ftmp0 = (float) res;
131!		*pz = ftmp0;
132!		goto next;
133!	}
134!	ldiff0 = ax0 - ay0;
135!	ldiff0 >>= 31;
136!	addrc0 = (char*)px - (char*)py;
137!	addrc0 &= ldiff0;
138!	fy0 = *(float*)((char*)py + addrc0);
139!	fx0 = *(float*)((char*)px - addrc0);
140!	itmp0 = *(int*)&fy0;
141!	if((itmp0 & 0x7fffffff) < 0x00800000)
142!	{
143!		itmp0 >>= 28;
144!		itmp0 &= -8;
145!		fy0 = fabsf(fy0);
146!		dtmp0 = (double) *(int*)&fy0;
147!		dtmp0 *= C2ONM149;
148!		dsign = *(double*)((char*)cmul_arr + itmp0);
149!		dtmp0 *= dsign;
150!		y0 = dtm0;
151!	}
152!	else
153!		y0 = (double)fy0;
154!	itmp0 = *(int*)&fx0;
155!	if((itmp0 & 0x7fffffff) < 0x00800000)
156!	{
157!		itmp0 >>= 28;
158!		itmp0 &= -8;
159!		fx0 = fabsf(fx0);
160!		dtmp0 = (double) *(int*)&fx0;
161!		dtmp0 *= C2ONM149;
162!		dsign = *(double*)((char*)cmul_arr + itmp0);
163!		dtmp0 *= dsign;
164!		x0 = dtmp0;
165!	}
166!	else
167!		x0 = (double)fx0;
168!	px += stridex;
169!	py += stridey;
170!	x0 = y0 / x0;
171!	x20 = x0 * x0;
172!	dtmp0 = K9 * x20;
173!	dtmp0 += K8;
174!	dtmp0 *= x20;
175!	dtmp0 += K7;
176!	dtmp0 *= x20;
177!	dtmp0 += K6;
178!	dtmp0 *= x20;
179!	dtmp0 += K5;
180!	dtmp0 *= x20;
181!	dtmp0 += K4;
182!	dtmp0 *= x20;
183!	dtmp0 += K3;
184!	dtmp0 *= x20;
185!	dtmp0 += K2;
186!	dtmp0 *= x20;
187!	dtmp0 += K1;
188!	dtmp0 *= x20;
189!	dtmp0 += K0;
190!	x0 = dtmp0 * x0;
191!	signy0 = uy0 >> 28;
192!	signy0 &= -8;
193!	signx0 = ux0 >> 27;
194!	signx0 &= -16;
195!	ltmp0 = ldiff0 << 5;
196!	ltmp0 += (char*)cadd_arr;
197!	ltmp0 += signx0;
198!	cadd0 = *(double*)(ltmp0 + signy0);
199!	cmul0_ind = ldiff0 << 3;
200!	cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
201!	dtmp0 = cmul0 * x0;
202!	dtmp0 = cadd0 + dtmp0;
203!	ftmp0 = (float)dtmp0;
204!	*pz = ftmp0;
205!	pz += stridez;
206!
207!--------------------------------------------------------------------
208
209	ENTRY(__vatan2f)
210	save	%sp,-SA(MINFRAME)-tmps,%sp
211	PIC_SETUP(l7)
212	PIC_SET(l7,.CONST_TBL,g5)
213
214#ifdef __sparcv9
215	ldx	[%fp+STACK_BIAS+176],%l7
216#else
217	ld	[%fp+STACK_BIAS+92],%l7
218#endif
219
220	st	%i0,[%fp+tmp_counter]
221	sethi	%hi(0x7ffffc00),_0x7fffffff
222	add	_0x7fffffff,1023,_0x7fffffff
223	or	%g0,%i2,%o2
224	sll	%l7,2,stridez
225
226	sethi	%hi(0x7f800000),_0x7f800000
227	mov	%g5,%g1
228
229	or	%g0,stridey,%o4
230	add	%g1,56,cadd_arr
231
232	sll	%o2,2,stridey
233	add	%g1,72,cmul_arr
234
235	ldd	[%g1+80],K0
236	ldd	[%g1+80+8],K1
237	ldd	[%g1+80+16],K2
238	ldd	[%g1+80+24],K3
239	ldd	[%g1+80+32],K4
240	ldd	[%g1+80+40],K5
241	ldd	[%g1+80+48],K6
242	ldd	[%g1+80+56],K7
243	ldd	[%g1+80+64],K8
244	ldd	[%g1+80+72],K9
245
246	sll	stridex,2,stridex
247
248	stx	%i1,[%fp+tmp_py]
249	stx	%i3,[%fp+tmp_px]
250.begin:
251	ld	[%fp+tmp_counter],counter
252	ldx	[%fp+tmp_py],%i1
253	ldx	[%fp+tmp_px],%i3
254	st	%g0,[%fp+tmp_counter]
255.begin1:
256	subcc	counter,1,counter
257	bneg,pn	%icc,.exit
258	nop
259
260	lda	[%i1]0x82,%l4		! (0_0) uy0 = *(int*)py;
261
262	lda	[%i3]0x82,%l3		! (0_0) ux0 = *(int*)px;
263
264	and	%l4,_0x7fffffff,%l7	! (0_0) ay0 = uy0 & 0x7fffffff;
265
266	cmp	%l7,_0x7f800000
267	bge,pn	%icc,.spec0
268	and	%l3,_0x7fffffff,%l6	! (0_0) ax0 = ux0 & 0x7fffffff;
269
270	cmp	%l6,_0x7f800000
271	bge,pn	%icc,.spec0
272	sethi	%hi(0x00800000),%o5
273
274	cmp	%l6,%o5
275	bl,pn	%icc,.spec1
276	sub	%l6,%l7,%o2		! (0_0) ldiff0 = ax0 - ay0;
277
278	cmp	%l7,%o5
279	bl,pn	%icc,.spec1
280	nop
281
282	stx	%o4,[%fp+tmp_pz]
283	sra	%o2,31,%l7		! (0_0) ldiff0 >>= 31;
284	sub	%i3,%i1,%l6		! (0_0) addrc0 = (char*)px - (char*)py;
285
286	and	%l6,%l7,%o2		! (0_0) addrc0 &= ldiff0;
287
288	lda	[%i1+%o2]0x82,%f0	! (0_0) fy0 = *(float*)((char*)py + addrc0);
289	sub	%i3,%o2,%o4		! (0_0) (char*)px - addrc0
290
291	lda	[%o4]0x82,%f2		! (0_0) fx0 = *(float*)((char*)px - addrc0);
292	sll	%l7,5,%l6		! (0_0) ltmp0 = ldiff0 << 5;
293
294	sra	%l3,27,%o5		! (0_0) signx0 = ux0 >> 27;
295	add	%i1,stridey,%i1		! py += stridey
296
297	add	%i3,stridex,%i3		! px += stridex
298
299	lda	[%i1]0x82,%l3		! (1_0) uy0 = *(int*)py;
300	sra	%l4,28,%o4		! (0_0) signy0 = uy0 >> 28;
301
302	add	%l6,cadd_arr,%l6	! (0_0) ltmp0 += (char*)cadd_arr;
303
304	fstod	%f0,%f40		! (0_0) y0 = (double)fy0;
305
306	fstod	%f2,%f2			! (0_0) x0 = (double)fx0;
307
308.spec1_cont:
309	lda	[%i3]0x82,%l4		! (1_0) ux0 = *(int*)px;
310	and	%o5,-16,%o5		! (0_0) signx0 &= -16;
311
312	and	%o4,-8,%o4		! (0_0) signy0 &= -8;
313
314	fdivd	%f40,%f2,%f12		! (0_0) x0 = y0 / x0;
315
316	add	%l6,%o5,%o1		! (0_0) ltmp0 += signx0;
317
318	and	%l4,_0x7fffffff,%l6	! (1_0) ax0 = ux0 & 0x7fffffff;
319	sethi	%hi(0x00800000),%o5
320
321	cmp	%l6,%o5
322	bl,pn	%icc,.u0
323	and	%l3,_0x7fffffff,%g1	! (1_0) ay0 = uy0 & 0x7fffffff;
324.c0:
325	cmp	%g1,%o5
326	bl,pn	%icc,.u1
327	ldd	[%o1+%o4],%f34		! (0_0) cadd0 = *(double*)(ltmp0 + signy0);
328.c1:
329	cmp	%l6,_0x7f800000
330	bge,pn	%icc,.u2
331	sub	%l6,%g1,%o1		! (1_0) ldiff0 = ax0 - ay0;
332.c2:
333	cmp	%g1,_0x7f800000
334	bge,pn	%icc,.u3
335	nop
336.c3:
337	sra	%o1,31,%g1		! (1_0) ldiff0 >>= 31;
338	sub	%i3,%i1,%l6		! (1_0) addrc0 = (char*)px - (char*)py;
339
340	and	%l6,%g1,%o1		! (1_0) addrc0 &= ldiff0;
341
342	lda	[%i1+%o1]0x82,%f0	! (1_0) fy0 = *(float*)((char*)py + addrc0);
343	sub	%i3,%o1,%o4		! (1_0) (char*)px - addrc0;
344
345	lda	[%o4]0x82,%f2		! (1_0) fx0 = *(float*)((char*)px - addrc0);
346	sll	%g1,5,%l6		! (1_0) ltmp0 = ldiff0 << 5;
347
348	cmp	%o5,_0x7f800000		! (1_0) b0 ? 0x7f800000
349	bge,pn	%icc,.update0		! (1_0) if ( b0 > 0x7f800000 )
350	nop
351.cont0:
352	add	%i1,stridey,%i1		! py += stridey
353	fstod	%f0,%f40		! (1_0) y0 = (double)fy0;
354
355	sra	%l4,27,%o5		! (1_0) signx0 = ux0 >> 27;
356	add	%i3,stridex,%i3		! px += stridex
357
358	sra	%l3,28,%o4		! (1_0) signy0 = uy0 >> 28;
359	add	%l6,cadd_arr,%l6	! (1_0) ltmp0 += (char*)cadd_arr;
360	fstod	%f2,%f2			! (1_0) x0 = (double)fx0;
361.d0:
362	and	%o5,-16,%o5		! (1_0) signx0 &= -16;
363	and	%o4,-8,%o4		! (1_0) signy0 &= -8;
364
365	lda	[%i1]0x82,%l4		! (2_0) uy0 = *(int*)py;
366
367	lda	[%i3]0x82,%l3		! (2_0) ux0 = *(int*)px;
368	fdivd	%f40,%f2,%f10		! (1_0) x0 = y0 / x0;
369
370	fmuld	%f12,%f12,%f20		! (0_0) x20 = x0 * x0;
371
372	add	%l6,%o5,%o2		! (1_0) ltmp0 += signx0;
373
374	and	%l3,_0x7fffffff,%l6	! (2_0) ax0 = ux0 & 0x7fffffff;
375	sethi	%hi(0x00800000),%o5
376
377	cmp	%l6,%o5
378	bl,pn	%icc,.u4
379	and	%l4,_0x7fffffff,%g5	! (2_0) ay0 = uy0 & 0x7fffffff;
380.c4:
381	cmp	%g5,%o5
382	bl,pn	%icc,.u5
383	fmuld	K9,%f20,%f40		! (0_0) dtmp0 = K9 * x20;
384.c5:
385	cmp	%l6,_0x7f800000
386	bge,pn	%icc,.u6
387	ldd	[%o2+%o4],%f32		! (1_0) cadd0 = *(double*)(ltmp0 + signy0);
388.c6:
389	cmp	%g5,_0x7f800000
390	bge,pn	%icc,.u7
391	sub	%l6,%g5,%o2		! (2_0) ldiff0 = ax0 - ay0;
392.c7:
393	sra	%o2,31,%g5		! (2_0) ldiff0 >>= 31;
394	sub	%i3,%i1,%l6		! (2_0) addrc0 = (char*)px - (char*)py;
395
396	faddd	%f40,K8,%f40		! (0_0) dtmp0 += K8;
397	and	%l6,%g5,%o2		! (2_0) addrc0 &= ldiff0;
398
399	lda	[%i1+%o2]0x82,%f0	! (2_0) fy0 = *(float*)((char*)py + addrc0);
400	sub	%i3,%o2,%o4		! (2_0) (char*)px - addrc0;
401
402	lda	[%o4]0x82,%f2		! (2_0) fx0 = *(float*)((char*)px - addrc0);
403
404	cmp	%o5,_0x7f800000		! (2_0) b0 ? 0x7f800000
405	bge,pn	%icc,.update1		! (2_0) if ( b0 > 0x7f800000 )
406	nop
407.cont1:
408	fmuld	%f40,%f20,%f30		! (0_0) dtmp0 *= x20;
409	sll	%g5,5,%l6		! (2_0) ltmp0 = ldiff0 << 5;
410	add	%i1,stridey,%i1		! py += stridey
411	fstod	%f0,%f40		! (2_0) y0 = (double)fy0;
412
413	sra	%l3,27,%o5		! (2_0) signx0 = ux0 >> 27;
414	add	%i3,stridex,%i3		! px += stridex
415
416	fstod	%f2,%f2			! (2_0) x0 = (double)fx0;
417	sra	%l4,28,%o4		! (2_0) signy0 = uy0 >> 28;
418	add	%l6,cadd_arr,%l6	! (2_0) ltmp0 += (char*)cadd_arr;
419.d1:
420	lda	[%i1]0x82,%l3		! (3_0) uy0 = *(int*)py;
421	and	%o5,-16,%o5		! (2_0) signx0 &= -16;
422	faddd	%f30,K7,%f30		! (0_0) dtmp0 += K7;
423
424	lda	[%i3]0x82,%l4		! (3_0) ux0 = *(int*)px;
425
426	fdivd	%f40,%f2,%f8		! (2_0) x0 = y0 / x0;
427
428	fmuld	%f10,%f10,%f18		! (1_0) x20 = x0 * x0;
429
430	add	%l6,%o5,%o1		! (2_0) ltmp0 += signx0;
431	and	%o4,-8,%o4		! (2_0) signy0 &= -8;
432	fmuld	%f30,%f20,%f30		! (0_0) dtmp0 *= x20;
433
434	and	%l4,_0x7fffffff,%l6	! (3_0) ax0 = ux0 & 0x7fffffff;
435	sethi	%hi(0x00800000),%o5
436
437	cmp	%l6,%o5
438	bl,pn	%icc,.u8
439	and	%l3,_0x7fffffff,%o0	! (3_0) ay0 = uy0 & 0x7fffffff;
440.c8:
441	cmp	%o0,%o5
442	bl,pn	%icc,.u9
443	fmuld	K9,%f18,%f40		! (1_0) dtmp0 = K9 * x20;
444.c9:
445	cmp	%l6,_0x7f800000
446	bge,pn	%icc,.u10
447	faddd	%f30,K6,%f16		! (0_0) dtmp0 += K6;
448.c10:
449	cmp	%o0,_0x7f800000
450	bge,pn	%icc,.u11
451	ldd	[%o1+%o4],%f30		! (2_0) cadd0 = *(double*)(ltmp0 + signy0);
452.c11:
453	sub	%l6,%o0,%o1		! (3_0) ldiff0 = ax0 - ay0;
454
455	sra	%o1,31,%o0		! (3_0) ldiff0 >>= 31;
456	sub	%i3,%i1,%l6		! (3_0) addrc0 = (char*)px - (char*)py;
457
458	faddd	%f40,K8,%f40		! (1_0) dtmp0 += K8;
459	and	%l6,%o0,%o1		! (3_0) addrc0 &= ldiff0;
460	fmuld	%f16,%f20,%f16		! (0_0) dtmp0 *= x20;
461
462	lda	[%i1+%o1]0x82,%f0	! (3_0) fy0 = *(float*)((char*)py + addrc0);
463	sub	%i3,%o1,%o4		! (3_0) (char*)px - addrc0;
464
465	lda	[%o4]0x82,%f1		! (3_0) fx0 = *(float*)((char*)px - addrc0);
466
467	cmp	%o5,_0x7f800000		! (3_0) b0 ? 0x7f800000
468	bge,pn	%icc,.update2		! (3_0) if ( b0 > 0x7f800000 )
469	nop
470.cont2:
471	fmuld	%f40,%f18,%f28		! (1_0) dtmp0 *= x20;
472	sll	%o0,5,%l6		! (3_0) ltmp0 = ldiff0 << 5;
473	add	%i1,stridey,%i1		! py += stridey
474	fstod	%f0,%f40		! (3_0) y0 = (double)fy0;
475
476	faddd	%f16,K5,%f2		! (0_0) dtmp0 += K5;
477	sra	%l4,27,%o5		! (3_0) signx0 = ux0 >> 27;
478	add	%i3,stridex,%i3		! px += stridex
479
480	sra	%l3,28,%o4		! (3_0) signy0 = uy0 >> 28;
481	fstod	%f1,%f16		! (3_0) x0 = (double)fx0;
482.d2:
483	faddd	%f28,K7,%f28		! (1_0) dtmp0 += K7;
484	add	%l6,cadd_arr,%l6	! (3_0) ltmp0 += (char*)cadd_arr;
485	and	%o5,-16,%o5		! (3_0) signx0 &= -16;
486
487	lda	[%i1]0x82,%l4		! (4_0) uy0 = *(int*)py;
488	fmuld	%f2,%f20,%f2		! (0_0) dtmp0 *= x20;
489
490	lda	[%i3]0x82,%l3		! (4_0) ux0 = *(int*)px;
491	fdivd	%f40,%f16,%f6		! (3_0) x0 = y0 / x0;
492
493	and	%o4,-8,%o4		! (3_0) signy0 &= -8;
494	fmuld	%f8,%f8,%f16		! (2_0) x20 = x0 * x0;
495
496	add	%l6,%o5,%o2		! (3_0) ltmp0 += signx0;
497	fmuld	%f28,%f18,%f28		! (1_0) dtmp0 *= x20;
498
499	and	%l3,_0x7fffffff,%l6	! (4_0) ax0 = ux0 & 0x7fffffff;
500	sethi	%hi(0x00800000),%o5
501	faddd	%f2,K4,%f2		! (0_0) dtmp0 += K4;
502
503	cmp	%l6,%o5
504	bl,pn	%icc,.u12
505	and	%l4,_0x7fffffff,%l5	! (4_0) ay0 = uy0 & 0x7fffffff;
506.c12:
507	cmp	%l5,%o5
508	bl,pn	%icc,.u13
509	fmuld	K9,%f16,%f40		! (2_0) dtmp0 = K9 * x20;
510.c13:
511	cmp	%l6,_0x7f800000
512	bge,pn	%icc,.u14
513	faddd	%f28,K6,%f4		! (1_0) dtmp0 += K6;
514.c14:
515	ldd	[%o2+%o4],%f28		! (3_0) cadd0 = *(double*)(ltmp0 + signy0);
516	cmp	%l5,_0x7f800000
517	bge,pn	%icc,.u15
518	fmuld	%f2,%f20,%f24		! (0_0) dtmp0 *= x20;
519.c15:
520	sub	%l6,%l5,%o2		! (4_0) ldiff0 = ax0 - ay0;
521
522	sra	%o2,31,%l5		! (4_0) ldiff0 >>= 31;
523	sub	%i3,%i1,%l6		! (4_0) addrc0 = (char*)px - (char*)py;
524
525	faddd	%f40,K8,%f40		! (2_0) dtmp0 += K8;
526	and	%l6,%l5,%o2		! (4_0) addrc0 &= ldiff0;
527	fmuld	%f4,%f18,%f4		! (1_0) dtmp0 *= x20;
528
529	lda	[%i1+%o2]0x82,%f0	! (4_0) fy0 = *(float*)((char*)py + addrc0);
530	sub	%i3,%o2,%o4		! (4_0) (char*)px - addrc0;
531	faddd	%f24,K3,%f24		! (0_0) dtmp0 += K3;
532
533	lda	[%o4]0x82,%f2		! (4_0) fx0 = *(float*)((char*)px - addrc0);
534
535	cmp	%o5,_0x7f800000		! (4_0) b0 ? 0x7f800000
536	bge,pn	%icc,.update3		! (4_0) if ( b0 > 0x7f800000 )
537	nop
538.cont3:
539	fmuld	%f40,%f16,%f26		! (2_0) dtmp0 *= x20;
540	sll	%l5,5,%l6		! (4_0) ltmp0 = ldiff0 << 5;
541	add	%i1,stridey,%i1		! py += stridey
542	fstod	%f0,%f40		! (4_0) y0 = (double)fy0;
543
544	faddd	%f4,K5,%f62		! (1_0) dtmp0 += K5;
545	add	%i3,stridex,%i3		! px += stridex
546	fmuld	%f24,%f20,%f24		! (0_0) dtmp0 *= x20;
547
548	fstod	%f2,%f2			! (4_0) x0 = (double)fx0;
549	sra	%l3,27,%o5		! (4_0) signx0 = ux0 >> 27;
550	sra	%l4,28,%o4		! (4_0) signy0 = uy0 >> 28;
551.d3:
552	lda	[%i1]0x82,%l3		! (5_0) uy0 = *(int*)py;
553	add	%l6,cadd_arr,%l6	! (4_0) ltmp0 += (char*)cadd_arr;
554	faddd	%f26,K7,%f26		! (2_0) dtmp0 += K7;
555
556	fmuld	%f62,%f18,%f4		! (1_0) dtmp0 *= x20;
557	and	%o5,-16,%o5		! (4_0) signx0 &= -16;
558
559	lda	[%i3]0x82,%l4		! (5_1) ux0 = *(int*)px;
560	fdivd	%f40,%f2,%f62		! (4_1) x0 = y0 / x0;
561	faddd	%f24,K2,%f40		! (0_1) dtmp0 += K2;
562
563	and	%o4,-8,%o4		! (4_1) signy0 &= -8;
564	fmuld	%f6,%f6,%f24		! (3_1) x20 = x0 * x0;
565
566	add	%l6,%o5,%o1		! (4_1) ltmp0 += signx0;
567	fmuld	%f26,%f16,%f26		! (2_1) dtmp0 *= x20;
568
569	and	%l4,_0x7fffffff,%l6	! (5_1) ax0 = ux0 & 0x7fffffff;
570	sethi	%hi(0x00800000),%o5
571	faddd	%f4,K4,%f4		! (1_1) dtmp0 += K4;
572
573	cmp	%l6,%o5
574	bl,pn	%icc,.u16
575	and	%l3,_0x7fffffff,%o7	! (5_1) ay0 = uy0 & 0x7fffffff;
576.c16:
577	cmp	%o7,%o5
578	bl,pn	%icc,.u17
579	fmuld	%f40,%f20,%f38		! (0_1) dtmp0 *= x20;
580.c17:
581	cmp	%l6,_0x7f800000
582	bge,pn	%icc,.u18
583	fmuld	K9,%f24,%f40		! (3_1) dtmp0 = K9 * x20;
584.c18:
585	cmp	%o7,_0x7f800000
586	bge,pn	%icc,.u19
587	faddd	%f26,K6,%f22		! (2_1) dtmp0 += K6;
588.c19:
589	ldd	[%o1+%o4],%f26		! (4_1) cadd0 = *(double*)(ltmp0 + signy0);
590	fmuld	%f4,%f18,%f4		! (1_1) dtmp0 *= x20;
591
592	sub	%l6,%o7,%o1		! (5_1) ldiff0 = ax0 - ay0;
593
594	sra	%o1,31,%o7		! (5_1) ldiff0 >>= 31;
595	sub	%i3,%i1,%l6		! (5_1) addrc0 = (char*)px - (char*)py;
596	faddd	%f38,K1,%f38		! (0_1) dtmp0 += K1;
597
598	faddd	%f40,K8,%f40		! (3_1) dtmp0 += K8;
599	and	%l6,%o7,%o1		! (5_1) addrc0 &= ldiff0;
600	fmuld	%f22,%f16,%f22		! (2_1) dtmp0 *= x20;
601
602	lda	[%i1+%o1]0x82,%f0	! (5_1) fy0 = *(float*)((char*)py + addrc0);
603	sll	%o7,5,%l6		! (5_1) ltmp0 = ldiff0 << 5;
604	sub	%i3,%o1,%o4		! (5_1) (char*)px - addrc0;
605	faddd	%f4,K3,%f4		! (1_1) dtmp0 += K3;
606
607	lda	[%o4]0x82,%f1		! (5_1) fx0 = *(float*)((char*)px - addrc0);
608
609	fmuld	%f38,%f20,%f38		! (0_1) dtmp0 *= x20;
610	cmp	%o5,_0x7f800000		! (5_1) b0 ? 0x7f800000
611	bge,pn	%icc,.update4		! (5_1) if ( b0 > 0x7f800000 )
612	nop
613.cont4:
614	fmuld	%f40,%f24,%f36		! (3_1) dtmp0 *= x20;
615	fstod	%f0,%f40		! (5_1) y0 = (double)fy0;
616
617	faddd	%f22,K5,%f14		! (2_1) dtmp0 += K5;
618	fmuld	%f4,%f18,%f4		! (1_1) dtmp0 *= x20;
619
620	add	%i3,stridex,%i3		! px += stridex
621	sll	%l7,3,%l7		! (0_1) cmul0_ind = ldiff0 << 3;
622	fstod	%f1,%f2			! (5_1) x0 = (double)fx0;
623.d4:
624	sra	%l3,28,%o4		! (5_1) signy0 = uy0 >> 28;
625	add	%i1,stridey,%i1		! py += stridey
626
627	faddd	%f36,K7,%f36		! (3_1) dtmp0 += K7;
628	sra	%l4,27,%o5		! (5_1) signx0 = ux0 >> 27;
629
630	lda	[%i1]0x82,%l4		! (0_0) uy0 = *(int*)py;
631	add	%l6,cadd_arr,%l6	! (5_1) ltmp0 += (char*)cadd_arr;
632	fmuld	%f14,%f16,%f22		! (2_1) dtmp0 *= x20;
633	faddd	%f38,K0,%f38		! (0_1) dtmp0 += K0;
634
635	lda	[%i3]0x82,%l3		! (0_0) ux0 = *(int*)px;
636	and	%o5,-16,%o5		! (5_1) signx0 &= -16;
637	fdivd	%f40,%f2,%f14		! (5_1) x0 = y0 / x0;
638	faddd	%f4,K2,%f40		! (1_1) dtmp0 += K2;
639
640	fmuld	%f62,%f62,%f4		! (4_1) x20 = x0 * x0;
641
642	ldd	[cmul_arr+%l7],%f0	! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
643	add	%l6,%o5,%o2		! (5_1) ltmp0 += signx0;
644	and	%o4,-8,%o4		! (5_1) signy0 &= -8;
645	fmuld	%f36,%f24,%f36		! (3_1) dtmp0 *= x20;
646
647	fmuld	%f38,%f12,%f12		! (0_1) x0 = dtmp0 * x0;
648	and	%l4,_0x7fffffff,%l7	! (0_0) ay0 = uy0 & 0x7fffffff;
649	sethi	%hi(0x00800000),%o5
650	faddd	%f22,K4,%f22		! (2_1) dtmp0 += K4;
651
652	and	%l3,_0x7fffffff,%l6	! (0_0) ax0 = ux0 & 0x7fffffff;
653	cmp	%l7,%o5
654	bl,pn	%icc,.u20
655	fmuld	%f40,%f18,%f38		! (1_1) dtmp0 *= x20;
656.c20:
657	cmp	%l6,%o5
658	bl,pn	%icc,.u21
659	fmuld	K9,%f4,%f40		! (4_1) dtmp0 = K9 * x20;
660.c21:
661	cmp	%l7,_0x7f800000
662	bge,pn	%icc,.u22
663	faddd	%f36,K6,%f20		! (3_1) dtmp0 += K6;
664.c22:
665	ldd	[%o2+%o4],%f36		! (5_1) cadd0 = *(double*)(ltmp0 + signy0);
666	cmp	%l6,_0x7f800000
667	bge,pn	%icc,.u23
668	fmuld	%f22,%f16,%f22		! (2_1) dtmp0 *= x20;
669.c23:
670	sub	%l6,%l7,%o2		! (0_0) ldiff0 = ax0 - ay0;
671
672	fmuld	%f0,%f12,%f12		! (0_1) dtmp0 = cmul0 * x0;
673	sra	%o2,31,%l7		! (0_0) ldiff0 >>= 31;
674	sub	%i3,%i1,%l6		! (0_0) addrc0 = (char*)px - (char*)py;
675	faddd	%f38,K1,%f38		! (1_1) dtmp0 += K1;
676
677	faddd	%f40,K8,%f40		! (4_1) dtmp0 += K8;
678	and	%l6,%l7,%o2		! (0_0) addrc0 &= ldiff0;
679	fmuld	%f20,%f24,%f20		! (3_1) dtmp0 *= x20;
680
681	lda	[%i1+%o2]0x82,%f0	! (0_0) fy0 = *(float*)((char*)py + addrc0);
682	sll	%g1,3,%g1		! (1_1) cmul0_ind = ldiff0 << 3;
683	sub	%i3,%o2,%o4		! (0_0) (char*)px - addrc0
684	faddd	%f22,K3,%f22		! (2_1) dtmp0 += K3;
685
686	lda	[%o4]0x82,%f2		! (0_0) fx0 = *(float*)((char*)px - addrc0);
687	sll	%l7,5,%l6		! (0_0) ltmp0 = ldiff0 << 5;
688
689	fmuld	%f38,%f18,%f38		! (1_1) dtmp0 *= x20;
690	cmp	%o5,_0x7f800000		! (0_0) b0 ? 0x7f800000
691	bge,pn	%icc,.update5		! (0_0) if ( b0 > 0x7f800000 )
692	faddd	%f34,%f12,%f18		! (0_1) dtmp0 = cadd0 + dtmp0;
693.cont5:
694	fmuld	%f40,%f4,%f34		! (4_1) dtmp0 *= x20;
695	sra	%l3,27,%o5		! (0_0) signx0 = ux0 >> 27;
696	add	%i3,stridex,%i3		! px += stridex
697	fstod	%f0,%f40		! (0_0) y0 = (double)fy0;
698
699	faddd	%f20,K5,%f12		! (3_1) dtmp0 += K5;
700	add	%i1,stridey,%i1		! py += stridey
701	fmuld	%f22,%f16,%f22		! (2_1) dtmp0 *= x20;
702
703	lda	[%i1]0x82,%l3		! (1_0) uy0 = *(int*)py;
704	sra	%l4,28,%o4		! (0_0) signy0 = uy0 >> 28;
705	add	%l6,cadd_arr,%l6	! (0_0) ltmp0 += (char*)cadd_arr;
706	fstod	%f2,%f2			! (0_0) x0 = (double)fx0;
707.d5:
708	lda	[%i3]0x82,%l4		! (1_0) ux0 = *(int*)px;
709	and	%o5,-16,%o5		! (0_0) signx0 &= -16;
710	faddd	%f34,K7,%f34		! (4_1) dtmp0 += K7;
711
712	ldx	[%fp+tmp_pz],%o1
713	fmuld	%f12,%f24,%f20		! (3_1) dtmp0 *= x20;
714	and	%o4,-8,%o4		! (0_0) signy0 &= -8;
715	faddd	%f38,K0,%f38		! (1_1) dtmp0 += K0;
716
717	fdivd	%f40,%f2,%f12		! (0_0) x0 = y0 / x0;
718	faddd	%f22,K2,%f40		! (2_1) dtmp0 += K2;
719
720	fdtos	%f18,%f2		! (0_1) ftmp0 = (float)dtmp0;
721	st	%f2,[%o1]		! (0_1) *pz = ftmp0
722	add	%o1,stridez,%o2
723	fmuld	%f14,%f14,%f22		! (5_1) x20 = x0 * x0;
724
725	subcc	counter,1,counter
726	bneg,a,pn	%icc,.begin
727	or	%g0,%o2,%o4
728
729	ldd	[cmul_arr+%g1],%f0	! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
730	add	%l6,%o5,%o1		! (0_0) ltmp0 += signx0;
731	fmuld	%f34,%f4,%f34		! (4_1) dtmp0 *= x20;
732
733	fmuld	%f38,%f10,%f10		! (1_1) x0 = dtmp0 * x0;
734	and	%l4,_0x7fffffff,%l6	! (1_0) ax0 = ux0 & 0x7fffffff;
735	sethi	%hi(0x00800000),%o5
736	faddd	%f20,K4,%f20		! (3_1) dtmp0 += K4;
737
738	and	%l3,_0x7fffffff,%g1	! (1_0) ay0 = uy0 & 0x7fffffff;
739	cmp	%l6,%o5
740	bl,pn	%icc,.u24
741	fmuld	%f40,%f16,%f38		! (2_1) dtmp0 *= x20;
742.c24:
743	cmp	%g1,%o5
744	bl,pn	%icc,.u25
745	fmuld	K9,%f22,%f40		! (5_1) dtmp0 = K9 * x20;
746.c25:
747	cmp	%l6,_0x7f800000
748	bge,pn	%icc,.u26
749	faddd	%f34,K6,%f18		! (4_1) dtmp0 += K6;
750.c26:
751	ldd	[%o1+%o4],%f34		! (0_0) cadd0 = *(double*)(ltmp0 + signy0);
752	cmp	%g1,_0x7f800000
753	bge,pn	%icc,.u27
754	fmuld	%f20,%f24,%f20		! (3_1) dtmp0 *= x20;
755.c27:
756	sub	%l6,%g1,%o1		! (1_0) ldiff0 = ax0 - ay0;
757
758	fmuld	%f0,%f10,%f10		! (1_1) dtmp0 = cmul0 * x0;
759	sra	%o1,31,%g1		! (1_0) ldiff0 >>= 31;
760	sub	%i3,%i1,%l6		! (1_0) addrc0 = (char*)px - (char*)py;
761	faddd	%f38,K1,%f38		! (2_1) dtmp0 += K1;
762
763	faddd	%f40,K8,%f40		! (5_1) dtmp0 += K8;
764	and	%l6,%g1,%o1		! (1_0) addrc0 &= ldiff0;
765	fmuld	%f18,%f4,%f18		! (4_1) dtmp0 *= x20;
766
767	lda	[%i1+%o1]0x82,%f0	! (1_0) fy0 = *(float*)((char*)py + addrc0);
768	sll	%g5,3,%g5		! (2_1) cmul0_ind = ldiff0 << 3;
769	sub	%i3,%o1,%o4		! (1_0) (char*)px - addrc0;
770	faddd	%f20,K3,%f20		! (3_1) dtmp0 += K3;
771
772	lda	[%o4]0x82,%f2		! (1_0) fx0 = *(float*)((char*)px - addrc0);
773	sll	%g1,5,%l6		! (1_0) ltmp0 = ldiff0 << 5;
774	add	%o2,stridez,%o1		! pz += stridez
775
776	fmuld	%f38,%f16,%f38		! (2_1) dtmp0 *= x20;
777	cmp	%o5,_0x7f800000		! (1_0) b0 ? 0x7f800000
778	bge,pn	%icc,.update6		! (1_0) if ( b0 > 0x7f800000 )
779	faddd	%f32,%f10,%f16		! (1_1) dtmp0 = cadd0 + dtmp0;
780.cont6:
781	fmuld	%f40,%f22,%f32		! (5_1) dtmp0 *= x20;
782	add	%i1,stridey,%i1		! py += stridey
783	fstod	%f0,%f40		! (1_0) y0 = (double)fy0;
784
785	faddd	%f18,K5,%f10		! (4_1) dtmp0 += K5;
786	sra	%l4,27,%o5		! (1_0) signx0 = ux0 >> 27;
787	add	%i3,stridex,%i3		! px += stridex
788	fmuld	%f20,%f24,%f20		! (3_1) dtmp0 *= x20;
789
790	sra	%l3,28,%o4		! (1_0) signy0 = uy0 >> 28;
791	add	%l6,cadd_arr,%l6	! (1_0) ltmp0 += (char*)cadd_arr;
792	fstod	%f2,%f2			! (1_0) x0 = (double)fx0;
793.d6:
794	faddd	%f32,K7,%f32		! (5_1) dtmp0 += K7;
795	and	%o5,-16,%o5		! (1_0) signx0 &= -16;
796	and	%o4,-8,%o4		! (1_0) signy0 &= -8;
797
798	lda	[%i1]0x82,%l4		! (2_0) uy0 = *(int*)py;
799	fmuld	%f10,%f4,%f18		! (4_1) dtmp0 *= x20;
800	faddd	%f38,K0,%f38		! (2_1) dtmp0 += K0;
801
802	lda	[%i3]0x82,%l3		! (2_0) ux0 = *(int*)px;
803	fdivd	%f40,%f2,%f10		! (1_0) x0 = y0 / x0;
804	faddd	%f20,K2,%f40		! (3_1) dtmp0 += K2;
805
806	fmuld	%f12,%f12,%f20		! (0_0) x20 = x0 * x0;
807	fdtos	%f16,%f2		! (1_1) ftmp0 = (float)dtmp0;
808	st	%f2,[%o2]		! (1_1) *pz = ftmp0;
809
810	subcc	counter,1,counter
811	bneg,a,pn	%icc,.begin
812	or	%g0,%o1,%o4
813
814	ldd	[cmul_arr+%g5],%f0	! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
815	add	%l6,%o5,%o2		! (1_0) ltmp0 += signx0;
816	fmuld	%f32,%f22,%f32		! (5_1) dtmp0 *= x20;
817
818	fmuld	%f38,%f8,%f8		! (2_1) x0 = dtmp0 * x0;
819	and	%l3,_0x7fffffff,%l6	! (2_0) ax0 = ux0 & 0x7fffffff;
820	sethi	%hi(0x00800000),%o5
821	faddd	%f18,K4,%f18		! (4_1) dtmp0 += K4;
822
823	and	%l4,_0x7fffffff,%g5	! (2_0) ay0 = uy0 & 0x7fffffff;
824	cmp	%l6,%o5
825	bl,pn	%icc,.u28
826	fmuld	%f40,%f24,%f38		! (3_1) dtmp0 *= x20;
827.c28:
828	cmp	%g5,%o5
829	bl,pn	%icc,.u29
830	fmuld	K9,%f20,%f40		! (0_0) dtmp0 = K9 * x20;
831.c29:
832	cmp	%l6,_0x7f800000
833	bge,pn	%icc,.u30
834	faddd	%f32,K6,%f16		! (5_1) dtmp0 += K6;
835.c30:
836	ldd	[%o2+%o4],%f32		! (1_0) cadd0 = *(double*)(ltmp0 + signy0);
837	cmp	%g5,_0x7f800000
838	bge,pn	%icc,.u31
839	fmuld	%f18,%f4,%f18		! (4_1) dtmp0 *= x20;
840.c31:
841	sub	%l6,%g5,%o2		! (2_0) ldiff0 = ax0 - ay0;
842
843	fmuld	%f0,%f8,%f8		! (2_1) dtmp0 = cmul0 * x0;
844	sra	%o2,31,%g5		! (2_0) ldiff0 >>= 31;
845	sub	%i3,%i1,%l6		! (2_0) addrc0 = (char*)px - (char*)py;
846	faddd	%f38,K1,%f38		! (3_1) dtmp0 += K1;
847
848	faddd	%f40,K8,%f40		! (0_0) dtmp0 += K8;
849	and	%l6,%g5,%o2		! (2_0) addrc0 &= ldiff0;
850	fmuld	%f16,%f22,%f16		! (5_1) dtmp0 *= x20;
851
852	lda	[%i1+%o2]0x82,%f0	! (2_0) fy0 = *(float*)((char*)py + addrc0);
853	sub	%i3,%o2,%o4		! (2_0) (char*)px - addrc0;
854	add	%o1,stridez,%o2		! pz += stridez
855	faddd	%f18,K3,%f18		! (4_1) dtmp0 += K3;
856
857	lda	[%o4]0x82,%f2		! (2_0) fx0 = *(float*)((char*)px - addrc0);
858	sll	%o0,3,%o0		! (3_1) cmul0_ind = ldiff0 << 3;
859
860	fmuld	%f38,%f24,%f38		! (3_1) dtmp0 *= x20;
861	cmp	%o5,_0x7f800000		! (2_0) b0 ? 0x7f800000
862	bge,pn	%icc,.update7		! (2_0) if ( b0 > 0x7f800000 )
863	faddd	%f30,%f8,%f24		! (2_1) dtmp0 = cadd0 + dtmp0;
864.cont7:
865	fmuld	%f40,%f20,%f30		! (0_0) dtmp0 *= x20;
866	sll	%g5,5,%l6		! (2_0) ltmp0 = ldiff0 << 5;
867	add	%i1,stridey,%i1		! py += stridey
868	fstod	%f0,%f40		! (2_0) y0 = (double)fy0;
869
870	faddd	%f16,K5,%f8		! (5_1) dtmp0 += K5;
871	sra	%l3,27,%o5		! (2_0) signx0 = ux0 >> 27;
872	add	%i3,stridex,%i3		! px += stridex
873	fmuld	%f18,%f4,%f18		! (4_1) dtmp0 *= x20;
874
875	fstod	%f2,%f2			! (2_0) x0 = (double)fx0;
876	sra	%l4,28,%o4		! (2_0) signy0 = uy0 >> 28;
877	add	%l6,cadd_arr,%l6	! (2_0) ltmp0 += (char*)cadd_arr;
878.d7:
879	lda	[%i1]0x82,%l3		! (3_0) uy0 = *(int*)py;
880	and	%o5,-16,%o5		! (2_0) signx0 &= -16;
881	faddd	%f30,K7,%f30		! (0_0) dtmp0 += K7;
882
883	lda	[%i3]0x82,%l4		! (3_0) ux0 = *(int*)px;
884	fmuld	%f8,%f22,%f16		! (5_1) dtmp0 *= x20;
885	faddd	%f38,K0,%f38		! (3_1) dtmp0 += K0;
886
887	fdivd	%f40,%f2,%f8		! (2_0) x0 = y0 / x0;
888	faddd	%f18,K2,%f40		! (4_1) dtmp0 += K2;
889
890	fmuld	%f10,%f10,%f18		! (1_0) x20 = x0 * x0;
891	fdtos	%f24,%f1		! (2_1) ftmp0 = (float)dtmp0;
892	st	%f1,[%o1]		! (2_1) *pz = ftmp0;
893
894	subcc	counter,1,counter
895	bneg,a,pn	%icc,.begin
896	or	%g0,%o2,%o4
897
898	ldd	[cmul_arr+%o0],%f2	! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
899	add	%l6,%o5,%o1		! (2_0) ltmp0 += signx0;
900	and	%o4,-8,%o4		! (2_0) signy0 &= -8;
901	fmuld	%f30,%f20,%f30		! (0_0) dtmp0 *= x20;
902
903	fmuld	%f38,%f6,%f6		! (3_1) x0 = dtmp0 * x0;
904	and	%l4,_0x7fffffff,%l6	! (3_0) ax0 = ux0 & 0x7fffffff;
905	sethi	%hi(0x00800000),%o5
906	faddd	%f16,K4,%f24		! (5_1) dtmp0 += K4;
907
908	and	%l3,_0x7fffffff,%o0	! (3_0) ay0 = uy0 & 0x7fffffff;
909	cmp	%l6,%o5
910	bl,pn	%icc,.u32
911	fmuld	%f40,%f4,%f38		! (4_1) dtmp0 *= x20;
912.c32:
913	cmp	%o0,%o5
914	bl,pn	%icc,.u33
915	fmuld	K9,%f18,%f40		! (1_0) dtmp0 = K9 * x20;
916.c33:
917	cmp	%l6,_0x7f800000
918	bge,pn	%icc,.u34
919	faddd	%f30,K6,%f16		! (0_0) dtmp0 += K6;
920.c34:
921	ldd	[%o1+%o4],%f30		! (2_0) cadd0 = *(double*)(ltmp0 + signy0);
922	cmp	%o0,_0x7f800000
923	bge,pn	%icc,.u35
924	fmuld	%f24,%f22,%f24		! (5_1) dtmp0 *= x20;
925.c35:
926	sub	%l6,%o0,%o1		! (3_0) ldiff0 = ax0 - ay0;
927
928	fmuld	%f2,%f6,%f6		! (3_1) dtmp0 = cmul0 * x0;
929	sra	%o1,31,%o0		! (3_0) ldiff0 >>= 31;
930	sub	%i3,%i1,%l6		! (3_0) addrc0 = (char*)px - (char*)py;
931	faddd	%f38,K1,%f38		! (4_1) dtmp0 += K1;
932
933	faddd	%f40,K8,%f40		! (1_0) dtmp0 += K8;
934	and	%l6,%o0,%o1		! (3_0) addrc0 &= ldiff0;
935	fmuld	%f16,%f20,%f16		! (0_0) dtmp0 *= x20;
936
937	lda	[%i1+%o1]0x82,%f0	! (3_0) fy0 = *(float*)((char*)py + addrc0);
938	sub	%i3,%o1,%o4		! (3_0) (char*)px - addrc0;
939	add	%o2,stridez,%o1		! pz += stridez
940	faddd	%f24,K3,%f24		! (5_1) dtmp0 += K3;
941
942	lda	[%o4]0x82,%f1		! (3_0) fx0 = *(float*)((char*)px - addrc0);
943	sll	%l5,3,%l5		! (4_1) cmul0_ind = ldiff0 << 3;
944
945	fmuld	%f38,%f4,%f38		! (4_1) dtmp0 *= x20;
946	cmp	%o5,_0x7f800000		! (3_0) b0 ? 0x7f800000
947	bge,pn	%icc,.update8		! (3_0) if ( b0 > 0x7f800000 )
948	faddd	%f28,%f6,%f4		! (3_1) dtmp0 = cadd0 + dtmp0;
949.cont8:
950	fmuld	%f40,%f18,%f28		! (1_0) dtmp0 *= x20;
951	sll	%o0,5,%l6		! (3_0) ltmp0 = ldiff0 << 5;
952	add	%i1,stridey,%i1		! py += stridey
953	fstod	%f0,%f40		! (3_0) y0 = (double)fy0;
954
955	faddd	%f16,K5,%f2		! (0_0) dtmp0 += K5;
956	sra	%l4,27,%o5		! (3_0) signx0 = ux0 >> 27;
957	add	%i3,stridex,%i3		! px += stridex
958	fmuld	%f24,%f22,%f24		! (5_1) dtmp0 *= x20;
959
960	sra	%l3,28,%o4		! (3_0) signy0 = uy0 >> 28;
961	fstod	%f1,%f16		! (3_0) x0 = (double)fx0;
962.d8:
963	faddd	%f28,K7,%f28		! (1_0) dtmp0 += K7;
964	add	%l6,cadd_arr,%l6	! (3_0) ltmp0 += (char*)cadd_arr;
965	and	%o5,-16,%o5		! (3_0) signx0 &= -16;
966
967	lda	[%i1]0x82,%l4		! (4_0) uy0 = *(int*)py;
968	fmuld	%f2,%f20,%f2		! (0_0) dtmp0 *= x20;
969	faddd	%f38,K0,%f38		! (4_1) dtmp0 += K0;
970
971	lda	[%i3]0x82,%l3		! (4_0) ux0 = *(int*)px;
972	fdivd	%f40,%f16,%f6		! (3_0) x0 = y0 / x0;
973	faddd	%f24,K2,%f24		! (5_1) dtmp0 += K2;
974
975	fdtos	%f4,%f1			! (3_1) ftmp0 = (float)dtmp0;
976	and	%o4,-8,%o4		! (3_0) signy0 &= -8;
977	st	%f1,[%o2]		! (3_1) *pz = ftmp0;
978	fmuld	%f8,%f8,%f16		! (2_0) x20 = x0 * x0;
979
980	subcc	counter,1,counter
981	bneg,a,pn	%icc,.begin
982	or	%g0,%o1,%o4
983
984	ldd	[cmul_arr+%l5],%f0	! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
985	add	%l6,%o5,%o2		! (3_0) ltmp0 += signx0;
986	fmuld	%f28,%f18,%f28		! (1_0) dtmp0 *= x20;
987
988	fmuld	%f38,%f62,%f62		! (4_1) x0 = dtmp0 * x0;
989	and	%l3,_0x7fffffff,%l6	! (4_0) ax0 = ux0 & 0x7fffffff;
990	sethi	%hi(0x00800000),%o5
991	faddd	%f2,K4,%f2		! (0_0) dtmp0 += K4;
992
993	and	%l4,_0x7fffffff,%l5	! (4_0) ay0 = uy0 & 0x7fffffff;
994	cmp	%l6,%o5
995	bl,pn	%icc,.u36
996	fmuld	%f24,%f22,%f38		! (5_1) dtmp0 *= x20;
997.c36:
998	cmp	%l5,%o5
999	bl,pn	%icc,.u37
1000	fmuld	K9,%f16,%f40		! (2_0) dtmp0 = K9 * x20;
1001.c37:
1002	cmp	%l6,_0x7f800000
1003	bge,pn	%icc,.u38
1004	faddd	%f28,K6,%f4		! (1_0) dtmp0 += K6;
1005.c38:
1006	ldd	[%o2+%o4],%f28		! (3_0) cadd0 = *(double*)(ltmp0 + signy0);
1007	cmp	%l5,_0x7f800000
1008	bge,pn	%icc,.u39
1009	fmuld	%f2,%f20,%f24		! (0_0) dtmp0 *= x20;
1010.c39:
1011	sub	%l6,%l5,%o2		! (4_0) ldiff0 = ax0 - ay0;
1012
1013	fmuld	%f0,%f62,%f62		! (4_1) dtmp0 = cmul0 * x0;
1014	sra	%o2,31,%l5		! (4_0) ldiff0 >>= 31;
1015	sub	%i3,%i1,%l6		! (4_0) addrc0 = (char*)px - (char*)py;
1016	faddd	%f38,K1,%f38		! (5_1) dtmp0 += K1;
1017
1018	faddd	%f40,K8,%f40		! (2_0) dtmp0 += K8;
1019	and	%l6,%l5,%o2		! (4_0) addrc0 &= ldiff0;
1020	fmuld	%f4,%f18,%f4		! (1_0) dtmp0 *= x20;
1021
1022	lda	[%i1+%o2]0x82,%f0	! (4_0) fy0 = *(float*)((char*)py + addrc0);
1023	sub	%i3,%o2,%o4		! (4_0) (char*)px - addrc0;
1024	add	%o1,stridez,%o2		! pz += stridez
1025	faddd	%f24,K3,%f24		! (0_0) dtmp0 += K3;
1026
1027	lda	[%o4]0x82,%f2		! (4_0) fx0 = *(float*)((char*)px - addrc0);
1028	sll	%o7,3,%o7		! (5_1) cmul0_ind = ldiff0 << 3;
1029
1030	fmuld	%f38,%f22,%f38		! (5_1) dtmp0 *= x20;
1031	cmp	%o5,_0x7f800000		! (4_0) b0 ? 0x7f800000
1032	bge,pn	%icc,.update9		! (4_0) if ( b0 > 0x7f800000 )
1033	faddd	%f26,%f62,%f22		! (4_1) dtmp0 = cadd0 + dtmp0;
1034.cont9:
1035	fmuld	%f40,%f16,%f26		! (2_0) dtmp0 *= x20;
1036	sll	%l5,5,%l6		! (4_0) ltmp0 = ldiff0 << 5;
1037	add	%i1,stridey,%i1		! py += stridey
1038	fstod	%f0,%f40		! (4_0) y0 = (double)fy0;
1039
1040	faddd	%f4,K5,%f62		! (1_0) dtmp0 += K5;
1041	sra	%l3,27,%o5		! (4_0) signx0 = ux0 >> 27;
1042	add	%i3,stridex,%i3		! px += stridex
1043	fmuld	%f24,%f20,%f24		! (0_0) dtmp0 *= x20;
1044
1045	fstod	%f2,%f2			! (4_0) x0 = (double)fx0;
1046	sra	%l4,28,%o4		! (4_0) signy0 = uy0 >> 28;
1047.d9:
1048	lda	[%i1]0x82,%l3		! (5_0) uy0 = *(int*)py;
1049	add	%l6,cadd_arr,%l6	! (4_0) ltmp0 += (char*)cadd_arr;
1050	faddd	%f26,K7,%f26		! (2_0) dtmp0 += K7;
1051
1052	fmuld	%f62,%f18,%f4		! (1_0) dtmp0 *= x20;
1053	and	%o5,-16,%o5		! (4_0) signx0 &= -16;
1054	faddd	%f38,K0,%f38		! (5_1) dtmp0 += K0;
1055
1056	subcc	counter,5,counter
1057	bneg,pn	%icc,.tail
1058	nop
1059
1060	ba	.main_loop
1061	nop
1062
1063	.align	16
1064.main_loop:
1065	lda	[%i3]0x82,%l4		! (5_1) ux0 = *(int*)px;
1066	nop
1067	fdivd	%f40,%f2,%f62		! (4_1) x0 = y0 / x0;
1068	faddd	%f24,K2,%f40		! (0_1) dtmp0 += K2;
1069
1070	fdtos	%f22,%f22		! (4_2) ftmp0 = (float)dtmp0;
1071	and	%o4,-8,%o4		! (4_1) signy0 &= -8;
1072	st	%f22,[%o1]		! (4_2) *pz = ftmp0;
1073	fmuld	%f6,%f6,%f24		! (3_1) x20 = x0 * x0;
1074
1075	ldd	[cmul_arr+%o7],%f0	! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1076	add	%l6,%o5,%o1		! (4_1) ltmp0 += signx0;
1077	fmuld	%f26,%f16,%f26		! (2_1) dtmp0 *= x20;
1078
1079	fmuld	%f38,%f14,%f14		! (5_2) x0 = dtmp0 * x0;
1080	and	%l4,_0x7fffffff,%l6	! (5_1) ax0 = ux0 & 0x7fffffff;
1081	sethi	%hi(0x00800000),%o5
1082	faddd	%f4,K4,%f4		! (1_1) dtmp0 += K4;
1083
1084	and	%l3,_0x7fffffff,%o7	! (5_1) ay0 = uy0 & 0x7fffffff;
1085	fmuld	%f40,%f20,%f38		! (0_1) dtmp0 *= x20;
1086
1087	cmp	%l6,%o5
1088	bl,pn	%icc,.up0
1089	fmuld	K9,%f24,%f40		! (3_1) dtmp0 = K9 * x20;
1090.co0:
1091	nop
1092	cmp	%o7,%o5
1093	bl,pn	%icc,.up1
1094	faddd	%f26,K6,%f22		! (2_1) dtmp0 += K6;
1095.co1:
1096	ldd	[%o1+%o4],%f26		! (4_1) cadd0 = *(double*)(ltmp0 + signy0);
1097	cmp	%l6,_0x7f800000
1098	bge,pn	%icc,.up2
1099	fmuld	%f4,%f18,%f4		! (1_1) dtmp0 *= x20;
1100.co2:
1101	sub	%l6,%o7,%o1		! (5_1) ldiff0 = ax0 - ay0;
1102	cmp	%o7,_0x7f800000
1103	bge,pn	%icc,.up3
1104
1105	fmuld	%f0,%f14,%f14		! (5_2) dtmp0 = cmul0 * x0;
1106.co3:
1107	sra	%o1,31,%o7		! (5_1) ldiff0 >>= 31;
1108	sub	%i3,%i1,%l6		! (5_1) addrc0 = (char*)px - (char*)py;
1109	faddd	%f38,K1,%f38		! (0_1) dtmp0 += K1;
1110
1111	faddd	%f40,K8,%f40		! (3_1) dtmp0 += K8;
1112	and	%l6,%o7,%o1		! (5_1) addrc0 &= ldiff0;
1113	fmuld	%f22,%f16,%f22		! (2_1) dtmp0 *= x20;
1114
1115	lda	[%i1+%o1]0x82,%f0	! (5_1) fy0 = *(float*)((char*)py + addrc0);
1116	sll	%o7,5,%l6		! (5_1) ltmp0 = ldiff0 << 5;
1117	sub	%i3,%o1,%o4		! (5_1) (char*)px - addrc0;
1118	faddd	%f4,K3,%f4		! (1_1) dtmp0 += K3;
1119
1120	lda	[%o4]0x82,%f2		! (5_1) fx0 = *(float*)((char*)px - addrc0);
1121
1122	fmuld	%f38,%f20,%f38		! (0_1) dtmp0 *= x20;
1123	cmp	%o5,_0x7f800000		! (5_1) b0 ? 0x7f800000
1124	bge,pn	%icc,.update10		! (5_1) if ( b0 > 0x7f800000 )
1125	faddd	%f36,%f14,%f20		! (5_2) dtmp0 = cadd0 + dtmp0;
1126.cont10:
1127	fmuld	%f40,%f24,%f36		! (3_1) dtmp0 *= x20;
1128	nop
1129	fstod	%f0,%f40		! (5_1) y0 = (double)fy0;
1130
1131	faddd	%f22,K5,%f14		! (2_1) dtmp0 += K5;
1132	add	%o2,stridez,%o1		! pz += stridez
1133	fmuld	%f4,%f18,%f4		! (1_1) dtmp0 *= x20;
1134
1135	sll	%l7,3,%l7		! (0_1) cmul0_ind = ldiff0 << 3;
1136	add	%i3,stridex,%i3		! px += stridex
1137	fstod	%f2,%f2			! (5_1) x0 = (double)fx0;
1138.den0:
1139	sra	%l3,28,%o4		! (5_1) signy0 = uy0 >> 28;
1140	add	%i1,stridey,%i1		! py += stridey
1141
1142	faddd	%f36,K7,%f36		! (3_1) dtmp0 += K7;
1143	sra	%l4,27,%o5		! (5_1) signx0 = ux0 >> 27;
1144
1145	lda	[%i1]0x82,%l4		! (0_0) uy0 = *(int*)py;
1146	add	%l6,cadd_arr,%l6	! (5_1) ltmp0 += (char*)cadd_arr;
1147	fmuld	%f14,%f16,%f22		! (2_1) dtmp0 *= x20;
1148	faddd	%f38,K0,%f38		! (0_1) dtmp0 += K0;
1149
1150	lda	[%i3]0x82,%l3		! (0_0) ux0 = *(int*)px;
1151	and	%o5,-16,%o5		! (5_1) signx0 &= -16;
1152	fdivd	%f40,%f2,%f14		! (5_1) x0 = y0 / x0;
1153	faddd	%f4,K2,%f40		! (1_1) dtmp0 += K2;
1154
1155	fdtos	%f20,%f2		! (5_2) ftmp0 = (float)dtmp0;
1156	st	%f2,[%o2]		! (5_2) *pz = ftmp0;
1157	fmuld	%f62,%f62,%f4		! (4_1) x20 = x0 * x0;
1158
1159	ldd	[cmul_arr+%l7],%f0	! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1160	add	%l6,%o5,%o2		! (5_1) ltmp0 += signx0;
1161	and	%o4,-8,%o4		! (5_1) signy0 &= -8;
1162	fmuld	%f36,%f24,%f36		! (3_1) dtmp0 *= x20;
1163
1164	fmuld	%f38,%f12,%f12		! (0_1) x0 = dtmp0 * x0;
1165	and	%l4,_0x7fffffff,%l7	! (0_0) ay0 = uy0 & 0x7fffffff;
1166	sethi	%hi(0x00800000),%o5
1167	faddd	%f22,K4,%f22		! (2_1) dtmp0 += K4;
1168
1169	and	%l3,_0x7fffffff,%l6	! (0_0) ax0 = ux0 & 0x7fffffff;
1170	fmuld	%f40,%f18,%f38		! (1_1) dtmp0 *= x20;
1171
1172	cmp	%l7,%o5
1173	bl,pn	%icc,.up4
1174	fmuld	K9,%f4,%f40		! (4_1) dtmp0 = K9 * x20;
1175.co4:
1176	nop
1177	cmp	%l6,%o5
1178	bl,pn	%icc,.up5
1179	faddd	%f36,K6,%f20		! (3_1) dtmp0 += K6;
1180.co5:
1181	ldd	[%o2+%o4],%f36		! (5_1) cadd0 = *(double*)(ltmp0 + signy0);
1182	cmp	%l7,_0x7f800000
1183	bge,pn	%icc,.up6
1184	fmuld	%f22,%f16,%f22		! (2_1) dtmp0 *= x20;
1185.co6:
1186	sub	%l6,%l7,%o2		! (0_0) ldiff0 = ax0 - ay0;
1187	cmp	%l6,_0x7f800000
1188	bge,pn	%icc,.up7
1189
1190	fmuld	%f0,%f12,%f12		! (0_1) dtmp0 = cmul0 * x0;
1191.co7:
1192	sra	%o2,31,%l7		! (0_0) ldiff0 >>= 31;
1193	sub	%i3,%i1,%l6		! (0_0) addrc0 = (char*)px - (char*)py;
1194	faddd	%f38,K1,%f38		! (1_1) dtmp0 += K1;
1195
1196	faddd	%f40,K8,%f40		! (4_1) dtmp0 += K8;
1197	and	%l6,%l7,%o2		! (0_0) addrc0 &= ldiff0;
1198	fmuld	%f20,%f24,%f20		! (3_1) dtmp0 *= x20;
1199
1200	lda	[%i1+%o2]0x82,%f0	! (0_0) fy0 = *(float*)((char*)py + addrc0);
1201	sll	%g1,3,%g1		! (1_1) cmul0_ind = ldiff0 << 3;
1202	sub	%i3,%o2,%o4		! (0_0) (char*)px - addrc0
1203	faddd	%f22,K3,%f22		! (2_1) dtmp0 += K3;
1204
1205	lda	[%o4]0x82,%f2		! (0_0) fx0 = *(float*)((char*)px - addrc0);
1206	sll	%l7,5,%l6		! (0_0) ltmp0 = ldiff0 << 5;
1207	add	%o1,stridez,%o2		! pz += stridez
1208
1209	fmuld	%f38,%f18,%f38		! (1_1) dtmp0 *= x20;
1210	cmp	%o5,_0x7f800000		! (0_0) b0 ? 0x7f800000
1211	bge,pn	%icc,.update11		! (0_0) if ( b0 > 0x7f800000 )
1212	faddd	%f34,%f12,%f18		! (0_1) dtmp0 = cadd0 + dtmp0;
1213.cont11:
1214	fmuld	%f40,%f4,%f34		! (4_1) dtmp0 *= x20;
1215	sra	%l3,27,%o5		! (0_0) signx0 = ux0 >> 27;
1216	add	%i3,stridex,%i3		! px += stridex
1217	fstod	%f0,%f40		! (0_0) y0 = (double)fy0;
1218
1219	faddd	%f20,K5,%f12		! (3_1) dtmp0 += K5;
1220	add	%i1,stridey,%i1		! py += stridey
1221	fmuld	%f22,%f16,%f22		! (2_1) dtmp0 *= x20;
1222
1223	lda	[%i1]0x82,%l3		! (1_0) uy0 = *(int*)py;
1224	sra	%l4,28,%o4		! (0_0) signy0 = uy0 >> 28;
1225	add	%l6,cadd_arr,%l6	! (0_0) ltmp0 += (char*)cadd_arr;
1226	fstod	%f2,%f2			! (0_0) x0 = (double)fx0;
1227.den1:
1228	lda	[%i3]0x82,%l4		! (1_0) ux0 = *(int*)px;
1229	and	%o5,-16,%o5		! (0_0) signx0 &= -16;
1230	faddd	%f34,K7,%f34		! (4_1) dtmp0 += K7;
1231
1232	fmuld	%f12,%f24,%f20		! (3_1) dtmp0 *= x20;
1233	and	%o4,-8,%o4		! (0_0) signy0 &= -8;
1234	faddd	%f38,K0,%f38		! (1_1) dtmp0 += K0;
1235
1236	fdivd	%f40,%f2,%f12		! (0_0) x0 = y0 / x0;
1237	faddd	%f22,K2,%f40		! (2_1) dtmp0 += K2;
1238
1239	fdtos	%f18,%f2		! (0_1) ftmp0 = (float)dtmp0;
1240	nop
1241	st	%f2,[%o1]		! (0_1) *pz = ftmp0
1242	fmuld	%f14,%f14,%f22		! (5_1) x20 = x0 * x0;
1243
1244	ldd	[cmul_arr+%g1],%f0	! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1245	add	%l6,%o5,%o1		! (0_0) ltmp0 += signx0;
1246	fmuld	%f34,%f4,%f34		! (4_1) dtmp0 *= x20;
1247
1248	fmuld	%f38,%f10,%f10		! (1_1) x0 = dtmp0 * x0;
1249	and	%l4,_0x7fffffff,%l6	! (1_0) ax0 = ux0 & 0x7fffffff;
1250	sethi	%hi(0x00800000),%o5
1251	faddd	%f20,K4,%f20		! (3_1) dtmp0 += K4;
1252
1253	and	%l3,_0x7fffffff,%g1	! (1_0) ay0 = uy0 & 0x7fffffff;
1254	fmuld	%f40,%f16,%f38		! (2_1) dtmp0 *= x20;
1255
1256	cmp	%l6,%o5
1257	bl,pn	%icc,.up8
1258	fmuld	K9,%f22,%f40		! (5_1) dtmp0 = K9 * x20;
1259.co8:
1260	nop
1261	cmp	%g1,%o5
1262	bl,pn	%icc,.up9
1263	faddd	%f34,K6,%f18		! (4_1) dtmp0 += K6;
1264.co9:
1265	ldd	[%o1+%o4],%f34		! (0_0) cadd0 = *(double*)(ltmp0 + signy0);
1266	cmp	%l6,_0x7f800000
1267	bge,pn	%icc,.up10
1268	fmuld	%f20,%f24,%f20		! (3_1) dtmp0 *= x20;
1269.co10:
1270	sub	%l6,%g1,%o1		! (1_0) ldiff0 = ax0 - ay0;
1271	cmp	%g1,_0x7f800000
1272	bge,pn	%icc,.up11
1273
1274	fmuld	%f0,%f10,%f10		! (1_1) dtmp0 = cmul0 * x0;
1275.co11:
1276	sra	%o1,31,%g1		! (1_0) ldiff0 >>= 31;
1277	sub	%i3,%i1,%l6		! (1_0) addrc0 = (char*)px - (char*)py;
1278	faddd	%f38,K1,%f38		! (2_1) dtmp0 += K1;
1279
1280	faddd	%f40,K8,%f40		! (5_1) dtmp0 += K8;
1281	and	%l6,%g1,%o1		! (1_0) addrc0 &= ldiff0;
1282	fmuld	%f18,%f4,%f18		! (4_1) dtmp0 *= x20;
1283
1284	lda	[%i1+%o1]0x82,%f0	! (1_0) fy0 = *(float*)((char*)py + addrc0);
1285	sll	%g5,3,%g5		! (2_1) cmul0_ind = ldiff0 << 3;
1286	sub	%i3,%o1,%o4		! (1_0) (char*)px - addrc0;
1287	faddd	%f20,K3,%f20		! (3_1) dtmp0 += K3;
1288
1289	lda	[%o4]0x82,%f2		! (1_0) fx0 = *(float*)((char*)px - addrc0);
1290	sll	%g1,5,%l6		! (1_0) ltmp0 = ldiff0 << 5;
1291	add	%o2,stridez,%o1		! pz += stridez
1292
1293	fmuld	%f38,%f16,%f38		! (2_1) dtmp0 *= x20;
1294	cmp	%o5,_0x7f800000		! (1_0) b0 ? 0x7f800000
1295	bge,pn	%icc,.update12		! (1_0) if ( b0 > 0x7f800000 )
1296	faddd	%f32,%f10,%f16		! (1_1) dtmp0 = cadd0 + dtmp0;
1297.cont12:
1298	fmuld	%f40,%f22,%f32		! (5_1) dtmp0 *= x20;
1299	add	%i1,stridey,%i1		! py += stridey
1300	nop
1301	fstod	%f0,%f40		! (1_0) y0 = (double)fy0;
1302
1303	faddd	%f18,K5,%f10		! (4_1) dtmp0 += K5;
1304	sra	%l4,27,%o5		! (1_0) signx0 = ux0 >> 27;
1305	add	%i3,stridex,%i3		! px += stridex
1306	fmuld	%f20,%f24,%f20		! (3_1) dtmp0 *= x20;
1307
1308	sra	%l3,28,%o4		! (1_0) signy0 = uy0 >> 28;
1309	add	%l6,cadd_arr,%l6	! (1_0) ltmp0 += (char*)cadd_arr;
1310	fstod	%f2,%f2			! (1_0) x0 = (double)fx0;
1311.den2:
1312	faddd	%f32,K7,%f32		! (5_1) dtmp0 += K7;
1313	and	%o5,-16,%o5		! (1_0) signx0 &= -16;
1314	and	%o4,-8,%o4		! (1_0) signy0 &= -8;
1315
1316	lda	[%i1]0x82,%l4		! (2_0) uy0 = *(int*)py;
1317	fmuld	%f10,%f4,%f18		! (4_1) dtmp0 *= x20;
1318	faddd	%f38,K0,%f38		! (2_1) dtmp0 += K0;
1319
1320	lda	[%i3]0x82,%l3		! (2_0) ux0 = *(int*)px;
1321	fdivd	%f40,%f2,%f10		! (1_0) x0 = y0 / x0;
1322	faddd	%f20,K2,%f40		! (3_1) dtmp0 += K2;
1323
1324	fdtos	%f16,%f2		! (1_1) ftmp0 = (float)dtmp0;
1325	nop
1326	st	%f2,[%o2]		! (1_1) *pz = ftmp0;
1327	fmuld	%f12,%f12,%f20		! (0_0) x20 = x0 * x0;
1328
1329	ldd	[cmul_arr+%g5],%f0	! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1330	add	%l6,%o5,%o2		! (1_0) ltmp0 += signx0;
1331	fmuld	%f32,%f22,%f32		! (5_1) dtmp0 *= x20;
1332
1333	fmuld	%f38,%f8,%f8		! (2_1) x0 = dtmp0 * x0;
1334	and	%l3,_0x7fffffff,%l6	! (2_0) ax0 = ux0 & 0x7fffffff;
1335	sethi	%hi(0x00800000),%o5
1336	faddd	%f18,K4,%f18		! (4_1) dtmp0 += K4;
1337
1338	and	%l4,_0x7fffffff,%g5	! (2_0) ay0 = uy0 & 0x7fffffff;
1339	fmuld	%f40,%f24,%f38		! (3_1) dtmp0 *= x20;
1340
1341	cmp	%l6,%o5
1342	bl,pn	%icc,.up12
1343	fmuld	K9,%f20,%f40		! (0_0) dtmp0 = K9 * x20;
1344.co12:
1345	nop
1346	cmp	%g5,%o5
1347	bl,pn	%icc,.up13
1348	faddd	%f32,K6,%f16		! (5_1) dtmp0 += K6;
1349.co13:
1350	ldd	[%o2+%o4],%f32		! (1_0) cadd0 = *(double*)(ltmp0 + signy0);
1351	cmp	%l6,_0x7f800000
1352	bge,pn	%icc,.up14
1353	fmuld	%f18,%f4,%f18		! (4_1) dtmp0 *= x20;
1354.co14:
1355	sub	%l6,%g5,%o2		! (2_0) ldiff0 = ax0 - ay0;
1356	cmp	%g5,_0x7f800000
1357	bge,pn	%icc,.up15
1358
1359	fmuld	%f0,%f8,%f8		! (2_1) dtmp0 = cmul0 * x0;
1360.co15:
1361	sra	%o2,31,%g5		! (2_0) ldiff0 >>= 31;
1362	sub	%i3,%i1,%l6		! (2_0) addrc0 = (char*)px - (char*)py;
1363	faddd	%f38,K1,%f38		! (3_1) dtmp0 += K1;
1364
1365	faddd	%f40,K8,%f40		! (0_0) dtmp0 += K8;
1366	and	%l6,%g5,%o2		! (2_0) addrc0 &= ldiff0;
1367	fmuld	%f16,%f22,%f16		! (5_1) dtmp0 *= x20;
1368
1369	lda	[%i1+%o2]0x82,%f0	! (2_0) fy0 = *(float*)((char*)py + addrc0);
1370	sub	%i3,%o2,%o4		! (2_0) (char*)px - addrc0;
1371	add	%o1,stridez,%o2		! pz += stridez
1372	faddd	%f18,K3,%f18		! (4_1) dtmp0 += K3;
1373
1374	lda	[%o4]0x82,%f2		! (2_0) fx0 = *(float*)((char*)px - addrc0);
1375	sll	%o0,3,%o0		! (3_1) cmul0_ind = ldiff0 << 3;
1376	add	%i3,stridex,%i3		! px += stridex
1377
1378	fmuld	%f38,%f24,%f38		! (3_1) dtmp0 *= x20;
1379	cmp	%o5,_0x7f800000		! (2_0) b0 ? 0x7f800000
1380	bge,pn	%icc,.update13		! (2_0) if ( b0 > 0x7f800000 )
1381	faddd	%f30,%f8,%f24		! (2_1) dtmp0 = cadd0 + dtmp0;
1382.cont13:
1383	fmuld	%f40,%f20,%f30		! (0_0) dtmp0 *= x20;
1384	sll	%g5,5,%l6		! (2_0) ltmp0 = ldiff0 << 5;
1385	add	%i1,stridey,%i1		! py += stridey
1386	fstod	%f0,%f40		! (2_0) y0 = (double)fy0;
1387
1388	faddd	%f16,K5,%f8		! (5_1) dtmp0 += K5;
1389	sra	%l3,27,%o5		! (2_0) signx0 = ux0 >> 27;
1390	fmuld	%f18,%f4,%f18		! (4_1) dtmp0 *= x20;
1391
1392	fstod	%f2,%f2			! (2_0) x0 = (double)fx0;
1393	sra	%l4,28,%o4		! (2_0) signy0 = uy0 >> 28;
1394	add	%l6,cadd_arr,%l6	! (2_0) ltmp0 += (char*)cadd_arr;
1395.den3:
1396	lda	[%i1]0x82,%l3		! (3_0) uy0 = *(int*)py;
1397	and	%o5,-16,%o5		! (2_0) signx0 &= -16;
1398	faddd	%f30,K7,%f30		! (0_0) dtmp0 += K7;
1399
1400	lda	[%i3]0x82,%l4		! (3_0) ux0 = *(int*)px;
1401	fmuld	%f8,%f22,%f16		! (5_1) dtmp0 *= x20;
1402	faddd	%f38,K0,%f38		! (3_1) dtmp0 += K0;
1403
1404	fdivd	%f40,%f2,%f8		! (2_0) x0 = y0 / x0;
1405	faddd	%f18,K2,%f40		! (4_1) dtmp0 += K2;
1406
1407	fdtos	%f24,%f1		! (2_1) ftmp0 = (float)dtmp0;
1408	st	%f1,[%o1]		! (2_1) *pz = ftmp0;
1409	fmuld	%f10,%f10,%f18		! (1_0) x20 = x0 * x0;
1410
1411	ldd	[cmul_arr+%o0],%f2	! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1412	add	%l6,%o5,%o1		! (2_0) ltmp0 += signx0;
1413	and	%o4,-8,%o4		! (2_0) signy0 &= -8;
1414	fmuld	%f30,%f20,%f30		! (0_0) dtmp0 *= x20;
1415
1416	fmuld	%f38,%f6,%f6		! (3_1) x0 = dtmp0 * x0;
1417	and	%l4,_0x7fffffff,%l6	! (3_0) ax0 = ux0 & 0x7fffffff;
1418	sethi	%hi(0x00800000),%o5
1419	faddd	%f16,K4,%f24		! (5_1) dtmp0 += K4;
1420
1421	and	%l3,_0x7fffffff,%o0	! (3_0) ay0 = uy0 & 0x7fffffff;
1422	fmuld	%f40,%f4,%f38		! (4_1) dtmp0 *= x20;
1423
1424	cmp	%l6,%o5
1425	bl,pn	%icc,.up16
1426	fmuld	K9,%f18,%f40		! (1_0) dtmp0 = K9 * x20;
1427.co16:
1428	nop
1429	cmp	%o0,%o5
1430	bl,pn	%icc,.up17
1431	faddd	%f30,K6,%f16		! (0_0) dtmp0 += K6;
1432.co17:
1433	ldd	[%o1+%o4],%f30		! (2_0) cadd0 = *(double*)(ltmp0 + signy0);
1434	cmp	%l6,_0x7f800000
1435	bge,pn	%icc,.up18
1436	fmuld	%f24,%f22,%f24		! (5_1) dtmp0 *= x20;
1437.co18:
1438	sub	%l6,%o0,%o1		! (3_0) ldiff0 = ax0 - ay0;
1439	cmp	%o0,_0x7f800000
1440	bge,pn	%icc,.up19
1441
1442	fmuld	%f2,%f6,%f6		! (3_1) dtmp0 = cmul0 * x0;
1443.co19:
1444	sra	%o1,31,%o0		! (3_0) ldiff0 >>= 31;
1445	sub	%i3,%i1,%l6		! (3_0) addrc0 = (char*)px - (char*)py;
1446	faddd	%f38,K1,%f38		! (4_1) dtmp0 += K1;
1447
1448	faddd	%f40,K8,%f40		! (1_0) dtmp0 += K8;
1449	and	%l6,%o0,%o1		! (3_0) addrc0 &= ldiff0;
1450	fmuld	%f16,%f20,%f16		! (0_0) dtmp0 *= x20;
1451
1452	lda	[%i1+%o1]0x82,%f0	! (3_0) fy0 = *(float*)((char*)py + addrc0);
1453	sub	%i3,%o1,%o4		! (3_0) (char*)px - addrc0;
1454	add	%o2,stridez,%o1		! pz += stridez
1455	faddd	%f24,K3,%f24		! (5_1) dtmp0 += K3;
1456
1457	lda	[%o4]0x82,%f1		! (3_0) fx0 = *(float*)((char*)px - addrc0);
1458	sll	%l5,3,%l5		! (4_1) cmul0_ind = ldiff0 << 3;
1459	add	%i3,stridex,%i3		! px += stridex
1460
1461	fmuld	%f38,%f4,%f38		! (4_1) dtmp0 *= x20;
1462	cmp	%o5,_0x7f800000		! (3_0) b0 ? 0x7f800000
1463	bge,pn	%icc,.update14		! (3_0) if ( b0 > 0x7f800000 )
1464	faddd	%f28,%f6,%f4		! (3_1) dtmp0 = cadd0 + dtmp0;
1465.cont14:
1466	fmuld	%f40,%f18,%f28		! (1_0) dtmp0 *= x20;
1467	sll	%o0,5,%l6		! (3_0) ltmp0 = ldiff0 << 5;
1468	add	%i1,stridey,%i1		! py += stridey
1469	fstod	%f0,%f40		! (3_0) y0 = (double)fy0;
1470
1471	faddd	%f16,K5,%f2		! (0_0) dtmp0 += K5;
1472	sra	%l4,27,%o5		! (3_0) signx0 = ux0 >> 27;
1473	fmuld	%f24,%f22,%f24		! (5_1) dtmp0 *= x20;
1474
1475	sra	%l3,28,%o4		! (3_0) signy0 = uy0 >> 28;
1476	fstod	%f1,%f16		! (3_0) x0 = (double)fx0;
1477.den4:
1478	faddd	%f28,K7,%f28		! (1_0) dtmp0 += K7;
1479	add	%l6,cadd_arr,%l6	! (3_0) ltmp0 += (char*)cadd_arr;
1480	and	%o5,-16,%o5		! (3_0) signx0 &= -16;
1481
1482	lda	[%i1]0x82,%l4		! (4_0) uy0 = *(int*)py;
1483	fmuld	%f2,%f20,%f2		! (0_0) dtmp0 *= x20;
1484	faddd	%f38,K0,%f38		! (4_1) dtmp0 += K0;
1485
1486	lda	[%i3]0x82,%l3		! (4_0) ux0 = *(int*)px;
1487	fdivd	%f40,%f16,%f6		! (3_0) x0 = y0 / x0;
1488	faddd	%f24,K2,%f24		! (5_1) dtmp0 += K2;
1489
1490	fdtos	%f4,%f1			! (3_1) ftmp0 = (float)dtmp0;
1491	and	%o4,-8,%o4		! (3_0) signy0 &= -8;
1492	st	%f1,[%o2]		! (3_1) *pz = ftmp0;
1493	fmuld	%f8,%f8,%f16		! (2_0) x20 = x0 * x0;
1494
1495	ldd	[cmul_arr+%l5],%f0	! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1496	add	%l6,%o5,%o2		! (3_0) ltmp0 += signx0;
1497	fmuld	%f28,%f18,%f28		! (1_0) dtmp0 *= x20;
1498
1499	fmuld	%f38,%f62,%f62		! (4_1) x0 = dtmp0 * x0;
1500	and	%l3,_0x7fffffff,%l6	! (4_0) ax0 = ux0 & 0x7fffffff;
1501	sethi	%hi(0x00800000),%o5
1502	faddd	%f2,K4,%f2		! (0_0) dtmp0 += K4;
1503
1504	and	%l4,_0x7fffffff,%l5	! (4_0) ay0 = uy0 & 0x7fffffff;
1505	fmuld	%f24,%f22,%f38		! (5_1) dtmp0 *= x20;
1506
1507	cmp	%l6,%o5
1508	bl,pn	%icc,.up20
1509	fmuld	K9,%f16,%f40		! (2_0) dtmp0 = K9 * x20;
1510.co20:
1511	nop
1512	cmp	%l5,%o5
1513	bl,pn	%icc,.up21
1514	faddd	%f28,K6,%f4		! (1_0) dtmp0 += K6;
1515.co21:
1516	ldd	[%o2+%o4],%f28		! (3_0) cadd0 = *(double*)(ltmp0 + signy0);
1517	cmp	%l6,_0x7f800000
1518	bge,pn	%icc,.up22
1519	fmuld	%f2,%f20,%f24		! (0_0) dtmp0 *= x20;
1520.co22:
1521	sub	%l6,%l5,%o2		! (4_0) ldiff0 = ax0 - ay0;
1522	cmp	%l5,_0x7f800000
1523	bge,pn	%icc,.up23
1524
1525	fmuld	%f0,%f62,%f62		! (4_1) dtmp0 = cmul0 * x0;
1526.co23:
1527	sra	%o2,31,%l5		! (4_0) ldiff0 >>= 31;
1528	sub	%i3,%i1,%l6		! (4_0) addrc0 = (char*)px - (char*)py;
1529	faddd	%f38,K1,%f38		! (5_1) dtmp0 += K1;
1530
1531	faddd	%f40,K8,%f40		! (2_0) dtmp0 += K8;
1532	and	%l6,%l5,%o2		! (4_0) addrc0 &= ldiff0;
1533	fmuld	%f4,%f18,%f4		! (1_0) dtmp0 *= x20;
1534
1535	lda	[%i1+%o2]0x82,%f0	! (4_0) fy0 = *(float*)((char*)py + addrc0);
1536	sub	%i3,%o2,%o4		! (4_0) (char*)px - addrc0;
1537	add	%o1,stridez,%o2		! pz += stridez
1538	faddd	%f24,K3,%f24		! (0_0) dtmp0 += K3;
1539
1540	lda	[%o4]0x82,%f2		! (4_0) fx0 = *(float*)((char*)px - addrc0);
1541	sll	%o7,3,%o7		! (5_1) cmul0_ind = ldiff0 << 3;
1542	add	%i3,stridex,%i3		! px += stridex
1543
1544	fmuld	%f38,%f22,%f38		! (5_1) dtmp0 *= x20;
1545	cmp	%o5,_0x7f800000		! (4_0) b0 ? 0x7f800000
1546	bge,pn	%icc,.update15		! (4_0) if ( b0 > 0x7f800000 )
1547	faddd	%f26,%f62,%f22		! (4_1) dtmp0 = cadd0 + dtmp0;
1548.cont15:
1549	fmuld	%f40,%f16,%f26		! (2_0) dtmp0 *= x20;
1550	sll	%l5,5,%l6		! (4_0) ltmp0 = ldiff0 << 5;
1551	add	%i1,stridey,%i1		! py += stridey
1552	fstod	%f0,%f40		! (4_0) y0 = (double)fy0;
1553
1554	faddd	%f4,K5,%f62		! (1_0) dtmp0 += K5;
1555	sra	%l3,27,%o5		! (4_0) signx0 = ux0 >> 27;
1556	fmuld	%f24,%f20,%f24		! (0_0) dtmp0 *= x20;
1557
1558	fstod	%f2,%f2			! (4_0) x0 = (double)fx0;
1559	sra	%l4,28,%o4		! (4_0) signy0 = uy0 >> 28;
1560.den5:
1561	lda	[%i1]0x82,%l3		! (5_0) uy0 = *(int*)py;
1562	subcc	counter,6,counter	! counter?
1563	add	%l6,cadd_arr,%l6	! (4_0) ltmp0 += (char*)cadd_arr;
1564	faddd	%f26,K7,%f26		! (2_0) dtmp0 += K7;
1565
1566	fmuld	%f62,%f18,%f4		! (1_0) dtmp0 *= x20;
1567	and	%o5,-16,%o5		! (4_0) signx0 &= -16;
1568	bpos,pt	%icc,.main_loop
1569	faddd	%f38,K0,%f38		! (5_1) dtmp0 += K0;
1570
1571.tail:
1572	addcc	counter,5,counter
1573	bneg,a,pn	%icc,.begin
1574	or	%g0,%o1,%o4
1575
1576	faddd	%f24,K2,%f40		! (0_1) dtmp0 += K2;
1577
1578	fdtos	%f22,%f22		! (4_2) ftmp0 = (float)dtmp0;
1579	st	%f22,[%o1]		! (4_2) *pz = ftmp0;
1580
1581	subcc	counter,1,counter
1582	bneg,a,pn	%icc,.begin
1583	or	%g0,%o2,%o4
1584
1585	ldd	[cmul_arr+%o7],%f0	! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1586	fmuld	%f26,%f16,%f26		! (2_1) dtmp0 *= x20;
1587
1588	fmuld	%f38,%f14,%f14		! (5_2) x0 = dtmp0 * x0;
1589	faddd	%f4,K4,%f4		! (1_1) dtmp0 += K4;
1590
1591	fmuld	%f40,%f20,%f38		! (0_1) dtmp0 *= x20;
1592
1593
1594	faddd	%f26,K6,%f22		! (2_1) dtmp0 += K6;
1595
1596	fmuld	%f4,%f18,%f4		! (1_1) dtmp0 *= x20;
1597
1598	fmuld	%f0,%f14,%f14		! (5_2) dtmp0 = cmul0 * x0;
1599	faddd	%f38,K1,%f38		! (0_1) dtmp0 += K1;
1600
1601	fmuld	%f22,%f16,%f22		! (2_1) dtmp0 *= x20;
1602
1603	faddd	%f4,K3,%f4		! (1_1) dtmp0 += K3;
1604
1605	fmuld	%f38,%f20,%f38		! (0_1) dtmp0 *= x20;
1606	faddd	%f36,%f14,%f20		! (5_2) dtmp0 = cadd0 + dtmp0;
1607
1608	faddd	%f22,K5,%f14		! (2_1) dtmp0 += K5;
1609	add	%o2,stridez,%o1		! pz += stridez
1610	fmuld	%f4,%f18,%f4		! (1_1) dtmp0 *= x20;
1611
1612	sll	%l7,3,%l7		! (0_1) cmul0_ind = ldiff0 << 3;
1613
1614	fmuld	%f14,%f16,%f22		! (2_1) dtmp0 *= x20;
1615	faddd	%f38,K0,%f38		! (0_1) dtmp0 += K0;
1616
1617	faddd	%f4,K2,%f40		! (1_1) dtmp0 += K2;
1618
1619	fdtos	%f20,%f2		! (5_2) ftmp0 = (float)dtmp0;
1620	st	%f2,[%o2]		! (5_2) *pz = ftmp0;
1621
1622	subcc	counter,1,counter
1623	bneg,a,pn	%icc,.begin
1624	or	%g0,%o1,%o4
1625
1626	ldd	[cmul_arr+%l7],%f0	! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1627
1628	fmuld	%f38,%f12,%f12		! (0_1) x0 = dtmp0 * x0;
1629	faddd	%f22,K4,%f22		! (2_1) dtmp0 += K4;
1630
1631	fmuld	%f40,%f18,%f38		! (1_1) dtmp0 *= x20;
1632
1633	fmuld	%f22,%f16,%f22		! (2_1) dtmp0 *= x20;
1634
1635	fmuld	%f0,%f12,%f12		! (0_1) dtmp0 = cmul0 * x0;
1636	faddd	%f38,K1,%f38		! (1_1) dtmp0 += K1;
1637
1638	sll	%g1,3,%g1		! (1_1) cmul0_ind = ldiff0 << 3;
1639	faddd	%f22,K3,%f22		! (2_1) dtmp0 += K3;
1640
1641	add	%o1,stridez,%o2		! pz += stridez
1642
1643	fmuld	%f38,%f18,%f38		! (1_1) dtmp0 *= x20;
1644	faddd	%f34,%f12,%f18		! (0_1) dtmp0 = cadd0 + dtmp0;
1645
1646	fmuld	%f22,%f16,%f22		! (2_1) dtmp0 *= x20;
1647
1648	faddd	%f38,K0,%f38		! (1_1) dtmp0 += K0;
1649
1650	faddd	%f22,K2,%f40		! (2_1) dtmp0 += K2;
1651
1652	fdtos	%f18,%f2		! (0_1) ftmp0 = (float)dtmp0;
1653	st	%f2,[%o1]		! (0_1) *pz = ftmp0
1654
1655	subcc	counter,1,counter
1656	bneg,a,pn	%icc,.begin
1657	or	%g0,%o2,%o4
1658
1659	ldd	[cmul_arr+%g1],%f0	! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1660
1661	fmuld	%f38,%f10,%f10		! (1_1) x0 = dtmp0 * x0;
1662
1663	fmuld	%f40,%f16,%f38		! (2_1) dtmp0 *= x20;
1664
1665	fmuld	%f0,%f10,%f10		! (1_1) dtmp0 = cmul0 * x0;
1666	faddd	%f38,K1,%f38		! (2_1) dtmp0 += K1;
1667
1668	sll	%g5,3,%g5		! (2_1) cmul0_ind = ldiff0 << 3;
1669
1670	add	%o2,stridez,%o1		! pz += stridez
1671
1672	fmuld	%f38,%f16,%f38		! (2_1) dtmp0 *= x20;
1673	faddd	%f32,%f10,%f16		! (1_1) dtmp0 = cadd0 + dtmp0;
1674
1675	faddd	%f38,K0,%f38		! (2_1) dtmp0 += K0;
1676
1677	fdtos	%f16,%f2		! (1_1) ftmp0 = (float)dtmp0;
1678	st	%f2,[%o2]		! (1_1) *pz = ftmp0;
1679
1680	subcc	counter,1,counter
1681	bneg,a,pn	%icc,.begin
1682	or	%g0,%o1,%o4
1683
1684	ldd	[cmul_arr+%g5],%f0	! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1685
1686	fmuld	%f38,%f8,%f8		! (2_1) x0 = dtmp0 * x0;
1687
1688	fmuld	%f0,%f8,%f8		! (2_1) dtmp0 = cmul0 * x0;
1689
1690	add	%o1,stridez,%o2		! pz += stridez
1691
1692	faddd	%f30,%f8,%f24		! (2_1) dtmp0 = cadd0 + dtmp0;
1693
1694	fdtos	%f24,%f1		! (2_1) ftmp0 = (float)dtmp0;
1695	st	%f1,[%o1]		! (2_1) *pz = ftmp0;
1696
1697	ba	.begin
1698	or	%g0,%o2,%o4
1699
1700	.align	16
1701.spec0:
1702	cmp	%l6,_0x7f800000		! ax0 ? 0x7f800000
1703	bg	2f			! if ( ax0 >= 0x7f800000 )
1704	srl	%l3,30,%l3		! signx0 = (unsigned)ux0 >> 30;
1705
1706	cmp	%l7,_0x7f800000		! ay0 ? 0x7f800000
1707	bg	2f			! if ( ay0 >= 0x7f800000 )
1708	and	%l3,2,%l3		! signx0 &= 2;
1709
1710	sra	%l4,31,%l4		! signy0 = uy0 >> 31;
1711	bne,a	1f			! if (ay0 != 0x7f800000)
1712	add	%l3,%l3,%l3		! signx0 += signx0;
1713
1714	cmp	%l6,_0x7f800000		! ax0 ? 0x7f800000
1715	bne,a	1f			! if ( ax0 != 0x7f800000 )
1716	add	%g0,2,%l3		! signx0 = 2
1717
1718	add	%l3,1,%l3		! signx0 ++;
17191:
1720	sll	%l4,3,%l4		! signy0 <<= 3;
1721	st	%l3,[%fp+tmp_pz]	! STORE signx0
1722
1723	ldd	[cmul_arr+88],%f0	! LOAD M_PI_4
1724
1725	ld	[%fp+tmp_pz],%f2	!