1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vrsqrt.S"
30
31#include "libm.h"
32
33	RO_DATA
34	.align	64
35
36.CONST_TBL:
37	.word	0xbfe00000, 0x0000002f	! K1 =-5.00000000000005209867e-01;
38	.word	0x3fd80000, 0x00000058	! K2 = 3.75000000000004884257e-01;
39	.word	0xbfd3ffff, 0xff444bc8	! K3 =-3.12499999317136886551e-01;
40	.word	0x3fd17fff, 0xff5006fe	! K4 = 2.73437499359815081532e-01;
41	.word	0xbfcf80bb, 0xb33ef574	! K5 =-2.46116125605037803130e-01;
42	.word	0x3fcce0af, 0xf8156949	! K6 = 2.25606914648617522896e-01;
43
44	.word	0x001fffff, 0xffffffff	! DC0
45	.word	0x3fe00000, 0x00000000	! DC1
46	.word	0x00002000, 0x00000000	! DC2
47	.word	0x7fffc000, 0x00000000	! DC3
48	.word	0x0007ffff, 0xffffffff	! DC4
49
50	.word	0x43200000, 0x00000000	! D2ON51  = pow(2,51)
51	.word	0x3ff00000, 0x00000000	! DONE   = 1.0
52
53#define stridex		%l5
54#define stridey		%l7
55#define counter		%l0
56#define TBL		%l3
57#define _0x7ff00000	%o0
58#define _0x00100000	%o1
59
60#define DC0		%f56
61#define DC1		%f54
62#define DC2		%f48
63#define DC3		%f46
64#define K6		%f42
65#define K5		%f20
66#define K4		%f52
67#define K3		%f50
68#define K2		%f14
69#define K1		%f12
70#define DONE		%f4
71
72#define tmp_counter	%g5
73#define tmp_px		%o5
74
75#define tmp0		STACK_BIAS-0x40
76#define tmp1		STACK_BIAS-0x38
77#define tmp2		STACK_BIAS-0x30
78#define tmp3		STACK_BIAS-0x28
79#define tmp4		STACK_BIAS-0x20
80#define tmp5		STACK_BIAS-0x18
81#define tmp6		STACK_BIAS-0x10
82#define tmp7		STACK_BIAS-0x08
83
84! sizeof temp storage - must be a multiple of 16 for V9
85#define tmps		0x40
86
87!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
88!      !!!!!   algorithm   !!!!!
89!  ((float*)&res)[0] = ((float*)px)[0];
90!  ((float*)&res)[1] = ((float*)px)[1];
91!  hx = *(int*)px;
92!  if ( hx >= 0x7ff00000 )
93!  {
94!    res = DONE / res;
95!    ((float*)py)[0] = ((float*)&res)[0];
96!    ((float*)py)[1] = ((float*)&res)[1];
97!    px += stridex;
98!    py += stridey;
99!    continue;
100!  }
101!  if ( hx < 0x00100000 )
102!  {
103!    ax = hx & 0x7fffffff;
104!    lx = ((int*)px)[1];
105!
106!    if ( (ax | lx) == 0 )
107!    {
108!      res = DONE / res;
109!      ((float*)py)[0] = ((float*)&res)[0];
110!      ((float*)py)[1] = ((float*)&res)[1];
111!      px += stridex;
112!      py += stridey;
113!      continue;
114!    }
115!    else if ( hx >= 0 )
116!    {
117!      if ( hx < 0x00080000 )
118!      {
119!        res = *(long long*)&res;
120!        hx = *(int*)&res - (537 << 21);
121!      }
122!      else
123!      {
124!        res = vis_fand(res,DC4);
125!        res = *(long long*)&res;
126!        res += D2ON51;
127!        hx = *(int*)&res - (537 << 21);
128!      }
129!    }
130!    else
131!    {
132!      res = sqrt(res);
133!      ((float*)py)[0] = ((float*)&res)[0];
134!      ((float*)py)[1] = ((float*)&res)[1];
135!      px += stridex;
136!      py += stridey;
137!      continue;
138!    }
139!  }
140!
141!  iexp = hx >> 21;
142!  iexp = -iexp;
143!  iexp += 0x5fe;
144!  lexp = iexp << 52;
145!  dlexp = *(double*)&lexp;
146!  hx >>= 10;
147!  hx &= 0x7f8;
148!  hx += 8;
149!  hx &= -16;
150!
151!  res = vis_fand(res,DC0);
152!  res = vis_for(res,DC1);
153!  res_c = vis_fpadd32(res,DC2);
154!  res_c = vis_fand(res_c,DC3);
155!
156!  addr = (char*)arr + hx;
157!  dexp_hi = ((double*)addr)[0];
158!  dexp_lo = ((double*)addr)[1];
159!  dtmp0 = dexp_hi * dexp_hi;
160!  xx = res - res_c;
161!  xx *= dtmp0;
162!  res = K6 * xx;
163!  res += K5;
164!  res *= xx;
165!  res += K4;
166!  res *= xx;
167!  res += K3;
168!  res *= xx;
169!  res += K2;
170!  res *= xx;
171!  res += K1;
172!  res *= xx;
173!  res = dexp_hi * res;
174!  res += dexp_lo;
175!  res += dexp_hi;
176!
177!  res *= dlexp;
178!
179!  ((float*)py)[0] = ((float*)&res)[0];
180!  ((float*)py)[1] = ((float*)&res)[1];
181!
182!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
183
184	ENTRY(__vrsqrt)
185	save	%sp,-SA(MINFRAME)-tmps,%sp
186	PIC_SETUP(l7)
187	PIC_SET(l7,.CONST_TBL,o3)
188	PIC_SET(l7,__vlibm_TBL_rsqrt,l3)
189	wr	%g0,0x82,%asi
190
191	ldd	[%o3],K1
192	sethi	%hi(0x7ff00000),%o0
193	mov	%i3,%o4
194
195	ldd	[%o3+0x08],K2
196	sethi	%hi(0x00100000),%o1
197	mov	%i1,tmp_px
198
199	ldd	[%o3+0x10],K3
200	sll	%i2,3,stridex
201	mov	%i0,tmp_counter
202
203	ldd	[%o3+0x18],K4
204	sll	%i4,3,stridey
205
206	ldd	[%o3+0x20],K5
207	ldd	[%o3+0x28],K6
208	ldd	[%o3+0x30],DC0
209	ldd	[%o3+0x38],DC1
210	ldd	[%o3+0x40],DC2
211	ldd	[%o3+0x48],DC3
212
213.begin:
214	mov	tmp_counter,counter
215	mov	tmp_px,%i1
216	clr	tmp_counter
217.begin1:
218	cmp	counter,0
219	ble,pn	%icc,.exit
220	ldd	[%o3+0x60],DONE
221
222	lda	[%i1]%asi,%f0		! (6_0) ((float*)res)[0] = ((float*)px)[0];
223	sethi	%hi(0x7ffffc00),%i0
224
225	lda	[%i1+4]%asi,%f1		! (6_0) ((float*)res)[1] = ((float*)px)[1];
226	add	%i0,1023,%i0
227
228	fand	%f0,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
229
230	lda	[%i1]%asi,%g1		! (6_1) hx = *(int*)px;
231	sethi	%hi(0x00080000),%i4
232
233	lda	[%i1+4]%asi,%l4
234	add	%i1,stridex,%l6		! px += stridex
235
236	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
237	lda	[%l6]%asi,%f8		! (0_0) ((float*)res)[0] = ((float*)px)[0];
238	for	%f16,DC1,%f44		! (6_1) res = vis_for(res,DC1);
239
240	lda	[%l6+4]%asi,%f9		! (0_0) ((float*)res)[1] = ((float*)px)[1];
241	sra	%g1,10,%o2		! (6_1) hx >>= 10;
242	and	%g1,%i0,%i2
243
244	cmp	%g1,_0x7ff00000		! (6_1) hx ? 0x7ff00000
245	bge,pn	%icc,.spec0		! (6_1) if ( hx >= 0x7ff00000 )
246	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
247
248	cmp	%g1,_0x00100000		! (6_1) hx ? 0x00100000
249	bl,pn	%icc,.spec1		! (6_1) if ( hx < 0x00100000 )
250	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
251.cont_spec:
252	fand	%f8,DC0,%f16		! (0_0) res = vis_fand(res,DC0);
253
254	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
255
256	add	%o2,8,%l4		! (6_1) hx += 8;
257
258	add	%o7,1534,%o7		! (6_1) iexp += 0x5fe;
259
260	lda	[%l6]%asi,%g1		! (0_0) hx = *(int*)px;
261	sllx	%o7,52,%o7		! (6_1) iexp << 52;
262	and	%l4,-16,%l4		! (6_1) hx = -16;
263
264	add	%l4,TBL,%l4		! (6_1) addr = (char*)arr + hx;
265	stx	%o7,[%fp+tmp1]		! (6_1) dlexp = *(double*)lexp;
266
267	add	%l6,stridex,%l6		! px += stridex
268	ldd	[%l4],%f30		! (6_1) dtmp0 = ((double*)addr)[0];
269
270	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
271	lda	[%l6]%asi,%f0		! (1_0) ((float*)res)[0] = ((float*)px)[0];
272	for	%f16,DC1,%f28		! (0_0) res = vis_for(res,DC1);
273
274	sra	%g1,10,%o2		! (0_0) hx >>= 10;
275	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
276	lda	[%l6+4]%asi,%f1		! (1_0) ((float*)res)[1] = ((float*)px)[1];
277
278	cmp	%g1,_0x7ff00000		! (0_0) hx ? 0x7ff00000
279	bge,pn	%icc,.update0		! (0_0) if ( hx >= 0x7ff00000 )
280	fand	%f18,DC3,%f6		! (6_1) res_c = vis_fand(res_c,DC3);
281.cont0:
282	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
283	fmuld	%f30,%f30,%f10		! (6_1) dtmp0 = dexp_hi * dexp_hi;
284
285	cmp	%g1,_0x00100000		! (0_0) hx ? 0x00100000
286	bl,pn	%icc,.update1		! (0_0) if ( hx < 0x00100000 )
287	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
288.cont1:
289	fand	%f0,DC0,%f16		! (1_0) res = vis_fand(res,DC0);
290
291	fpadd32	%f28,DC2,%f18		! (0_0) res_c = vis_fpadd32(res,DC2);
292
293	add	%o2,8,%l2		! (0_0) hx += 8;
294	fsubd	%f44,%f6,%f6		! (6_1) xx = res - res_c;
295
296	lda	[%l6]%asi,%g1		! (1_0) hx = *(int*)px;
297	sllx	%o7,52,%o7		! (0_0) iexp << 52;
298	and	%l2,-16,%l2		! (0_0) hx = -16;
299
300	add	%l2,TBL,%l2		! (0_0) addr = (char*)arr + hx;
301	add	%l6,stridex,%l6		! px += stridex
302	stx	%o7,[%fp+tmp2]		! (0_0) dlexp = *(double*)lexp;
303
304	fmuld	%f6,%f10,%f26		! (6_1) xx *= dtmp0;
305	ldd	[%l2],%f10		! (0_0) dtmp0 = ((double*)addr)[0];
306
307	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
308	lda	[%l6]%asi,%f6		! (2_0) ((float*)res)[0] = ((float*)px)[0];
309	for	%f16,DC1,%f44		! (1_0) res = vis_for(res,DC1);
310
311	sra	%g1,10,%o2		! (1_0) hx >>= 10;
312	cmp	%g1,_0x7ff00000		! (1_0) hx ? 0x7ff00000
313	bge,pn	%icc,.update2		! (1_0) if ( hx >= 0x7ff00000 )
314	lda	[%l6+4]%asi,%f7		! (2_0) ((float*)res)[1] = ((float*)px)[1];
315.cont2:
316	fand	%f18,DC3,%f8		! (0_0) res_c = vis_fand(res_c,DC3);
317
318	fmuld	%f10,%f10,%f10		! (0_0) dtmp0 = dexp_hi * dexp_hi;
319	cmp	%g1,_0x00100000		! (1_0) hx ? 0x00100000
320	bl,pn	%icc,.update3		! (1_0) if ( hx < 0x00100000 )
321	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
322.cont3:
323	sub	%g0,%o7,%o7		! (1_0) iexp = -iexp;
324	fand	%f6,DC0,%f16		! (2_0) res = vis_fand(res,DC0);
325
326	add	%o7,1534,%o7		! (1_0) iexp += 0x5fe;
327	fpadd32	%f44,DC2,%f18		! (1_0) res_c = vis_fpadd32(res,DC2);
328
329	fmuld	K6,%f26,%f62		! (6_1) res = K6 * xx;
330	add	%o2,8,%i2		! (1_0) hx += 8;
331	fsubd	%f28,%f8,%f32		! (0_0) xx = res - res_c;
332
333	lda	[%l6]%asi,%g1		! (2_0) hx = *(int*)px;
334	sllx	%o7,52,%o7		! (1_0) iexp << 52;
335	and	%i2,-16,%i2		! (1_0) hx = -16;
336
337	add	%i2,TBL,%i2		! (1_0) addr = (char*)arr + hx;
338	stx	%o7,[%fp+tmp3]		! (1_0) dlexp = *(double*)lexp;
339
340	fmuld	%f32,%f10,%f32		! (0_0) xx *= dtmp0;
341	add	%l6,stridex,%l6		! px += stridex
342	ldd	[%i2],%f10		! (1_0) dtmp0 = ((double*)addr)[0];
343	faddd	%f62,K5,%f62		! (6_1) res += K5;
344
345	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
346	lda	[%l6]%asi,%f0		! (3_0) ((float*)res)[0] = ((float*)px)[0];
347	for	%f16,DC1,%f28		! (2_0) res = vis_for(res,DC1);
348
349	sra	%g1,10,%o2		! (2_0) hx >>= 10;
350	cmp	%g1,_0x7ff00000		! (2_0) hx ? 0x7ff00000
351	bge,pn	%icc,.update4		! (2_0) if ( hx >= 0x7ff00000 )
352	lda	[%l6+4]%asi,%f1		! (3_0) ((float*)res)[1] = ((float*)px)[1];
353.cont4:
354	fmuld	%f62,%f26,%f40		! (6_1) res *= xx;
355	fand	%f18,DC3,%f8		! (1_0) res_c = vis_fand(res_c,DC3);
356
357	fmuld	%f10,%f10,%f10		! (1_0) dtmp0 = dexp_hi * dexp_hi;
358	cmp	%g1,_0x00100000		! (2_0) hx ? 0x00100000
359	bl,pn	%icc,.update5		! (2_0) if ( hx < 0x00100000 )
360	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
361.cont5:
362	sub	%g0,%o7,%o7		! (2_0) iexp = -iexp;
363	fand	%f0,DC0,%f16		! (3_0) res = vis_fand(res,DC0);
364
365	add	%o7,1534,%o7		! (2_0) iexp += 0x5fe;
366	fpadd32	%f28,DC2,%f18		! (2_0) res_c = vis_fpadd32(res,DC2);
367
368	fmuld	K6,%f32,%f62		! (0_0) res = K6 * xx;
369	add	%o2,8,%i4		! (2_0) hx += 8;
370	fsubd	%f44,%f8,%f6		! (1_0) xx = res - res_c;
371
372	faddd	%f40,K4,%f40		! (6_1) res += K4;
373
374	lda	[%l6]%asi,%g1		! (3_0) hx = *(int*)px;
375	sllx	%o7,52,%o7		! (2_0) iexp << 52;
376	and	%i4,-16,%i4		! (2_0) hx = -16;
377
378	add	%i4,TBL,%i4		! (2_0) addr = (char*)arr + hx;
379	stx	%o7,[%fp+tmp4]		! (2_0) dlexp = *(double*)lexp;
380
381	fmuld	%f6,%f10,%f38		! (1_0) xx *= dtmp0;
382	ldd	[%i4],%f24		! (2_0) dtmp0 = ((double*)addr)[0];
383	faddd	%f62,K5,%f62		! (0_0) res += K5;
384
385	fmuld	%f40,%f26,%f34		! (6_1) res *= xx;
386	add	%l6,stridex,%l6		! px += stridex
387
388	sra	%g1,21,%o7		! (3_0) iexp = hx >> 21;
389	lda	[%l6]%asi,%f8		! (4_0) ((float*)res)[0] = ((float*)px)[0];
390	for	%f16,DC1,%f44		! (3_0) res = vis_for(res,DC1);
391
392	sra	%g1,10,%o2		! (3_0) hx >>= 10;
393	cmp	%g1,_0x7ff00000		! (3_0) hx ? 0x7ff00000
394	bge,pn	%icc,.update6		! (3_0) if ( hx >= 0x7ff00000 )
395	lda	[%l6+4]%asi,%f9		! (4_0) ((float*)res)[1] = ((float*)px)[1];
396.cont6:
397	fmuld	%f62,%f32,%f60		! (0_0) res *= xx;
398	cmp	%g1,_0x00100000		! (3_0) hx ? 0x00100000
399	fand	%f18,DC3,%f22		! (2_0) res_c = vis_fand(res_c,DC3);
400
401	fmuld	%f24,%f24,%f24		! (2_0) dtmp0 = dexp_hi * dexp_hi;
402	bl,pn	%icc,.update7		! (3_0) if ( hx < 0x00100000 )
403	and	%o2,2040,%o2		! (3_0) hx &= 0x7f8;
404	faddd	%f34,K3,%f6		! (6_1) res += K3;
405.cont7:
406	sub	%g0,%o7,%o7		! (3_0) iexp = -iexp;
407	fand	%f8,DC0,%f16		! (4_0) res = vis_fand(res,DC0);
408
409	add	%o7,1534,%o7		! (3_0) iexp += 0x5fe;
410	fpadd32	%f44,DC2,%f18		! (3_0) res_c = vis_fpadd32(res,DC2);
411
412	fmuld	K6,%f38,%f62		! (1_0) res = K6 * xx;
413	add	%o2,8,%i5		! (3_0) hx += 8;
414	fsubd	%f28,%f22,%f28		! (2_0) xx = res - res_c;
415
416	fmuld	%f6,%f26,%f22		! (6_1) res *= xx;
417	faddd	%f60,K4,%f60		! (0_0) res += K4;
418
419	lda	[%l6]%asi,%g1		! (4_0) hx = *(int*)px;
420	sllx	%o7,52,%o7		! (3_0) iexp << 52;
421	and	%i5,-16,%i5		! (3_0) hx = -16;
422
423	add	%i5,TBL,%i5		! (3_0) addr = (char*)arr + hx;
424	stx	%o7,[%fp+tmp5]		! (3_0) dlexp = *(double*)lexp;
425
426	fmuld	%f28,%f24,%f36		! (2_0) xx *= dtmp0;
427	add	%l6,stridex,%i0		! px += stridex
428	ldd	[%i5],%f28		! (3_0) dtmp0 = ((double*)addr)[0];
429	faddd	%f62,K5,%f62		! (1_0) res += K5;
430
431	faddd	%f22,K2,%f10		! (6_1) res += K2;
432	fmuld	%f60,%f32,%f34		! (0_0) res *= xx;
433
434	sra	%g1,21,%o7		! (4_0) iexp = hx >> 21;
435	lda	[%i0]%asi,%f0		! (5_0) ((float*)res)[0] = ((float*)px)[0];
436	for	%f16,DC1,%f24		! (4_0) res = vis_for(res,DC1);
437
438	sra	%g1,10,%o2		! (4_0) hx >>= 10;
439	cmp	%g1,_0x7ff00000		! (4_0) hx ? 0x7ff00000
440	bge,pn	%icc,.update8		! (4_0) if ( hx >= 0x7ff00000 )
441	lda	[%i0+4]%asi,%f1		! (5_0) ((float*)res)[1] = ((float*)px)[1];
442.cont8:
443	fand	%f18,DC3,%f40		! (3_0) res_c = vis_fand(res_c,DC3);
444	fmuld	%f62,%f38,%f62		! (1_0) res *= xx;
445
446	fmuld	%f10,%f26,%f58		! (6_1) res *= xx;
447	cmp	%g1,_0x00100000		! (4_0) hx ? 0x00100000
448	and	%o2,2040,%o2		! (4_0) hx &= 0x7f8;
449	faddd	%f34,K3,%f60		! (0_0) res += K3;
450
451	fmuld	%f28,%f28,%f28		! (3_0) dtmp0 = dexp_hi * dexp_hi;
452	bl,pn	%icc,.update9		! (4_0) if ( hx < 0x00100000 )
453	sub	%g0,%o7,%o7		! (4_0) iexp = -iexp;
454	fand	%f0,DC0,%f16		! (5_0) res = vis_fand(res,DC0);
455.cont9:
456	add	%o7,1534,%o7		! (4_0) iexp += 0x5fe;
457	fpadd32	%f24,DC2,%f18		! (4_0) res_c = vis_fpadd32(res,DC2);
458
459	fmuld	K6,%f36,%f10		! (2_0) res = K6 * xx;
460	add	%o2,8,%l1		! (4_0) hx += 8;
461	fsubd	%f44,%f40,%f44		! (3_0) xx = res - res_c;
462
463	fmuld	%f60,%f32,%f60		! (0_0) res *= xx;
464	faddd	%f62,K4,%f6		! (1_0) res += K4;
465
466	lda	[%i0]%asi,%g1		! (5_0) hx = *(int*)px;
467	sllx	%o7,52,%o7		! (4_0) iexp << 52;
468	and	%l1,-16,%l1		! (4_0) hx = -16;
469	faddd	%f58,K1,%f58		! (6_1) res += K1;
470
471	add	%i0,stridex,%i1		! px += stridex
472	add	%l1,TBL,%l1		! (4_0) addr = (char*)arr + hx;
473	stx	%o7,[%fp+tmp6]		! (4_0) dlexp = *(double*)lexp;
474
475	fmuld	%f44,%f28,%f40		! (3_0) xx *= dtmp0;
476	ldd	[%l1],%f44		! (4_0) dtmp0 = ((double*)addr)[0];
477	faddd	%f10,K5,%f62		! (2_0) res += K5;
478
479	fmuld	%f6,%f38,%f34		! (1_0) res *= xx;
480	sra	%g1,21,%o7		! (5_0) iexp = hx >> 21;
481	nop
482	faddd	%f60,K2,%f60		! (0_0) res += K2;
483
484	for	%f16,DC1,%f28		! (5_0) res = vis_for(res,DC1);
485	sub	%g0,%o7,%o7		! (5_0) iexp = -iexp;
486	lda	[%i1]%asi,%f6		! (6_0) ((float*)res)[0] = ((float*)px)[0];
487	fmuld	%f58,%f26,%f26		! (6_1) res *= xx;
488
489	sra	%g1,10,%o2		! (5_0) hx >>= 10;
490	cmp	%g1,_0x7ff00000		! (5_0) hx ? 0x7ff00000
491	bge,pn	%icc,.update10		! (5_0) if ( hx >= 0x7ff00000 )
492	lda	[%i1+4]%asi,%f7		! (6_0) ((float*)res)[1] = ((float*)px)[1];
493.cont10:
494	fand	%f18,DC3,%f8		! (4_0) res_c = vis_fand(res_c,DC3);
495	fmuld	%f62,%f36,%f62		! (2_0) res *= xx;
496
497	fmuld	%f60,%f32,%f58		! (0_0) res *= xx;
498	cmp	%g1,_0x00100000		! (5_0) hx ? 0x00100000
499	and	%o2,2040,%o2		! (5_0) hx &= 0x7f8;
500	faddd	%f34,K3,%f34		! (1_0) res += K3;
501
502	fmuld	%f30,%f26,%f26		! (6_1) res = dexp_hi * res;
503	bl,pn	%icc,.update11		! (5_0) if ( hx < 0x00100000 )
504	nop
505	fand	%f6,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
506.cont11:
507	ldd	[%l4+8],%f60		! (6_1) dexp_lo = ((double*)addr)[1];
508	fmuld	%f44,%f44,%f44		! (4_0) dtmp0 = dexp_hi * dexp_hi;
509	fpadd32	%f28,DC2,%f18		! (5_0) res_c = vis_fpadd32(res,DC2);
510
511	fmuld	K6,%f40,%f22		! (3_0) res = K6 * xx;
512	add	%o2,8,%i3		! (5_0) hx += 8;
513	fsubd	%f24,%f8,%f10		! (4_0) xx = res - res_c;
514
515	fmuld	%f34,%f38,%f24		! (1_0) res *= xx;
516	or	%g0,%o4,%i0
517
518	cmp	counter,7
519	bl,pn	%icc,.tail
520	faddd	%f62,K4,%f34		! (2_0) res += K4;
521
522	ba	.main_loop
523	sub	counter,7,counter	! counter
524
525	.align	16
526.main_loop:
527	add	%o7,1534,%o7		! (5_0) iexp += 0x5fe;
528	and	%i3,-16,%i3		! (5_1) hx = -16;
529	lda	[%i1]%asi,%g1		! (6_1) hx = *(int*)px;
530	faddd	%f58,K1,%f58		! (0_1) res += K1;
531
532	add	%i3,TBL,%i3		! (5_1) addr = (char*)arr + hx;
533	sllx	%o7,52,%o7		! (5_1) iexp << 52;
534	stx	%o7,[%fp+tmp0]		! (5_1) dlexp = *(double*)lexp;
535	faddd	%f26,%f60,%f8		! (6_2) res += dexp_lo;
536
537	faddd	%f22,K5,%f62		! (3_1) res += K5;
538	add	%i1,stridex,%l6		! px += stridex
539	ldd	[%i3],%f22		! (5_1) dtmp0 = ((double*)addr)[0];
540	fmuld	%f10,%f44,%f60		! (4_1) xx *= dtmp0;
541
542	faddd	%f24,K2,%f26		! (1_1) res += K2;
543	add	%i0,stridey,%i1		! px += stridey
544	ldd	[%l2],%f24		! (0_1) dexp_hi = ((double*)addr)[0];
545	fmuld	%f34,%f36,%f34		! (2_1) res *= xx;
546
547	fmuld	%f58,%f32,%f58		! (0_1) res *= xx;
548	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
549	lda	[%l6]%asi,%f0		! (0_0) ((float*)res)[0] = ((float*)px)[0];
550	for	%f16,DC1,%f44		! (6_1) res = vis_for(res,DC1);
551
552	lda	[%l6+4]%asi,%f1		! (0_0) ((float*)res)[1] = ((float*)px)[1];
553	sra	%g1,10,%o2		! (6_1) hx >>= 10;
554	fmuld	%f22,%f22,%f10		! (5_1) dtmp0 = dexp_hi * dexp_hi;
555	faddd	%f8,%f30,%f30		! (6_2) res += dexp_hi;
556
557	fmuld	%f62,%f40,%f32		! (3_1) res *= xx;
558	cmp	%g1,_0x7ff00000		! (6_1) hx ? 0x7ff00000
559	ldd	[%fp+tmp1],%f62		! (6_2) dlexp = *(double*)lexp;
560	fand	%f18,DC3,%f8		! (5_1) res_c = vis_fand(res_c,DC3);
561
562	fmuld	%f26,%f38,%f26		! (1_1) res *= xx;
563	bge,pn	%icc,.update12		! (6_1) if ( hx >= 0x7ff00000 )
564	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
565	faddd	%f34,K3,%f34		! (2_1) res += K3;
566.cont12:
567	fmuld	%f24,%f58,%f58		! (0_1) res = dexp_hi * res;
568	cmp	%g1,_0x00100000		! (6_1) hx ? 0x00100000
569	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
570	fand	%f0,DC0,%f16		! (0_0) res = vis_fand(res,DC0);
571
572	fmuld	%f30,%f62,%f2		! (6_2) res *= dlexp;
573	bl,pn	%icc,.update13		! (6_1) if ( hx < 0x00100000 )
574	ldd	[%l2+8],%f30		! (0_1) dexp_lo = ((double*)addr)[1];
575	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
576.cont13:
577	fmuld	K6,%f60,%f62		! (4_1) res = K6 * xx;
578	add	%o2,8,%l4		! (6_1) hx += 8;
579	st	%f2,[%i0]		! (6_2) ((float*)py)[0] = ((float*)res)[0];
580	fsubd	%f28,%f8,%f6		! (5_1) xx = res - res_c;
581
582	fmuld	%f34,%f36,%f28		! (2_1) res *= xx;
583	add	%o7,1534,%o7		! (6_1) iexp += 0x5fe;
584	st	%f3,[%i0+4]		! (6_2) ((float*)py)[1] = ((float*)res)[1];
585	faddd	%f32,K4,%f32		! (3_1) res += K4;
586
587	lda	[%l6]%asi,%g1		! (0_0) hx = *(int*)px;
588	sllx	%o7,52,%o7		! (6_1) iexp << 52;
589	and	%l4,-16,%l4		! (6_1) hx = -16;
590	faddd	%f26,K1,%f26		! (1_1) res += K1;
591
592	add	%i1,stridey,%i0		! px += stridey
593	add	%l4,TBL,%l4		! (6_1) addr = (char*)arr + hx;
594	stx	%o7,[%fp+tmp1]		! (6_1) dlexp = *(double*)lexp;
595	faddd	%f58,%f30,%f8		! (0_1) res += dexp_lo;
596
597	fmuld	%f6,%f10,%f58		! (5_1) xx *= dtmp0;
598	add	%l6,stridex,%l6		! px += stridex
599	ldd	[%l4],%f30		! (6_1) dtmp0 = ((double*)addr)[0];
600	faddd	%f62,K5,%f62		! (4_1) res += K5;
601
602	fmuld	%f32,%f40,%f34		! (3_1) res *= xx;
603	sra	%g1,10,%o2		! (0_0) hx >>= 10;
604	ldd	[%i2],%f4		! (1_1) dexp_hi = ((double*)addr)[0];
605	faddd	%f28,K2,%f32		! (2_1) res += K2;
606
607	fmuld	%f26,%f38,%f26		! (1_1) res *= xx;
608	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
609	lda	[%l6]%asi,%f6		! (1_0) ((float*)res)[0] = ((float*)px)[0];
610	for	%f16,DC1,%f28		! (0_0) res = vis_for(res,DC1);
611
612	fmuld	%f30,%f30,%f30		! (6_1) dtmp0 = dexp_hi * dexp_hi;
613	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
614	lda	[%l6+4]%asi,%f7		! (1_0) ((float*)res)[1] = ((float*)px)[1];
615	faddd	%f8,%f24,%f24		! (0_1) res += dexp_hi;
616
617	fmuld	%f62,%f60,%f38		! (4_1) res *= xx;
618	cmp	%g1,_0x7ff00000		! (0_0) hx ? 0x7ff00000
619	ldd	[%fp+tmp2],%f62		! (0_1) dlexp = *(double*)lexp;
620	fand	%f18,DC3,%f8		! (6_1) res_c = vis_fand(res_c,DC3);
621
622	fmuld	%f32,%f36,%f32		! (2_1) res *= xx;
623	bge,pn	%icc,.update14		! (0_0) if ( hx >= 0x7ff00000 )
624	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
625	faddd	%f34,K3,%f34		! (3_1) res += K3;
626.cont14:
627	fmuld	%f4,%f26,%f26		! (1_1) res = dexp_hi * res;
628	cmp	%g1,_0x00100000		! (0_0) hx ? 0x00100000
629	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
630	fand	%f6,DC0,%f16		! (1_0) res = vis_fand(res,DC0);
631
632	fmuld	%f24,%f62,%f2		! (0_1) res *= dlexp;
633	bl,pn	%icc,.update15		! (0_0) if ( hx < 0x00100000 )
634	ldd	[%i2+8],%f24		! (1_1) dexp_lo = ((double*)addr)[1];
635	fpadd32	%f28,DC2,%f18		! (0_0) res_c = vis_fpadd32(res,DC2);
636.cont15:
637	fmuld	K6,%f58,%f62		! (5_1) res = K6 * xx;
638	add	%o2,8,%l2		! (0_0) hx += 8;
639	st	%f2,[%i1]		! (0_1) ((float*)py)[0] = ((float*)res)[0];
640	fsubd	%f44,%f8,%f10		! (6_1) xx = res - res_c;
641
642	fmuld	%f34,%f40,%f44		! (3_1) res *= xx;
643	nop
644	st	%f3,[%i1+4]		! (0_1) ((float*)py)[1] = ((float*)res)[1];
645	faddd	%f38,K4,%f38		! (4_1) res += K4;
646
647	lda	[%l6]%asi,%g1		! (1_0) hx = *(int*)px;
648	sllx	%o7,52,%o7		! (0_0) iexp << 52;
649	and	%l2,-16,%l2		! (0_0) hx = -16;
650	faddd	%f32,K1,%f32		! (2_1) res += K1;
651
652	add	%l2,TBL,%l2		! (0_0) addr = (char*)arr + hx;
653	add	%l6,stridex,%l6		! px += stridex
654	stx	%o7,[%fp+tmp2]		! (0_0) dlexp = *(double*)lexp;
655	faddd	%f26,%f24,%f8		! (1_1) res += dexp_lo;
656
657	fmuld	%f10,%f30,%f26		! (6_1) xx *= dtmp0;
658	add	%i0,stridey,%i1		! px += stridey
659	ldd	[%l2],%f30		! (0_0) dtmp0 = ((double*)addr)[0];
660	faddd	%f62,K5,%f62		! (5_1) res += K5;
661
662	fmuld	%f38,%f60,%f34		! (4_1) res *= xx;
663	sra	%g1,10,%o2		! (1_0) hx >>= 10;
664	ldd	[%i4],%f24		! (2_1) dexp_hi = ((double*)addr)[0];
665	faddd	%f44,K2,%f38		! (3_1) res += K2;
666
667	fmuld	%f32,%f36,%f32		! (2_1) res *= xx;
668	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
669	lda	[%l6]%asi,%f0		! (2_0) ((float*)res)[0] = ((float*)px)[0];
670	for	%f16,DC1,%f44		! (1_0) res = vis_for(res,DC1);
671
672	fmuld	%f30,%f30,%f30		! (0_0) dtmp0 = dexp_hi * dexp_hi;
673	cmp	%g1,_0x7ff00000		! (1_0) hx ? 0x7ff00000
674	lda	[%l6+4]%asi,%f1		! (2_0) ((float*)res)[1] = ((float*)px)[1];
675	faddd	%f8,%f4,%f4		! (1_1) res += dexp_hi;
676
677	fmuld	%f62,%f58,%f36		! (5_1) res *= xx;
678	bge,pn	%icc,.update16		! (1_0) if ( hx >= 0x7ff00000 )
679	ldd	[%fp+tmp3],%f62		! (1_1) dlexp = *(double*)lexp;
680	fand	%f18,DC3,%f8		! (0_0) res_c = vis_fand(res_c,DC3);
681.cont16:
682	fmuld	%f38,%f40,%f38		! (3_1) res *= xx;
683	cmp	%g1,_0x00100000		! (1_0) hx ? 0x00100000
684	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
685	faddd	%f34,K3,%f34		! (4_1) res += K3;
686
687	fmuld	%f24,%f32,%f32		! (2_1) res = dexp_hi * res;
688	bl,pn	%icc,.update17		! (1_0) if ( hx < 0x00100000 )
689	sub	%g0,%o7,%o7		! (1_0) iexp = -iexp;
690	fand	%f0,DC0,%f16		! (2_0) res = vis_fand(res,DC0);
691.cont17:
692	fmuld	%f4,%f62,%f2		! (1_1) res *= dlexp;
693	add	%o7,1534,%o7		! (1_0) iexp += 0x5fe;
694	ldd	[%i4+8],%f4		! (2_1) dexp_lo = ((double*)addr)[1];
695	fpadd32	%f44,DC2,%f18		! (1_0) res_c = vis_fpadd32(res,DC2);
696
697	fmuld	K6,%f26,%f62		! (6_1) res = K6 * xx;
698	add	%o2,8,%i2		! (1_0) hx += 8;
699	st	%f2,[%i0]		! (1_1) ((float*)py)[0] = ((float*)res)[0];
700	fsubd	%f28,%f8,%f6		! (0_0) xx = res - res_c;
701
702	fmuld	%f34,%f60,%f28		! (4_1) res *= xx;
703	nop
704	st	%f3,[%i0+4]		! (1_1) ((float*)py)[1] = ((float*)res)[1];
705	faddd	%f36,K4,%f36		! (5_1) res += K4;
706
707	lda	[%l6]%asi,%g1		! (2_0) hx = *(int*)px;
708	sllx	%o7,52,%o7		! (1_0) iexp << 52;
709	and	%i2,-16,%i2		! (1_0) hx = -16;
710	faddd	%f38,K1,%f38		! (3_1) res += K1;
711
712	add	%i1,stridey,%i0		! px += stridey
713	add	%i2,TBL,%i2		! (1_0) addr = (char*)arr + hx;
714	stx	%o7,[%fp+tmp3]		! (1_0) dlexp = *(double*)lexp;
715	faddd	%f32,%f4,%f8		! (2_1) res += dexp_lo;
716
717	fmuld	%f6,%f30,%f32		! (0_0) xx *= dtmp0;
718	add	%l6,stridex,%l6		! px += stridex
719	ldd	[%i2],%f30		! (1_0) dtmp0 = ((double*)addr)[0];
720	faddd	%f62,K5,%f62		! (6_1) res += K5;
721
722	fmuld	%f36,%f58,%f34		! (5_1) res *= xx;
723	sra	%g1,10,%o2		! (2_0) hx >>= 10;
724	ldd	[%i5],%f4		! (3_1) dexp_hi = ((double*)addr)[0];
725	faddd	%f28,K2,%f36		! (4_1) res += K2;
726
727	fmuld	%f38,%f40,%f38		! (3_1) res *= xx;
728	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
729	lda	[%l6]%asi,%f6		! (3_0) ((float*)res)[0] = ((float*)px)[0];
730	for	%f16,DC1,%f28		! (2_0) res = vis_for(res,DC1);
731
732	fmuld	%f30,%f30,%f30		! (1_0) dtmp0 = dexp_hi * dexp_hi;
733	cmp	%g1,_0x7ff00000		! (2_0) hx ? 0x7ff00000
734	lda	[%l6+4]%asi,%f7		! (3_0) ((float*)res)[1] = ((float*)px)[1];
735	faddd	%f8,%f24,%f24		! (2_1) res += dexp_hi;
736
737	fmuld	%f62,%f26,%f40		! (6_1) res *= xx;
738	bge,pn	%icc,.update18		! (2_0) if ( hx >= 0x7ff00000 )
739	ldd	[%fp+tmp4],%f62		! (2_1) dlexp = *(double*)lexp;
740	fand	%f18,DC3,%f8		! (1_0) res_c = vis_fand(res_c,DC3);
741.cont18:
742	fmuld	%f36,%f60,%f36		! (4_1) res *= xx;
743	cmp	%g1,_0x00100000		! (2_0) hx ? 0x00100000
744	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
745	faddd	%f34,K3,%f34		! (5_1) res += K3;
746
747	fmuld	%f4,%f38,%f38		! (3_1) res = dexp_hi * res;
748	bl,pn	%icc,.update19		! (2_0) if ( hx < 0x00100000 )
749	sub	%g0,%o7,%o7		! (2_0) iexp = -iexp;
750	fand	%f6,DC0,%f16		! (3_0) res = vis_fand(res,DC0);
751.cont19:
752	fmuld	%f24,%f62,%f2		! (2_1) res *= dlexp;
753	add	%o7,1534,%o7		! (2_0) iexp += 0x5fe;
754	ldd	[%i5+8],%f24		! (3_1) dexp_lo = ((double*)addr)[1];
755	fpadd32	%f28,DC2,%f18		! (2_0) res_c = vis_fpadd32(res,DC2);
756
757	fmuld	K6,%f32,%f62		! (0_0) res = K6 * xx;
758	add	%o2,8,%i4		! (2_0) hx += 8;
759	st	%f2,[%i1]		! (2_1) ((float*)py)[0] = ((float*)res)[0];
760	fsubd	%f44,%f8,%f10		! (1_0) xx = res - res_c;
761
762	fmuld	%f34,%f58,%f44		! (5_1) res *= xx;
763	nop
764	st	%f3,[%i1+4]		! (2_1) ((float*)py)[1] = ((float*)res)[1];
765	faddd	%f40,K4,%f40		! (6_1) res += K4;
766
767	lda	[%l6]%asi,%g1		! (3_0) hx = *(int*)px;
768	sllx	%o7,52,%o7		! (2_0) iexp << 52;
769	and	%i4,-16,%i4		! (2_0) hx = -16;
770	faddd	%f36,K1,%f36		! (4_1) res += K1;
771
772	add	%l6,stridex,%l6		! px += stridex
773	add	%i4,TBL,%i4		! (2_0) addr = (char*)arr + hx;
774	stx	%o7,[%fp+tmp4]		! (2_0) dlexp = *(double*)lexp;
775	faddd	%f38,%f24,%f8		! (3_1) res += dexp_lo;
776
777	fmuld	%f10,%f30,%f38		! (1_0) xx *= dtmp0;
778	add	%i0,stridey,%i1		! px += stridey
779	ldd	[%i4],%f24		! (2_0) dtmp0 = ((double*)addr)[0];
780	faddd	%f62,K5,%f62		! (0_0) res += K5;
781
782	fmuld	%f40,%f26,%f34		! (6_1) res *= xx;
783	sra	%g1,10,%o2		! (3_0) hx >>= 10;
784	ldd	[%l1],%f30		! (4_1) dexp_hi = ((double*)addr)[0];
785	faddd	%f44,K2,%f40		! (5_1) res += K2;
786
787	fmuld	%f36,%f60,%f36		! (4_1) res *= xx;
788	sra	%g1,21,%o7		! (3_0) iexp = hx >> 21;
789	lda	[%l6]%asi,%f0		! (4_0) ((float*)res)[0] = ((float*)px)[0];
790	for	%f16,DC1,%f44		! (3_0) res = vis_for(res,DC1);
791
792	fmuld	%f24,%f24,%f24		! (2_0) dtmp0 = dexp_hi * dexp_hi;
793	cmp	%g1,_0x7ff00000		! (3_0) hx ? 0x7ff00000
794	lda	[%l6+4]%asi,%f1		! (4_0) ((float*)res)[1] = ((float*)px)[1];
795	faddd	%f8,%f4,%f8		! (3_1) res += dexp_hi;
796
797	fmuld	%f62,%f32,%f60		! (0_0) res *= xx;
798	bge,pn	%icc,.update20		! (3_0) if ( hx >= 0x7ff00000 )
799	ldd	[%fp+tmp5],%f62		! (3_1) dlexp = *(double*)lexp;
800	fand	%f18,DC3,%f4		! (2_0) res_c = vis_fand(res_c,DC3);
801.cont20:
802	fmuld	%f40,%f58,%f40		! (5_1) res *= xx;
803	cmp	%g1,_0x00100000		! (3_0) hx ? 0x00100000
804	and	%o2,2040,%o2		! (3_0) hx &= 0x7f8;
805	faddd	%f34,K3,%f10		! (6_1) res += K3;
806
807	fmuld	%f30,%f36,%f36		! (4_1) res = dexp_hi * res;
808	bl,pn	%icc,.update21		! (3_0) if ( hx < 0x00100000 )
809	sub	%g0,%o7,%o7		! (3_0) iexp = -iexp;
810	fand	%f0,DC0,%f16		! (4_0) res = vis_fand(res,DC0);
811.cont21:
812	fmuld	%f8,%f62,%f8		! (3_1) res *= dlexp;
813	add	%o7,1534,%o7		! (3_0) iexp += 0x5fe;
814	ldd	[%l1+8],%f34		! (4_1) dexp_lo = ((double*)addr)[1];
815	fpadd32	%f44,DC2,%f18		! (3_0) res_c = vis_fpadd32(res,DC2);
816
817	fmuld	K6,%f38,%f62		! (1_0) res = K6 * xx;
818	add	%o2,8,%i5		! (3_0) hx += 8;
819	st	%f8,[%i0]		! (3_1) ((float*)py)[0] = ((float*)res)[0];
820	fsubd	%f28,%f4,%f28		! (2_0) xx = res - res_c;
821
822	fmuld	%f10,%f26,%f4		! (6_1) res *= xx;
823	nop
824	st	%f9,[%i0+4]		! (3_1) ((float*)py)[1] = ((float*)res)[1];
825	faddd	%f60,K4,%f60		! (0_0) res += K4;
826
827	lda	[%l6]%asi,%g1		! (4_0) hx = *(int*)px;
828	sllx	%o7,52,%o7		! (3_0) iexp << 52;
829	and	%i5,-16,%i5		! (3_0) hx = -16;
830	faddd	%f40,K1,%f40		! (5_1) res += K1;
831
832	add	%l6,stridex,%i0		! px += stridex
833	add	%i5,TBL,%i5		! (3_0) addr = (char*)arr + hx;
834	stx	%o7,[%fp+tmp5]		! (3_0) dlexp = *(double*)lexp;
835	faddd	%f36,%f34,%f8		! (4_1) res += dexp_lo;
836
837	fmuld	%f28,%f24,%f36		! (2_0) xx *= dtmp0;
838	add	%i1,stridey,%l6		! px += stridey
839	ldd	[%i5],%f28		! (3_0) dtmp0 = ((double*)addr)[0];
840	faddd	%f62,K5,%f62		! (1_0) res += K5;
841
842	faddd	%f4,K2,%f10		! (6_1) res += K2;
843	sra	%g1,10,%o2		! (4_0) hx >>= 10;
844	nop
845	fmuld	%f60,%f32,%f34		! (0_0) res *= xx;
846
847	fmuld	%f40,%f58,%f40		! (5_1) res *= xx;
848	sra	%g1,21,%o7		! (4_0) iexp = hx >> 21;
849	lda	[%i0]%asi,%f6		! (5_0) ((float*)res)[0] = ((float*)px)[0];
850	for	%f16,DC1,%f24		! (4_0) res = vis_for(res,DC1);
851
852	fmuld	%f28,%f28,%f28		! (3_0) dtmp0 = dexp_hi * dexp_hi;
853	cmp	%g1,_0x7ff00000		! (4_0) hx ? 0x7ff00000
854	lda	[%i0+4]%asi,%f7		! (5_0) ((float*)res)[1] = ((float*)px)[1];
855	faddd	%f8,%f30,%f30		! (4_1) res += dexp_hi;
856
857	fand	%f18,DC3,%f8		! (3_0) res_c = vis_fand(res_c,DC3);
858	bge,pn	%icc,.update22		! (4_0) if ( hx >= 0x7ff00000 )
859	ldd	[%fp+tmp6],%f18		! (4_1) dlexp = *(double*)lexp;
860	fmuld	%f62,%f38,%f62		! (1_0) res *= xx;
861.cont22:
862	fmuld	%f10,%f26,%f58		! (6_1) res *= xx;
863	cmp	%g1,_0x00100000		! (4_0) hx ? 0x00100000
864	and	%o2,2040,%o2		! (4_0) hx &= 0x7f8;
865	faddd	%f34,K3,%f60		! (0_0) res += K3;
866
867	fmuld	%f22,%f40,%f40		! (5_1) res = dexp_hi * res;
868	bl,pn	%icc,.update23		! (4_0) if ( hx < 0x00100000 )
869	sub	%g0,%o7,%o7		! (4_0) iexp = -iexp;
870	fand	%f6,DC0,%f16		! (5_0) res = vis_fand(res,DC0);
871.cont23:
872	fmuld	%f30,%f18,%f6		! (4_1) res *= dlexp;
873	add	%o7,1534,%o7		! (4_0) iexp += 0x5fe;
874	ldd	[%i3+8],%f34		! (5_1) dexp_lo = ((double*)addr)[1];
875	fpadd32	%f24,DC2,%f18		! (4_0) res_c = vis_fpadd32(res,DC2);
876
877	fmuld	K6,%f36,%f30		! (2_0) res = K6 * xx;
878	add	%o2,8,%l1		! (4_0) hx += 8;
879	st	%f6,[%i1]		! (4_1) ((float*)py)[0] = ((float*)res)[0];
880	fsubd	%f44,%f8,%f44		! (3_0) xx = res - res_c;
881
882	fmuld	%f60,%f32,%f60		! (0_0) res *= xx;
883	sllx	%o7,52,%o7		! (4_0) iexp << 52;
884	st	%f7,[%i1+4]		! (4_1) ((float*)py)[1] = ((float*)res)[1];
885	faddd	%f62,K4,%f6		! (1_0) res += K4;
886
887	lda	[%i0]%asi,%g1		! (5_0) hx = *(int*)px;
888	add	%i0,stridex,%i1		! px += stridex
889	and	%l1,-16,%l1		! (4_0) hx = -16;
890	faddd	%f58,K1,%f58		! (6_1) res += K1;
891
892	add	%l1,TBL,%l1		! (4_0) addr = (char*)arr + hx;
893	add	%l6,stridey,%i0		! px += stridey
894	stx	%o7,[%fp+tmp6]		! (4_0) dlexp = *(double*)lexp;
895	faddd	%f40,%f34,%f8		! (5_1) res += dexp_lo;
896
897	fmuld	%f44,%f28,%f40		! (3_0) xx *= dtmp0;
898	nop
899	ldd	[%l1],%f44		! (4_0) dtmp0 = ((double*)addr)[0];
900	faddd	%f30,K5,%f62		! (2_0) res += K5;
901
902	fmuld	%f6,%f38,%f34		! (1_0) res *= xx;
903	sra	%g1,21,%o7		! (5_0) iexp = hx >> 21;
904	ldd	[%l4],%f30		! (6_1) dexp_hi = ((double*)addr)[0];
905	faddd	%f60,K2,%f60		! (0_0) res += K2;
906
907	for	%f16,DC1,%f28		! (5_0) res = vis_for(res,DC1);
908	sub	%g0,%o7,%o7		! (5_0) iexp = -iexp;
909	lda	[%i1]%asi,%f6		! (6_0) ((float*)res)[0] = ((float*)px)[0];
910	fmuld	%f58,%f26,%f26		! (6_1) res *= xx;
911
912	fmuld	%f44,%f44,%f44		! (4_0) dtmp0 = dexp_hi * dexp_hi;
913	cmp	%g1,_0x7ff00000		! (5_0) hx ? 0x7ff00000
914	lda	[%i1+4]%asi,%f7		! (6_0) ((float*)res)[1] = ((float*)px)[1];
915	faddd	%f8,%f22,%f22		! (5_1) res += dexp_hi;
916
917	fand	%f18,DC3,%f8		! (4_0) res_c = vis_fand(res_c,DC3);
918	bge,pn	%icc,.update24		! (5_0) if ( hx >= 0x7ff00000 )
919	ldd	[%fp+tmp0],%f18		! (5_1) dlexp = *(double*)lexp;
920	fmuld	%f62,%f36,%f62		! (2_0) res *= xx;
921.cont24:
922	fmuld	%f60,%f32,%f58		! (0_0) res *= xx;
923	sra	%g1,10,%o2		! (5_0) hx >>= 10;
924	cmp	%g1,_0x00100000		! (5_0) hx ? 0x00100000
925	faddd	%f34,K3,%f34		! (1_0) res += K3;
926
927	fmuld	%f30,%f26,%f26		! (6_1) res = dexp_hi * res;
928	bl,pn	%icc,.update25		! (5_0) if ( hx < 0x00100000 )
929	and	%o2,2040,%o2		! (5_0) hx &= 0x7f8;
930	fand	%f6,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
931.cont25:
932	fmuld	%f22,%f18,%f2		! (5_1) res *= dlexp;
933	subcc	counter,7,counter	! counter -= 7;
934	ldd	[%l4+8],%f60		! (6_1) dexp_lo = ((double*)addr)[1];
935	fpadd32	%f28,DC2,%f18		! (5_0) res_c = vis_fpadd32(res,DC2);
936
937	fmuld	K6,%f40,%f22		! (3_0) res = K6 * xx;
938	add	%o2,8,%i3		! (5_0) hx += 8;
939	st	%f2,[%l6]		! (5_1) ((float*)py)[0] = ((float*)res)[0];
940	fsubd	%f24,%f8,%f10		! (4_0) xx = res - res_c;
941
942	fmuld	%f34,%f38,%f24		! (1_0) res *= xx;
943	st	%f3,[%l6+4]		! (5_1) ((float*)py)[1] = ((float*)res)[1];
944	bpos,pt	%icc,.main_loop
945	faddd	%f62,K4,%f34		! (2_0) res += K4;
946
947	add	counter,7,counter
948.tail:
949	add	%o7,1534,%o7		! (5_0) iexp += 0x5fe;
950	subcc	counter,1,counter
951	bneg,a	.begin
952	mov	%i0,%o4
953
954	faddd	%f58,K1,%f58		! (0_1) res += K1;
955
956	faddd	%f26,%f60,%f8		! (6_2) res += dexp_lo;
957
958	faddd	%f22,K5,%f62		! (3_1) res += K5;
959	fmuld	%f10,%f44,%f60		! (4_1) xx *= dtmp0;
960
961	faddd	%f24,K2,%f26		! (1_1) res += K2;
962	add	%i1,stridex,%l6		! px += stridex
963	ldd	[%l2],%f24		! (0_1) dexp_hi = ((double*)addr)[0];
964	fmuld	%f34,%f36,%f34		! (2_1) res *= xx;
965
966	fmuld	%f58,%f32,%f58		! (0_1) res *= xx;
967
968	add	%i0,stridey,%i1		! px += stridey
969	faddd	%f8,%f30,%f30		! (6_2) res += dexp_hi;
970
971	fmuld	%f62,%f40,%f32		! (3_1) res *= xx;
972	ldd	[%fp+tmp1],%f62		! (6_2) dlexp = *(double*)lexp;
973
974	fmuld	%f26,%f38,%f26		! (1_1) res *= xx;
975	faddd	%f34,K3,%f34		! (2_1) res += K3;
976
977	fmuld	%f24,%f58,%f58		! (0_1) res = dexp_hi * res;
978
979	fmuld	%f30,%f62,%f2		! (6_2) res *= dlexp;
980	ldd	[%l2+8],%f30		! (0_1) dexp_lo = ((double*)addr)[1];
981
982	fmuld	K6,%f60,%f62		! (4_1) res = K6 * xx;
983	st	%f2,[%i0]		! (6_2) ((float*)py)[0] = ((float*)res)[0];
984
985	fmuld	%f34,%f36,%f28		! (2_1) res *= xx;
986	st	%f3,[%i0+4]		! (6_2) ((float*)py)[1] = ((float*)res)[1];
987	faddd	%f32,K4,%f32		! (3_1) res += K4;
988
989	subcc	counter,1,counter
990	bneg,a	.begin
991	mov	%i1,%o4
992
993	faddd	%f26,K1,%f26		! (1_1) res += K1;
994
995	faddd	%f58,%f30,%f8		! (0_1) res += dexp_lo;
996
997	add	%l6,stridex,%l6		! px += stridex
998	faddd	%f62,K5,%f62		! (4_1) res += K5;
999
1000	fmuld	%f32,%f40,%f34		! (3_1) res *= xx;
1001	add	%i1,stridey,%i0		! px += stridey
1002	ldd	[%i2],%f22		! (1_1) dexp_hi = ((double*)addr)[0];
1003	faddd	%f28,K2,%f32		! (2_1) res += K2;
1004
1005	fmuld	%f26,%f38,%f26		! (1_1) res *= xx;
1006
1007	faddd	%f8,%f24,%f24		! (0_1) res += dexp_hi;
1008
1009	fmuld	%f62,%f60,%f38		! (4_1) res *= xx;
1010	ldd	[%fp+tmp2],%f62		! (0_1) dlexp = *(double*)lexp;
1011
1012	fmuld	%f32,%f36,%f32		! (2_1) res *= xx;
1013	faddd	%f34,K3,%f34		! (3_1) res += K3;
1014
1015	fmuld	%f22,%f26,%f26		! (1_1) res = dexp_hi * res;
1016
1017	fmuld	%f24,%f62,%f2		! (0_1) res *= dlexp;
1018	ldd	[%i2+8],%f24		! (1_1) dexp_lo = ((double*)addr)[1];
1019
1020	st	%f2,[%i1]		! (0_1) ((float*)py)[0] = ((float*)res)[0];
1021
1022	fmuld	%f34,%f40,%f44		! (3_1) res *= xx;
1023	st	%f3,[%i1+4]		! (0_1) ((float*)py)[1] = ((float*)res)[1];
1024	faddd	%f38,K4,%f38		! (4_1) res += K4;
1025
1026	subcc	counter,1,counter
1027	bneg,a	.begin
1028	mov	%i0,%o4
1029
1030	faddd	%f32,K1,%f32		! (2_1) res += K1;
1031
1032	add	%l6,stridex,%l6		! px += stridex
1033	faddd	%f26,%f24,%f8		! (1_1) res += dexp_lo;
1034
1035	add	%i0,stridey,%i1		! px += stridey
1036
1037	fmuld	%f38,%f60,%f34		! (4_1) res *= xx;
1038	ldd	[%i4],%f24		! (2_1) dexp_hi = ((double*)addr)[0];
1039	faddd	%f44,K2,%f38		! (3_1) res += K2;
1040
1041	fmuld	%f32,%f36,%f32		! (2_1) res *= xx;
1042
1043	faddd	%f8,%f22,%f22		! (1_1) res += dexp_hi;
1044
1045	ldd	[%fp+tmp3],%f62		! (1_1) dlexp = *(double*)lexp;
1046
1047	fmuld	%f38,%f40,%f38		! (3_1) res *= xx;
1048	faddd	%f34,K3,%f34		! (4_1) res += K3;
1049
1050	fmuld	%f24,%f32,%f32		! (2_1) res = dexp_hi * res;
1051
1052	fmuld	%f22,%f62,%f2		! (1_1) res *= dlexp;
1053	ldd	[%i4+8],%f22		! (2_1) dexp_lo = ((double*)addr)[1];
1054
1055	st	%f2,[%i0]		! (1_1) ((float*)py)[0] = ((float*)res)[0];
1056
1057	fmuld	%f34,%f60,%f28		! (4_1) res *= xx;
1058	st	%f3,[%i0+4]		! (1_1) ((float*)py)[1] = ((float*)res)[1];
1059
1060	subcc	counter,1,counter
1061	bneg,a	.begin
1062	mov	%i1,%o4
1063
1064	faddd	%f38,K1,%f38		! (3_1) res += K1;
1065
1066	faddd	%f32,%f22,%f8		! (2_1) res += dexp_lo;
1067
1068	add	%l6,stridex,%l6		! px += stridex
1069
1070	add	%i1,stridey,%i0		! px += stridey
1071	ldd	[%i5],%f22		! (3_1) dexp_hi = ((double*)addr)[0];
1072	faddd	%f28,K2,%f36		! (4_1) res += K2;
1073
1074	fmuld	%f38,%f40,%f38		! (3_1) res *= xx;
1075
1076	faddd	%f8,%f24,%f24		! (2_1) res += dexp_hi;
1077
1078	ldd	[%fp+tmp4],%f62		! (2_1) dlexp = *(double*)lexp;
1079
1080	fmuld	%f36,%f60,%f36		! (4_1) res *= xx;
1081
1082	fmuld	%f22,%f38,%f38		! (3_1) res = dexp_hi * res;
1083
1084	fmuld	%f24,%f62,%f2		! (2_1) res *= dlexp;
1085	ldd	[%i5+8],%f24		! (3_1) dexp_lo = ((double*)addr)[1];
1086
1087	st	%f2,[%i1]		! (2_1) ((float*)py)[0] = ((float*)res)[0];
1088
1089	st	%f3,[%i1+4]		! (2_1) ((float*)py)[1] = ((float*)res)[1];
1090
1091	subcc	counter,1,counter
1092	bneg,a	.begin
1093	mov	%i0,%o4
1094
1095	faddd	%f36,K1,%f36		! (4_1) res += K1;
1096
1097	faddd	%f38,%f24,%f8		! (3_1) res += dexp_lo;
1098
1099	add	%i0,stridey,%i1		! px += stridey
1100
1101	add	%l6,stridex,%l6		! px += stridex
1102	ldd	[%l1],%f30		! (4_1) dexp_hi = ((double*)addr)[0];
1103
1104	fmuld	%f36,%f60,%f36		! (4_1) res *= xx;
1105
1106	faddd	%f8,%f22,%f8		! (3_1) res += dexp_hi;
1107
1108	ldd	[%fp+tmp5],%f62		! (3_1) dlexp = *(double*)lexp;
1109
1110	fmuld	%f30,%f36,%f36		! (4_1) res = dexp_hi * res;
1111
1112	fmuld	%f8,%f62,%f8		! (3_1) res *= dlexp;
1113	ldd	[%l1+8],%f34		! (4_1) dexp_lo = ((double*)addr)[1];
1114
1115	st	%f8,[%i0]		! (3_1) ((float*)py)[0] = ((float*)res)[0];
1116
1117	st	%f9,[%i0+4]		! (3_1) ((float*)py)[1] = ((float*)res)[1];
1118
1119	subcc	counter,1,counter
1120	bneg,a	.begin
1121	mov	%i1,%o4
1122
1123	faddd	%f36,%f34,%f8		! (4_1) res += dexp_lo;
1124
1125	add	%l6,stridex,%i0		! px += stridex
1126
1127	add	%i1,stridey,%l6		! px += stridey
1128
1129	faddd	%f8,%f30,%f30		! (4_1) res += dexp_hi;
1130
1131	ldd	[%fp+tmp6],%f18		! (4_1) dlexp = *(double*)lexp;
1132
1133	fmuld	%f30,%f18,%f6		! (4_1) res *= dlexp;
1134
1135	st	%f6,[%i1]		! (4_1) ((float*)py)[0] = ((float*)res)[0];
1136
1137	st	%f7,[%i1+4]		! (4_1) ((float*)py)[1] = ((float*)res)[1];
1138
1139	ba	.begin
1140	add	%i1,stridey,%o4
1141
1142	.align	16
1143.spec0:
1144	fdivd	DONE,%f0,%f0		! res = DONE / res;
1145	add	%i1,stridex,%i1		! px += stridex
1146	st	%f0,[%o4]		! ((float*)py)[0] = ((float*)&res)[0];
1147	st	%f1,[%o4+4]		! ((float*)py)[1] = ((float*)&res)[1];
1148	add	%o4,stridey,%o4		! py += stridey
1149	ba	.begin1
1150	sub	counter,1,counter
1151
1152	.align	16
1153.spec1:
1154	orcc	%i2,%l4,%g0
1155	bz,a	2f
1156	fdivd	DONE,%f0,%f0		! res = DONE / res;
1157
1158	cmp	%g1,0
1159	bl,a	2f
1160	fsqrtd	%f0,%f0			! res = sqrt(res);
1161
1162	cmp	%g1,%i4
1163	bge,a	1f
1164	ldd	[%o3+0x50],%f18
1165
1166	fxtod	%f0,%f0			! res = *(long long*)&res;
1167	st	%f0,[%fp+tmp0]
1168
1169	fand	%f0,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
1170	ld	[%fp+tmp0],%g1
1171
1172	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
1173	for	%f16,DC1,%f44		! (6_1) res = vis_for(res,DC1);
1174
1175	sra	%g1,10,%o2		! (6_1) hx >>= 10;
1176	sub	%o7,537,%o7
1177
1178	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
1179	ba	.cont_spec
1180	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
1181
11821:
1183	fand	%f0,%f18,%f0		! res = vis_fand(res,DC4);
1184
1185	ldd	[%o3+0x58],%f28
1186	fxtod	%f0,%f0			! res = *(long long*)&res;
1187
1188	faddd	%f0,%f28,%f0		! res += D2ON51;
1189	st	%f0,[%fp+tmp0]
1190
1191	fand	%f0,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
1192	ld	[%fp+tmp0],%g1
1193
1194	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
1195	for	%f16,DC1,%f44		! (6_1) res = vis_for(res,DC1);
1196
1197	sra	%g1,10,%o2		! (6_1) hx >>= 10;
1198	sub	%o7,537,%o7
1199
1200	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
1201	ba	.cont_spec
1202	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
1203
12042:
1205	add	%i1,stridex,%i1		! px += stridex
1206	st	%f0,[%o4]		! ((float*)py)[0] = ((float*)&res)[0];
1207	st	%f1,[%o4+4]		! ((float*)py)[1] = ((float*)&res)[1];
1208	add	%o4,stridey,%o4		! py += stridey
1209	ba	.begin1
1210	sub	counter,1,counter
1211
1212	.align	16
1213.update0:
1214	cmp	counter,1
1215	ble	.cont0
1216	nop
1217
1218	sub	%l6,stridex,tmp_px
1219	sub	counter,1,tmp_counter
1220
1221	ba	.cont0
1222	mov	1,counter
1223
1224	.align	16
1225.update1:
1226	cmp	counter,1
1227	ble	.cont1
1228	sub	%l6,stridex,%i1
1229
1230	ld	[%i1+4],%i2
1231	cmp	%g1,0
1232	bl	1f
1233
1234	orcc	%g1,%i2,%g0
1235	bz	1f
1236	sethi	%hi(0x00080000),%i3
1237
1238	cmp	%g1,%i3
1239	bge,a	2f
1240	ldd	[%o3+0x50],%f18
1241
1242	fxtod	%f8,%f8			! res = *(long long*)&res;
1243	st	%f8,[%fp+tmp7]
1244
1245	fand	%f8,DC0,%f16		! (0_0) res = vis_fand(res,DC0);
1246	ld	[%fp+tmp7],%g1
1247
1248	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
1249	sra	%g1,10,%o2		! (0_0) hx >>= 10;
1250	for	%f16,DC1,%f28		! (0_0) res = vis_for(res,DC1);
1251
1252	sub	%o7,537,%o7
1253
1254	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
1255
1256	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
1257	ba	.cont1
1258	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
12592:
1260	fand	%f8,%f18,%f8
1261	fxtod	%f8,%f8			! res = *(long long*)&res;
1262	ldd	[%o3+0x58],%f18
1263	faddd	%f8,%f18,%f8
1264	st	%f8,[%fp+tmp7]
1265
1266	fand	%f8,DC0,%f16		! (0_0) res = vis_fand(res,DC0);
1267	ld	[%fp+tmp7],%g1
1268
1269	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
1270	sra	%g1,10,%o2		! (0_0) hx >>= 10;
1271	for	%f16,DC1,%f28		! (0_0) res = vis_for(res,DC1);
1272
1273	sub	%o7,537,%o7
1274
1275	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
1276
1277	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
1278	ba	.cont1
1279	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
12801:
1281	sub	%l6,stridex,tmp_px
1282	sub	counter,1,tmp_counter
1283
1284	ba	.cont1
1285	mov	1,counter
1286
1287	.align	16
1288.update2:
1289	cmp	counter,2
1290	ble	.cont2
1291	nop
1292
1293	sub	%l6,stridex,tmp_px
1294	sub	counter,2,tmp_counter
1295
1296	ba	.cont2
1297	mov	2,counter
1298
1299	.align	16
1300.update3:
1301	cmp	counter,2
1302	ble	.cont3
1303	sub	%l6,stridex,%i1
1304
1305	ld	[%i1+4],%i2
1306	cmp	%g1,0
1307	bl	1f
1308
1309	orcc	%g1,%i2,%g0
1310	bz	1f
1311	sethi	%hi(0x00080000),%i3
1312
1313	cmp	%g1,%i3
1314	bge,a	2f
1315	ldd	[%o3+0x50],%f18
1316
1317	fxtod	%f0,%f0			! res = *(long long*)&res;
1318	st	%f0,[%fp+tmp7]
1319
1320	fand	%f0,DC0,%f16		! (1_0) res = vis_fand(res,DC0);
1321	ld	[%fp+tmp7],%g1
1322
1323	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
1324	for	%f16,DC1,%f44		! (1_0) res = vis_for(res,DC1);
1325
1326	sra	%g1,10,%o2		! (1_0) hx >>= 10;
1327	sub	%o7,537,%o7
1328	ba	.cont3
1329	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
13302:
1331	fand	%f0,%f18,%f0
1332	fxtod	%f0,%f0			! res = *(long long*)&res;
1333	ldd	[%o3+0x58],%f18
1334	faddd	%f0,%f18,%f0
1335	st	%f0,[%fp+tmp7]
1336
1337	fand	%f0,DC0,%f16		! (1_0) res = vis_fand(res,DC0);
1338	ld	[%fp+tmp7],%g1
1339
1340	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
1341	for	%f16,DC1,%f44		! (1_0) res = vis_for(res,DC1);
1342
1343	sra	%g1,10,%o2		! (1_0) hx >>= 10;
1344	sub	%o7,537,%o7
1345	ba	.cont3
1346	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
13471:
1348	sub	%l6,stridex,tmp_px
1349	sub	counter,2,tmp_counter
1350
1351	ba	.cont3
1352	mov	2,counter
1353
1354	.align	16
1355.update4:
1356	cmp	counter,3
1357	ble	.cont4
1358	nop
1359
1360	sub	%l6,stridex,tmp_px
1361	sub	counter,3,tmp_counter
1362
1363	ba	.cont4
1364	mov	3,counter
1365
1366	.align	16
1367.update5:
1368	cmp	counter,3
1369	ble	.cont5
1370	sub	%l6,stridex,%i1
1371
1372	ld	[%i1+4],%i3
1373	cmp	%g1,0
1374	bl	1f
1375
1376	orcc	%g1,%i3,%g0
1377	bz	1f
1378	sethi	%hi(0x00080000),%i4
1379
1380	cmp	%g1,%i4
1381	bge,a	2f
1382	ldd	[%o3+0x50],%f18
1383
1384	fxtod	%f6,%f6			! res = *(long long*)&res;
1385	st	%f6,[%fp+tmp7]
1386
1387	fand	%f6,DC0,%f16		! (2_0) res = vis_fand(res,DC0);
1388	ld	[%fp+tmp7],%g1
1389
1390	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
1391	sra	%g1,10,%o2		! (2_0) hx >>= 10;
1392
1393	sub	%o7,537,%o7
1394	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
1395	ba	.cont5
1396	for	%f16,DC1,%f28		! (2_0) res = vis_for(res,DC1);
13972:
1398	fand	%f6,%f18,%f6
1399	fxtod	%f6,%f6			! res = *(long long*)&res;
1400	ldd	[%o3+0x58],%f18
1401	faddd	%f6,%f18,%f6
1402	st	%f6,[%fp+tmp7]
1403
1404	fand	%f6,DC0,%f16		! (2_0) res = vis_fand(res,DC0);
1405	ld	[%fp+tmp7],%g1
1406
1407	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
1408	sra	%g1,10,%o2		! (2_0) hx >>= 10;
1409
1410	sub	%o7,537,%o7
1411	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
1412	ba	.cont5
1413	for	%f16,DC1,%f28		! (2_0) res = vis_for(res,DC1);
14141:
1415	sub	%l6,stridex,tmp_px
1416	sub	counter,3,tmp_counter
1417
1418	ba	.cont5
1419	mov	3,counter
1420
1421	.align	16
1422.update6:
1423	cmp	counter,4
1424	ble	.cont6
1425	nop
1426
1427	sub	%l6,stridex,tmp_px
1428	sub	counter,4,tmp_counter
1429
1430	ba	.cont6
1431	mov	4,counter
1432
1433	.align	16
1434.update7:
1435	sub	%l6,stridex,%i1
1436	cmp	counter,4
1437	ble	.cont7
1438	faddd	%f34,K3,%f6		! (6_1) res += K3;
1439
1440	ld	[%i1+4],%i3
1441	cmp	%g1,0
1442	bl	1f
1443
1444	orcc	%g1,%i3,%g0
1445	bz	1f
1446	sethi	%hi(0x00080000),%i5
1447
1448	cmp	%g1,%i5
1449	bge,a	2f
1450	ldd	[%o3+0x50],%f18
1451
1452	fxtod	%f0,%f0			! res = *(long long*)&res;
1453	st	%f0,[%fp+tmp7]
1454
1455	fand	%f0,DC0,%f16		! (3_0) res = vis_fand(res,DC0);
1456	ld	[%fp+tmp7],%g1
1457
1458	sra	%g1,21,%o7		! (3_0) iexp = hx >> 21;
1459	sra	%g1,10,%o2		! (3_0) hx >>= 10;
1460
1461	sub	%o7,537,%o7
1462	and	%o2,2040,%o2		! (3_0) hx &= 0x7f8;
1463	ba	.cont7
1464	for	%f16,DC1,%f44		! (3_0) res = vis_for(res,DC1);
14652:
1466	fand	%f0,%f18,%f0
1467	fxtod	%f0,%f0			! res = *(long long*)&res;
1468	ldd	[%o3+0x58],%f18
1469	faddd	%f0,%f18,%f0
1470	st	%f0,[%fp+tmp7]
1471
1472	fand	%f0,DC0,%f16		! (3_0) res = vis_fand(res,DC0);
1473	ld	[%fp+tmp7],%g1
1474
1475	sra	%g1,21,%o7		! (3_0) iexp = hx >> 21;
1476	sra	%g1,10,%o2		! (3_0) hx >>= 10;
1477
1478	sub	%o7,537,%o7
1479	and	%o2,2040,%o2		! (3_0) hx &= 0x7f8;
1480	ba	.cont7
1481	for	%f16,DC1,%f44		! (3_0) res = vis_for(res,DC1);
14821:
1483	sub	%l6,stridex,tmp_px
1484	sub	counter,4,tmp_counter
1485
1486	ba	.cont7
1487	mov	4,counter
1488
1489	.align	16
1490.update8:
1491	cmp	counter,5
1492	ble	.cont8
1493	nop
1494
1495	mov	%l6,tmp_px
1496	sub	counter,5,tmp_counter
1497
1498	ba	.cont8
1499	mov	5,counter
1500
1501	.align	16
1502.update9:
1503	ld	[%l6+4],%i3
1504	cmp	counter,5
1505	ble	.cont9
1506	fand	%f0,DC0,%f16		! (5_0) res = vis_fand(res,DC0);
1507
1508	cmp	%g1,0
1509	bl	1f
1510
1511	orcc	%g1,%i3,%g0
1512	bz	1f
1513	sethi	%hi(0x00080000),%i1
1514
1515	cmp	%g1,%i1
1516	bge,a	2f
1517	ldd	[%o3+0x50],%f18
1518
1519	fxtod	%f8,%f8			! res = *(long long*)&res;
1520	st	%f8,[%fp+tmp7]
1521
1522	fand	%f8,DC0,%f24		! (4_0) res = vis_fand(res,DC0);
1523	ld	[%fp+tmp7],%g1
1524
1525	sra	%g1,21,%o7		! (4_0) iexp = hx >> 21;
1526	sra	%g1,10,%o2		! (4_0) hx >>= 10;
1527
1528	sub	%o7,537,%o7
1529
1530	and	%o2,2040,%o2		! (4_0) hx &= 0x7f8;
1531	sub	%g0,%o7,%o7		! (4_0) iexp = -iexp;
1532	ba	.cont9
1533	for	%f24,DC1,%f24		! (4_0) res = vis_for(res,DC1);
15342:
1535	fand	%f8,%f18,%f8
1536	fxtod	%f8,%f8			! res = *(long long*)&res;
1537	ldd	[%o3+0x58],%f18
1538	faddd	%f8,%f18,%f8
1539	st	%f8,[%fp+tmp7]
1540
1541	fand	%f8,DC0,%f24		! (4_0) res = vis_fand(res,DC0);
1542	ld	[%fp+tmp7],%g1
1543
1544	sra	%g1,21,%o7		! (4_0) iexp = hx >> 21;
1545	sra	%g1,10,%o2		! (4_0) hx >>= 10;
1546
1547	sub	%o7,537,%o7
1548
1549	and	%o2,2040,%o2		! (4_0) hx &= 0x7f8;
1550	sub	%g0,%o7,%o7		! (4_0) iexp = -iexp;
1551	ba	.cont9
1552	for	%f24,DC1,%f24		! (4_0) res = vis_for(res,DC1);
15531:
1554	mov	%l6,tmp_px
1555	sub	counter,5,tmp_counter
1556
1557	ba	.cont9
1558	mov	5,counter
1559
1560	.align	16
1561.update10:
1562	cmp	counter,6
1563	ble	.cont10
1564	nop
1565
1566	mov	%i0,tmp_px
1567	sub	counter,6,tmp_counter
1568
1569	ba	.cont10
1570	mov	6,counter
1571
1572	.align	16
1573.update11:
1574	ld	[%i0+4],%i3
1575	cmp	counter,6
1576	ble	.cont11
1577	fand	%f6,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
1578
1579	cmp	%g1,0
1580	bl	1f
1581
1582	orcc	%g1,%i3,%g0
1583	bz	1f
1584	sethi	%hi(0x00080000),%i3
1585
1586	cmp	%g1,%i3
1587	bge,a	2f
1588	ldd	[%o3+0x50],%f18
1589
1590	fxtod	%f0,%f0			! res = *(long long*)&res;
1591	st	%f0,[%fp+tmp7]
1592
1593	fand	%f0,DC0,%f28		! (5_0) res = vis_fand(res,DC0);
1594	ld	[%fp+tmp7],%g1
1595
1596	sra	%g1,21,%o7		! (5_0) iexp = hx >> 21;
1597	sra	%g1,10,%o2		! (5_0) hx >>= 10;
1598
1599	sub	%o7,537,%o7
1600
1601	sub	%g0,%o7,%o7		! (5_0) iexp = -iexp;
1602
1603	and	%o2,2040,%o2		! (5_0) hx &= 0x7f8;
1604	ba	.cont11
1605	for	%f28,DC1,%f28		! (5_0) res = vis_for(res,DC1);
16062:
1607	fand	%f0,%f18,%f0
1608	fxtod	%f0,%f0			! res = *(long long*)&res;
1609	ldd	[%o3+0x58],%f18
1610	faddd	%f0,%f18,%f0
1611	st	%f0,[%fp+tmp7]
1612
1613	fand	%f0,DC0,%f28		! (5_0) res = vis_fand(res,DC0);
1614	ld	[%fp+tmp7],%g1
1615
1616	sra	%g1,21,%o7		! (5_0) iexp = hx >> 21;
1617	sra	%g1,10,%o2		! (5_0) hx >>= 10;
1618
1619	sub	%o7,537,%o7
1620
1621	sub	%g0,%o7,%o7		! (5_0) iexp = -iexp;
1622
1623	and	%o2,2040,%o2		! (5_0) hx &= 0x7f8;
1624	ba	.cont11
1625	for	%f28,DC1,%f28		! (5_0) res = vis_for(res,DC1);
16261:
1627	mov	%i0,tmp_px
1628	sub	counter,6,tmp_counter
1629
1630	ba	.cont11
1631	mov	6,counter
1632
1633	.align	16
1634.update12:
1635	cmp	counter,0
1636	ble	.cont12
1637	faddd	%f34,K3,%f34		! (2_1) res += K3;
1638
1639	sub	%l6,stridex,tmp_px
1640	sub	counter,0,tmp_counter
1641
1642	ba	.cont12
1643	mov	0,counter
1644
1645	.align	16
1646.update13:
1647	sub	%l6,stridex,%l4
1648	cmp	counter,0
1649	ble	.cont13
1650	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
1651
1652	ld	[%l4+4],%l4
1653	cmp	%g1,0
1654	bl	1f
1655
1656	orcc	%g1,%l4,%g0
1657	bz	1f
1658	sethi	%hi(0x00080000),%l4
1659
1660	cmp	%g1,%l4
1661	bge,a	2f
1662	ldd	[%o3+0x50],%f62
1663
1664	fxtod	%f6,%f6			! res = *(long long*)&res;
1665	st	%f6,[%fp+tmp7]
1666
1667	fand	%f6,DC0,%f44		! (6_0) res = vis_fand(res,DC0);
1668	ld	[%fp+tmp7],%g1
1669
1670	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
1671	sra	%g1,10,%o2		! (6_1) hx >>= 10;
1672
1673	sub	%o7,537,%o7
1674	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
1675	for	%f44,DC1,%f44		! (6_1) res = vis_for(res,DC1);
1676
1677	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
1678	ba	.cont13
1679	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
16802:
1681	fand	%f6,%f62,%f6
1682	fxtod	%f6,%f6			! res = *(long long*)&res;
1683	ldd	[%o3+0x58],%f62
1684	faddd	%f6,%f62,%f6
1685	st	%f6,[%fp+tmp7]
1686
1687	fand	%f6,DC0,%f44		! (6_0) res = vis_fand(res,DC0);
1688	ld	[%fp+tmp7],%g1
1689
1690	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
1691	sra	%g1,10,%o2		! (6_1) hx >>= 10;
1692	for	%f44,DC1,%f44		! (6_1) res = vis_for(res,DC1);
1693
1694	sub	%o7,537,%o7
1695
1696	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
1697	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
1698	ba	.cont13
1699	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
17001:
1701	sub	%l6,stridex,tmp_px
1702	sub	counter,0,tmp_counter
1703
1704	ba	.cont13
1705	mov	0,counter
1706
1707	.align	16
1708.update14:
1709	cmp	counter,1
1710	ble	.cont14
1711	faddd	%f34,K3,%f34		! (3_1) res += K3;
1712
1713	sub	%l6,stridex,tmp_px
1714	sub	counter,1,tmp_counter
1715
1716	ba	.cont14
1717	mov	1,counter
1718
1719	.align	16
1720.update15:
1721	sub	%l6,stridex,%l2
1722	cmp	counter,1
1723	ble	.cont15
1724	fpadd32	%f28,DC2,%f18		! (0_0) res_c = vis_fpadd32(res,DC2);
1725
1726	ld	[%l2+4],%l2
1727	cmp	%g1,0
1728	bl	1f
1729
1730	orcc	%g1,%l2,%g0
1731	bz	1f
1732	sethi	%hi(0x00080000),%l2
1733
1734	cmp	%g1,%l2
1735	bge,a	2f
1736	ldd	[%o3+0x50],%f62
1737
1738	fxtod	%f0,%f0			! res = *(long long*)&res;
1739	st	%f0,[%fp+tmp7]
1740
1741	fand	%f0,DC0,%f18		! (0_0) res = vis_fand(res,DC0);
1742	ld	[%fp+tmp7],%g1
1743
1744	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
1745	sra	%g1,10,%o2		! (0_0) hx >>= 10;
1746
1747	sub	%o7,537,%o7
1748	for	%f18,DC1,%f28		! (0_0) res = vis_for(res,DC1);
1749
1750	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
1751
1752	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
1753	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
1754	ba	.cont15
1755	fpadd32	%f28,DC2,%f18		! (0_0) res_c = vis_fpadd32(res,DC2);
17562:
1757	fand	%f0,%f62,%f0
1758	fxtod	%f0,%f0			! res = *(long long*)&res;
1759	ldd	[%o3+0x58],%f62
1760	faddd	%f0,%f62,%f0
1761	st	%f0,[%fp+tmp7]
1762
1763	fand	%f0,DC0,%f18		! (0_0) res = vis_fand(res,DC0);
1764	ld	[%fp+tmp7],%g1
1765
1766	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
1767	sra	%g1,10,%o2		! (0_0) hx >>= 10;
1768	for	%f18,DC1,%f28		! (0_0) res = vis_for(res,DC1);
1769
1770	sub	%o7,537,%o7
1771
1772	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
1773
1774	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
1775	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
1776	ba	.cont15
1777	fpadd32	%f28,DC2,%f18		! (0_0) res_c = vis_fpadd32(res,DC2);
17781:
1779	sub	%l6,stridex,tmp_px
1780	sub	counter,1,tmp_counter
1781
1782	ba	.cont15
1783	mov	1,counter
1784
1785	.align	16
1786.update16:
1787	cmp	counter,2
1788	ble	.cont16
1789	fand	%f18,DC3,%f8		! (0_0) res_c = vis_fand(res_c,DC3);
1790
1791	sub	%l6,stridex,tmp_px
1792	sub	counter,2,tmp_counter
1793
1794	ba	.cont16
1795	mov	2,counter
1796
1797	.align	16
1798.update17:
1799	sub	%l6,stridex,%i2
1800	cmp	counter,2
1801	ble	.cont17
1802	fand	%f0,DC0,%f16		! (2_0) res = vis_fand(res,DC0);
1803
1804	ld	[%i2+4],%i2
1805	cmp	%g1,0
1806	bl	1f
1807
1808	orcc	%g1,%i2,%g0
1809	bz	1f
1810	sethi	%hi(0x00080000),%i2
1811
1812	cmp	%g1,%i2
1813	bge,a	2f
1814	ldd	[%o3+0x50],%f2
1815
1816	fxtod	%f6,%f6			! res = *(long long*)&res;
1817	st	%f6,[%fp+tmp7]
1818
1819	fand	%f6,DC0,%f44		! (1_0) res = vis_fand(res,DC0);
1820	ld	[%fp+tmp7],%g1
1821
1822	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
1823	sra	%g1,10,%o2		! (1_0) hx >>= 10;
1824
1825	sub	%o7,537,%o7
1826
1827	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
1828	sub	%g0,%o7,%o7		! (1_0) iexp = -iexp;
1829	ba	.cont17
1830	for	%f44,DC1,%f44		! (1_0) res = vis_for(res,DC1);
18312:
1832	fand	%f6,%f2,%f6
1833	fxtod	%f6,%f6			! res = *(long long*)&res;
1834	ldd	[%o3+0x58],%f2
1835	faddd	%f6,%f2,%f6
1836	st	%f6,[%fp+tmp7]
1837
1838	fand	%f6,DC0,%f44		! (1_0) res = vis_fand(res,DC0);
1839	ld	[%fp+tmp7],%g1
1840
1841	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
1842	sra	%g1,10,%o2		! (1_0) hx >>= 10;
1843
1844	sub	%o7,537,%o7
1845
1846	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
1847	sub	%g0,%o7,%o7		! (1_0) iexp = -iexp;
1848	ba	.cont17
1849	for	%f44,DC1,%f44		! (1_0) res = vis_for(res,DC1);
18501:
1851	sub	%l6,stridex,tmp_px
1852	sub	counter,2,tmp_counter
1853
1854	ba	.cont17
1855	mov	2,counter
1856
1857	.align	16
1858.update18:
1859	cmp	counter,3
1860	ble	.cont18
1861	fand	%f18,DC3,%f8		! (1_0) res_c = vis_fand(res_c,DC3);
1862
1863	sub	%l6,stridex,tmp_px
1864	sub	counter,3,tmp_counter
1865
1866	ba	.cont18
1867	mov	3,counter
1868
1869	.align	16
1870.update19:
1871	sub	%l6,stridex,%i4
1872	cmp	counter,3
1873	ble	.cont19
1874	fand	%f6,DC0,%f16		! (3_0) res = vis_fand(res,DC0);
1875
1876	ld	[%i4+4],%i4
1877	cmp	%g1,0
1878	bl	1f
1879
1880	orcc	%g1,%i4,%g0
1881	bz	1f
1882	sethi	%hi(0x00080000),%i4
1883
1884	cmp	%g1,%i4
1885	bge,a	2f
1886	ldd	[%o3+0x50],%f2
1887
1888	fxtod	%f0,%f0			! res = *(long long*)&res;
1889	st	%f0,[%fp+tmp7]
1890
1891	fand	%f0,DC0,%f28		! (2_0) res = vis_fand(res,DC0);
1892	ld	[%fp+tmp7],%g1
1893
1894	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
1895
1896	sra	%g1,10,%o2		! (2_0) hx >>= 10;
1897	sub	%o7,537,%o7
1898
1899	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
1900	sub	%g0,%o7,%o7		! (2_0) iexp = -iexp;
1901	ba	.cont19
1902	for	%f28,DC1,%f28		! (2_0) res = vis_for(res,DC1);
19032:
1904	fand	%f0,%f2,%f0
1905	fxtod	%f0,%f0			! res = *(long long*)&res;
1906	ldd	[%o3+0x58],%f2
1907	faddd	%f0,%f2,%f0
1908	st	%f0,[%fp+tmp7]
1909
1910	fand	%f0,DC0,%f28		! (2_0) res = vis_fand(res,DC0);
1911	ld	[%fp+tmp7],%g1
1912
1913	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
1914
1915	sra	%g1,10,%o2		! (2_0) hx >>= 10;
1916	sub	%o7,537,%o7
1917
1918	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
1919	sub	%g0,%o7,%o7		! (2_0) iexp = -iexp;
1920	ba	.cont19
1921	for	%f28,DC1,%f28		! (2_0) res = vis_for(res,DC1);
19221:
1923	sub	%l6,stridex,tmp_px
1924	sub	counter,3,tmp_counter
1925
1926	ba	.