1*25c28e83SPiotr Jasiukajtis/*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis/*
22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*25c28e83SPiotr Jasiukajtis */
24*25c28e83SPiotr Jasiukajtis/*
25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
27*25c28e83SPiotr Jasiukajtis */
28*25c28e83SPiotr Jasiukajtis
29*25c28e83SPiotr Jasiukajtis	.file	"__vrsqrtf.S"
30*25c28e83SPiotr Jasiukajtis
31*25c28e83SPiotr Jasiukajtis#include "libm.h"
32*25c28e83SPiotr Jasiukajtis
33*25c28e83SPiotr Jasiukajtis	RO_DATA
34*25c28e83SPiotr Jasiukajtis	.align	64
35*25c28e83SPiotr Jasiukajtis
36*25c28e83SPiotr Jasiukajtis! i = [0,63]
37*25c28e83SPiotr Jasiukajtis! TBL[2*i  ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24;
38*25c28e83SPiotr Jasiukajtis! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
39*25c28e83SPiotr Jasiukajtis! i = [64,127]
40*25c28e83SPiotr Jasiukajtis! TBL[2*i  ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23;
41*25c28e83SPiotr Jasiukajtis! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
42*25c28e83SPiotr Jasiukajtis
43*25c28e83SPiotr Jasiukajtis.CONST_TBL:
44*25c28e83SPiotr Jasiukajtis	.word	0x3e800000, 0x00000000, 0x3ff6a09e, 0x667f3bcd,
45*25c28e83SPiotr Jasiukajtis	.word	0x3e7f81f8, 0x1f81f820, 0x3ff673e3, 0x2ef63a03,
46*25c28e83SPiotr Jasiukajtis	.word	0x3e7f07c1, 0xf07c1f08, 0x3ff6482d, 0x37a5a3d2,
47*25c28e83SPiotr Jasiukajtis	.word	0x3e7e9131, 0xabf0b767, 0x3ff61d72, 0xb7978671,
48*25c28e83SPiotr Jasiukajtis	.word	0x3e7e1e1e, 0x1e1e1e1e, 0x3ff5f3aa, 0x673fa911,
49*25c28e83SPiotr Jasiukajtis	.word	0x3e7dae60, 0x76b981db, 0x3ff5cacb, 0x7802f342,
50*25c28e83SPiotr Jasiukajtis	.word	0x3e7d41d4, 0x1d41d41d, 0x3ff5a2cd, 0x8c69d61a,
51*25c28e83SPiotr Jasiukajtis	.word	0x3e7cd856, 0x89039b0b, 0x3ff57ba8, 0xb0ee01b9,
52*25c28e83SPiotr Jasiukajtis	.word	0x3e7c71c7, 0x1c71c71c, 0x3ff55555, 0x55555555,
53*25c28e83SPiotr Jasiukajtis	.word	0x3e7c0e07, 0x0381c0e0, 0x3ff52fcc, 0x468d6b54,
54*25c28e83SPiotr Jasiukajtis	.word	0x3e7bacf9, 0x14c1bad0, 0x3ff50b06, 0xa8fc6b70,
55*25c28e83SPiotr Jasiukajtis	.word	0x3e7b4e81, 0xb4e81b4f, 0x3ff4e6fd, 0xf33cf032,
56*25c28e83SPiotr Jasiukajtis	.word	0x3e7af286, 0xbca1af28, 0x3ff4c3ab, 0xe93bcf74,
57*25c28e83SPiotr Jasiukajtis	.word	0x3e7a98ef, 0x606a63be, 0x3ff4a10a, 0x97af7b92,
58*25c28e83SPiotr Jasiukajtis	.word	0x3e7a41a4, 0x1a41a41a, 0x3ff47f14, 0x4fe17f9f,
59*25c28e83SPiotr Jasiukajtis	.word	0x3e79ec8e, 0x951033d9, 0x3ff45dc3, 0xa3c34fa3,
60*25c28e83SPiotr Jasiukajtis	.word	0x3e799999, 0x9999999a, 0x3ff43d13, 0x6248490f,
61*25c28e83SPiotr Jasiukajtis	.word	0x3e7948b0, 0xfcd6e9e0, 0x3ff41cfe, 0x93ff5199,
62*25c28e83SPiotr Jasiukajtis	.word	0x3e78f9c1, 0x8f9c18fa, 0x3ff3fd80, 0x77e70577,
63*25c28e83SPiotr Jasiukajtis	.word	0x3e78acb9, 0x0f6bf3aa, 0x3ff3de94, 0x8077db58,
64*25c28e83SPiotr Jasiukajtis	.word	0x3e786186, 0x18618618, 0x3ff3c036, 0x50e00e03,
65*25c28e83SPiotr Jasiukajtis	.word	0x3e781818, 0x18181818, 0x3ff3a261, 0xba6d7a37,
66*25c28e83SPiotr Jasiukajtis	.word	0x3e77d05f, 0x417d05f4, 0x3ff38512, 0xba21f51e,
67*25c28e83SPiotr Jasiukajtis	.word	0x3e778a4c, 0x8178a4c8, 0x3ff36845, 0x766eec92,
68*25c28e83SPiotr Jasiukajtis	.word	0x3e7745d1, 0x745d1746, 0x3ff34bf6, 0x3d156826,
69*25c28e83SPiotr Jasiukajtis	.word	0x3e7702e0, 0x5c0b8170, 0x3ff33021, 0x8127c0e0,
70*25c28e83SPiotr Jasiukajtis	.word	0x3e76c16c, 0x16c16c17, 0x3ff314c3, 0xd92a9e91,
71*25c28e83SPiotr Jasiukajtis	.word	0x3e768168, 0x16816817, 0x3ff2f9d9, 0xfd52fd50,
72*25c28e83SPiotr Jasiukajtis	.word	0x3e7642c8, 0x590b2164, 0x3ff2df60, 0xc5df2c9e,
73*25c28e83SPiotr Jasiukajtis	.word	0x3e760581, 0x60581606, 0x3ff2c555, 0x2988e428,
74*25c28e83SPiotr Jasiukajtis	.word	0x3e75c988, 0x2b931057, 0x3ff2abb4, 0x3c0eb0f4,
75*25c28e83SPiotr Jasiukajtis	.word	0x3e758ed2, 0x308158ed, 0x3ff2927b, 0x2cd320f5,
76*25c28e83SPiotr Jasiukajtis	.word	0x3e755555, 0x55555555, 0x3ff279a7, 0x4590331c,
77*25c28e83SPiotr Jasiukajtis	.word	0x3e751d07, 0xeae2f815, 0x3ff26135, 0xe91daf55,
78*25c28e83SPiotr Jasiukajtis	.word	0x3e74e5e0, 0xa72f0539, 0x3ff24924, 0x92492492,
79*25c28e83SPiotr Jasiukajtis	.word	0x3e74afd6, 0xa052bf5b, 0x3ff23170, 0xd2be638a,
80*25c28e83SPiotr Jasiukajtis	.word	0x3e747ae1, 0x47ae147b, 0x3ff21a18, 0x51ff630a,
81*25c28e83SPiotr Jasiukajtis	.word	0x3e7446f8, 0x6562d9fb, 0x3ff20318, 0xcc6a8f5d,
82*25c28e83SPiotr Jasiukajtis	.word	0x3e741414, 0x14141414, 0x3ff1ec70, 0x124e98f9,
83*25c28e83SPiotr Jasiukajtis	.word	0x3e73e22c, 0xbce4a902, 0x3ff1d61c, 0x070ae7d3,
84*25c28e83SPiotr Jasiukajtis	.word	0x3e73b13b, 0x13b13b14, 0x3ff1c01a, 0xa03be896,
85*25c28e83SPiotr Jasiukajtis	.word	0x3e738138, 0x13813814, 0x3ff1aa69, 0xe4f2777f,
86*25c28e83SPiotr Jasiukajtis	.word	0x3e73521c, 0xfb2b78c1, 0x3ff19507, 0xecf5b9e9,
87*25c28e83SPiotr Jasiukajtis	.word	0x3e7323e3, 0x4a2b10bf, 0x3ff17ff2, 0xe00ec3ee,
88*25c28e83SPiotr Jasiukajtis	.word	0x3e72f684, 0xbda12f68, 0x3ff16b28, 0xf55d72d4,
89*25c28e83SPiotr Jasiukajtis	.word	0x3e72c9fb, 0x4d812ca0, 0x3ff156a8, 0x72b5ef62,
90*25c28e83SPiotr Jasiukajtis	.word	0x3e729e41, 0x29e4129e, 0x3ff1426f, 0xac0654db,
91*25c28e83SPiotr Jasiukajtis	.word	0x3e727350, 0xb8812735, 0x3ff12e7d, 0x02c40253,
92*25c28e83SPiotr Jasiukajtis	.word	0x3e724924, 0x92492492, 0x3ff11ace, 0xe560242a,
93*25c28e83SPiotr Jasiukajtis	.word	0x3e721fb7, 0x8121fb78, 0x3ff10763, 0xcec30b26,
94*25c28e83SPiotr Jasiukajtis	.word	0x3e71f704, 0x7dc11f70, 0x3ff0f43a, 0x45cdedad,
95*25c28e83SPiotr Jasiukajtis	.word	0x3e71cf06, 0xada2811d, 0x3ff0e150, 0xdce2b60c,
96*25c28e83SPiotr Jasiukajtis	.word	0x3e71a7b9, 0x611a7b96, 0x3ff0cea6, 0x317186dc,
97*25c28e83SPiotr Jasiukajtis	.word	0x3e718118, 0x11811812, 0x3ff0bc38, 0xeb8ba412,
98*25c28e83SPiotr Jasiukajtis	.word	0x3e715b1e, 0x5f75270d, 0x3ff0aa07, 0xbd7b7488,
99*25c28e83SPiotr Jasiukajtis	.word	0x3e7135c8, 0x1135c811, 0x3ff09811, 0x63615499,
100*25c28e83SPiotr Jasiukajtis	.word	0x3e711111, 0x11111111, 0x3ff08654, 0xa2d4f6db,
101*25c28e83SPiotr Jasiukajtis	.word	0x3e70ecf5, 0x6be69c90, 0x3ff074d0, 0x4a8b1438,
102*25c28e83SPiotr Jasiukajtis	.word	0x3e70c971, 0x4fbcda3b, 0x3ff06383, 0x31ff307a,
103*25c28e83SPiotr Jasiukajtis	.word	0x3e70a681, 0x0a6810a7, 0x3ff0526c, 0x39213bfa,
104*25c28e83SPiotr Jasiukajtis	.word	0x3e708421, 0x08421084, 0x3ff0418a, 0x4806de7d,
105*25c28e83SPiotr Jasiukajtis	.word	0x3e70624d, 0xd2f1a9fc, 0x3ff030dc, 0x4ea03a72,
106*25c28e83SPiotr Jasiukajtis	.word	0x3e704104, 0x10410410, 0x3ff02061, 0x446ffa9a,
107*25c28e83SPiotr Jasiukajtis	.word	0x3e702040, 0x81020408, 0x3ff01018, 0x28467ee9,
108*25c28e83SPiotr Jasiukajtis	.word	0x3e800000, 0x00000000, 0x3ff00000, 0x00000000,
109*25c28e83SPiotr Jasiukajtis	.word	0x3e7f81f8, 0x1f81f820, 0x3fefc0bd, 0x88a0f1d9,
110*25c28e83SPiotr Jasiukajtis	.word	0x3e7f07c1, 0xf07c1f08, 0x3fef82ec, 0x882c0f9b,
111*25c28e83SPiotr Jasiukajtis	.word	0x3e7e9131, 0xabf0b767, 0x3fef467f, 0x2814b0cc,
112*25c28e83SPiotr Jasiukajtis	.word	0x3e7e1e1e, 0x1e1e1e1e, 0x3fef0b68, 0x48d2af1c,
113*25c28e83SPiotr Jasiukajtis	.word	0x3e7dae60, 0x76b981db, 0x3feed19b, 0x75e78957,
114*25c28e83SPiotr Jasiukajtis	.word	0x3e7d41d4, 0x1d41d41d, 0x3fee990c, 0xdad55ed2,
115*25c28e83SPiotr Jasiukajtis	.word	0x3e7cd856, 0x89039b0b, 0x3fee61b1, 0x38f18adc,
116*25c28e83SPiotr Jasiukajtis	.word	0x3e7c71c7, 0x1c71c71c, 0x3fee2b7d, 0xddfefa66,
117*25c28e83SPiotr Jasiukajtis	.word	0x3e7c0e07, 0x0381c0e0, 0x3fedf668, 0x9b7e6350,
118*25c28e83SPiotr Jasiukajtis	.word	0x3e7bacf9, 0x14c1bad0, 0x3fedc267, 0xbea45549,
119*25c28e83SPiotr Jasiukajtis	.word	0x3e7b4e81, 0xb4e81b4f, 0x3fed8f72, 0x08e6b82d,
120*25c28e83SPiotr Jasiukajtis	.word	0x3e7af286, 0xbca1af28, 0x3fed5d7e, 0xa914b937,
121*25c28e83SPiotr Jasiukajtis	.word	0x3e7a98ef, 0x606a63be, 0x3fed2c85, 0x34ed6d86,
122*25c28e83SPiotr Jasiukajtis	.word	0x3e7a41a4, 0x1a41a41a, 0x3fecfc7d, 0xa32a9213,
123*25c28e83SPiotr Jasiukajtis	.word	0x3e79ec8e, 0x951033d9, 0x3feccd60, 0x45f5d358,
124*25c28e83SPiotr Jasiukajtis	.word	0x3e799999, 0x9999999a, 0x3fec9f25, 0xc5bfedd9,
125*25c28e83SPiotr Jasiukajtis	.word	0x3e7948b0, 0xfcd6e9e0, 0x3fec71c7, 0x1c71c71c,
126*25c28e83SPiotr Jasiukajtis	.word	0x3e78f9c1, 0x8f9c18fa, 0x3fec453d, 0x90f057a2,
127*25c28e83SPiotr Jasiukajtis	.word	0x3e78acb9, 0x0f6bf3aa, 0x3fec1982, 0xb2ece47b,
128*25c28e83SPiotr Jasiukajtis	.word	0x3e786186, 0x18618618, 0x3febee90, 0x56fb9c39,
129*25c28e83SPiotr Jasiukajtis	.word	0x3e781818, 0x18181818, 0x3febc460, 0x92eb3118,
130*25c28e83SPiotr Jasiukajtis	.word	0x3e77d05f, 0x417d05f4, 0x3feb9aed, 0xba588347,
131*25c28e83SPiotr Jasiukajtis	.word	0x3e778a4c, 0x8178a4c8, 0x3feb7232, 0x5b79db11,
132*25c28e83SPiotr Jasiukajtis	.word	0x3e7745d1, 0x745d1746, 0x3feb4a29, 0x3c1d9550,
133*25c28e83SPiotr Jasiukajtis	.word	0x3e7702e0, 0x5c0b8170, 0x3feb22cd, 0x56d87d7e,
134*25c28e83SPiotr Jasiukajtis	.word	0x3e76c16c, 0x16c16c17, 0x3feafc19, 0xd8606169,
135*25c28e83SPiotr Jasiukajtis	.word	0x3e768168, 0x16816817, 0x3fead60a, 0x1d0fb394,
136*25c28e83SPiotr Jasiukajtis	.word	0x3e7642c8, 0x590b2164, 0x3feab099, 0xae8f539a,
137*25c28e83SPiotr Jasiukajtis	.word	0x3e760581, 0x60581606, 0x3fea8bc4, 0x41a3d02c,
138*25c28e83SPiotr Jasiukajtis	.word	0x3e75c988, 0x2b931057, 0x3fea6785, 0xb41bacf7,
139*25c28e83SPiotr Jasiukajtis	.word	0x3e758ed2, 0x308158ed, 0x3fea43da, 0x0adc6899,
140*25c28e83SPiotr Jasiukajtis	.word	0x3e755555, 0x55555555, 0x3fea20bd, 0x700c2c3e,
141*25c28e83SPiotr Jasiukajtis	.word	0x3e751d07, 0xeae2f815, 0x3fe9fe2c, 0x315637ee,
142*25c28e83SPiotr Jasiukajtis	.word	0x3e74e5e0, 0xa72f0539, 0x3fe9dc22, 0xbe484458,
143*25c28e83SPiotr Jasiukajtis	.word	0x3e74afd6, 0xa052bf5b, 0x3fe9ba9d, 0xa6c73588,
144*25c28e83SPiotr Jasiukajtis	.word	0x3e747ae1, 0x47ae147b, 0x3fe99999, 0x9999999a,
145*25c28e83SPiotr Jasiukajtis	.word	0x3e7446f8, 0x6562d9fb, 0x3fe97913, 0x63068b54,
146*25c28e83SPiotr Jasiukajtis	.word	0x3e741414, 0x14141414, 0x3fe95907, 0xeb87ab44,
147*25c28e83SPiotr Jasiukajtis	.word	0x3e73e22c, 0xbce4a902, 0x3fe93974, 0x368cfa31,
148*25c28e83SPiotr Jasiukajtis	.word	0x3e73b13b, 0x13b13b14, 0x3fe91a55, 0x6151761c,
149*25c28e83SPiotr Jasiukajtis	.word	0x3e738138, 0x13813814, 0x3fe8fba8, 0xa1bf6f96,
150*25c28e83SPiotr Jasiukajtis	.word	0x3e73521c, 0xfb2b78c1, 0x3fe8dd6b, 0x4563a009,
151*25c28e83SPiotr Jasiukajtis	.word	0x3e7323e3, 0x4a2b10bf, 0x3fe8bf9a, 0xb06e1af3,
152*25c28e83SPiotr Jasiukajtis	.word	0x3e72f684, 0xbda12f68, 0x3fe8a234, 0x5cc04426,
153*25c28e83SPiotr Jasiukajtis	.word	0x3e72c9fb, 0x4d812ca0, 0x3fe88535, 0xd90703c6,
154*25c28e83SPiotr Jasiukajtis	.word	0x3e729e41, 0x29e4129e, 0x3fe8689c, 0xc7e07e7d,
155*25c28e83SPiotr Jasiukajtis	.word	0x3e727350, 0xb8812735, 0x3fe84c66, 0xdf0ca4c2,
156*25c28e83SPiotr Jasiukajtis	.word	0x3e724924, 0x92492492, 0x3fe83091, 0xe6a7f7e7,
157*25c28e83SPiotr Jasiukajtis	.word	0x3e721fb7, 0x8121fb78, 0x3fe8151b, 0xb86fee1d,
158*25c28e83SPiotr Jasiukajtis	.word	0x3e71f704, 0x7dc11f70, 0x3fe7fa02, 0x3f1068d1,
159*25c28e83SPiotr Jasiukajtis	.word	0x3e71cf06, 0xada2811d, 0x3fe7df43, 0x7579b9b5,
160*25c28e83SPiotr Jasiukajtis	.word	0x3e71a7b9, 0x611a7b96, 0x3fe7c4dd, 0x663ebb88,
161*25c28e83SPiotr Jasiukajtis	.word	0x3e718118, 0x11811812, 0x3fe7aace, 0x2afa8b72,
162*25c28e83SPiotr Jasiukajtis	.word	0x3e715b1e, 0x5f75270d, 0x3fe79113, 0xebbd7729,
163*25c28e83SPiotr Jasiukajtis	.word	0x3e7135c8, 0x1135c811, 0x3fe777ac, 0xde80baea,
164*25c28e83SPiotr Jasiukajtis	.word	0x3e711111, 0x11111111, 0x3fe75e97, 0x46a0b098,
165*25c28e83SPiotr Jasiukajtis	.word	0x3e70ecf5, 0x6be69c90, 0x3fe745d1, 0x745d1746,
166*25c28e83SPiotr Jasiukajtis	.word	0x3e70c971, 0x4fbcda3b, 0x3fe72d59, 0xc45f1fc5,
167*25c28e83SPiotr Jasiukajtis	.word	0x3e70a681, 0x0a6810a7, 0x3fe7152e, 0x9f44f01f,
168*25c28e83SPiotr Jasiukajtis	.word	0x3e708421, 0x08421084, 0x3fe6fd4e, 0x79325467,
169*25c28e83SPiotr Jasiukajtis	.word	0x3e70624d, 0xd2f1a9fc, 0x3fe6e5b7, 0xd16657e1,
170*25c28e83SPiotr Jasiukajtis	.word	0x3e704104, 0x10410410, 0x3fe6ce69, 0x31d5858d,
171*25c28e83SPiotr Jasiukajtis	.word	0x3e702040, 0x81020408, 0x3fe6b761, 0x2ec892f6,
172*25c28e83SPiotr Jasiukajtis
173*25c28e83SPiotr Jasiukajtis	.word	0x3fefffff, 0xfee7f18f	! K0 =  9.99999997962321453275e-01
174*25c28e83SPiotr Jasiukajtis	.word	0xbfdfffff, 0xfe07e52f	! K1 = -4.99999998166077580600e-01
175*25c28e83SPiotr Jasiukajtis	.word	0x3fd80118, 0x0ca296d9	! K2 =  3.75066768969515586277e-01
176*25c28e83SPiotr Jasiukajtis	.word	0xbfd400fc, 0x0bbb8e78	! K3 = -3.12560092408808548438e-01
177*25c28e83SPiotr Jasiukajtis	.word	0x7ffe0000, 0x7ffe0000	! DC0
178*25c28e83SPiotr Jasiukajtis	.word	0x3f800000, 0x40000000	! FTWO
179*25c28e83SPiotr Jasiukajtis
180*25c28e83SPiotr Jasiukajtis#define stridex		%l4
181*25c28e83SPiotr Jasiukajtis#define stridex2	%l1
182*25c28e83SPiotr Jasiukajtis#define stridey		%l3
183*25c28e83SPiotr Jasiukajtis#define stridey2	%i2
184*25c28e83SPiotr Jasiukajtis#define TBL		%l2
185*25c28e83SPiotr Jasiukajtis#define counter		%i5
186*25c28e83SPiotr Jasiukajtis
187*25c28e83SPiotr Jasiukajtis#define K3		%f38
188*25c28e83SPiotr Jasiukajtis#define K2		%f36
189*25c28e83SPiotr Jasiukajtis#define K1		%f34
190*25c28e83SPiotr Jasiukajtis#define K0		%f32
191*25c28e83SPiotr Jasiukajtis#define DC0		%f4
192*25c28e83SPiotr Jasiukajtis#define FONE		%f2
193*25c28e83SPiotr Jasiukajtis#define FTWO		%f3
194*25c28e83SPiotr Jasiukajtis
195*25c28e83SPiotr Jasiukajtis#define _0x00800000	%o2
196*25c28e83SPiotr Jasiukajtis#define _0x7f800000	%o4
197*25c28e83SPiotr Jasiukajtis
198*25c28e83SPiotr Jasiukajtis#define tmp0		STACK_BIAS-0x30
199*25c28e83SPiotr Jasiukajtis#define tmp1		STACK_BIAS-0x28
200*25c28e83SPiotr Jasiukajtis#define tmp2		STACK_BIAS-0x20
201*25c28e83SPiotr Jasiukajtis#define tmp3		STACK_BIAS-0x18
202*25c28e83SPiotr Jasiukajtis#define tmp_counter	STACK_BIAS-0x10
203*25c28e83SPiotr Jasiukajtis#define tmp_px		STACK_BIAS-0x08
204*25c28e83SPiotr Jasiukajtis
205*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9
206*25c28e83SPiotr Jasiukajtis#define tmps		0x30
207*25c28e83SPiotr Jasiukajtis
208*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
209*25c28e83SPiotr Jasiukajtis!      !!!!!   algorithm   !!!!!
210*25c28e83SPiotr Jasiukajtis!  ((float*)&ddx0)[0] = *px;
211*25c28e83SPiotr Jasiukajtis!  ax0 = *(int*)px;
212*25c28e83SPiotr Jasiukajtis!
213*25c28e83SPiotr Jasiukajtis!  ((float*)&ddx0)[1] = *(px + stridex);
214*25c28e83SPiotr Jasiukajtis!  ax1 = *(int*)(px + stridex);
215*25c28e83SPiotr Jasiukajtis!
216*25c28e83SPiotr Jasiukajtis!  px += stridex2;
217*25c28e83SPiotr Jasiukajtis!
218*25c28e83SPiotr Jasiukajtis!  if ( ax0 >= 0x7f800000 )
219*25c28e83SPiotr Jasiukajtis!  {
220*25c28e83SPiotr Jasiukajtis!    RETURN ( FONE / ((float*)&dres0)[0] );
221*25c28e83SPiotr Jasiukajtis!  }
222*25c28e83SPiotr Jasiukajtis!  if ( ax0 < 0x00800000 )
223*25c28e83SPiotr Jasiukajtis!  {
224*25c28e83SPiotr Jasiukajtis!    float res = ((float*)&dres0)[0];
225*25c28e83SPiotr Jasiukajtis!
226*25c28e83SPiotr Jasiukajtis!    if ( (ax0 & 0x7fffffff) == 0 )  /* |X| = zero  */
227*25c28e83SPiotr Jasiukajtis!    {
228*25c28e83SPiotr Jasiukajtis!      RETURN ( FONE / res )
229*25c28e83SPiotr Jasiukajtis!    }
230*25c28e83SPiotr Jasiukajtis!    else if ( ax0 >= 0 )  /* X = denormal  */
231*25c28e83SPiotr Jasiukajtis!    {
232*25c28e83SPiotr Jasiukajtis!      double    res0, xx0, tbl_div0, tbl_sqrt0;
233*25c28e83SPiotr Jasiukajtis!      float    fres0;
234*25c28e83SPiotr Jasiukajtis!      int    iax0, si0, iexp0;
235*25c28e83SPiotr Jasiukajtis!
236*25c28e83SPiotr Jasiukajtis!      res = *(int*)&res;
237*25c28e83SPiotr Jasiukajtis!      res *= FTWO;
238*25c28e83SPiotr Jasiukajtis!      ax0 = *(int*)&res;
239*25c28e83SPiotr Jasiukajtis!      iexp0 = ax0 >> 24;
240*25c28e83SPiotr Jasiukajtis!      iexp0 = 0x3f + 0x4b - iexp0;
241*25c28e83SPiotr Jasiukajtis!      iexp0 = iexp0 << 23;
242*25c28e83SPiotr Jasiukajtis!
243*25c28e83SPiotr Jasiukajtis!      si0 = (ax0 >> 13) & 0x7f0;
244*25c28e83SPiotr Jasiukajtis!
245*25c28e83SPiotr Jasiukajtis!      tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
246*25c28e83SPiotr Jasiukajtis!      tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
247*25c28e83SPiotr Jasiukajtis!      iax0 = ax0 & 0x7ffe0000;
248*25c28e83SPiotr Jasiukajtis!      iax0 = ax0 - iax0;
249*25c28e83SPiotr Jasiukajtis!      xx0 = iax0 * tbl_div0;
250*25c28e83SPiotr Jasiukajtis!      res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
251*25c28e83SPiotr Jasiukajtis!
252*25c28e83SPiotr Jasiukajtis!      fres0 = res0;
253*25c28e83SPiotr Jasiukajtis!      iexp0 += *(int*)&fres0;
254*25c28e83SPiotr Jasiukajtis!      RETURN(*(float*)&iexp0)
255*25c28e83SPiotr Jasiukajtis!    }
256*25c28e83SPiotr Jasiukajtis!    else  /* X = negative  */
257*25c28e83SPiotr Jasiukajtis!    {
258*25c28e83SPiotr Jasiukajtis!      RETURN ( sqrtf(res) )
259*25c28e83SPiotr Jasiukajtis!    }
260*25c28e83SPiotr Jasiukajtis!  }
261*25c28e83SPiotr Jasiukajtis!  if ( ax1 >= 0x7f800000 )
262*25c28e83SPiotr Jasiukajtis!  {
263*25c28e83SPiotr Jasiukajtis!    RETURN ( FONE / ((float*)&dres0)[1] )
264*25c28e83SPiotr Jasiukajtis!  }
265*25c28e83SPiotr Jasiukajtis!  if ( ax1 < 0x00800000 )
266*25c28e83SPiotr Jasiukajtis!  {
267*25c28e83SPiotr Jasiukajtis!    float res = ((float*)&dres0)[1];
268*25c28e83SPiotr Jasiukajtis!    if ( (ax0 & 0x7fffffff) == 0 )  /* |X| = zero  */
269*25c28e83SPiotr Jasiukajtis!    {
270*25c28e83SPiotr Jasiukajtis!      RETURN ( FONE / res )
271*25c28e83SPiotr Jasiukajtis!    }
272*25c28e83SPiotr Jasiukajtis!    else if ( ax0 >= 0 )  /* X = denormal  */
273*25c28e83SPiotr Jasiukajtis!    {
274*25c28e83SPiotr Jasiukajtis!      double    res0, xx0, tbl_div0, tbl_sqrt0;
275*25c28e83SPiotr Jasiukajtis!      float    fres0;
276*25c28e83SPiotr Jasiukajtis!      int    iax1, si0, iexp0;
277*25c28e83SPiotr Jasiukajtis!
278*25c28e83SPiotr Jasiukajtis!      res = *(int*)&res;
279*25c28e83SPiotr Jasiukajtis!      res *= FTWO;
280*25c28e83SPiotr Jasiukajtis!      ax1 = *(int*)&res;
281*25c28e83SPiotr Jasiukajtis!      iexp0 = ax1 >> 24;
282*25c28e83SPiotr Jasiukajtis!      iexp0 = 0x3f + 0x4b - iexp0;
283*25c28e83SPiotr Jasiukajtis!      iexp0 = iexp0 << 23;
284*25c28e83SPiotr Jasiukajtis!
285*25c28e83SPiotr Jasiukajtis!      si0 = (ax1 >> 13) & 0x7f0;
286*25c28e83SPiotr Jasiukajtis!
287*25c28e83SPiotr Jasiukajtis!      tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
288*25c28e83SPiotr Jasiukajtis!      tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
289*25c28e83SPiotr Jasiukajtis!      iax1 = ax1 & 0x7ffe0000;
290*25c28e83SPiotr Jasiukajtis!      iax1 = ax1 - iax1;
291*25c28e83SPiotr Jasiukajtis!      xx0 = iax1 * tbl_div0;
292*25c28e83SPiotr Jasiukajtis!      res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
293*25c28e83SPiotr Jasiukajtis!
294*25c28e83SPiotr Jasiukajtis!      fres0 = res0;
295*25c28e83SPiotr Jasiukajtis!      iexp0 += *(int*)&fres0;
296*25c28e83SPiotr Jasiukajtis!      RETURN(*(float*)&iexp0)
297*25c28e83SPiotr Jasiukajtis!    }
298*25c28e83SPiotr Jasiukajtis!    else  /* X = negative  */
299*25c28e83SPiotr Jasiukajtis!    {
300*25c28e83SPiotr Jasiukajtis!      RETURN ( sqrtf(res) )
301*25c28e83SPiotr Jasiukajtis!    }
302*25c28e83SPiotr Jasiukajtis!  }
303*25c28e83SPiotr Jasiukajtis!
304*25c28e83SPiotr Jasiukajtis!  iexp0 = ax0 >> 24;
305*25c28e83SPiotr Jasiukajtis!  iexp1 = ax1 >> 24;
306*25c28e83SPiotr Jasiukajtis!  iexp0 = 0x3f - iexp0;
307*25c28e83SPiotr Jasiukajtis!  iexp1 = 0x3f - iexp1;
308*25c28e83SPiotr Jasiukajtis!  iexp1 &= 0x1ff;
309*25c28e83SPiotr Jasiukajtis!  lexp0 = iexp0 << 55;
310*25c28e83SPiotr Jasiukajtis!  lexp1 = iexp1 << 23;
311*25c28e83SPiotr Jasiukajtis!
312*25c28e83SPiotr Jasiukajtis!  lexp0 |= lexp1;
313*25c28e83SPiotr Jasiukajtis!
314*25c28e83SPiotr Jasiukajtis!  fdx0 = *((double*)&lexp0);
315*25c28e83SPiotr Jasiukajtis!
316*25c28e83SPiotr Jasiukajtis!  si0 = ax0 >> 13;
317*25c28e83SPiotr Jasiukajtis!  si1 = ax1 >> 13;
318*25c28e83SPiotr Jasiukajtis!  si0 &= 0x7f0;
319*25c28e83SPiotr Jasiukajtis!  si1 &= 0x7f0;
320*25c28e83SPiotr Jasiukajtis!
321*25c28e83SPiotr Jasiukajtis!  addr0 = (char*)TBL + si0;
322*25c28e83SPiotr Jasiukajtis!  addr1 = (char*)TBL + si1;
323*25c28e83SPiotr Jasiukajtis!  tbl_div0 = ((double*)((char*)TBL + si0))[0];
324*25c28e83SPiotr Jasiukajtis!  tbl_div1 = ((double*)((char*)TBL + si1))[0];
325*25c28e83SPiotr Jasiukajtis!  tbl_sqrt0 = ((double*)addr0)[1];
326*25c28e83SPiotr Jasiukajtis!  tbl_sqrt1 = ((double*)addr1)[1];
327*25c28e83SPiotr Jasiukajtis!  dfx0 = vis_fand(ddx0,DC0);
328*25c28e83SPiotr Jasiukajtis!  dfx0 = vis_fpsub32(ddx0,dfx0);
329*25c28e83SPiotr Jasiukajtis!  dtmp0 = (double)(((int*)&dfx0)[0]);
330*25c28e83SPiotr Jasiukajtis!  dtmp1 = (double)(((int*)&dfx0)[1]);
331*25c28e83SPiotr Jasiukajtis!  xx0 = dtmp0 * tbl_div0;
332*25c28e83SPiotr Jasiukajtis!  xx1 = dtmp1 * tbl_div1;
333*25c28e83SPiotr Jasiukajtis!  res0 = K3 * xx0;
334*25c28e83SPiotr Jasiukajtis!  res1 = K3 * xx1;
335*25c28e83SPiotr Jasiukajtis!  res0 += K2;
336*25c28e83SPiotr Jasiukajtis!  res1 += K2;
337*25c28e83SPiotr Jasiukajtis!  res0 *= xx0;
338*25c28e83SPiotr Jasiukajtis!  res1 *= xx1;
339*25c28e83SPiotr Jasiukajtis!  res0 += K1;
340*25c28e83SPiotr Jasiukajtis!  res1 += K1;
341*25c28e83SPiotr Jasiukajtis!  res0 *= xx0;
342*25c28e83SPiotr Jasiukajtis!  res1 *= xx1;
343*25c28e83SPiotr Jasiukajtis!  res0 += K0;
344*25c28e83SPiotr Jasiukajtis!  res1 += K0;
345*25c28e83SPiotr Jasiukajtis!  res0 = tbl_sqrt0 * res0;
346*25c28e83SPiotr Jasiukajtis!  res1 = tbl_sqrt1 * res1;
347*25c28e83SPiotr Jasiukajtis!  ((float*)&dres0)[0] = (float)res0;
348*25c28e83SPiotr Jasiukajtis!  ((float*)&dres0)[1] = (float)res1;
349*25c28e83SPiotr Jasiukajtis!  dres0 = vis_fpadd32(dres0,fdx0);
350*25c28e83SPiotr Jasiukajtis!  *py = ((float*)&dres0)[0];
351*25c28e83SPiotr Jasiukajtis!  *(py + stridey) = ((float*)&dres0)[1];
352*25c28e83SPiotr Jasiukajtis!  py += stridey2;
353*25c28e83SPiotr Jasiukajtis!
354*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
355*25c28e83SPiotr Jasiukajtis
356*25c28e83SPiotr Jasiukajtis	ENTRY(__vrsqrtf)
357*25c28e83SPiotr Jasiukajtis	save	%sp,-SA(MINFRAME)-tmps,%sp
358*25c28e83SPiotr Jasiukajtis	PIC_SETUP(l7)
359*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,.CONST_TBL,l2)
360*25c28e83SPiotr Jasiukajtis
361*25c28e83SPiotr Jasiukajtis	st	%i0,[%fp+tmp_counter]
362*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
363*25c28e83SPiotr Jasiukajtis
364*25c28e83SPiotr Jasiukajtis	ldd	[TBL+2048],K0
365*25c28e83SPiotr Jasiukajtis	sll	%i2,2,stridex
366*25c28e83SPiotr Jasiukajtis
367*25c28e83SPiotr Jasiukajtis	ldd	[TBL+2048+8],K1
368*25c28e83SPiotr Jasiukajtis	sll	%i4,2,stridey
369*25c28e83SPiotr Jasiukajtis	mov	%i3,%i2
370*25c28e83SPiotr Jasiukajtis
371*25c28e83SPiotr Jasiukajtis	ldd	[TBL+2048+16],K2
372*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),_0x7f800000
373*25c28e83SPiotr Jasiukajtis	sll	stridex,1,stridex2
374*25c28e83SPiotr Jasiukajtis
375*25c28e83SPiotr Jasiukajtis	ldd	[TBL+2048+24],K3
376*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00800000),_0x00800000
377*25c28e83SPiotr Jasiukajtis
378*25c28e83SPiotr Jasiukajtis	ldd	[TBL+2048+32],DC0
379*25c28e83SPiotr Jasiukajtis	add	%g0,0x3f,%l0
380*25c28e83SPiotr Jasiukajtis
381*25c28e83SPiotr Jasiukajtis	ldd	[TBL+2048+40],FONE
382*25c28e83SPiotr Jasiukajtis!	ld	[TBL+2048+44],FTWO
383*25c28e83SPiotr Jasiukajtis.begin:
384*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp_counter],counter
385*25c28e83SPiotr Jasiukajtis	ldx	[%fp+tmp_px],%l7
386*25c28e83SPiotr Jasiukajtis	st	%g0,[%fp+tmp_counter]
387*25c28e83SPiotr Jasiukajtis.begin1:
388*25c28e83SPiotr Jasiukajtis	cmp	counter,0
389*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.exit
390*25c28e83SPiotr Jasiukajtis
391*25c28e83SPiotr Jasiukajtis	lda	[%l7]0x82,%f14		! (4_0) ((float*)&ddx0)[0] = *px;
392*25c28e83SPiotr Jasiukajtis
393*25c28e83SPiotr Jasiukajtis	lda	[stridex+%l7]0x82,%f15	! (5_0) ((float*)&ddx0)[1] = *(px + stridex);
394*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%o0
395*25c28e83SPiotr Jasiukajtis
396*25c28e83SPiotr Jasiukajtis	lda	[%l7]0x82,%g1		! (4_0) ax0 = *(int*)px;
397*25c28e83SPiotr Jasiukajtis	add	%l7,stridex2,%i1	! px += stridex2
398*25c28e83SPiotr Jasiukajtis	add	%o0,0x3ff,%o0
399*25c28e83SPiotr Jasiukajtis
400*25c28e83SPiotr Jasiukajtis	lda	[stridex+%l7]0x82,%g5	! (5_0) ax1 = *(int*)(px + stridex);
401*25c28e83SPiotr Jasiukajtis	fand	%f14,DC0,%f16		! (4_0) dfx0 = vis_fand(ddx0,DC0);
402*25c28e83SPiotr Jasiukajtis
403*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%l5		! (4_0) si0 = ax0 >> 13;
404*25c28e83SPiotr Jasiukajtis	add	%i1,stridex2,%o5	! px += stridex2
405*25c28e83SPiotr Jasiukajtis
406*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7f800000		! (4_1) ax0 ? 0x7f800000
407*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.spec0		! (4_1) if ( ax0 >= 0x7f800000 )
408*25c28e83SPiotr Jasiukajtis	nop
409*25c28e83SPiotr Jasiukajtis
410*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00800000		! (4_1) ax0 ? 0x00800000
411*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.spec1		! (4_1) if ( ax0 < 0x00800000 )
412*25c28e83SPiotr Jasiukajtis	sra	%g5,13,%l6		! (5_0) si1 = ax1 >> 13;
413*25c28e83SPiotr Jasiukajtis.cont_spec:
414*25c28e83SPiotr Jasiukajtis	and	%l5,2032,%l5		! (4_0) si0 &= 0x7f0;
415*25c28e83SPiotr Jasiukajtis
416*25c28e83SPiotr Jasiukajtis	ldd	[%l5+TBL],%f54		! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
417*25c28e83SPiotr Jasiukajtis	sra	%g5,24,%l7		! (5_0) iexp1 = ax1 >> 24;
418*25c28e83SPiotr Jasiukajtis	and	%l6,2032,%l6		! (5_0) si1 &= 0x7f0;
419*25c28e83SPiotr Jasiukajtis	fpsub32	%f14,%f16,%f16		! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
420*25c28e83SPiotr Jasiukajtis
421*25c28e83SPiotr Jasiukajtis	ldd	[%l6+TBL],%f46		! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
422*25c28e83SPiotr Jasiukajtis	sra	%g1,24,%i3		! (4_0) iexp0 = ax0 >> 24;
423*25c28e83SPiotr Jasiukajtis	sub	%l0,%l7,%l7		! (5_0) iexp1 = 0x3f - iexp1;
424*25c28e83SPiotr Jasiukajtis
425*25c28e83SPiotr Jasiukajtis	and	%l7,511,%l1		! (5_0) iexp1 = 0x1ff;
426*25c28e83SPiotr Jasiukajtis	add	%l6,TBL,%l6		! (5_0) addr1 = (char*)TBL + si1;
427*25c28e83SPiotr Jasiukajtis
428*25c28e83SPiotr Jasiukajtis	sllx	%l1,23,%l1		! (5_0) lexp1 = iexp1 << 23;
429*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%o0		! (4_0) iexp0 = 0x3f - iexp0;
430*25c28e83SPiotr Jasiukajtis	fitod	%f16,%f56		! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
431*25c28e83SPiotr Jasiukajtis
432*25c28e83SPiotr Jasiukajtis	sllx	%o0,55,%o0		! (4_0) lexp0 = iexp0 << 55;
433*25c28e83SPiotr Jasiukajtis	fitod	%f17,%f44		! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
434*25c28e83SPiotr Jasiukajtis
435*25c28e83SPiotr Jasiukajtis	or	%o0,%l1,%o0		! (4_0) lexp0 |= lexp1;
436*25c28e83SPiotr Jasiukajtis
437*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp0]		! (4_0) fdx0 = *((double*)lexp0);
438*25c28e83SPiotr Jasiukajtis
439*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f54,%f40		! (4_0) xx0 = dtmp0 * tbl_div0;
440*25c28e83SPiotr Jasiukajtis
441*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f18		! (0_0) ((float*)&ddx0)[0] = *px;
442*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f46,%f46		! (5_1) xx1 = dtmp1 * tbl_div1;
443*25c28e83SPiotr Jasiukajtis
444*25c28e83SPiotr Jasiukajtis	lda	[stridex+%i1]0x82,%f19	! (1_0) ((float*)&ddx0)[1] = *(px + stridex);
445*25c28e83SPiotr Jasiukajtis
446*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%g1		! (0_0) ax0 = *(int*)px;
447*25c28e83SPiotr Jasiukajtis
448*25c28e83SPiotr Jasiukajtis	lda	[stridex+%i1]0x82,%i4	! (1_0) ax1 = *(int*)(px + stridex);
449*25c28e83SPiotr Jasiukajtis	cmp	%g5,_0x7f800000		! (5_1) ax1 ? 0x7f800000
450*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update0		! (5_1) if ( ax1 >= 0x7f800000 )
451*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f40,%f52		! (4_1) res0 = K3 * xx0;
452*25c28e83SPiotr Jasiukajtis.cont0:
453*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f46,%f50		! (5_1) res1 = K3 * xx1;
454*25c28e83SPiotr Jasiukajtis	cmp	%g5,_0x00800000		! (5_1) ax1 ? 0x00800000
455*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update1		! (5_1) if ( ax1 < 0x00800000 )
456*25c28e83SPiotr Jasiukajtis	fand	%f18,DC0,%f56		! (0_0) dfx0 = vis_fand(ddx0,DC0);
457*25c28e83SPiotr Jasiukajtis.cont1:
458*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%o0		! (0_0) si0 = ax0 >> 13;
459*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7f800000		! (0_0) ax0 ? 0x7f800000
460*25c28e83SPiotr Jasiukajtis
461*25c28e83SPiotr Jasiukajtis	sra	%i4,13,%g5		! (1_0) si1 = ax1 >> 13;
462*25c28e83SPiotr Jasiukajtis	and	%o0,2032,%o0		! (0_0) si0 &= 0x7f0;
463*25c28e83SPiotr Jasiukajtis
464*25c28e83SPiotr Jasiukajtis	ldd	[%o0+TBL],%f54		! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
465*25c28e83SPiotr Jasiukajtis	sra	%i4,24,%i1		! (1_0) iexp1 = ax1 >> 24;
466*25c28e83SPiotr Jasiukajtis	and	%g5,2032,%o7		! (1_0) si1 &= 0x7f0;
467*25c28e83SPiotr Jasiukajtis	fpsub32	%f18,%f56,%f30		! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
468*25c28e83SPiotr Jasiukajtis
469*25c28e83SPiotr Jasiukajtis	ldd	[%o7+TBL],%f44		! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
470*25c28e83SPiotr Jasiukajtis	sra	%g1,24,%i3		! (0_0) iexp0 = ax0 >> 24;
471*25c28e83SPiotr Jasiukajtis	sub	%l0,%i1,%i1		! (1_0) iexp1 = 0x3f - iexp1;
472*25c28e83SPiotr Jasiukajtis	faddd	%f52,K2,%f62		! (4_1) res0 += K2;
473*25c28e83SPiotr Jasiukajtis
474*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%g5		! (0_0) iexp0 = 0x3f - iexp0;
475*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update2		! (0_0) if ( ax0 >= 0x7f800000 )
476*25c28e83SPiotr Jasiukajtis	faddd	%f50,K2,%f60		! (5_1) res1 += K2;
477*25c28e83SPiotr Jasiukajtis.cont2:
478*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00800000		! (0_0) ax0 ? 0x00800000
479*25c28e83SPiotr Jasiukajtis	and	%i1,511,%i0		! (1_0) iexp1 = 0x1ff;
480*25c28e83SPiotr Jasiukajtis	fitod	%f30,%f56		! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
481*25c28e83SPiotr Jasiukajtis
482*25c28e83SPiotr Jasiukajtis	sllx	%i0,23,%i0		! (1_0) lexp1 = iexp1 << 23;
483*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update3		! (0_0) if ( ax0 < 0x00800000 )
484*25c28e83SPiotr Jasiukajtis	fitod	%f31,%f50		! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
485*25c28e83SPiotr Jasiukajtis.cont3:
486*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f40,%f30		! (4_1) res0 *= xx0;
487*25c28e83SPiotr Jasiukajtis	sllx	%g5,55,%g5		! (0_0) lexp0 = iexp0 << 55;
488*25c28e83SPiotr Jasiukajtis
489*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f46,%f48		! (5_1) res1 *= xx1;
490*25c28e83SPiotr Jasiukajtis	or	%g5,%i0,%g5		! (0_0) lexp0 |= lexp1;
491*25c28e83SPiotr Jasiukajtis	stx	%g5,[%fp+tmp1]		! (0_0) fdx0 = *((double*)lexp0);
492*25c28e83SPiotr Jasiukajtis
493*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f54,%f26		! (0_0) xx0 = dtmp0 * tbl_div0;
494*25c28e83SPiotr Jasiukajtis	sll	stridex,1,stridex2	! stridex2 = stridex * 2;
495*25c28e83SPiotr Jasiukajtis
496*25c28e83SPiotr Jasiukajtis	lda	[%o5]0x82,%f24		! (2_0) ((float*)&ddx0)[0] = *px;
497*25c28e83SPiotr Jasiukajtis	add	%o7,TBL,%o7		! (1_0) addr0 = (char*)TBL + si0;
498*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f44,%f44		! (1_0) xx0 = dtmp0 * tbl_div0;
499*25c28e83SPiotr Jasiukajtis
500*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o5]0x82,%f25	! (3_0) ((float*)&ddx0)[1] = *(px + stridex);
501*25c28e83SPiotr Jasiukajtis	add	%l5,TBL,%l5		! (4_1) addr0 = (char*)TBL + si0;
502*25c28e83SPiotr Jasiukajtis	faddd	%f30,K1,%f62		! (4_1) res0 += K1;
503*25c28e83SPiotr Jasiukajtis
504*25c28e83SPiotr Jasiukajtis	lda	[%o5]0x82,%g1		! (2_0) ax0 = *(int*)px;
505*25c28e83SPiotr Jasiukajtis	add	%o5,stridex2,%l7	! px += stridex2
506*25c28e83SPiotr Jasiukajtis	faddd	%f48,K1,%f42		! (5_1) res1 += K1;
507*25c28e83SPiotr Jasiukajtis
508*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o5]0x82,%o5	! (3_0) ax1 = *(int*)(px + stridex);
509*25c28e83SPiotr Jasiukajtis	cmp	%i4,_0x7f800000		! (1_0) ax1 ? 0x7f800000
510*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update4		! (1_0) if ( ax1 >= 0x7f800000 )
511*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f26,%f52		! (0_0) res0 = K3 * xx0;
512*25c28e83SPiotr Jasiukajtis.cont4:
513*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f44,%f50		! (1_0) res1 = K3 * xx1;
514*25c28e83SPiotr Jasiukajtis	cmp	%i4,_0x00800000		! (1_0) ax1 ? 0x00800000
515*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update5		! (1_0) if ( ax1 < 0x00800000 )
516*25c28e83SPiotr Jasiukajtis	fand	%f24,DC0,%f54		! (2_0) dfx0 = vis_fand(ddx0,DC0);
517*25c28e83SPiotr Jasiukajtis.cont5:
518*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f40,%f48		! (4_1) res0 *= xx0;
519*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%i0		! (2_0) si0 = ax0 >> 13;
520*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7f800000		! (2_0) ax0 ? 0x7f800000
521*25c28e83SPiotr Jasiukajtis
522*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f46,%f58		! (5_1) res1 *= xx1;
523*25c28e83SPiotr Jasiukajtis	sra	%o5,13,%o1		! (3_0) si1 = ax1 >> 13;
524*25c28e83SPiotr Jasiukajtis	and	%i0,2032,%i0		! (2_0) si0 &= 0x7f0;
525*25c28e83SPiotr Jasiukajtis
526*25c28e83SPiotr Jasiukajtis	ldd	[%i0+TBL],%f30		! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
527*25c28e83SPiotr Jasiukajtis	sra	%o5,24,%o3		! (3_0) iexp1 = ax1 >> 24;
528*25c28e83SPiotr Jasiukajtis	and	%o1,2032,%o1		! (3_0) si1 &= 0x7f0;
529*25c28e83SPiotr Jasiukajtis	fpsub32	%f24,%f54,%f12		! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
530*25c28e83SPiotr Jasiukajtis
531*25c28e83SPiotr Jasiukajtis	ldd	[%o1+TBL],%f46		! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
532*25c28e83SPiotr Jasiukajtis	sra	%g1,24,%i3		! (2_0) iexp0 = ax0 >> 24;
533*25c28e83SPiotr Jasiukajtis	sub	%l0,%o3,%o3		! (3_0) iexp1 = 0x3f - iexp1;
534*25c28e83SPiotr Jasiukajtis	faddd	%f52,K2,%f40		! (0_0) res0 += K2;
535*25c28e83SPiotr Jasiukajtis
536*25c28e83SPiotr Jasiukajtis	ldd	[%l5+8],%f42		! (4_1) tbl_sqrt0 = ((double*)addr0)[1];
537*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%g5		! (2_0) iexp0 = 0x3f - iexp0;
538*25c28e83SPiotr Jasiukajtis	and	%o3,511,%i3		! (3_0) iexp1 &= 0x1ff;
539*25c28e83SPiotr Jasiukajtis	faddd	%f50,K2,%f60		! (1_0) res0 += K2;
540*25c28e83SPiotr Jasiukajtis
541*25c28e83SPiotr Jasiukajtis	ldd	[%l6+8],%f28		! (5_1) tbl_sqrt1 = ((double*)addr1)[1];
542*25c28e83SPiotr Jasiukajtis	sllx	%g5,55,%g5		! (2_0) lexp0 = iexp0 << 55;
543*25c28e83SPiotr Jasiukajtis	add	%i0,TBL,%i0		! (2_0) addr0 = (char*)TBL + si0;
544*25c28e83SPiotr Jasiukajtis	fitod	%f12,%f56		! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
545*25c28e83SPiotr Jasiukajtis
546*25c28e83SPiotr Jasiukajtis	sllx	%i3,23,%i3		! (3_0) lexp1 = iexp1 << 23;
547*25c28e83SPiotr Jasiukajtis	fitod	%f13,%f50		! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
548*25c28e83SPiotr Jasiukajtis
549*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f26,%f40		! (0_0) res0 *= xx0;
550*25c28e83SPiotr Jasiukajtis	or	%g5,%i3,%g5		! (2_0) lexp0 |= lexp1;
551*25c28e83SPiotr Jasiukajtis	faddd	%f48,K0,%f62		! (4_1) res0 += K0;
552*25c28e83SPiotr Jasiukajtis
553*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f44,%f48		! (1_0) res1 *= xx1;
554*25c28e83SPiotr Jasiukajtis	add	%o1,TBL,%o1		! (3_0) addr1 = (char*)TBL + si1;
555*25c28e83SPiotr Jasiukajtis	stx	%g5,[%fp+tmp2]		! (2_0) fdx0 = *((double*)lexp0);
556*25c28e83SPiotr Jasiukajtis	faddd	%f58,K0,%f60		! (5_1) res1 += K0;
557*25c28e83SPiotr Jasiukajtis
558*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f30,%f30		! (2_0) xx0 = dtmp0 * tbl_div0;
559*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update6		! (2_0) if ( ax0 >= 0x7f800000 )
560*25c28e83SPiotr Jasiukajtis	lda	[%l7]0x82,%f14		! (4_0) ((float*)&ddx0)[0] = *px;
561*25c28e83SPiotr Jasiukajtis.cont6:
562*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00800000		! (2_0) ax0 ? 0x00800000
563*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update7		! (2_0) if ( ax0 < 0x00800000 )
564*25c28e83SPiotr Jasiukajtis	nop
565*25c28e83SPiotr Jasiukajtis.cont7:
566*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f46,%f24		! (3_0) xx1 = dtmp1 * tbl_div1;
567*25c28e83SPiotr Jasiukajtis
568*25c28e83SPiotr Jasiukajtis	lda	[stridex+%l7]0x82,%f15	! (5_0) ((float*)&ddx0)[1] = *(px + stridex);
569*25c28e83SPiotr Jasiukajtis	cmp	%o5,_0x7f800000		! (3_0) ax1 ? 0x7f800000
570*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f62,%f58		! (4_1) res0 = tbl_sqrt0 * res0;
571*25c28e83SPiotr Jasiukajtis	faddd	%f40,K1,%f46		! (0_0) res0 += K1;
572*25c28e83SPiotr Jasiukajtis
573*25c28e83SPiotr Jasiukajtis	lda	[%l7]0x82,%g1		! (4_0) ax0 = *(int*)px;
574*25c28e83SPiotr Jasiukajtis	add	%l7,stridex2,%i1	! px += stridex2
575*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f60,%f56		! (5_1) res1 = tbl_sqrt1 * res1;
576*25c28e83SPiotr Jasiukajtis	faddd	%f48,K1,%f62		! (1_0) res1 += K1;
577*25c28e83SPiotr Jasiukajtis
578*25c28e83SPiotr Jasiukajtis	lda	[stridex+%l7]0x82,%g5	! (5_0) ax1 = *(int*)(px + stridex);
579*25c28e83SPiotr Jasiukajtis	add	%o0,TBL,%o0		! (0_0) addr0 = (char*)TBL + si0;
580*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update8		! (3_0) if ( ax1 >= 0x7f800000 )
581*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f30,%f52		! (2_0) res0 = K3 * xx0;
582*25c28e83SPiotr Jasiukajtis.cont8:
583*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f24,%f50		! (3_0) res1 = K3 * xx1;
584*25c28e83SPiotr Jasiukajtis	cmp	%o5,_0x00800000		! (3_0) ax1 ? 0x00800000
585*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update9		! (3_0) if ( ax1 < 0x00800000 )
586*25c28e83SPiotr Jasiukajtis	fand	%f14,DC0,%f16		! (4_0) dfx0 = vis_fand(ddx0,DC0);
587*25c28e83SPiotr Jasiukajtis.cont9:
588*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f26,%f48		! (0_0) res0 *= xx0;
589*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%l5		! (4_0) si0 = ax0 >> 13;
590*25c28e83SPiotr Jasiukajtis	add	%i1,stridex2,%o5	! px += stridex2
591*25c28e83SPiotr Jasiukajtis	fdtos	%f58,%f6		! (4_1) ((float*)&dres0)[0] = (float)res0;
592*25c28e83SPiotr Jasiukajtis
593*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f44,%f40		! (1_0) res1 *= xx1;
594*25c28e83SPiotr Jasiukajtis	sra	%g5,13,%l6		! (5_0) si1 = ax1 >> 13;
595*25c28e83SPiotr Jasiukajtis	and	%l5,2032,%l5		! (4_0) si0 &= 0x7f0;
596*25c28e83SPiotr Jasiukajtis	fdtos	%f56,%f7		! (5_1) ((float*)&dres0)[1] = (float)res1;
597*25c28e83SPiotr Jasiukajtis
598*25c28e83SPiotr Jasiukajtis	ldd	[%l5+TBL],%f54		! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
599*25c28e83SPiotr Jasiukajtis	sra	%g5,24,%l7		! (5_0) iexp1 = ax1 >> 24;
600*25c28e83SPiotr Jasiukajtis	and	%l6,2032,%l6		! (5_0) si1 &= 0x7f0;
601*25c28e83SPiotr Jasiukajtis	fpsub32	%f14,%f16,%f16		! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
602*25c28e83SPiotr Jasiukajtis
603*25c28e83SPiotr Jasiukajtis	ldd	[%l6+TBL],%f46		! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
604*25c28e83SPiotr Jasiukajtis	sra	%g1,24,%i3		! (4_0) iexp0 = ax0 >> 24;
605*25c28e83SPiotr Jasiukajtis	sub	%l0,%l7,%l7		! (5_0) iexp1 = 0x3f - iexp1;
606*25c28e83SPiotr Jasiukajtis	faddd	%f52,K2,%f58		! (2_0) res0 += K2;
607*25c28e83SPiotr Jasiukajtis
608*25c28e83SPiotr Jasiukajtis	ldd	[%o0+8],%f42		! (0_0) tbl_sqrt0 = ((double*)addr0)[1];
609*25c28e83SPiotr Jasiukajtis	and	%l7,511,%l1		! (5_0) iexp1 = 0x1ff;
610*25c28e83SPiotr Jasiukajtis	add	%l6,TBL,%l6		! (5_0) addr1 = (char*)TBL + si1;
611*25c28e83SPiotr Jasiukajtis	faddd	%f50,K2,%f60		! (3_0) res1 += K2;
612*25c28e83SPiotr Jasiukajtis
613*25c28e83SPiotr Jasiukajtis	ldd	[%o7+8],%f28		! (1_0) tbl_sqrt1 = ((double*)addr1)[1];
614*25c28e83SPiotr Jasiukajtis	sllx	%l1,23,%l1		! (5_0) lexp1 = iexp1 << 23;
615*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%o0		! (4_0) iexp0 = 0x3f - iexp0;
616*25c28e83SPiotr Jasiukajtis	fitod	%f16,%f56		! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
617*25c28e83SPiotr Jasiukajtis
618*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp0],%f52		! (4_1) fdx0 = *((double*)lexp0);
619*25c28e83SPiotr Jasiukajtis	sllx	%o0,55,%o0		! (4_0) lexp0 = iexp0 << 55;
620*25c28e83SPiotr Jasiukajtis	fitod	%f17,%f44		! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
621*25c28e83SPiotr Jasiukajtis
622*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f30,%f62		! (2_0) res0 *= xx0;
623*25c28e83SPiotr Jasiukajtis	or	%o0,%l1,%o0		! (4_0) lexp0 |= lexp1;
624*25c28e83SPiotr Jasiukajtis	faddd	%f48,K0,%f22		! (0_0) res0 += K0;
625*25c28e83SPiotr Jasiukajtis
626*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f24,%f58		! (3_0) res1 *= xx1;
627*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp0]		! (4_0) fdx0 = *((double*)lexp0);
628*25c28e83SPiotr Jasiukajtis	faddd	%f40,K0,%f26		! (1_0) res1 += K0;
629*25c28e83SPiotr Jasiukajtis
630*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f54,%f40		! (4_0) xx0 = dtmp0 * tbl_div0;
631*25c28e83SPiotr Jasiukajtis	fpadd32	%f6,%f52,%f10		! (4_1) dres0 = vis_fpadd32(dres0,fdx0);
632*25c28e83SPiotr Jasiukajtis
633*25c28e83SPiotr Jasiukajtis	or	%g0,%i2,%l7
634*25c28e83SPiotr Jasiukajtis	add	stridey,stridey,stridey2
635*25c28e83SPiotr Jasiukajtis
636*25c28e83SPiotr Jasiukajtis	cmp	counter,6
637*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.tail
638*25c28e83SPiotr Jasiukajtis	nop
639*25c28e83SPiotr Jasiukajtis
640*25c28e83SPiotr Jasiukajtis	ba	.main_loop
641*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter	! counter
642*25c28e83SPiotr Jasiukajtis
643*25c28e83SPiotr Jasiukajtis	.align	16
644*25c28e83SPiotr Jasiukajtis.main_loop:
645*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f18		! (0_0) ((float*)&ddx0)[0] = *px;
646*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7f800000		! (4_1) ax0 ? 0x7f800000
647*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update10		! (4_1) if ( ax0 >= 0x7f800000 )
648*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f46,%f46		! (5_1) xx1 = dtmp1 * tbl_div1;
649*25c28e83SPiotr Jasiukajtis.cont10:
650*25c28e83SPiotr Jasiukajtis	lda	[stridex+%i1]0x82,%f19	! (1_0) ((float*)&ddx0)[1] = *(px + stridex);
651*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00800000		! (4_1) ax0 ? 0x00800000
652*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f22,%f44		! (0_1) res0 = tbl_sqrt0 * res0;
653*25c28e83SPiotr Jasiukajtis	faddd	%f62,K1,%f42		! (2_1) res0 += K1;
654*25c28e83SPiotr Jasiukajtis
655*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%g1		! (0_0) ax0 = *(int*)px;
656*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f26,%f60		! (1_1) res1 = tbl_sqrt1 * res1;
657*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update11		! (4_1) if ( ax0 < 0x00800000 )
658*25c28e83SPiotr Jasiukajtis	faddd	%f58,K1,%f62		! (3_1) res1 += K1;
659*25c28e83SPiotr Jasiukajtis.cont11:
660*25c28e83SPiotr Jasiukajtis	lda	[stridex+%i1]0x82,%i4	! (1_0) ax1 = *(int*)(px + stridex);
661*25c28e83SPiotr Jasiukajtis	cmp	%g5,_0x7f800000		! (5_1) ax1 ? 0x7f800000
662*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update12		! (5_1) if ( ax1 >= 0x7f800000 )
663*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f40,%f52		! (4_1) res0 = K3 * xx0;
664*25c28e83SPiotr Jasiukajtis.cont12:
665*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f46,%f50		! (5_1) res1 = K3 * xx1;
666*25c28e83SPiotr Jasiukajtis	cmp	%g5,_0x00800000		! (5_1) ax1 ? 0x00800000
667*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update13		! (5_1) if ( ax1 < 0x00800000 )
668*25c28e83SPiotr Jasiukajtis	fand	%f18,DC0,%f56		! (0_0) dfx0 = vis_fand(ddx0,DC0);
669*25c28e83SPiotr Jasiukajtis.cont13:
670*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f30,%f48		! (2_1) res0 *= xx0;
671*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%o0		! (0_0) si0 = ax0 >> 13;
672*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7f800000		! (0_0) ax0 ? 0x7f800000
673*25c28e83SPiotr Jasiukajtis	fdtos	%f44,%f8		! (0_1) ((float*)&dres0)[0] = (float)res0;
674*25c28e83SPiotr Jasiukajtis
675*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f24,%f58		! (3_1) res1 *= xx1;
676*25c28e83SPiotr Jasiukajtis	sra	%i4,13,%g5		! (1_0) si1 = ax1 >> 13;
677*25c28e83SPiotr Jasiukajtis	and	%o0,2032,%o0		! (0_0) si0 &= 0x7f0;
678*25c28e83SPiotr Jasiukajtis	fdtos	%f60,%f9		! (1_1) ((float*)&dres0)[1] = (float)res1;
679*25c28e83SPiotr Jasiukajtis
680*25c28e83SPiotr Jasiukajtis	ldd	[%o0+TBL],%f54		! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
681*25c28e83SPiotr Jasiukajtis	sra	%i4,24,%i1		! (1_0) iexp1 = ax1 >> 24;
682*25c28e83SPiotr Jasiukajtis	and	%g5,2032,%o7		! (1_0) si1 &= 0x7f0;
683*25c28e83SPiotr Jasiukajtis	fpsub32	%f18,%f56,%f30		! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
684*25c28e83SPiotr Jasiukajtis
685*25c28e83SPiotr Jasiukajtis	ldd	[%o7+TBL],%f44		! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
686*25c28e83SPiotr Jasiukajtis	sra	%g1,24,%i3		! (0_0) iexp0 = ax0 >> 24;
687*25c28e83SPiotr Jasiukajtis	sub	%l0,%i1,%i1		! (1_0) iexp1 = 0x3f - iexp1;
688*25c28e83SPiotr Jasiukajtis	faddd	%f52,K2,%f62		! (4_1) res0 += K2;
689*25c28e83SPiotr Jasiukajtis
690*25c28e83SPiotr Jasiukajtis	ldd	[%i0+8],%f42		! (2_1) tbl_sqrt0 = ((double*)addr0)[1];
691*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%g5		! (0_0) iexp0 = 0x3f - iexp0;
692*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update14		! (0_0) if ( ax0 >= 0x7f800000 )
693*25c28e83SPiotr Jasiukajtis	faddd	%f50,K2,%f60		! (5_1) res1 += K2;
694*25c28e83SPiotr Jasiukajtis.cont14:
695*25c28e83SPiotr Jasiukajtis	ldd	[%o1+8],%f28		! (3_1) tbl_sqrt1 = ((double*)addr0)[1];
696*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00800000		! (0_0) ax0 ? 0x00800000
697*25c28e83SPiotr Jasiukajtis	and	%i1,511,%i0		! (1_0) iexp1 = 0x1ff;
698*25c28e83SPiotr Jasiukajtis	fitod	%f30,%f56		! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
699*25c28e83SPiotr Jasiukajtis
700*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp1],%f52		! (0_1) fdx0 = *((double*)lexp0);
701*25c28e83SPiotr Jasiukajtis	sllx	%i0,23,%i0		! (1_0) lexp1 = iexp1 << 23;
702*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update15		! (0_0) if ( ax0 < 0x00800000 )
703*25c28e83SPiotr Jasiukajtis	fitod	%f31,%f50		! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
704*25c28e83SPiotr Jasiukajtis.cont15:
705*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f40,%f30		! (4_1) res0 *= xx0;
706*25c28e83SPiotr Jasiukajtis	sllx	%g5,55,%g5		! (0_0) lexp0 = iexp0 << 55;
707*25c28e83SPiotr Jasiukajtis	st	%f10,[%l7]		! (4_2) *py = ((float*)&dres0)[0];
708*25c28e83SPiotr Jasiukajtis	faddd	%f48,K0,%f62		! (2_1) res0 += K0;
709*25c28e83SPiotr Jasiukajtis
710*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f46,%f48		! (5_1) res1 *= xx1;
711*25c28e83SPiotr Jasiukajtis	or	%g5,%i0,%g5		! (0_0) lexp0 |= lexp1;
712*25c28e83SPiotr Jasiukajtis	stx	%g5,[%fp+tmp1]		! (0_0) fdx0 = *((double*)lexp0);
713*25c28e83SPiotr Jasiukajtis	faddd	%f58,K0,%f60		! (3_1) res1 += K0;
714*25c28e83SPiotr Jasiukajtis
715*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f54,%f26		! (0_0) xx0 = dtmp0 * tbl_div0;
716*25c28e83SPiotr Jasiukajtis	sll	stridex,1,stridex2	! stridex2 = stridex * 2;
717*25c28e83SPiotr Jasiukajtis	st	%f11,[stridey+%l7]	! (5_2) *(py + stridey) = ((float*)&dres0)[1];
718*25c28e83SPiotr Jasiukajtis	fpadd32	%f8,%f52,%f10		! (0_1) dres0 = vis_fpadd32(dres0,fdx0);
719*25c28e83SPiotr Jasiukajtis
720*25c28e83SPiotr Jasiukajtis	lda	[%o5]0x82,%f24		! (2_0) ((float*)&ddx0)[0] = *px;
721*25c28e83SPiotr Jasiukajtis	add	%l7,stridey2,%i1	! py += stridey2
722*25c28e83SPiotr Jasiukajtis	add	%o7,TBL,%o7		! (1_0) addr0 = (char*)TBL + si0;
723*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f44,%f44		! (1_0) xx0 = dtmp0 * tbl_div0;
724*25c28e83SPiotr Jasiukajtis
725*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o5]0x82,%f25	! (3_0) ((float*)&ddx0)[1] = *(px + stridex);
726*25c28e83SPiotr Jasiukajtis	add	%l5,TBL,%l5		! (4_1) addr0 = (char*)TBL + si0;
727*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f62,%f58		! (2_1) res0 = tbl_sqrt0 * res0;
728*25c28e83SPiotr Jasiukajtis	faddd	%f30,K1,%f62		! (4_1) res0 += K1;
729*25c28e83SPiotr Jasiukajtis
730*25c28e83SPiotr Jasiukajtis	lda	[%o5]0x82,%g1		! (2_0) ax0 = *(int*)px;
731*25c28e83SPiotr Jasiukajtis	add	%o5,stridex2,%l7	! px += stridex2
732*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f60,%f56		! (3_1) res1 = tbl_sqrt1 * res1;
733*25c28e83SPiotr Jasiukajtis	faddd	%f48,K1,%f42		! (5_1) res1 += K1;
734*25c28e83SPiotr Jasiukajtis
735*25c28e83SPiotr Jasiukajtis	lda	[stridex+%o5]0x82,%o5	! (3_0) ax1 = *(int*)(px + stridex);
736*25c28e83SPiotr Jasiukajtis	cmp	%i4,_0x7f800000		! (1_0) ax1 ? 0x7f800000
737*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update16		! (1_0) if ( ax1 >= 0x7f800000 )
738*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f26,%f52		! (0_0) res0 = K3 * xx0;
739*25c28e83SPiotr Jasiukajtis.cont16:
740*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f44,%f50		! (1_0) res1 = K3 * xx1;
741*25c28e83SPiotr Jasiukajtis	cmp	%i4,_0x00800000		! (1_0) ax1 ? 0x00800000
742*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update17		! (1_0) if ( ax1 < 0x00800000 )
743*25c28e83SPiotr Jasiukajtis	fand	%f24,DC0,%f54		! (2_0) dfx0 = vis_fand(ddx0,DC0);
744*25c28e83SPiotr Jasiukajtis.cont17:
745*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f40,%f48		! (4_1) res0 *= xx0;
746*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%i0		! (2_0) si0 = ax0 >> 13;
747*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7f800000		! (2_0) ax0 ? 0x7f800000
748*25c28e83SPiotr Jasiukajtis	fdtos	%f58,%f20		! (2_1) ((float*)&dres0)[0] = (float)res0;
749*25c28e83SPiotr Jasiukajtis
750*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f46,%f58		! (5_1) res1 *= xx1;
751*25c28e83SPiotr Jasiukajtis	sra	%o5,13,%o1		! (3_0) si1 = ax1 >> 13;
752*25c28e83SPiotr Jasiukajtis	and	%i0,2032,%i0		! (2_0) si0 &= 0x7f0;
753*25c28e83SPiotr Jasiukajtis	fdtos	%f56,%f21		! (3_1) ((float*)&dres0)[0] = (float)res0;
754*25c28e83SPiotr Jasiukajtis
755*25c28e83SPiotr Jasiukajtis	ldd	[%i0+TBL],%f30		! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
756*25c28e83SPiotr Jasiukajtis	sra	%o5,24,%o3		! (3_0) iexp1 = ax1 >> 24;
757*25c28e83SPiotr Jasiukajtis	and	%o1,2032,%o1		! (3_0) si1 &= 0x7f0;
758*25c28e83SPiotr Jasiukajtis	fpsub32	%f24,%f54,%f12		! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
759*25c28e83SPiotr Jasiukajtis
760*25c28e83SPiotr Jasiukajtis	ldd	[%o1+TBL],%f46		! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
761*25c28e83SPiotr Jasiukajtis	sra	%g1,24,%i3		! (2_0) iexp0 = ax0 >> 24;
762*25c28e83SPiotr Jasiukajtis	sub	%l0,%o3,%o3		! (3_0) iexp1 = 0x3f - iexp1;
763*25c28e83SPiotr Jasiukajtis	faddd	%f52,K2,%f40		! (0_0) res0 += K2;
764*25c28e83SPiotr Jasiukajtis
765*25c28e83SPiotr Jasiukajtis	ldd	[%l5+8],%f42		! (4_1) tbl_sqrt0 = ((double*)addr0)[1];
766*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%g5		! (2_0) iexp0 = 0x3f - iexp0;
767*25c28e83SPiotr Jasiukajtis	and	%o3,511,%i3		! (3_0) iexp1 &= 0x1ff;
768*25c28e83SPiotr Jasiukajtis	faddd	%f50,K2,%f60		! (1_0) res0 += K2;
769*25c28e83SPiotr Jasiukajtis
770*25c28e83SPiotr Jasiukajtis	ldd	[%l6+8],%f28		! (5_1) tbl_sqrt1 = ((double*)addr1)[1];
771*25c28e83SPiotr Jasiukajtis	sllx	%g5,55,%g5		! (2_0) lexp0 = iexp0 << 55;
772*25c28e83SPiotr Jasiukajtis	add	%i0,TBL,%i0		! (2_0) addr0 = (char*)TBL + si0;
773*25c28e83SPiotr Jasiukajtis	fitod	%f12,%f56		! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
774*25c28e83SPiotr Jasiukajtis
775*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp2],%f52		! (2_1) fdx0 = *((double*)lexp0);
776*25c28e83SPiotr Jasiukajtis	sllx	%i3,23,%i3		! (3_0) lexp1 = iexp1 << 23;
777*25c28e83SPiotr Jasiukajtis	add	%i1,stridey2,%o3	! py += stridey2
778*25c28e83SPiotr Jasiukajtis	fitod	%f13,%f50		! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
779*25c28e83SPiotr Jasiukajtis
780*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f26,%f40		! (0_0) res0 *= xx0;
781*25c28e83SPiotr Jasiukajtis	or	%g5,%i3,%g5		! (2_0) lexp0 |= lexp1;
782*25c28e83SPiotr Jasiukajtis	st	%f10,[%i1]		! (0_1) *py = ((float*)&dres0)[0];
783*25c28e83SPiotr Jasiukajtis	faddd	%f48,K0,%f62		! (4_1) res0 += K0;
784*25c28e83SPiotr Jasiukajtis
785*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f44,%f48		! (1_0) res1 *= xx1;
786*25c28e83SPiotr Jasiukajtis	add	%o1,TBL,%o1		! (3_0) addr1 = (char*)TBL + si1;
787*25c28e83SPiotr Jasiukajtis	stx	%g5,[%fp+tmp2]		! (2_0) fdx0 = *((double*)lexp0);
788*25c28e83SPiotr Jasiukajtis	faddd	%f58,K0,%f60		! (5_1) res1 += K0;
789*25c28e83SPiotr Jasiukajtis
790*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f30,%f30		! (2_0) xx0 = dtmp0 * tbl_div0;
791*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update18		! (2_0) if ( ax0 >= 0x7f800000 )
792*25c28e83SPiotr Jasiukajtis	st	%f11,[stridey+%i1]	! (1_1) *(py + stridey) = ((float*)&dres0)[1];
793*25c28e83SPiotr Jasiukajtis	fpadd32	%f20,%f52,%f0		! (2_1) dres0 = vis_fpadd32(dres0,fdx0);
794*25c28e83SPiotr Jasiukajtis.cont18:
795*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00800000		! (2_0) ax0 ? 0x00800000
796*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update19		! (2_0) if ( ax0 < 0x00800000 )
797*25c28e83SPiotr Jasiukajtis	lda	[%l7]0x82,%f14		! (4_0) ((float*)&ddx0)[0] = *px;
798*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f46,%f24		! (3_0) xx1 = dtmp1 * tbl_div1;
799*25c28e83SPiotr Jasiukajtis.cont19:
800*25c28e83SPiotr Jasiukajtis	lda	[stridex+%l7]0x82,%f15	! (5_0) ((float*)&ddx0)[1] = *(px + stridex);
801*25c28e83SPiotr Jasiukajtis	cmp	%o5,_0x7f800000		! (3_0) ax1 ? 0x7f800000
802*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f62,%f58		! (4_1) res0 = tbl_sqrt0 * res0;
803*25c28e83SPiotr Jasiukajtis	faddd	%f40,K1,%f46		! (0_0) res0 += K1;
804*25c28e83SPiotr Jasiukajtis
805*25c28e83SPiotr Jasiukajtis	lda	[%l7]0x82,%g1		! (4_0) ax0 = *(int*)px;
806*25c28e83SPiotr Jasiukajtis	add	%l7,stridex2,%i1	! px += stridex2
807*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f60,%f56		! (5_1) res1 = tbl_sqrt1 * res1;
808*25c28e83SPiotr Jasiukajtis	faddd	%f48,K1,%f62		! (1_0) res1 += K1;
809*25c28e83SPiotr Jasiukajtis
810*25c28e83SPiotr Jasiukajtis	lda	[stridex+%l7]0x82,%g5	! (5_0) ax1 = *(int*)(px + stridex);
811*25c28e83SPiotr Jasiukajtis	add	%o0,TBL,%o0		! (0_0) addr0 = (char*)TBL + si0;
812*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update20		! (3_0) if ( ax1 >= 0x7f800000 )
813*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f30,%f52		! (2_0) res0 = K3 * xx0;
814*25c28e83SPiotr Jasiukajtis.cont20:
815*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f24,%f50		! (3_0) res1 = K3 * xx1;
816*25c28e83SPiotr Jasiukajtis	cmp	%o5,_0x00800000		! (3_0) ax1 ? 0x00800000
817*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update21		! (3_0) if ( ax1 < 0x00800000 )
818*25c28e83SPiotr Jasiukajtis	fand	%f14,DC0,%f16		! (4_0) dfx0 = vis_fand(ddx0,DC0);
819*25c28e83SPiotr Jasiukajtis.cont21:
820*25c28e83SPiotr Jasiukajtis	fmuld	%f46,%f26,%f48		! (0_0) res0 *= xx0;
821*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%l5		! (4_0) si0 = ax0 >> 13;
822*25c28e83SPiotr Jasiukajtis	add	%i1,stridex2,%o5	! px += stridex2
823*25c28e83SPiotr Jasiukajtis	fdtos	%f58,%f6		! (4_1) ((float*)&dres0)[0] = (float)res0;
824*25c28e83SPiotr Jasiukajtis
825*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f44,%f40		! (1_0) res1 *= xx1;
826*25c28e83SPiotr Jasiukajtis	sra	%g5,13,%l6		! (5_0) si1 = ax1 >> 13;
827*25c28e83SPiotr Jasiukajtis	and	%l5,2032,%l5		! (4_0) si0 &= 0x7f0;
828*25c28e83SPiotr Jasiukajtis	fdtos	%f56,%f7		! (5_1) ((float*)&dres0)[1] = (float)res1;
829*25c28e83SPiotr Jasiukajtis
830*25c28e83SPiotr Jasiukajtis	ldd	[%l5+TBL],%f54		! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
831*25c28e83SPiotr Jasiukajtis	sra	%g5,24,%l7		! (5_0) iexp1 = ax1 >> 24;
832*25c28e83SPiotr Jasiukajtis	and	%l6,2032,%l6		! (5_0) si1 &= 0x7f0;
833*25c28e83SPiotr Jasiukajtis	fpsub32	%f14,%f16,%f16		! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
834*25c28e83SPiotr Jasiukajtis
835*25c28e83SPiotr Jasiukajtis	ldd	[%l6+TBL],%f46		! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
836*25c28e83SPiotr Jasiukajtis	sra	%g1,24,%i3		! (4_0) iexp0 = ax0 >> 24;
837*25c28e83SPiotr Jasiukajtis	sub	%l0,%l7,%l7		! (5_0) iexp1 = 0x3f - iexp1;
838*25c28e83SPiotr Jasiukajtis	faddd	%f52,K2,%f58		! (2_0) res0 += K2;
839*25c28e83SPiotr Jasiukajtis
840*25c28e83SPiotr Jasiukajtis	ldd	[%o0+8],%f42		! (0_0) tbl_sqrt0 = ((double*)addr0)[1];
841*25c28e83SPiotr Jasiukajtis	and	%l7,511,%l1		! (5_0) iexp1 = 0x1ff;
842*25c28e83SPiotr Jasiukajtis	add	%l6,TBL,%l6		! (5_0) addr1 = (char*)TBL + si1;
843*25c28e83SPiotr Jasiukajtis	faddd	%f50,K2,%f60		! (3_0) res1 += K2;
844*25c28e83SPiotr Jasiukajtis
845*25c28e83SPiotr Jasiukajtis	ldd	[%o7+8],%f28		! (1_0) tbl_sqrt1 = ((double*)addr1)[1];
846*25c28e83SPiotr Jasiukajtis	sllx	%l1,23,%l1		! (5_0) lexp1 = iexp1 << 23;
847*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%o0		! (4_0) iexp0 = 0x3f - iexp0;
848*25c28e83SPiotr Jasiukajtis	fitod	%f16,%f56		! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
849*25c28e83SPiotr Jasiukajtis
850*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp0],%f52		! (4_1) fdx0 = *((double*)lexp0);
851*25c28e83SPiotr Jasiukajtis	sllx	%o0,55,%o0		! (4_0) lexp0 = iexp0 << 55;
852*25c28e83SPiotr Jasiukajtis	add	%o3,stridey2,%l7	! py += stridey2
853*25c28e83SPiotr Jasiukajtis	fitod	%f17,%f44		! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
854*25c28e83SPiotr Jasiukajtis
855*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f30,%f62		! (2_0) res0 *= xx0;
856*25c28e83SPiotr Jasiukajtis	or	%o0,%l1,%o0		! (4_0) lexp0 |= lexp1;
857*25c28e83SPiotr Jasiukajtis	st	%f0,[%o3]		! (2_1) *py = ((float*)&dres0)[0];
858*25c28e83SPiotr Jasiukajtis	faddd	%f48,K0,%f22		! (0_0) res0 += K0;
859*25c28e83SPiotr Jasiukajtis
860*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f24,%f58		! (3_0) res1 *= xx1;
861*25c28e83SPiotr Jasiukajtis	subcc	counter,6,counter	! counter -= 6;
862*25c28e83SPiotr Jasiukajtis	stx	%o0,[%fp+tmp0]		! (4_0) fdx0 = *((double*)lexp0);
863*25c28e83SPiotr Jasiukajtis	faddd	%f40,K0,%f26		! (1_0) res1 += K0;
864*25c28e83SPiotr Jasiukajtis
865*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f54,%f40		! (4_0) xx0 = dtmp0 * tbl_div0;
866*25c28e83SPiotr Jasiukajtis	st	%f1,[stridey+%o3]	! (3_1) *(py + stridey) = ((float*)&dres0)[1];
867*25c28e83SPiotr Jasiukajtis	bpos,pt	%icc,.main_loop
868*25c28e83SPiotr Jasiukajtis	fpadd32	%f6,%f52,%f10		! (4_1) dres0 = vis_fpadd32(dres0,fdx0);
869*25c28e83SPiotr Jasiukajtis
870*25c28e83SPiotr Jasiukajtis	add	counter,6,counter
871*25c28e83SPiotr Jasiukajtis.tail:
872*25c28e83SPiotr Jasiukajtis	sll	stridex,1,stridex2
873*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
874*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
875*25c28e83SPiotr Jasiukajtis	mov	%l7,%i2
876*25c28e83SPiotr Jasiukajtis
877*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f22,%f44		! (0_1) res0 = tbl_sqrt0 * res0;
878*25c28e83SPiotr Jasiukajtis	faddd	%f62,K1,%f42		! (2_1) res0 += K1;
879*25c28e83SPiotr Jasiukajtis
880*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f26,%f60		! (1_1) res1 = tbl_sqrt1 * res1;
881*25c28e83SPiotr Jasiukajtis
882*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f30,%f48		! (2_1) res0 *= xx0;
883*25c28e83SPiotr Jasiukajtis	fdtos	%f44,%f8		! (0_1) ((float*)&dres0)[0] = (float)res0;
884*25c28e83SPiotr Jasiukajtis
885*25c28e83SPiotr Jasiukajtis	fdtos	%f60,%f9		! (1_1) ((float*)&dres0)[1] = (float)res1;
886*25c28e83SPiotr Jasiukajtis
887*25c28e83SPiotr Jasiukajtis	ldd	[%i0+8],%f42		! (2_1) tbl_sqrt0 = ((double*)addr0)[1];
888*25c28e83SPiotr Jasiukajtis
889*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp1],%f52		! (0_1) fdx0 = *((double*)lexp0);
890*25c28e83SPiotr Jasiukajtis
891*25c28e83SPiotr Jasiukajtis	st	%f10,[%l7]		! (4_2) *py = ((float*)&dres0)[0];
892*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
893*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
894*25c28e83SPiotr Jasiukajtis	add	%l7,stridey,%i2
895*25c28e83SPiotr Jasiukajtis
896*25c28e83SPiotr Jasiukajtis	faddd	%f48,K0,%f62		! (2_1) res0 += K0;
897*25c28e83SPiotr Jasiukajtis	st	%f11,[stridey+%l7]	! (5_2) *(py + stridey) = ((float*)&dres0)[1];
898*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
899*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
900*25c28e83SPiotr Jasiukajtis	add	%l7,stridey2,%i2
901*25c28e83SPiotr Jasiukajtis	fpadd32	%f8,%f52,%f10		! (0_1) dres0 = vis_fpadd32(dres0,fdx0);
902*25c28e83SPiotr Jasiukajtis
903*25c28e83SPiotr Jasiukajtis	add	%l7,stridey2,%i1	! py += stridey2
904*25c28e83SPiotr Jasiukajtis
905*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f62,%f58		! (2_1) res0 = tbl_sqrt0 * res0;
906*25c28e83SPiotr Jasiukajtis
907*25c28e83SPiotr Jasiukajtis	fdtos	%f58,%f20		! (2_1) ((float*)&dres0)[0] = (float)res0;
908*25c28e83SPiotr Jasiukajtis
909*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp2],%f52		! (2_1) fdx0 = *((double*)lexp0);
910*25c28e83SPiotr Jasiukajtis	add	%i1,stridey2,%o3	! py += stridey2
911*25c28e83SPiotr Jasiukajtis
912*25c28e83SPiotr Jasiukajtis	st	%f10,[%i1]		! (0_1) *py = ((float*)&dres0)[0];
913*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
914*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
915*25c28e83SPiotr Jasiukajtis	add	%i1,stridey,%i2
916*25c28e83SPiotr Jasiukajtis
917*25c28e83SPiotr Jasiukajtis	st	%f11,[stridey+%i1]	! (1_1) *(py + stridey) = ((float*)&dres0)[1];
918*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
919*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
920*25c28e83SPiotr Jasiukajtis	mov	%o3,%i2
921*25c28e83SPiotr Jasiukajtis	fpadd32	%f20,%f52,%f0		! (2_1) dres0 = vis_fpadd32(dres0,fdx0);
922*25c28e83SPiotr Jasiukajtis
923*25c28e83SPiotr Jasiukajtis	st	%f0,[%o3]		! (2_1) *py = ((float*)&dres0)[0];
924*25c28e83SPiotr Jasiukajtis	ba	.begin
925*25c28e83SPiotr Jasiukajtis	add	%o3,stridey,%i2
926*25c28e83SPiotr Jasiukajtis
927*25c28e83SPiotr Jasiukajtis	.align	16
928*25c28e83SPiotr Jasiukajtis.spec0:
929*25c28e83SPiotr Jasiukajtis	fdivs	FONE,%f14,%f14		! x0 = FONE / x0;
930*25c28e83SPiotr Jasiukajtis	add	%l7,stridex,%l7		! px += stridex
931*25c28e83SPiotr Jasiukajtis	st	%f14,[%i2]		! *py = x0;
932*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
933*25c28e83SPiotr Jasiukajtis	ba	.begin1
934*25c28e83SPiotr Jasiukajtis	add	%i2,stridey,%i2		! py += stridey
935*25c28e83SPiotr Jasiukajtis
936*25c28e83SPiotr Jasiukajtis	.align	16
937*25c28e83SPiotr Jasiukajtis.spec1:
938*25c28e83SPiotr Jasiukajtis	andcc	%g1,%o0,%g0
939*25c28e83SPiotr Jasiukajtis	bz,a	1f
940*25c28e83SPiotr Jasiukajtis	fdivs	FONE,%f14,%f14		! x0 = DONE / x0;
941*25c28e83SPiotr Jasiukajtis
942*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
943*25c28e83SPiotr Jasiukajtis	bl,a	1f
944*25c28e83SPiotr Jasiukajtis	fsqrts	%f14,%f14		! x0 = sqrtf(x0);
945*25c28e83SPiotr Jasiukajtis
946*25c28e83SPiotr Jasiukajtis	fitod	%f14,%f0
947*25c28e83SPiotr Jasiukajtis	fdtos	%f0,%f14
948*25c28e83SPiotr Jasiukajtis	fmuls	%f14,FTWO,%f14
949*25c28e83SPiotr Jasiukajtis	st	%f14,[%fp+tmp3]
950*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%g1
951*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%o0
952*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%l5		! (4_0) si0 = ax0 >> 13;
953*25c28e83SPiotr Jasiukajtis	fands	%f14,DC0,%f16		! (4_0) dfx0 = vis_fand(ddx0,DC0);
954*25c28e83SPiotr Jasiukajtis	ba	.cont_spec
955*25c28e83SPiotr Jasiukajtis	sub	%g1,%o0,%g1
956*25c28e83SPiotr Jasiukajtis1:
957*25c28e83SPiotr Jasiukajtis	add	%l7,stridex,%l7		! px += stridex
958*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
959*25c28e83SPiotr Jasiukajtis	st	%f14,[%i2]		! *py = x0;
960*25c28e83SPiotr Jasiukajtis	ba	.begin1
961*25c28e83SPiotr Jasiukajtis	add	%i2,stridey,%i2		! py += stridey
962*25c28e83SPiotr Jasiukajtis
963*25c28e83SPiotr Jasiukajtis	.align	16
964*25c28e83SPiotr Jasiukajtis.update0:
965*25c28e83SPiotr Jasiukajtis	cmp	counter,1
966*25c28e83SPiotr Jasiukajtis	ble	.cont0
967*25c28e83SPiotr Jasiukajtis	nop
968*25c28e83SPiotr Jasiukajtis
969*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%o1
970*25c28e83SPiotr Jasiukajtis	stx	%o1,[%fp+tmp_px]
971*25c28e83SPiotr Jasiukajtis
972*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
973*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
974*25c28e83SPiotr Jasiukajtis
975*25c28e83SPiotr Jasiukajtis	ba	.cont0
976*25c28e83SPiotr Jasiukajtis	mov	1,counter
977*25c28e83SPiotr Jasiukajtis
978*25c28e83SPiotr Jasiukajtis	.align	16
979*25c28e83SPiotr Jasiukajtis.update1:
980*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%o0
981*25c28e83SPiotr Jasiukajtis	cmp	counter,1
982*25c28e83SPiotr Jasiukajtis	ble	.cont1
983*25c28e83SPiotr Jasiukajtis
984*25c28e83SPiotr Jasiukajtis	add	%o0,0x3ff,%o0
985*25c28e83SPiotr Jasiukajtis
986*25c28e83SPiotr Jasiukajtis	andcc	%g5,%o0,%g0
987*25c28e83SPiotr Jasiukajtis	bz,a	1f
988*25c28e83SPiotr Jasiukajtis	nop
989*25c28e83SPiotr Jasiukajtis
990*25c28e83SPiotr Jasiukajtis	cmp	%g5,0
991*25c28e83SPiotr Jasiukajtis	bl,a	1f
992*25c28e83SPiotr Jasiukajtis	nop
993*25c28e83SPiotr Jasiukajtis
994*25c28e83SPiotr Jasiukajtis	fitod	%f15,%f0
995*25c28e83SPiotr Jasiukajtis	fdtos	%f0,%f15
996*25c28e83SPiotr Jasiukajtis	fmuls	%f15,FTWO,%f15
997*25c28e83SPiotr Jasiukajtis	st	%f15,[%fp+tmp3]
998*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%g5
999*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%o0
1000*25c28e83SPiotr Jasiukajtis	sub	%g5,%o0,%g5
1001*25c28e83SPiotr Jasiukajtis
1002*25c28e83SPiotr Jasiukajtis	fands	%f15,DC0,%f17		! (4_0) dfx0 = vis_fand(ddx0,DC0);
1003*25c28e83SPiotr Jasiukajtis
1004*25c28e83SPiotr Jasiukajtis	sra	%g5,13,%l6		! (5_0) si1 = ax1 >> 13;
1005*25c28e83SPiotr Jasiukajtis
1006*25c28e83SPiotr Jasiukajtis	sra	%g5,24,%l7		! (5_0) iexp1 = ax1 >> 24;
1007*25c28e83SPiotr Jasiukajtis	and	%l6,2032,%l6		! (5_0) si1 &= 0x7f0;
1008*25c28e83SPiotr Jasiukajtis
1009*25c28e83SPiotr Jasiukajtis	fpsub32s	%f15,%f17,%f17	! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1010*25c28e83SPiotr Jasiukajtis
1011*25c28e83SPiotr Jasiukajtis	ldd	[%l6+TBL],%f46		! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1012*25c28e83SPiotr Jasiukajtis	sub	%l0,%l7,%l1		! (5_0) iexp1 = 0x3f - iexp1;
1013*25c28e83SPiotr Jasiukajtis
1014*25c28e83SPiotr Jasiukajtis	sll	%l1,23,%l1		! (5_0) lexp1 = iexp1 << 23;
1015*25c28e83SPiotr Jasiukajtis	add	%l6,TBL,%l6		! (5_0) addr1 = (char*)TBL + si1;
1016*25c28e83SPiotr Jasiukajtis	st	%l1,[%fp+tmp0+4]	! (4_0) fdx0 = *((double*)lexp0);
1017*25c28e83SPiotr Jasiukajtis	fitod	%f17,%f44		! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
1018*25c28e83SPiotr Jasiukajtis
1019*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f46,%f46		! (5_1) xx1 = dtmp1 * tbl_div1;
1020*25c28e83SPiotr Jasiukajtis
1021*25c28e83SPiotr Jasiukajtis	ba	.cont1
1022*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f46,%f50		! (5_1) res1 = K3 * xx1;
1023*25c28e83SPiotr Jasiukajtis1:
1024*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%o1
1025*25c28e83SPiotr Jasiukajtis	stx	%o1,[%fp+tmp_px]
1026*25c28e83SPiotr Jasiukajtis
1027*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
1028*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1029*25c28e83SPiotr Jasiukajtis
1030*25c28e83SPiotr Jasiukajtis	ba	.cont1
1031*25c28e83SPiotr Jasiukajtis	mov	1,counter
1032*25c28e83SPiotr Jasiukajtis
1033*25c28e83SPiotr Jasiukajtis	.align	16
1034*25c28e83SPiotr Jasiukajtis.update2:
1035*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1036*25c28e83SPiotr Jasiukajtis	ble	.cont2
1037*25c28e83SPiotr Jasiukajtis	sub	%o5,stridex,%o1
1038*25c28e83SPiotr Jasiukajtis
1039*25c28e83SPiotr Jasiukajtis	sub	%o1,stridex,%o1
1040*25c28e83SPiotr Jasiukajtis	stx	%o1,[%fp+tmp_px]
1041*25c28e83SPiotr Jasiukajtis
1042*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
1043*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1044*25c28e83SPiotr Jasiukajtis
1045*25c28e83SPiotr Jasiukajtis	ba	.cont2
1046*25c28e83SPiotr Jasiukajtis	mov	2,counter
1047*25c28e83SPiotr Jasiukajtis
1048*25c28e83SPiotr Jasiukajtis	.align	16
1049*25c28e83SPiotr Jasiukajtis.update3:
1050*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%o1
1051*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1052*25c28e83SPiotr Jasiukajtis	ble	.cont3
1053*25c28e83SPiotr Jasiukajtis
1054*25c28e83SPiotr Jasiukajtis	add	%o1,0x3ff,%o1
1055*25c28e83SPiotr Jasiukajtis
1056*25c28e83SPiotr Jasiukajtis	andcc	%g1,%o1,%g0
1057*25c28e83SPiotr Jasiukajtis	bz,a	1f
1058*25c28e83SPiotr Jasiukajtis	sub	%o5,stridex,%o1
1059*25c28e83SPiotr Jasiukajtis
1060*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1061*25c28e83SPiotr Jasiukajtis	bl,a	1f
1062*25c28e83SPiotr Jasiukajtis	sub	%o5,stridex,%o1
1063*25c28e83SPiotr Jasiukajtis
1064*25c28e83SPiotr Jasiukajtis	fitod	%f18,%f0
1065*25c28e83SPiotr Jasiukajtis	fdtos	%f0,%f18
1066*25c28e83SPiotr Jasiukajtis	fmuls	%f18,FTWO,%f18
1067*25c28e83SPiotr Jasiukajtis	st	%f18,[%fp+tmp3]
1068*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%g1
1069*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%o1
1070*25c28e83SPiotr Jasiukajtis	sub	%g1,%o1,%g1
1071*25c28e83SPiotr Jasiukajtis
1072*25c28e83SPiotr Jasiukajtis	fand	%f18,DC0,%f56		! (0_0) dfx0 = vis_fand(ddx0,DC0);
1073*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%o0		! (0_0) si0 = ax0 >> 13;
1074*25c28e83SPiotr Jasiukajtis
1075*25c28e83SPiotr Jasiukajtis	and	%o0,2032,%o0		! (0_0) si0 &= 0x7f0;
1076*25c28e83SPiotr Jasiukajtis
1077*25c28e83SPiotr Jasiukajtis	ldd	[%o0+TBL],%f54		! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
1078*25c28e83SPiotr Jasiukajtis	fpsub32	%f18,%f56,%f30		! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1079*25c28e83SPiotr Jasiukajtis
1080*25c28e83SPiotr Jasiukajtis	sra	%g1,24,%i3		! (0_0) iexp0 = ax0 >> 24;
1081*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%g5		! (0_0) iexp0 = 0x3f - iexp0;
1082*25c28e83SPiotr Jasiukajtis	ba	.cont3
1083*25c28e83SPiotr Jasiukajtis	fitod	%f30,%f56		! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
1084*25c28e83SPiotr Jasiukajtis1:
1085*25c28e83SPiotr Jasiukajtis	sub	%o1,stridex,%o1
1086*25c28e83SPiotr Jasiukajtis	stx	%o1,[%fp+tmp_px]
1087*25c28e83SPiotr Jasiukajtis
1088*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
1089*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1090*25c28e83SPiotr Jasiukajtis
1091*25c28e83SPiotr Jasiukajtis	ba	.cont3
1092*25c28e83SPiotr Jasiukajtis	mov	2,counter
1093*25c28e83SPiotr Jasiukajtis
1094*25c28e83SPiotr Jasiukajtis	.align	16
1095*25c28e83SPiotr Jasiukajtis.update4:
1096*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1097*25c28e83SPiotr Jasiukajtis	ble	.cont4
1098*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex2,%o1
1099*25c28e83SPiotr Jasiukajtis
1100*25c28e83SPiotr Jasiukajtis	sub	%o1,stridex,%o1
1101*25c28e83SPiotr Jasiukajtis	stx	%o1,[%fp+tmp_px]
1102*25c28e83SPiotr Jasiukajtis
1103*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
1104*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1105*25c28e83SPiotr Jasiukajtis
1106*25c28e83SPiotr Jasiukajtis	ba	.cont4
1107*25c28e83SPiotr Jasiukajtis	mov	3,counter
1108*25c28e83SPiotr Jasiukajtis
1109*25c28e83SPiotr Jasiukajtis	.align	16
1110*25c28e83SPiotr Jasiukajtis.update5:
1111*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%o1
1112*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1113*25c28e83SPiotr Jasiukajtis	ble	.cont5
1114*25c28e83SPiotr Jasiukajtis
1115*25c28e83SPiotr Jasiukajtis	add	%o1,0x3ff,%o1
1116*25c28e83SPiotr Jasiukajtis
1117*25c28e83SPiotr Jasiukajtis	andcc	%i4,%o1,%g0
1118*25c28e83SPiotr Jasiukajtis	bz,a	1f
1119*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex2,%o1
1120*25c28e83SPiotr Jasiukajtis
1121*25c28e83SPiotr Jasiukajtis	cmp	%i4,0
1122*25c28e83SPiotr Jasiukajtis	bl,a	1f
1123*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex2,%o1
1124*25c28e83SPiotr Jasiukajtis
1125*25c28e83SPiotr Jasiukajtis	fitod	%f19,%f0
1126*25c28e83SPiotr Jasiukajtis	fdtos	%f0,%f19
1127*25c28e83SPiotr Jasiukajtis	fmuls	%f19,FTWO,%f19
1128*25c28e83SPiotr Jasiukajtis	st	%f19,[%fp+tmp3]
1129*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%i4
1130*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%o1
1131*25c28e83SPiotr Jasiukajtis	sub	%i4,%o1,%i4
1132*25c28e83SPiotr Jasiukajtis
1133*25c28e83SPiotr Jasiukajtis	fands	%f19,DC0,%f0		! (0_0) dfx0 = vis_fand(ddx0,DC0);
1134*25c28e83SPiotr Jasiukajtis
1135*25c28e83SPiotr Jasiukajtis	sra	%i4,13,%g5		! (1_0) si1 = ax1 >> 13;
1136*25c28e83SPiotr Jasiukajtis
1137*25c28e83SPiotr Jasiukajtis	sra	%i4,24,%i1		! (1_0) iexp1 = ax1 >> 24;
1138*25c28e83SPiotr Jasiukajtis	and	%g5,2032,%o7		! (1_0) si1 &= 0x7f0;
1139*25c28e83SPiotr Jasiukajtis	fpsub32s	%f19,%f0,%f31	! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1140*25c28e83SPiotr Jasiukajtis
1141*25c28e83SPiotr Jasiukajtis	ldd	[%o7+TBL],%f44		! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1142*25c28e83SPiotr Jasiukajtis	sub	%l0,%i1,%i0		! (1_0) iexp1 = 0x3f - iexp1;
1143*25c28e83SPiotr Jasiukajtis
1144*25c28e83SPiotr Jasiukajtis	sll	%i0,23,%i0		! (1_0) lexp1 = iexp1 << 23;
1145*25c28e83SPiotr Jasiukajtis	fitod	%f31,%f50		! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
1146*25c28e83SPiotr Jasiukajtis
1147*25c28e83SPiotr Jasiukajtis	st	%i0,[%fp+tmp1+4]	! (0_0) fdx0 = *((double*)lexp0);
1148*25c28e83SPiotr Jasiukajtis
1149*25c28e83SPiotr Jasiukajtis	add	%o7,TBL,%o7		! (1_0) addr0 = (char*)TBL + si0;
1150*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f44,%f44		! (1_0) xx0 = dtmp0 * tbl_div0;
1151*25c28e83SPiotr Jasiukajtis
1152*25c28e83SPiotr Jasiukajtis	ba	.cont5
1153*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f44,%f50		! (1_0) res1 = K3 * xx1;
1154*25c28e83SPiotr Jasiukajtis1:
1155*25c28e83SPiotr Jasiukajtis	sub	%o1,stridex,%o1
1156*25c28e83SPiotr Jasiukajtis	stx	%o1,[%fp+tmp_px]
1157*25c28e83SPiotr Jasiukajtis
1158*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
1159*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1160*25c28e83SPiotr Jasiukajtis
1161*25c28e83SPiotr Jasiukajtis	ba	.cont5
1162*25c28e83SPiotr Jasiukajtis	mov	3,counter
1163*25c28e83SPiotr Jasiukajtis
1164*25c28e83SPiotr Jasiukajtis	.align	16
1165*25c28e83SPiotr Jasiukajtis.update6:
1166*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1167*25c28e83SPiotr Jasiukajtis	ble	.cont6
1168*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex,%o3
1169*25c28e83SPiotr Jasiukajtis
1170*25c28e83SPiotr Jasiukajtis	sub	%o3,stridex,%o3
1171*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp_px]
1172*25c28e83SPiotr Jasiukajtis
1173*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
1174*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1175*25c28e83SPiotr Jasiukajtis
1176*25c28e83SPiotr Jasiukajtis	ba	.cont6
1177*25c28e83SPiotr Jasiukajtis	mov	4,counter
1178*25c28e83SPiotr Jasiukajtis
1179*25c28e83SPiotr Jasiukajtis	.align	16
1180*25c28e83SPiotr Jasiukajtis.update7:
1181*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%o3
1182*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1183*25c28e83SPiotr Jasiukajtis	ble	.cont7
1184*25c28e83SPiotr Jasiukajtis
1185*25c28e83SPiotr Jasiukajtis	add	%o3,0x3ff,%o3
1186*25c28e83SPiotr Jasiukajtis
1187*25c28e83SPiotr Jasiukajtis	andcc	%g1,%o3,%g0
1188*25c28e83SPiotr Jasiukajtis	bz,a	1f
1189*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex,%o3
1190*25c28e83SPiotr Jasiukajtis
1191*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1192*25c28e83SPiotr Jasiukajtis	bl,a	1f
1193*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex,%o3
1194*25c28e83SPiotr Jasiukajtis
1195*25c28e83SPiotr Jasiukajtis	fitod	%f24,%f0
1196*25c28e83SPiotr Jasiukajtis	fdtos	%f0,%f24
1197*25c28e83SPiotr Jasiukajtis	fmuls	%f24,FTWO,%f24
1198*25c28e83SPiotr Jasiukajtis	st	%f24,[%fp+tmp3]
1199*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%g1
1200*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%o3
1201*25c28e83SPiotr Jasiukajtis	sub	%g1,%o3,%g1
1202*25c28e83SPiotr Jasiukajtis
1203*25c28e83SPiotr Jasiukajtis	fands	%f24,DC0,%f0		! (2_0) dfx0 = vis_fand(ddx0,DC0);
1204*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%i0		! (2_0) si0 = ax0 >> 13;
1205*25c28e83SPiotr Jasiukajtis
1206*25c28e83SPiotr Jasiukajtis	and	%i0,2032,%i0		! (2_0) si0 &= 0x7f0;
1207*25c28e83SPiotr Jasiukajtis
1208*25c28e83SPiotr Jasiukajtis	ldd	[%i0+TBL],%f30		! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
1209*25c28e83SPiotr Jasiukajtis	fpsub32s	%f24,%f0,%f12	! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1210*25c28e83SPiotr Jasiukajtis
1211*25c28e83SPiotr Jasiukajtis	sra	%g1,24,%i3		! (2_0) iexp0 = ax0 >> 24;
1212*25c28e83SPiotr Jasiukajtis
1213*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%g5		! (2_0) iexp0 = 0x3f - iexp0;
1214*25c28e83SPiotr Jasiukajtis
1215*25c28e83SPiotr Jasiukajtis	sll	%g5,23,%g5		! (2_0) lexp0 = iexp0 << 55;
1216*25c28e83SPiotr Jasiukajtis	add	%i0,TBL,%i0		! (2_0) addr0 = (char*)TBL + si0;
1217*25c28e83SPiotr Jasiukajtis	fitod	%f12,%f56		! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
1218*25c28e83SPiotr Jasiukajtis
1219*25c28e83SPiotr Jasiukajtis	st	%g5,[%fp+tmp2]		! (2_0) fdx0 = *((double*)lexp0);
1220*25c28e83SPiotr Jasiukajtis	ba	.cont7
1221*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f30,%f30		! (2_0) xx0 = dtmp0 * tbl_div0;
1222*25c28e83SPiotr Jasiukajtis1:
1223*25c28e83SPiotr Jasiukajtis	sub	%o3,stridex,%o3
1224*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp_px]
1225*25c28e83SPiotr Jasiukajtis
1226*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
1227*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1228*25c28e83SPiotr Jasiukajtis
1229*25c28e83SPiotr Jasiukajtis	ba	.cont7
1230*25c28e83SPiotr Jasiukajtis	mov	4,counter
1231*25c28e83SPiotr Jasiukajtis
1232*25c28e83SPiotr Jasiukajtis	.align	16
1233*25c28e83SPiotr Jasiukajtis.update8:
1234*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1235*25c28e83SPiotr Jasiukajtis	ble	.cont8
1236*25c28e83SPiotr Jasiukajtis	nop
1237*25c28e83SPiotr Jasiukajtis
1238*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex,%o3
1239*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp_px]
1240*25c28e83SPiotr Jasiukajtis
1241*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
1242*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1243*25c28e83SPiotr Jasiukajtis
1244*25c28e83SPiotr Jasiukajtis	ba	.cont8
1245*25c28e83SPiotr Jasiukajtis	mov	5,counter
1246*25c28e83SPiotr Jasiukajtis
1247*25c28e83SPiotr Jasiukajtis	.align	16
1248*25c28e83SPiotr Jasiukajtis.update9:
1249*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%o3
1250*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1251*25c28e83SPiotr Jasiukajtis	ble	.cont9
1252*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex,%i3
1253*25c28e83SPiotr Jasiukajtis
1254*25c28e83SPiotr Jasiukajtis	add	%o3,0x3ff,%o3
1255*25c28e83SPiotr Jasiukajtis
1256*25c28e83SPiotr Jasiukajtis	andcc	%o5,%o3,%g0
1257*25c28e83SPiotr Jasiukajtis	bz	1f
1258*25c28e83SPiotr Jasiukajtis	ld	[%i3],%f0
1259*25c28e83SPiotr Jasiukajtis
1260*25c28e83SPiotr Jasiukajtis	cmp	%o5,0
1261*25c28e83SPiotr Jasiukajtis	bl,a	1f
1262*25c28e83SPiotr Jasiukajtis	nop
1263*25c28e83SPiotr Jasiukajtis
1264*25c28e83SPiotr Jasiukajtis	fitod	%f0,%f0
1265*25c28e83SPiotr Jasiukajtis	fdtos	%f0,%f0
1266*25c28e83SPiotr Jasiukajtis	fmuls	%f0,FTWO,%f0
1267*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp3]
1268*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%o5
1269*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%o3
1270*25c28e83SPiotr Jasiukajtis	sub	%o5,%o3,%o5
1271*25c28e83SPiotr Jasiukajtis
1272*25c28e83SPiotr Jasiukajtis	fands	%f0,DC0,%f8		! (2_0) dfx0 = vis_fand(ddx0,DC0);
1273*25c28e83SPiotr Jasiukajtis
1274*25c28e83SPiotr Jasiukajtis	sra	%o5,13,%o1		! (3_0) si1 = ax1 >> 13;
1275*25c28e83SPiotr Jasiukajtis
1276*25c28e83SPiotr Jasiukajtis	sra	%o5,24,%o3		! (3_0) iexp1 = ax1 >> 24;
1277*25c28e83SPiotr Jasiukajtis	and	%o1,2032,%o1		! (3_0) si1 &= 0x7f0;
1278*25c28e83SPiotr Jasiukajtis	fpsub32s	%f0,%f8,%f0	! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1279*25c28e83SPiotr Jasiukajtis
1280*25c28e83SPiotr Jasiukajtis	ldd	[%o1+TBL],%f8		! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1281*25c28e83SPiotr Jasiukajtis	sub	%l0,%o3,%i3		! (3_0) iexp1 = 0x3f - iexp1;
1282*25c28e83SPiotr Jasiukajtis
1283*25c28e83SPiotr Jasiukajtis	sllx	%i3,23,%i3		! (3_0) lexp1 = iexp1 << 23;
1284*25c28e83SPiotr Jasiukajtis	fitod	%f0,%f50		! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
1285*25c28e83SPiotr Jasiukajtis
1286*25c28e83SPiotr Jasiukajtis	add	%o1,TBL,%o1		! (3_0) addr1 = (char*)TBL + si1;
1287*25c28e83SPiotr Jasiukajtis	st	%i3,[%fp+tmp2+4]	! (2_0) fdx0 = *((double*)lexp0);
1288*25c28e83SPiotr Jasiukajtis
1289*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f8,%f24		! (3_0) xx1 = dtmp1 * tbl_div1;
1290*25c28e83SPiotr Jasiukajtis
1291*25c28e83SPiotr Jasiukajtis	ba	.cont9
1292*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f24,%f50		! (3_0) res1 = K3 * xx1;
1293*25c28e83SPiotr Jasiukajtis1:
1294*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_px]
1295*25c28e83SPiotr Jasiukajtis
1296*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
1297*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1298*25c28e83SPiotr Jasiukajtis
1299*25c28e83SPiotr Jasiukajtis	ba	.cont9
1300*25c28e83SPiotr Jasiukajtis	mov	5,counter
1301*25c28e83SPiotr Jasiukajtis
1302*25c28e83SPiotr Jasiukajtis	.align	16
1303*25c28e83SPiotr Jasiukajtis.update10:
1304*25c28e83SPiotr Jasiukajtis	cmp	counter,0
1305*25c28e83SPiotr Jasiukajtis	ble	.cont10
1306*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%o3
1307*25c28e83SPiotr Jasiukajtis
1308*25c28e83SPiotr Jasiukajtis	sub	%o3,stridex,%o3
1309*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp_px]
1310*25c28e83SPiotr Jasiukajtis
1311*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1312*25c28e83SPiotr Jasiukajtis
1313*25c28e83SPiotr Jasiukajtis	ba	.cont10
1314*25c28e83SPiotr Jasiukajtis	mov	0,counter
1315*25c28e83SPiotr Jasiukajtis
1316*25c28e83SPiotr Jasiukajtis	.align	16
1317*25c28e83SPiotr Jasiukajtis.update11:
1318*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%i4
1319*25c28e83SPiotr Jasiukajtis	cmp	counter,0
1320*25c28e83SPiotr Jasiukajtis	ble	.cont11
1321*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%o3
1322*25c28e83SPiotr Jasiukajtis
1323*25c28e83SPiotr Jasiukajtis	sub	%o3,stridex,%o3
1324*25c28e83SPiotr Jasiukajtis	add	%i4,0x3ff,%i4
1325*25c28e83SPiotr Jasiukajtis	ld	[%o3],%i3
1326*25c28e83SPiotr Jasiukajtis
1327*25c28e83SPiotr Jasiukajtis	andcc	%i3,%i4,%g0
1328*25c28e83SPiotr Jasiukajtis	bz	1f
1329*25c28e83SPiotr Jasiukajtis
1330*25c28e83SPiotr Jasiukajtis	cmp	%i3,0
1331*25c28e83SPiotr Jasiukajtis	bl,a	1f
1332*25c28e83SPiotr Jasiukajtis	nop
1333*25c28e83SPiotr Jasiukajtis
1334*25c28e83SPiotr Jasiukajtis	fitod	%f14,%f0
1335*25c28e83SPiotr Jasiukajtis	fdtos	%f0,%f14
1336*25c28e83SPiotr Jasiukajtis	fmuls	%f14,FTWO,%f14
1337*25c28e83SPiotr Jasiukajtis	st	%f14,[%fp+tmp3]
1338*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%i3
1339*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%o3
1340*25c28e83SPiotr Jasiukajtis	sub	%i3,%o3,%i3
1341*25c28e83SPiotr Jasiukajtis
1342*25c28e83SPiotr Jasiukajtis	fands	%f14,DC0,%f16		! (4_0) dfx0 = vis_fand(ddx0,DC0);
1343*25c28e83SPiotr Jasiukajtis	sra	%i3,13,%l5		! (4_0) si0 = ax0 >> 13;
1344*25c28e83SPiotr Jasiukajtis
1345*25c28e83SPiotr Jasiukajtis	and	%l5,2032,%l5		! (4_0) si0 &= 0x7f0;
1346*25c28e83SPiotr Jasiukajtis
1347*25c28e83SPiotr Jasiukajtis	ldd	[%l5+TBL],%f54		! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
1348*25c28e83SPiotr Jasiukajtis	fpsub32s	%f14,%f16,%f16	! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1349*25c28e83SPiotr Jasiukajtis
1350*25c28e83SPiotr Jasiukajtis	sra	%i3,24,%i3		! (4_0) iexp0 = ax0 >> 24;
1351*25c28e83SPiotr Jasiukajtis
1352*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%o0		! (4_0) iexp0 = 0x3f - iexp0;
1353*25c28e83SPiotr Jasiukajtis	fitod	%f16,%f56		! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
1354*25c28e83SPiotr Jasiukajtis
1355*25c28e83SPiotr Jasiukajtis	sllx	%o0,23,%o0		! (4_0) lexp0 = iexp0 << 55;
1356*25c28e83SPiotr Jasiukajtis
1357*25c28e83SPiotr Jasiukajtis	st	%o0,[%fp+tmp0]		! (4_0) fdx0 = *((double*)lexp0);
1358*25c28e83SPiotr Jasiukajtis
1359*25c28e83SPiotr Jasiukajtis	ba	.cont11
1360*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f54,%f40		! (4_0) xx0 = dtmp0 * tbl_div0;
1361*25c28e83SPiotr Jasiukajtis1:
1362*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp_px]
1363*25c28e83SPiotr Jasiukajtis
1364*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1365*25c28e83SPiotr Jasiukajtis
1366*25c28e83SPiotr Jasiukajtis	ba	.cont11
1367*25c28e83SPiotr Jasiukajtis	mov	0,counter
1368*25c28e83SPiotr Jasiukajtis
1369*25c28e83SPiotr Jasiukajtis	.align	16
1370*25c28e83SPiotr Jasiukajtis.update12:
1371*25c28e83SPiotr Jasiukajtis	cmp	counter,1
1372*25c28e83SPiotr Jasiukajtis	ble	.cont12
1373*25c28e83SPiotr Jasiukajtis	nop
1374*25c28e83SPiotr Jasiukajtis
1375*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%i1
1376*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
1377*25c28e83SPiotr Jasiukajtis
1378*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
1379*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1380*25c28e83SPiotr Jasiukajtis
1381*25c28e83SPiotr Jasiukajtis	ba	.cont12
1382*25c28e83SPiotr Jasiukajtis	mov	1,counter
1383*25c28e83SPiotr Jasiukajtis
1384*25c28e83SPiotr Jasiukajtis	.align	16
1385*25c28e83SPiotr Jasiukajtis.update13:
1386*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%o3
1387*25c28e83SPiotr Jasiukajtis	cmp	counter,1
1388*25c28e83SPiotr Jasiukajtis	ble	.cont13
1389*25c28e83SPiotr Jasiukajtis
1390*25c28e83SPiotr Jasiukajtis	add	%o3,0x3ff,%o3
1391*25c28e83SPiotr Jasiukajtis
1392*25c28e83SPiotr Jasiukajtis	andcc	%g5,%o3,%g0
1393*25c28e83SPiotr Jasiukajtis	bz	1f
1394*25c28e83SPiotr Jasiukajtis
1395*25c28e83SPiotr Jasiukajtis	cmp	%g5,0
1396*25c28e83SPiotr Jasiukajtis	bl,a	1f
1397*25c28e83SPiotr Jasiukajtis	nop
1398*25c28e83SPiotr Jasiukajtis
1399*25c28e83SPiotr Jasiukajtis	fitod	%f15,%f0
1400*25c28e83SPiotr Jasiukajtis	fdtos	%f0,%f15
1401*25c28e83SPiotr Jasiukajtis	fmuls	%f15,FTWO,%f15
1402*25c28e83SPiotr Jasiukajtis	st	%f15,[%fp+tmp3]
1403*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%g5
1404*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%o3
1405*25c28e83SPiotr Jasiukajtis	sub	%g5,%o3,%g5
1406*25c28e83SPiotr Jasiukajtis
1407*25c28e83SPiotr Jasiukajtis	fands	%f15,DC0,%f17		! (4_0) dfx0 = vis_fand(ddx0,DC0);
1408*25c28e83SPiotr Jasiukajtis
1409*25c28e83SPiotr Jasiukajtis	sra	%g5,13,%l6		! (5_0) si1 = ax1 >> 13;
1410*25c28e83SPiotr Jasiukajtis	sra	%g5,24,%o3		! (5_0) iexp1 = ax1 >> 24;
1411*25c28e83SPiotr Jasiukajtis	and	%l6,2032,%l6		! (5_0) si1 &= 0x7f0;
1412*25c28e83SPiotr Jasiukajtis	fpsub32s	%f15,%f17,%f17	! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1413*25c28e83SPiotr Jasiukajtis
1414*25c28e83SPiotr Jasiukajtis	ldd	[%l6+TBL],%f46		! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1415*25c28e83SPiotr Jasiukajtis	sub	%l0,%o3,%l1		! (5_0) iexp1 = 0x3f - iexp1;
1416*25c28e83SPiotr Jasiukajtis
1417*25c28e83SPiotr Jasiukajtis	add	%l6,TBL,%l6		! (5_0) addr1 = (char*)TBL + si1;
1418*25c28e83SPiotr Jasiukajtis
1419*25c28e83SPiotr Jasiukajtis	sllx	%l1,23,%l1		! (5_0) lexp1 = iexp1 << 23;
1420*25c28e83SPiotr Jasiukajtis	st	%l1,[%fp+tmp0+4]	! (4_0) fdx0 = *((double*)lexp0);
1421*25c28e83SPiotr Jasiukajtis
1422*25c28e83SPiotr Jasiukajtis	fitod	%f17,%f0		! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
1423*25c28e83SPiotr Jasiukajtis
1424*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f46,%f46		! (5_1) xx1 = dtmp1 * tbl_div1;
1425*25c28e83SPiotr Jasiukajtis	ba	.cont13
1426*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f46,%f50		! (5_1) res1 = K3 * xx1;
1427*25c28e83SPiotr Jasiukajtis1:
1428*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%i1
1429*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
1430*25c28e83SPiotr Jasiukajtis
1431*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
1432*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1433*25c28e83SPiotr Jasiukajtis
1434*25c28e83SPiotr Jasiukajtis	ba	.cont13
1435*25c28e83SPiotr Jasiukajtis	mov	1,counter
1436*25c28e83SPiotr Jasiukajtis
1437*25c28e83SPiotr Jasiukajtis	.align	16
1438*25c28e83SPiotr Jasiukajtis.update14:
1439*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1440*25c28e83SPiotr Jasiukajtis	ble	.cont14
1441*25c28e83SPiotr Jasiukajtis	sub	%o5,stridex,%o3
1442*25c28e83SPiotr Jasiukajtis
1443*25c28e83SPiotr Jasiukajtis	sub	%o3,stridex,%o3
1444*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp_px]
1445*25c28e83SPiotr Jasiukajtis
1446*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
1447*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1448*25c28e83SPiotr Jasiukajtis
1449*25c28e83SPiotr Jasiukajtis	ba	.cont14
1450*25c28e83SPiotr Jasiukajtis	mov	2,counter
1451*25c28e83SPiotr Jasiukajtis
1452*25c28e83SPiotr Jasiukajtis	.align	16
1453*25c28e83SPiotr Jasiukajtis.update15:
1454*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%i3
1455*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1456*25c28e83SPiotr Jasiukajtis	ble	.cont15
1457*25c28e83SPiotr Jasiukajtis	sub	%o5,stridex,%o3
1458*25c28e83SPiotr Jasiukajtis
1459*25c28e83SPiotr Jasiukajtis	add	%i3,0x3ff,%i3
1460*25c28e83SPiotr Jasiukajtis
1461*25c28e83SPiotr Jasiukajtis	andcc	%g1,%i3,%g0
1462*25c28e83SPiotr Jasiukajtis	bz	1f
1463*25c28e83SPiotr Jasiukajtis	sub	%o3,stridex,%o3
1464*25c28e83SPiotr Jasiukajtis
1465*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1466*25c28e83SPiotr Jasiukajtis	bl,a	1f
1467*25c28e83SPiotr Jasiukajtis	nop
1468*25c28e83SPiotr Jasiukajtis
1469*25c28e83SPiotr Jasiukajtis	fitod	%f18,%f0
1470*25c28e83SPiotr Jasiukajtis	fdtos	%f0,%f18
1471*25c28e83SPiotr Jasiukajtis	fmuls	%f18,FTWO,%f18
1472*25c28e83SPiotr Jasiukajtis	st	%f18,[%fp+tmp3]
1473*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%g1
1474*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%o3
1475*25c28e83SPiotr Jasiukajtis	sub	%g1,%o3,%g1
1476*25c28e83SPiotr Jasiukajtis
1477*25c28e83SPiotr Jasiukajtis	fands	%f18,DC0,%f0		! (0_0) dfx0 = vis_fand(ddx0,DC0);
1478*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%o0		! (0_0) si0 = ax0 >> 13;
1479*25c28e83SPiotr Jasiukajtis	and	%o0,2032,%o0		! (0_0) si0 &= 0x7f0;
1480*25c28e83SPiotr Jasiukajtis
1481*25c28e83SPiotr Jasiukajtis	ldd	[%o0+TBL],%f54		! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
1482*25c28e83SPiotr Jasiukajtis	fpsub32s	%f18,%f0,%f30	! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1483*25c28e83SPiotr Jasiukajtis
1484*25c28e83SPiotr Jasiukajtis	sra	%g1,24,%i3		! (0_0) iexp0 = ax0 >> 24;
1485*25c28e83SPiotr Jasiukajtis
1486*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%g5		! (0_0) iexp0 = 0x3f - iexp0;
1487*25c28e83SPiotr Jasiukajtis
1488*25c28e83SPiotr Jasiukajtis	ba	.cont15
1489*25c28e83SPiotr Jasiukajtis	fitod	%f30,%f56		! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
1490*25c28e83SPiotr Jasiukajtis1:
1491*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp_px]
1492*25c28e83SPiotr Jasiukajtis
1493*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
1494*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1495*25c28e83SPiotr Jasiukajtis
1496*25c28e83SPiotr Jasiukajtis	ba	.cont15
1497*25c28e83SPiotr Jasiukajtis	mov	2,counter
1498*25c28e83SPiotr Jasiukajtis
1499*25c28e83SPiotr Jasiukajtis	.align	16
1500*25c28e83SPiotr Jasiukajtis.update16:
1501*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1502*25c28e83SPiotr Jasiukajtis	ble	.cont16
1503*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex2,%o3
1504*25c28e83SPiotr Jasiukajtis
1505*25c28e83SPiotr Jasiukajtis	sub	%o3,stridex,%o3
1506*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp_px]
1507*25c28e83SPiotr Jasiukajtis
1508*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
1509*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1510*25c28e83SPiotr Jasiukajtis
1511*25c28e83SPiotr Jasiukajtis	ba	.cont16
1512*25c28e83SPiotr Jasiukajtis	mov	3,counter
1513*25c28e83SPiotr Jasiukajtis
1514*25c28e83SPiotr Jasiukajtis	.align	16
1515*25c28e83SPiotr Jasiukajtis.update17:
1516*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%i3
1517*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1518*25c28e83SPiotr Jasiukajtis	ble	.cont17
1519*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex2,%o3
1520*25c28e83SPiotr Jasiukajtis
1521*25c28e83SPiotr Jasiukajtis	add	%i3,0x3ff,%i3
1522*25c28e83SPiotr Jasiukajtis
1523*25c28e83SPiotr Jasiukajtis	andcc	%i4,%i3,%g0
1524*25c28e83SPiotr Jasiukajtis	bz	1f
1525*25c28e83SPiotr Jasiukajtis	sub	%o3,stridex,%o3
1526*25c28e83SPiotr Jasiukajtis
1527*25c28e83SPiotr Jasiukajtis	cmp	%i4,0
1528*25c28e83SPiotr Jasiukajtis	bl,a	1f
1529*25c28e83SPiotr Jasiukajtis	nop
1530*25c28e83SPiotr Jasiukajtis
1531*25c28e83SPiotr Jasiukajtis	fitod	%f19,%f0
1532*25c28e83SPiotr Jasiukajtis	fdtos	%f0,%f19
1533*25c28e83SPiotr Jasiukajtis	fmuls	%f19,FTWO,%f19
1534*25c28e83SPiotr Jasiukajtis	st	%f19,[%fp+tmp3]
1535*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%i4
1536*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%o3
1537*25c28e83SPiotr Jasiukajtis	sub	%i4,%o3,%i4
1538*25c28e83SPiotr Jasiukajtis
1539*25c28e83SPiotr Jasiukajtis	fands	%f19,DC0,%f0		! (0_0) dfx0 = vis_fand(ddx0,DC0);
1540*25c28e83SPiotr Jasiukajtis
1541*25c28e83SPiotr Jasiukajtis	sra	%i4,13,%g5		! (1_0) si1 = ax1 >> 13;
1542*25c28e83SPiotr Jasiukajtis
1543*25c28e83SPiotr Jasiukajtis	sra	%i4,24,%i0		! (1_0) iexp1 = ax1 >> 24;
1544*25c28e83SPiotr Jasiukajtis	and	%g5,2032,%o7		! (1_0) si1 &= 0x7f0;
1545*25c28e83SPiotr Jasiukajtis	fpsub32s	%f19,%f0,%f31	! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1546*25c28e83SPiotr Jasiukajtis
1547*25c28e83SPiotr Jasiukajtis	ldd	[%o7+TBL],%f44		! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1548*25c28e83SPiotr Jasiukajtis	sub	%l0,%i0,%i0		! (1_0) iexp1 = 0x3f - iexp1;
1549*25c28e83SPiotr Jasiukajtis
1550*25c28e83SPiotr Jasiukajtis	sllx	%i0,23,%i0		! (1_0) lexp1 = iexp1 << 23;
1551*25c28e83SPiotr Jasiukajtis	fitod	%f31,%f50		! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
1552*25c28e83SPiotr Jasiukajtis
1553*25c28e83SPiotr Jasiukajtis	st	%i0,[%fp+tmp1+4]	! (0_0) fdx0 = *((double*)lexp0);
1554*25c28e83SPiotr Jasiukajtis
1555*25c28e83SPiotr Jasiukajtis	add	%o7,TBL,%o7		! (1_0) addr0 = (char*)TBL + si0;
1556*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f44,%f44		! (1_0) xx0 = dtmp0 * tbl_div0;
1557*25c28e83SPiotr Jasiukajtis
1558*25c28e83SPiotr Jasiukajtis	ba	.cont17
1559*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f44,%f50		! (1_0) res1 = K3 * xx1;
1560*25c28e83SPiotr Jasiukajtis1:
1561*25c28e83SPiotr Jasiukajtis	stx	%o3,[%fp+tmp_px]
1562*25c28e83SPiotr Jasiukajtis
1563*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
1564*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1565*25c28e83SPiotr Jasiukajtis
1566*25c28e83SPiotr Jasiukajtis	ba	.cont17
1567*25c28e83SPiotr Jasiukajtis	mov	3,counter
1568*25c28e83SPiotr Jasiukajtis
1569*25c28e83SPiotr Jasiukajtis	.align	16
1570*25c28e83SPiotr Jasiukajtis.update18:
1571*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1572*25c28e83SPiotr Jasiukajtis	ble	.cont18
1573*25c28e83SPiotr Jasiukajtis	fpadd32	%f20,%f52,%f0		! (2_1) dres0 = vis_fpadd32(dres0,fdx0);
1574*25c28e83SPiotr Jasiukajtis
1575*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex2,%i3
1576*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_px]
1577*25c28e83SPiotr Jasiukajtis
1578*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
1579*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1580*25c28e83SPiotr Jasiukajtis
1581*25c28e83SPiotr Jasiukajtis	ba	.cont18
1582*25c28e83SPiotr Jasiukajtis	mov	4,counter
1583*25c28e83SPiotr Jasiukajtis
1584*25c28e83SPiotr Jasiukajtis	.align	16
1585*25c28e83SPiotr Jasiukajtis.update19:
1586*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%i3
1587*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1588*25c28e83SPiotr Jasiukajtis	ble,a	.cont19
1589*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f46,%f24		! (3_0) xx1 = dtmp1 * tbl_div1;
1590*25c28e83SPiotr Jasiukajtis
1591*25c28e83SPiotr Jasiukajtis	add	%i3,0x3ff,%i3
1592*25c28e83SPiotr Jasiukajtis
1593*25c28e83SPiotr Jasiukajtis	andcc	%g1,%i3,%g0
1594*25c28e83SPiotr Jasiukajtis	bz	1f
1595*25c28e83SPiotr Jasiukajtis	nop
1596*25c28e83SPiotr Jasiukajtis
1597*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1598*25c28e83SPiotr Jasiukajtis	bl,a	1f
1599*25c28e83SPiotr Jasiukajtis	nop
1600*25c28e83SPiotr Jasiukajtis
1601*25c28e83SPiotr Jasiukajtis	fitod	%f24,%f24
1602*25c28e83SPiotr Jasiukajtis	fdtos	%f24,%f24
1603*25c28e83SPiotr Jasiukajtis	fmuls	%f24,FTWO,%f24
1604*25c28e83SPiotr Jasiukajtis	st	%f24,[%fp+tmp3]
1605*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%g1
1606*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%i3
1607*25c28e83SPiotr Jasiukajtis	sub	%g1,%i3,%g1
1608*25c28e83SPiotr Jasiukajtis
1609*25c28e83SPiotr Jasiukajtis	fands	%f24,DC0,%f8		! (2_0) dfx0 = vis_fand(ddx0,DC0);
1610*25c28e83SPiotr Jasiukajtis	sra	%g1,13,%i0		! (2_0) si0 = ax0 >> 13;
1611*25c28e83SPiotr Jasiukajtis
1612*25c28e83SPiotr Jasiukajtis	and	%i0,2032,%i0		! (2_0) si0 &= 0x7f0;
1613*25c28e83SPiotr Jasiukajtis
1614*25c28e83SPiotr Jasiukajtis	ldd	[%i0+TBL],%f30		! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
1615*25c28e83SPiotr Jasiukajtis	fpsub32s	%f24,%f8,%f12	! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1616*25c28e83SPiotr Jasiukajtis
1617*25c28e83SPiotr Jasiukajtis	sra	%g1,24,%i3		! (2_0) iexp0 = ax0 >> 24;
1618*25c28e83SPiotr Jasiukajtis
1619*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%g5		! (2_0) iexp0 = 0x3f - iexp0;
1620*25c28e83SPiotr Jasiukajtis
1621*25c28e83SPiotr Jasiukajtis	sllx	%g5,23,%g5		! (2_0) lexp0 = iexp0 << 55;
1622*25c28e83SPiotr Jasiukajtis	add	%i0,TBL,%i0		! (2_0) addr0 = (char*)TBL + si0;
1623*25c28e83SPiotr Jasiukajtis	fitod	%f12,%f56		! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
1624*25c28e83SPiotr Jasiukajtis
1625*25c28e83SPiotr Jasiukajtis	st	%g5,[%fp+tmp2]		! (2_0) fdx0 = *((double*)lexp0);
1626*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f30,%f30		! (2_0) xx0 = dtmp0 * tbl_div0;
1627*25c28e83SPiotr Jasiukajtis
1628*25c28e83SPiotr Jasiukajtis	ba	.cont19
1629*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f46,%f24		! (3_0) xx1 = dtmp1 * tbl_div1;
1630*25c28e83SPiotr Jasiukajtis1:
1631*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex2,%i3
1632*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_px]
1633*25c28e83SPiotr Jasiukajtis
1634*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
1635*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1636*25c28e83SPiotr Jasiukajtis
1637*25c28e83SPiotr Jasiukajtis	mov	4,counter
1638*25c28e83SPiotr Jasiukajtis	ba	.cont19
1639*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f46,%f24		! (3_0) xx1 = dtmp1 * tbl_div1;
1640*25c28e83SPiotr Jasiukajtis
1641*25c28e83SPiotr Jasiukajtis	.align	16
1642*25c28e83SPiotr Jasiukajtis.update20:
1643*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1644*25c28e83SPiotr Jasiukajtis	ble	.cont20
1645*25c28e83SPiotr Jasiukajtis	nop
1646*25c28e83SPiotr Jasiukajtis
1647*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex,%i3
1648*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_px]
1649*25c28e83SPiotr Jasiukajtis
1650*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
1651*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1652*25c28e83SPiotr Jasiukajtis
1653*25c28e83SPiotr Jasiukajtis	ba	.cont20
1654*25c28e83SPiotr Jasiukajtis	mov	5,counter
1655*25c28e83SPiotr Jasiukajtis
1656*25c28e83SPiotr Jasiukajtis	.align	16
1657*25c28e83SPiotr Jasiukajtis.update21:
1658*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%i3
1659*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1660*25c28e83SPiotr Jasiukajtis	ble,a	.cont21
1661*25c28e83SPiotr Jasiukajtis	nop
1662*25c28e83SPiotr Jasiukajtis
1663*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex,%i4
1664*25c28e83SPiotr Jasiukajtis	add	%i3,0x3ff,%i3
1665*25c28e83SPiotr Jasiukajtis
1666*25c28e83SPiotr Jasiukajtis	andcc	%o5,%i3,%g0
1667*25c28e83SPiotr Jasiukajtis	bz	1f
1668*25c28e83SPiotr Jasiukajtis	ld	[%i4],%f8
1669*25c28e83SPiotr Jasiukajtis
1670*25c28e83SPiotr Jasiukajtis	cmp	%o5,0
1671*25c28e83SPiotr Jasiukajtis	bl,a	1f
1672*25c28e83SPiotr Jasiukajtis	nop
1673*25c28e83SPiotr Jasiukajtis
1674*25c28e83SPiotr Jasiukajtis	fitod	%f8,%f8
1675*25c28e83SPiotr Jasiukajtis	fdtos	%f8,%f8
1676*25c28e83SPiotr Jasiukajtis	fmuls	%f8,FTWO,%f8
1677*25c28e83SPiotr Jasiukajtis	st	%f8,[%fp+tmp3]
1678*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%o5
1679*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x4b000000),%i3
1680*25c28e83SPiotr Jasiukajtis	sub	%o5,%i3,%o5
1681*25c28e83SPiotr Jasiukajtis
1682*25c28e83SPiotr Jasiukajtis	fands	%f8,DC0,%f24		! (2_0) dfx0 = vis_fand(ddx0,DC0);
1683*25c28e83SPiotr Jasiukajtis
1684*25c28e83SPiotr Jasiukajtis	sra	%o5,13,%o1		! (3_0) si1 = ax1 >> 13;
1685*25c28e83SPiotr Jasiukajtis
1686*25c28e83SPiotr Jasiukajtis	sra	%o5,24,%i3		! (3_0) iexp1 = ax1 >> 24;
1687*25c28e83SPiotr Jasiukajtis	and	%o1,2032,%o1		! (3_0) si1 &= 0x7f0;
1688*25c28e83SPiotr Jasiukajtis	fpsub32s	%f8,%f24,%f24	! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1689*25c28e83SPiotr Jasiukajtis
1690*25c28e83SPiotr Jasiukajtis	ldd	[%o1+TBL],%f8		! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1691*25c28e83SPiotr Jasiukajtis	sub	%l0,%i3,%i3		! (3_0) iexp1 = 0x3f - iexp1;
1692*25c28e83SPiotr Jasiukajtis
1693*25c28e83SPiotr Jasiukajtis	sllx	%i3,23,%i3		! (3_0) lexp1 = iexp1 << 23;
1694*25c28e83SPiotr Jasiukajtis	fitod	%f24,%f50		! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
1695*25c28e83SPiotr Jasiukajtis
1696*25c28e83SPiotr Jasiukajtis	add	%o1,TBL,%o1		! (3_0) addr1 = (char*)TBL + si1;
1697*25c28e83SPiotr Jasiukajtis	st	%i3,[%fp+tmp2+4]	! (2_0) fdx0 = *((double*)lexp0);
1698*25c28e83SPiotr Jasiukajtis
1699*25c28e83SPiotr Jasiukajtis	fmuld	%f50,%f8,%f24		! (3_0) xx1 = dtmp1 * tbl_div1;
1700*25c28e83SPiotr Jasiukajtis
1701*25c28e83SPiotr Jasiukajtis	ba	.cont21
1702*25c28e83SPiotr Jasiukajtis	fmuld	K3,%f24,%f50		! (3_0) res1 = K3 * xx1;
1703*25c28e83SPiotr Jasiukajtis1:
1704*25c28e83SPiotr Jasiukajtis	sub	%l7,stridex,%i3
1705*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_px]
1706*25c28e83SPiotr Jasiukajtis
1707*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
1708*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1709*25c28e83SPiotr Jasiukajtis
1710*25c28e83SPiotr Jasiukajtis	ba	.cont21
1711*25c28e83SPiotr Jasiukajtis	mov	5,counter
1712*25c28e83SPiotr Jasiukajtis
1713*25c28e83SPiotr Jasiukajtis	.align	16
1714*25c28e83SPiotr Jasiukajtis.exit:
1715*25c28e83SPiotr Jasiukajtis	ret
1716*25c28e83SPiotr Jasiukajtis	restore
1717*25c28e83SPiotr Jasiukajtis
1718*25c28e83SPiotr Jasiukajtis	SET_SIZE(__vrsqrtf)
1719*25c28e83SPiotr Jasiukajtis
1720