1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24/* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vatan2f.S" 30 31#include "libm.h" 32 33 RO_DATA 34 .align 64 35.CONST_TBL: 36 .word 0xbff921fb, 0x54442d18 ! -M_PI_2 37 .word 0x3ff921fb, 0x54442d18 ! M_PI_2 38 .word 0xbff921fb, 0x54442d18 ! -M_PI_2 39 .word 0x3ff921fb, 0x54442d18 ! M_PI_2 40 .word 0xc00921fb, 0x54442d18 ! -M_PI 41 .word 0x400921fb, 0x54442d18 ! M_PI 42 .word 0x80000000, 0x00000000 ! -0.0 43 .word 0x00000000, 0x00000000 ! 0.0 44 45 .word 0xbff00000, 0x00000000 ! -1.0 46 .word 0x3ff00000, 0x00000000 ! 1.0 47 48 .word 0x3fefffff, 0xfe79bf93 ! K0 = 9.99999997160545464888e-01 49 .word 0xbfd55552, 0xf0db4320 ! K1 = -3.33332762919825514315e-01 50 .word 0x3fc998f8, 0x2493d066 ! K2 = 1.99980752811487135558e-01 51 .word 0xbfc240b8, 0xd994abf9 ! K3 = -1.42600160828209047720e-01 52 .word 0x3fbbfc9e, 0x8c2b0243 ! K4 = 1.09323415013030928421e-01 53 .word 0xbfb56013, 0x64b1cac3 ! K5 = -8.34972496830160174704e-02 54 .word 0x3fad3ad7, 0x9f53e142 ! K6 = 5.70895559303061900411e-02 55 .word 0xbf9f148f, 0x2a829af1 ! K7 = -3.03518647857811706139e-02 56 .word 0x3f857a8c, 0x747ed314 ! K8 = 1.04876492549493055747e-02 57 .word 0xbf5bdf39, 0x729124b6 ! K9 = -1.70117006406859722727e-03 58 59 .word 0x3fe921fb, 0x54442d18 ! M_PI_4 60 .word 0x36a00000, 0x00000000 ! 2^(-149) 61 62#define counter %o3 63#define stridex %i4 64#define stridey %i5 65#define stridez %l1 66#define cmul_arr %i0 67#define cadd_arr %i2 68#define _0x7fffffff %l0 69#define _0x7f800000 %l2 70 71#define K0 %f42 72#define K1 %f44 73#define K2 %f46 74#define K3 %f48 75#define K4 %f50 76#define K5 %f52 77#define K6 %f54 78#define K7 %f56 79#define K8 %f58 80#define K9 %f60 81 82#define tmp_counter STACK_BIAS-32 83#define tmp_py STACK_BIAS-24 84#define tmp_px STACK_BIAS-16 85#define tmp_pz STACK_BIAS-8 86 87! sizeof temp storage - must be a multiple of 16 for V9 88#define tmps 0x20 89 90!-------------------------------------------------------------------- 91! !!!!! vatan2f algorithm !!!!! 92! uy0 = *(int*)py; 93! ux0 = *(int*)px; 94! ay0 = uy0 & 0x7fffffff; 95! ax0 = ux0 & 0x7fffffff; 96! if ( ax0 >= 0x7f800000 || ay0 >= 0x7f800000 ) 97! { 98! /* |X| or |Y| = Nan */ 99! if ( ax0 > 0x7f800000 || ay0 > 0x7f800000 ) 100! { 101! ftmp0 = *(float*)&ax0 * *(float*)&ay0; 102! *pz = ftmp0; 103! } 104! signx0 = (unsigned)ux0 >> 30; 105! signx0 &= 2; 106! signy0 = uy0 >> 31; 107! if (ay0 == 0x7f800000) 108! signx0 = (ax0 == 0x7f800000) ? signx0 + 1 : 2; 109! else 110! signx0 += signx0; 111! res = signx0 * M_PI_4; 112! signy0 <<= 3; 113! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0); 114! res *= dtmp0; 115! ftmp0 = (float) res; 116! *pz = ftmp0; 117! goto next; 118! } 119! if ( ax0 == 0 && ay0 == 0 ) 120! { 121! signy0 = uy0 >> 28; 122! signx0 = ux0 >> 27; 123! ldiff0 = ax0 - ay0; 124! ldiff0 >>= 31; 125! signx0 &= -16; 126! signy0 &= -8; 127! ldiff0 <<= 5; 128! signx0 += signy0; 129! res = *(double*)((char*)(cadd_arr + 7) + ldiff0 + signx0 + signy0); 130! ftmp0 = (float) res; 131! *pz = ftmp0; 132! goto next; 133! } 134! ldiff0 = ax0 - ay0; 135! ldiff0 >>= 31; 136! addrc0 = (char*)px - (char*)py; 137! addrc0 &= ldiff0; 138! fy0 = *(float*)((char*)py + addrc0); 139! fx0 = *(float*)((char*)px - addrc0); 140! itmp0 = *(int*)&fy0; 141! if((itmp0 & 0x7fffffff) < 0x00800000) 142! { 143! itmp0 >>= 28; 144! itmp0 &= -8; 145! fy0 = fabsf(fy0); 146! dtmp0 = (double) *(int*)&fy0; 147! dtmp0 *= C2ONM149; 148! dsign = *(double*)((char*)cmul_arr + itmp0); 149! dtmp0 *= dsign; 150! y0 = dtm0; 151! } 152! else 153! y0 = (double)fy0; 154! itmp0 = *(int*)&fx0; 155! if((itmp0 & 0x7fffffff) < 0x00800000) 156! { 157! itmp0 >>= 28; 158! itmp0 &= -8; 159! fx0 = fabsf(fx0); 160! dtmp0 = (double) *(int*)&fx0; 161! dtmp0 *= C2ONM149; 162! dsign = *(double*)((char*)cmul_arr + itmp0); 163! dtmp0 *= dsign; 164! x0 = dtmp0; 165! } 166! else 167! x0 = (double)fx0; 168! px += stridex; 169! py += stridey; 170! x0 = y0 / x0; 171! x20 = x0 * x0; 172! dtmp0 = K9 * x20; 173! dtmp0 += K8; 174! dtmp0 *= x20; 175! dtmp0 += K7; 176! dtmp0 *= x20; 177! dtmp0 += K6; 178! dtmp0 *= x20; 179! dtmp0 += K5; 180! dtmp0 *= x20; 181! dtmp0 += K4; 182! dtmp0 *= x20; 183! dtmp0 += K3; 184! dtmp0 *= x20; 185! dtmp0 += K2; 186! dtmp0 *= x20; 187! dtmp0 += K1; 188! dtmp0 *= x20; 189! dtmp0 += K0; 190! x0 = dtmp0 * x0; 191! signy0 = uy0 >> 28; 192! signy0 &= -8; 193! signx0 = ux0 >> 27; 194! signx0 &= -16; 195! ltmp0 = ldiff0 << 5; 196! ltmp0 += (char*)cadd_arr; 197! ltmp0 += signx0; 198! cadd0 = *(double*)(ltmp0 + signy0); 199! cmul0_ind = ldiff0 << 3; 200! cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 201! dtmp0 = cmul0 * x0; 202! dtmp0 = cadd0 + dtmp0; 203! ftmp0 = (float)dtmp0; 204! *pz = ftmp0; 205! pz += stridez; 206! 207!-------------------------------------------------------------------- 208 209 ENTRY(__vatan2f) 210 save %sp,-SA(MINFRAME)-tmps,%sp 211 PIC_SETUP(l7) 212 PIC_SET(l7,.CONST_TBL,g5) 213 214#ifdef __sparcv9 215 ldx [%fp+STACK_BIAS+176],%l7 216#else 217 ld [%fp+STACK_BIAS+92],%l7 218#endif 219 220 st %i0,[%fp+tmp_counter] 221 sethi %hi(0x7ffffc00),_0x7fffffff 222 add _0x7fffffff,1023,_0x7fffffff 223 or %g0,%i2,%o2 224 sll %l7,2,stridez 225 226 sethi %hi(0x7f800000),_0x7f800000 227 mov %g5,%g1 228 229 or %g0,stridey,%o4 230 add %g1,56,cadd_arr 231 232 sll %o2,2,stridey 233 add %g1,72,cmul_arr 234 235 ldd [%g1+80],K0 236 ldd [%g1+80+8],K1 237 ldd [%g1+80+16],K2 238 ldd [%g1+80+24],K3 239 ldd [%g1+80+32],K4 240 ldd [%g1+80+40],K5 241 ldd [%g1+80+48],K6 242 ldd [%g1+80+56],K7 243 ldd [%g1+80+64],K8 244 ldd [%g1+80+72],K9 245 246 sll stridex,2,stridex 247 248 stx %i1,[%fp+tmp_py] 249 stx %i3,[%fp+tmp_px] 250.begin: 251 ld [%fp+tmp_counter],counter 252 ldx [%fp+tmp_py],%i1 253 ldx [%fp+tmp_px],%i3 254 st %g0,[%fp+tmp_counter] 255.begin1: 256 subcc counter,1,counter 257 bneg,pn %icc,.exit 258 nop 259 260 lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; 261 262 lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; 263 264 and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; 265 266 cmp %l7,_0x7f800000 267 bge,pn %icc,.spec0 268 and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; 269 270 cmp %l6,_0x7f800000 271 bge,pn %icc,.spec0 272 sethi %hi(0x00800000),%o5 273 274 cmp %l6,%o5 275 bl,pn %icc,.spec1 276 sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; 277 278 cmp %l7,%o5 279 bl,pn %icc,.spec1 280 nop 281 282 stx %o4,[%fp+tmp_pz] 283 sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; 284 sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; 285 286 and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; 287 288 lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 289 sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 290 291 lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 292 sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; 293 294 sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 295 add %i1,stridey,%i1 ! py += stridey 296 297 add %i3,stridex,%i3 ! px += stridex 298 299 lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 300 sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 301 302 add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 303 304 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 305 306 fstod %f2,%f2 ! (0_0) x0 = (double)fx0; 307 308.spec1_cont: 309 lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; 310 and %o5,-16,%o5 ! (0_0) signx0 &= -16; 311 312 and %o4,-8,%o4 ! (0_0) signy0 &= -8; 313 314 fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; 315 316 add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; 317 318 and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; 319 sethi %hi(0x00800000),%o5 320 321 cmp %l6,%o5 322 bl,pn %icc,.u0 323 and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; 324.c0: 325 cmp %g1,%o5 326 bl,pn %icc,.u1 327 ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); 328.c1: 329 cmp %l6,_0x7f800000 330 bge,pn %icc,.u2 331 sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; 332.c2: 333 cmp %g1,_0x7f800000 334 bge,pn %icc,.u3 335 nop 336.c3: 337 sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; 338 sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; 339 340 and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; 341 342 lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); 343 sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; 344 345 lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); 346 sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; 347 348 cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 349 bge,pn %icc,.update0 ! (1_0) if ( b0 > 0x7f800000 ) 350 nop 351.cont0: 352 add %i1,stridey,%i1 ! py += stridey 353 fstod %f0,%f40 ! (1_0) y0 = (double)fy0; 354 355 sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 356 add %i3,stridex,%i3 ! px += stridex 357 358 sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 359 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 360 fstod %f2,%f2 ! (1_0) x0 = (double)fx0; 361.d0: 362 and %o5,-16,%o5 ! (1_0) signx0 &= -16; 363 and %o4,-8,%o4 ! (1_0) signy0 &= -8; 364 365 lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; 366 367 lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; 368 fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; 369 370 fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; 371 372 add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; 373 374 and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; 375 sethi %hi(0x00800000),%o5 376 377 cmp %l6,%o5 378 bl,pn %icc,.u4 379 and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; 380.c4: 381 cmp %g5,%o5 382 bl,pn %icc,.u5 383 fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; 384.c5: 385 cmp %l6,_0x7f800000 386 bge,pn %icc,.u6 387 ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); 388.c6: 389 cmp %g5,_0x7f800000 390 bge,pn %icc,.u7 391 sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; 392.c7: 393 sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; 394 sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; 395 396 faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; 397 and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; 398 399 lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); 400 sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; 401 402 lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); 403 404 cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 405 bge,pn %icc,.update1 ! (2_0) if ( b0 > 0x7f800000 ) 406 nop 407.cont1: 408 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 409 sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 410 add %i1,stridey,%i1 ! py += stridey 411 fstod %f0,%f40 ! (2_0) y0 = (double)fy0; 412 413 sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 414 add %i3,stridex,%i3 ! px += stridex 415 416 fstod %f2,%f2 ! (2_0) x0 = (double)fx0; 417 sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 418 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 419.d1: 420 lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; 421 and %o5,-16,%o5 ! (2_0) signx0 &= -16; 422 faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; 423 424 lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; 425 426 fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; 427 428 fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; 429 430 add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; 431 and %o4,-8,%o4 ! (2_0) signy0 &= -8; 432 fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; 433 434 and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; 435 sethi %hi(0x00800000),%o5 436 437 cmp %l6,%o5 438 bl,pn %icc,.u8 439 and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; 440.c8: 441 cmp %o0,%o5 442 bl,pn %icc,.u9 443 fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; 444.c9: 445 cmp %l6,_0x7f800000 446 bge,pn %icc,.u10 447 faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; 448.c10: 449 cmp %o0,_0x7f800000 450 bge,pn %icc,.u11 451 ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); 452.c11: 453 sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; 454 455 sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; 456 sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; 457 458 faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; 459 and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; 460 fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; 461 462 lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); 463 sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; 464 465 lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); 466 467 cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 468 bge,pn %icc,.update2 ! (3_0) if ( b0 > 0x7f800000 ) 469 nop 470.cont2: 471 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 472 sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 473 add %i1,stridey,%i1 ! py += stridey 474 fstod %f0,%f40 ! (3_0) y0 = (double)fy0; 475 476 faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 477 sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 478 add %i3,stridex,%i3 ! px += stridex 479 480 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 481 fstod %f1,%f16 ! (3_0) x0 = (double)fx0; 482.d2: 483 faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; 484 add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; 485 and %o5,-16,%o5 ! (3_0) signx0 &= -16; 486 487 lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; 488 fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; 489 490 lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; 491 fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; 492 493 and %o4,-8,%o4 ! (3_0) signy0 &= -8; 494 fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; 495 496 add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; 497 fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; 498 499 and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; 500 sethi %hi(0x00800000),%o5 501 faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; 502 503 cmp %l6,%o5 504 bl,pn %icc,.u12 505 and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; 506.c12: 507 cmp %l5,%o5 508 bl,pn %icc,.u13 509 fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; 510.c13: 511 cmp %l6,_0x7f800000 512 bge,pn %icc,.u14 513 faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; 514.c14: 515 ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); 516 cmp %l5,_0x7f800000 517 bge,pn %icc,.u15 518 fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; 519.c15: 520 sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; 521 522 sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; 523 sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; 524 525 faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; 526 and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; 527 fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; 528 529 lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); 530 sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; 531 faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; 532 533 lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); 534 535 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 536 bge,pn %icc,.update3 ! (4_0) if ( b0 > 0x7f800000 ) 537 nop 538.cont3: 539 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 540 sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 541 add %i1,stridey,%i1 ! py += stridey 542 fstod %f0,%f40 ! (4_0) y0 = (double)fy0; 543 544 faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 545 add %i3,stridex,%i3 ! px += stridex 546 fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 547 548 fstod %f2,%f2 ! (4_0) x0 = (double)fx0; 549 sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 550 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 551.d3: 552 lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; 553 add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; 554 faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; 555 556 fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; 557 and %o5,-16,%o5 ! (4_0) signx0 &= -16; 558 559 lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; 560 fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; 561 faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; 562 563 and %o4,-8,%o4 ! (4_1) signy0 &= -8; 564 fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; 565 566 add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; 567 fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; 568 569 and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; 570 sethi %hi(0x00800000),%o5 571 faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; 572 573 cmp %l6,%o5 574 bl,pn %icc,.u16 575 and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; 576.c16: 577 cmp %o7,%o5 578 bl,pn %icc,.u17 579 fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; 580.c17: 581 cmp %l6,_0x7f800000 582 bge,pn %icc,.u18 583 fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; 584.c18: 585 cmp %o7,_0x7f800000 586 bge,pn %icc,.u19 587 faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; 588.c19: 589 ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); 590 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 591 592 sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; 593 594 sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; 595 sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; 596 faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; 597 598 faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; 599 and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; 600 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 601 602 lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); 603 sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; 604 sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; 605 faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; 606 607 lda [%o4]0x82,%f1 ! (5_1) fx0 = *(float*)((char*)px - addrc0); 608 609 fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; 610 cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 611 bge,pn %icc,.update4 ! (5_1) if ( b0 > 0x7f800000 ) 612 nop 613.cont4: 614 fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; 615 fstod %f0,%f40 ! (5_1) y0 = (double)fy0; 616 617 faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 618 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 619 620 add %i3,stridex,%i3 ! px += stridex 621 sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 622 fstod %f1,%f2 ! (5_1) x0 = (double)fx0; 623.d4: 624 sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; 625 add %i1,stridey,%i1 ! py += stridey 626 627 faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; 628 sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; 629 630 lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; 631 add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; 632 fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; 633 faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; 634 635 lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; 636 and %o5,-16,%o5 ! (5_1) signx0 &= -16; 637 fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; 638 faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; 639 640 fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; 641 642 ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 643 add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; 644 and %o4,-8,%o4 ! (5_1) signy0 &= -8; 645 fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; 646 647 fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; 648 and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; 649 sethi %hi(0x00800000),%o5 650 faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; 651 652 and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; 653 cmp %l7,%o5 654 bl,pn %icc,.u20 655 fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; 656.c20: 657 cmp %l6,%o5 658 bl,pn %icc,.u21 659 fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; 660.c21: 661 cmp %l7,_0x7f800000 662 bge,pn %icc,.u22 663 faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; 664.c22: 665 ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); 666 cmp %l6,_0x7f800000 667 bge,pn %icc,.u23 668 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 669.c23: 670 sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; 671 672 fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; 673 sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; 674 sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; 675 faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; 676 677 faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; 678 and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; 679 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 680 681 lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 682 sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; 683 sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 684 faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; 685 686 lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 687 sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; 688 689 fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; 690 cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 691 bge,pn %icc,.update5 ! (0_0) if ( b0 > 0x7f800000 ) 692 faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; 693.cont5: 694 fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; 695 sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 696 add %i3,stridex,%i3 ! px += stridex 697 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 698 699 faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; 700 add %i1,stridey,%i1 ! py += stridey 701 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 702 703 lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 704 sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 705 add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 706 fstod %f2,%f2 ! (0_0) x0 = (double)fx0; 707.d5: 708 lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; 709 and %o5,-16,%o5 ! (0_0) signx0 &= -16; 710 faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; 711 712 ldx [%fp+tmp_pz],%o1 713 fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; 714 and %o4,-8,%o4 ! (0_0) signy0 &= -8; 715 faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; 716 717 fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; 718 faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; 719 720 fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; 721 st %f2,[%o1] ! (0_1) *pz = ftmp0 722 add %o1,stridez,%o2 723 fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; 724 725 subcc counter,1,counter 726 bneg,a,pn %icc,.begin 727 or %g0,%o2,%o4 728 729 ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 730 add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; 731 fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; 732 733 fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; 734 and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; 735 sethi %hi(0x00800000),%o5 736 faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; 737 738 and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; 739 cmp %l6,%o5 740 bl,pn %icc,.u24 741 fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; 742.c24: 743 cmp %g1,%o5 744 bl,pn %icc,.u25 745 fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; 746.c25: 747 cmp %l6,_0x7f800000 748 bge,pn %icc,.u26 749 faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; 750.c26: 751 ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); 752 cmp %g1,_0x7f800000 753 bge,pn %icc,.u27 754 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 755.c27: 756 sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; 757 758 fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; 759 sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; 760 sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; 761 faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; 762 763 faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; 764 and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; 765 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 766 767 lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); 768 sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; 769 sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; 770 faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; 771 772 lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); 773 sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; 774 add %o2,stridez,%o1 ! pz += stridez 775 776 fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; 777 cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 778 bge,pn %icc,.update6 ! (1_0) if ( b0 > 0x7f800000 ) 779 faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; 780.cont6: 781 fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; 782 add %i1,stridey,%i1 ! py += stridey 783 fstod %f0,%f40 ! (1_0) y0 = (double)fy0; 784 785 faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; 786 sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 787 add %i3,stridex,%i3 ! px += stridex 788 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 789 790 sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 791 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 792 fstod %f2,%f2 ! (1_0) x0 = (double)fx0; 793.d6: 794 faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; 795 and %o5,-16,%o5 ! (1_0) signx0 &= -16; 796 and %o4,-8,%o4 ! (1_0) signy0 &= -8; 797 798 lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; 799 fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; 800 faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; 801 802 lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; 803 fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; 804 faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; 805 806 fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; 807 fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; 808 st %f2,[%o2] ! (1_1) *pz = ftmp0; 809 810 subcc counter,1,counter 811 bneg,a,pn %icc,.begin 812 or %g0,%o1,%o4 813 814 ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 815 add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; 816 fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; 817 818 fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; 819 and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; 820 sethi %hi(0x00800000),%o5 821 faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; 822 823 and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; 824 cmp %l6,%o5 825 bl,pn %icc,.u28 826 fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; 827.c28: 828 cmp %g5,%o5 829 bl,pn %icc,.u29 830 fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; 831.c29: 832 cmp %l6,_0x7f800000 833 bge,pn %icc,.u30 834 faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; 835.c30: 836 ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); 837 cmp %g5,_0x7f800000 838 bge,pn %icc,.u31 839 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 840.c31: 841 sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; 842 843 fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; 844 sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; 845 sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; 846 faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; 847 848 faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; 849 and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; 850 fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; 851 852 lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); 853 sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; 854 add %o1,stridez,%o2 ! pz += stridez 855 faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; 856 857 lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); 858 sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; 859 860 fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; 861 cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 862 bge,pn %icc,.update7 ! (2_0) if ( b0 > 0x7f800000 ) 863 faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; 864.cont7: 865 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 866 sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 867 add %i1,stridey,%i1 ! py += stridey 868 fstod %f0,%f40 ! (2_0) y0 = (double)fy0; 869 870 faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; 871 sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 872 add %i3,stridex,%i3 ! px += stridex 873 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 874 875 fstod %f2,%f2 ! (2_0) x0 = (double)fx0; 876 sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 877 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 878.d7: 879 lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; 880 and %o5,-16,%o5 ! (2_0) signx0 &= -16; 881 faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; 882 883 lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; 884 fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; 885 faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; 886 887 fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; 888 faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; 889 890 fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; 891 fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; 892 st %f1,[%o1] ! (2_1) *pz = ftmp0; 893 894 subcc counter,1,counter 895 bneg,a,pn %icc,.begin 896 or %g0,%o2,%o4 897 898 ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 899 add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; 900 and %o4,-8,%o4 ! (2_0) signy0 &= -8; 901 fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; 902 903 fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; 904 and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; 905 sethi %hi(0x00800000),%o5 906 faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; 907 908 and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; 909 cmp %l6,%o5 910 bl,pn %icc,.u32 911 fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; 912.c32: 913 cmp %o0,%o5 914 bl,pn %icc,.u33 915 fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; 916.c33: 917 cmp %l6,_0x7f800000 918 bge,pn %icc,.u34 919 faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; 920.c34: 921 ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); 922 cmp %o0,_0x7f800000 923 bge,pn %icc,.u35 924 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 925.c35: 926 sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; 927 928 fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; 929 sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; 930 sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; 931 faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; 932 933 faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; 934 and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; 935 fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; 936 937 lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); 938 sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; 939 add %o2,stridez,%o1 ! pz += stridez 940 faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; 941 942 lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); 943 sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; 944 945 fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; 946 cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 947 bge,pn %icc,.update8 ! (3_0) if ( b0 > 0x7f800000 ) 948 faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; 949.cont8: 950 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 951 sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 952 add %i1,stridey,%i1 ! py += stridey 953 fstod %f0,%f40 ! (3_0) y0 = (double)fy0; 954 955 faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 956 sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 957 add %i3,stridex,%i3 ! px += stridex 958 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 959 960 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 961 fstod %f1,%f16 ! (3_0) x0 = (double)fx0; 962.d8: 963 faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; 964 add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; 965 and %o5,-16,%o5 ! (3_0) signx0 &= -16; 966 967 lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; 968 fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; 969 faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; 970 971 lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; 972 fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; 973 faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; 974 975 fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; 976 and %o4,-8,%o4 ! (3_0) signy0 &= -8; 977 st %f1,[%o2] ! (3_1) *pz = ftmp0; 978 fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; 979 980 subcc counter,1,counter 981 bneg,a,pn %icc,.begin 982 or %g0,%o1,%o4 983 984 ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 985 add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; 986 fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; 987 988 fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; 989 and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; 990 sethi %hi(0x00800000),%o5 991 faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; 992 993 and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; 994 cmp %l6,%o5 995 bl,pn %icc,.u36 996 fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; 997.c36: 998 cmp %l5,%o5 999 bl,pn %icc,.u37 1000 fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; 1001.c37: 1002 cmp %l6,_0x7f800000 1003 bge,pn %icc,.u38 1004 faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; 1005.c38: 1006 ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); 1007 cmp %l5,_0x7f800000 1008 bge,pn %icc,.u39 1009 fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; 1010.c39: 1011 sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; 1012 1013 fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; 1014 sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; 1015 sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; 1016 faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; 1017 1018 faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; 1019 and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; 1020 fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; 1021 1022 lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); 1023 sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; 1024 add %o1,stridez,%o2 ! pz += stridez 1025 faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; 1026 1027 lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); 1028 sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; 1029 1030 fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; 1031 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 1032 bge,pn %icc,.update9 ! (4_0) if ( b0 > 0x7f800000 ) 1033 faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; 1034.cont9: 1035 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 1036 sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 1037 add %i1,stridey,%i1 ! py += stridey 1038 fstod %f0,%f40 ! (4_0) y0 = (double)fy0; 1039 1040 faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 1041 sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 1042 add %i3,stridex,%i3 ! px += stridex 1043 fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 1044 1045 fstod %f2,%f2 ! (4_0) x0 = (double)fx0; 1046 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 1047.d9: 1048 lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; 1049 add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; 1050 faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; 1051 1052 fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; 1053 and %o5,-16,%o5 ! (4_0) signx0 &= -16; 1054 faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; 1055 1056 subcc counter,5,counter 1057 bneg,pn %icc,.tail 1058 nop 1059 1060 ba .main_loop 1061 nop 1062 1063 .align 16 1064.main_loop: 1065 lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; 1066 nop 1067 fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; 1068 faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; 1069 1070 fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; 1071 and %o4,-8,%o4 ! (4_1) signy0 &= -8; 1072 st %f22,[%o1] ! (4_2) *pz = ftmp0; 1073 fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; 1074 1075 ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1076 add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; 1077 fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; 1078 1079 fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; 1080 and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; 1081 sethi %hi(0x00800000),%o5 1082 faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; 1083 1084 and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; 1085 fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; 1086 1087 cmp %l6,%o5 1088 bl,pn %icc,.up0 1089 fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; 1090.co0: 1091 nop 1092 cmp %o7,%o5 1093 bl,pn %icc,.up1 1094 faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; 1095.co1: 1096 ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); 1097 cmp %l6,_0x7f800000 1098 bge,pn %icc,.up2 1099 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1100.co2: 1101 sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; 1102 cmp %o7,_0x7f800000 1103 bge,pn %icc,.up3 1104 1105 fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; 1106.co3: 1107 sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; 1108 sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; 1109 faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; 1110 1111 faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; 1112 and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; 1113 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1114 1115 lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); 1116 sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; 1117 sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; 1118 faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; 1119 1120 lda [%o4]0x82,%f2 ! (5_1) fx0 = *(float*)((char*)px - addrc0); 1121 1122 fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; 1123 cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 1124 bge,pn %icc,.update10 ! (5_1) if ( b0 > 0x7f800000 ) 1125 faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; 1126.cont10: 1127 fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; 1128 nop 1129 fstod %f0,%f40 ! (5_1) y0 = (double)fy0; 1130 1131 faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 1132 add %o2,stridez,%o1 ! pz += stridez 1133 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1134 1135 sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 1136 add %i3,stridex,%i3 ! px += stridex 1137 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 1138.den0: 1139 sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; 1140 add %i1,stridey,%i1 ! py += stridey 1141 1142 faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; 1143 sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; 1144 1145 lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; 1146 add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; 1147 fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; 1148 faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; 1149 1150 lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; 1151 and %o5,-16,%o5 ! (5_1) signx0 &= -16; 1152 fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; 1153 faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; 1154 1155 fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; 1156 st %f2,[%o2] ! (5_2) *pz = ftmp0; 1157 fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; 1158 1159 ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1160 add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; 1161 and %o4,-8,%o4 ! (5_1) signy0 &= -8; 1162 fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; 1163 1164 fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; 1165 and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; 1166 sethi %hi(0x00800000),%o5 1167 faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; 1168 1169 and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; 1170 fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; 1171 1172 cmp %l7,%o5 1173 bl,pn %icc,.up4 1174 fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; 1175.co4: 1176 nop 1177 cmp %l6,%o5 1178 bl,pn %icc,.up5 1179 faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; 1180.co5: 1181 ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); 1182 cmp %l7,_0x7f800000 1183 bge,pn %icc,.up6 1184 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1185.co6: 1186 sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; 1187 cmp %l6,_0x7f800000 1188 bge,pn %icc,.up7 1189 1190 fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; 1191.co7: 1192 sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; 1193 sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; 1194 faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; 1195 1196 faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; 1197 and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; 1198 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 1199 1200 lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 1201 sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; 1202 sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 1203 faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; 1204 1205 lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 1206 sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; 1207 add %o1,stridez,%o2 ! pz += stridez 1208 1209 fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; 1210 cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 1211 bge,pn %icc,.update11 ! (0_0) if ( b0 > 0x7f800000 ) 1212 faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; 1213.cont11: 1214 fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; 1215 sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 1216 add %i3,stridex,%i3 ! px += stridex 1217 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 1218 1219 faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; 1220 add %i1,stridey,%i1 ! py += stridey 1221 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1222 1223 lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 1224 sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 1225 add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 1226 fstod %f2,%f2 ! (0_0) x0 = (double)fx0; 1227.den1: 1228 lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; 1229 and %o5,-16,%o5 ! (0_0) signx0 &= -16; 1230 faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; 1231 1232 fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; 1233 and %o4,-8,%o4 ! (0_0) signy0 &= -8; 1234 faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; 1235 1236 fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; 1237 faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; 1238 1239 fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; 1240 nop 1241 st %f2,[%o1] ! (0_1) *pz = ftmp0 1242 fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; 1243 1244 ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1245 add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; 1246 fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; 1247 1248 fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; 1249 and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; 1250 sethi %hi(0x00800000),%o5 1251 faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; 1252 1253 and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; 1254 fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; 1255 1256 cmp %l6,%o5 1257 bl,pn %icc,.up8 1258 fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; 1259.co8: 1260 nop 1261 cmp %g1,%o5 1262 bl,pn %icc,.up9 1263 faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; 1264.co9: 1265 ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); 1266 cmp %l6,_0x7f800000 1267 bge,pn %icc,.up10 1268 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 1269.co10: 1270 sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; 1271 cmp %g1,_0x7f800000 1272 bge,pn %icc,.up11 1273 1274 fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; 1275.co11: 1276 sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; 1277 sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; 1278 faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; 1279 1280 faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; 1281 and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; 1282 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 1283 1284 lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); 1285 sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; 1286 sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; 1287 faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; 1288 1289 lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); 1290 sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; 1291 add %o2,stridez,%o1 ! pz += stridez 1292 1293 fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; 1294 cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 1295 bge,pn %icc,.update12 ! (1_0) if ( b0 > 0x7f800000 ) 1296 faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; 1297.cont12: 1298 fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; 1299 add %i1,stridey,%i1 ! py += stridey 1300 nop 1301 fstod %f0,%f40 ! (1_0) y0 = (double)fy0; 1302 1303 faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; 1304 sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 1305 add %i3,stridex,%i3 ! px += stridex 1306 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 1307 1308 sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 1309 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 1310 fstod %f2,%f2 ! (1_0) x0 = (double)fx0; 1311.den2: 1312 faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; 1313 and %o5,-16,%o5 ! (1_0) signx0 &= -16; 1314 and %o4,-8,%o4 ! (1_0) signy0 &= -8; 1315 1316 lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; 1317 fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; 1318 faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; 1319 1320 lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; 1321 fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; 1322 faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; 1323 1324 fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; 1325 nop 1326 st %f2,[%o2] ! (1_1) *pz = ftmp0; 1327 fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; 1328 1329 ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1330 add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; 1331 fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; 1332 1333 fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; 1334 and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; 1335 sethi %hi(0x00800000),%o5 1336 faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; 1337 1338 and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; 1339 fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; 1340 1341 cmp %l6,%o5 1342 bl,pn %icc,.up12 1343 fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; 1344.co12: 1345 nop 1346 cmp %g5,%o5 1347 bl,pn %icc,.up13 1348 faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; 1349.co13: 1350 ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); 1351 cmp %l6,_0x7f800000 1352 bge,pn %icc,.up14 1353 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 1354.co14: 1355 sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; 1356 cmp %g5,_0x7f800000 1357 bge,pn %icc,.up15 1358 1359 fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; 1360.co15: 1361 sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; 1362 sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; 1363 faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; 1364 1365 faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; 1366 and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; 1367 fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; 1368 1369 lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); 1370 sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; 1371 add %o1,stridez,%o2 ! pz += stridez 1372 faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; 1373 1374 lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); 1375 sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; 1376 add %i3,stridex,%i3 ! px += stridex 1377 1378 fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; 1379 cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 1380 bge,pn %icc,.update13 ! (2_0) if ( b0 > 0x7f800000 ) 1381 faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; 1382.cont13: 1383 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 1384 sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 1385 add %i1,stridey,%i1 ! py += stridey 1386 fstod %f0,%f40 ! (2_0) y0 = (double)fy0; 1387 1388 faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; 1389 sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 1390 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 1391 1392 fstod %f2,%f2 ! (2_0) x0 = (double)fx0; 1393 sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 1394 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 1395.den3: 1396 lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; 1397 and %o5,-16,%o5 ! (2_0) signx0 &= -16; 1398 faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; 1399 1400 lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; 1401 fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; 1402 faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; 1403 1404 fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; 1405 faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; 1406 1407 fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; 1408 st %f1,[%o1] ! (2_1) *pz = ftmp0; 1409 fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; 1410 1411 ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1412 add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; 1413 and %o4,-8,%o4 ! (2_0) signy0 &= -8; 1414 fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; 1415 1416 fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; 1417 and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; 1418 sethi %hi(0x00800000),%o5 1419 faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; 1420 1421 and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; 1422 fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; 1423 1424 cmp %l6,%o5 1425 bl,pn %icc,.up16 1426 fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; 1427.co16: 1428 nop 1429 cmp %o0,%o5 1430 bl,pn %icc,.up17 1431 faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; 1432.co17: 1433 ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); 1434 cmp %l6,_0x7f800000 1435 bge,pn %icc,.up18 1436 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 1437.co18: 1438 sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; 1439 cmp %o0,_0x7f800000 1440 bge,pn %icc,.up19 1441 1442 fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; 1443.co19: 1444 sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; 1445 sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; 1446 faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; 1447 1448 faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; 1449 and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; 1450 fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; 1451 1452 lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); 1453 sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; 1454 add %o2,stridez,%o1 ! pz += stridez 1455 faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; 1456 1457 lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); 1458 sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; 1459 add %i3,stridex,%i3 ! px += stridex 1460 1461 fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; 1462 cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 1463 bge,pn %icc,.update14 ! (3_0) if ( b0 > 0x7f800000 ) 1464 faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; 1465.cont14: 1466 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 1467 sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 1468 add %i1,stridey,%i1 ! py += stridey 1469 fstod %f0,%f40 ! (3_0) y0 = (double)fy0; 1470 1471 faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 1472 sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 1473 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 1474 1475 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 1476 fstod %f1,%f16 ! (3_0) x0 = (double)fx0; 1477.den4: 1478 faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; 1479 add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; 1480 and %o5,-16,%o5 ! (3_0) signx0 &= -16; 1481 1482 lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; 1483 fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; 1484 faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; 1485 1486 lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; 1487 fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; 1488 faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; 1489 1490 fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; 1491 and %o4,-8,%o4 ! (3_0) signy0 &= -8; 1492 st %f1,[%o2] ! (3_1) *pz = ftmp0; 1493 fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; 1494 1495 ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1496 add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; 1497 fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; 1498 1499 fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; 1500 and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; 1501 sethi %hi(0x00800000),%o5 1502 faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; 1503 1504 and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; 1505 fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; 1506 1507 cmp %l6,%o5 1508 bl,pn %icc,.up20 1509 fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; 1510.co20: 1511 nop 1512 cmp %l5,%o5 1513 bl,pn %icc,.up21 1514 faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; 1515.co21: 1516 ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); 1517 cmp %l6,_0x7f800000 1518 bge,pn %icc,.up22 1519 fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; 1520.co22: 1521 sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; 1522 cmp %l5,_0x7f800000 1523 bge,pn %icc,.up23 1524 1525 fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; 1526.co23: 1527 sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; 1528 sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; 1529 faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; 1530 1531 faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; 1532 and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; 1533 fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; 1534 1535 lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); 1536 sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; 1537 add %o1,stridez,%o2 ! pz += stridez 1538 faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; 1539 1540 lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); 1541 sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; 1542 add %i3,stridex,%i3 ! px += stridex 1543 1544 fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; 1545 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 1546 bge,pn %icc,.update15 ! (4_0) if ( b0 > 0x7f800000 ) 1547 faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; 1548.cont15: 1549 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 1550 sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 1551 add %i1,stridey,%i1 ! py += stridey 1552 fstod %f0,%f40 ! (4_0) y0 = (double)fy0; 1553 1554 faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 1555 sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 1556 fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 1557 1558 fstod %f2,%f2 ! (4_0) x0 = (double)fx0; 1559 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 1560.den5: 1561 lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; 1562 subcc counter,6,counter ! counter? 1563 add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; 1564 faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; 1565 1566 fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; 1567 and %o5,-16,%o5 ! (4_0) signx0 &= -16; 1568 bpos,pt %icc,.main_loop 1569 faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; 1570 1571.tail: 1572 addcc counter,5,counter 1573 bneg,a,pn %icc,.begin 1574 or %g0,%o1,%o4 1575 1576 faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; 1577 1578 fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; 1579 st %f22,[%o1] ! (4_2) *pz = ftmp0; 1580 1581 subcc counter,1,counter 1582 bneg,a,pn %icc,.begin 1583 or %g0,%o2,%o4 1584 1585 ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1586 fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; 1587 1588 fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; 1589 faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; 1590 1591 fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; 1592 1593 1594 faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; 1595 1596 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1597 1598 fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; 1599 faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; 1600 1601 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1602 1603 faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; 1604 1605 fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; 1606 faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; 1607 1608 faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 1609 add %o2,stridez,%o1 ! pz += stridez 1610 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1611 1612 sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 1613 1614 fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; 1615 faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; 1616 1617 faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; 1618 1619 fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; 1620 st %f2,[%o2] ! (5_2) *pz = ftmp0; 1621 1622 subcc counter,1,counter 1623 bneg,a,pn %icc,.begin 1624 or %g0,%o1,%o4 1625 1626 ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1627 1628 fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; 1629 faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; 1630 1631 fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; 1632 1633 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1634 1635 fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; 1636 faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; 1637 1638 sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; 1639 faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; 1640 1641 add %o1,stridez,%o2 ! pz += stridez 1642 1643 fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; 1644 faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; 1645 1646 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1647 1648 faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; 1649 1650 faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; 1651 1652 fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; 1653 st %f2,[%o1] ! (0_1) *pz = ftmp0 1654 1655 subcc counter,1,counter 1656 bneg,a,pn %icc,.begin 1657 or %g0,%o2,%o4 1658 1659 ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1660 1661 fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; 1662 1663 fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; 1664 1665 fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; 1666 faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; 1667 1668 sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; 1669 1670 add %o2,stridez,%o1 ! pz += stridez 1671 1672 fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; 1673 faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; 1674 1675 faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; 1676 1677 fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; 1678 st %f2,[%o2] ! (1_1) *pz = ftmp0; 1679 1680 subcc counter,1,counter 1681 bneg,a,pn %icc,.begin 1682 or %g0,%o1,%o4 1683 1684 ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1685 1686 fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; 1687 1688 fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; 1689 1690 add %o1,stridez,%o2 ! pz += stridez 1691 1692 faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; 1693 1694 fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; 1695 st %f1,[%o1] ! (2_1) *pz = ftmp0; 1696 1697 ba .begin 1698 or %g0,%o2,%o4 1699 1700 .align 16 1701.spec0: 1702 cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 1703 bg 2f ! if ( ax0 >= 0x7f800000 ) 1704 srl %l3,30,%l3 ! signx0 = (unsigned)ux0 >> 30; 1705 1706 cmp %l7,_0x7f800000 ! ay0 ? 0x7f800000 1707 bg 2f ! if ( ay0 >= 0x7f800000 ) 1708 and %l3,2,%l3 ! signx0 &= 2; 1709 1710 sra %l4,31,%l4 ! signy0 = uy0 >> 31; 1711 bne,a 1f ! if (ay0 != 0x7f800000) 1712 add %l3,%l3,%l3 ! signx0 += signx0; 1713 1714 cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 1715 bne,a 1f ! if ( ax0 != 0x7f800000 ) 1716 add %g0,2,%l3 ! signx0 = 2 1717 1718 add %l3,1,%l3 ! signx0 ++; 17191: 1720 sll %l4,3,%l4 ! signy0 <<= 3; 1721 st %l3,[%fp+tmp_pz] ! STORE signx0 1722 1723 ldd [cmul_arr+88],%f0 ! LOAD M_PI_4 1724 1725 ld [%fp+tmp_pz],%f2 !