/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * This file is mostly a result of compiling the mont_mulf.c file to generate an * assembly output and then hand-editing that output to replace the * compiler-generated loop for the 512-bit case (nlen == 16) in the * mont_mulf_noconv routine with a hand-crafted version. This file also * has big_savefp() and big_restorefp() routines added by hand. */ #include #include #include #include #include #include #include #include #include .section ".text",#alloc,#execinstr .file "mont_mulf.s" .section ".bss",#alloc,#write Bbss.bss: .section ".data",#alloc,#write Ddata.data: .section ".rodata",#alloc ! ! CONSTANT POOL ! Drodata.rodata: .global TwoTo16 .align 8 ! ! CONSTANT POOL ! .global TwoTo16 TwoTo16: .word 1089470464 .word 0 .type TwoTo16,#object .size TwoTo16,8 .global TwoToMinus16 ! ! CONSTANT POOL ! .global TwoToMinus16 TwoToMinus16: .word 1055916032 .word 0 .type TwoToMinus16,#object .size TwoToMinus16,8 .global Zero ! ! CONSTANT POOL ! .global Zero Zero: .word 0 .word 0 .type Zero,#object .size Zero,8 .global TwoTo32 ! ! CONSTANT POOL ! .global TwoTo32 TwoTo32: .word 1106247680 .word 0 .type TwoTo32,#object .size TwoTo32,8 .global TwoToMinus32 ! ! CONSTANT POOL ! .global TwoToMinus32 TwoToMinus32: .word 1039138816 .word 0 .type TwoToMinus32,#object .size TwoToMinus32,8 .section ".text",#alloc,#execinstr /* 000000 0 */ .register %g3,#scratch /* 000000 */ .register %g2,#scratch /* 000000 0 */ .align 32 ! FILE mont_mulf.c ! 1 !/* ! 2 ! * Copyright 2005 Sun Microsystems, Inc. All rights reserved. ! 3 ! * Use is subject to license terms. ! 4 ! */ ! 6 !#pragma ident "@(#)mont_mulf.c 1.2 01/09/24 SMI" ! 9 !/* ! 10 ! * If compiled without -DRF_INLINE_MACROS then needs -lm at link time ! 11 ! * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time ! 12 ! * (i.e. cc -DRF_INLINE_MACROS conv.il mont_mulf.c ) ! 13 ! */ ! 15 !#include ! 16 !#include ! 18 !static const double TwoTo16 = 65536.0; ! 19 !static const double TwoToMinus16 = 1.0/65536.0; ! 20 !static const double Zero = 0.0; ! 21 !static const double TwoTo32 = 65536.0 * 65536.0; ! 22 !static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0); ! 24 !#ifdef RF_INLINE_MACROS ! 26 !double upper32(double); ! 27 !double lower32(double, double); ! 28 !double mod(double, double, double); ! 30 !#else ! 32 !static double ! 33 !upper32(double x) ! 34 !{ ! 35 ! return (floor(x * TwoToMinus32)); ! 36 !} ! 39 !/* ARGSUSED */ ! 40 !static double ! 41 !lower32(double x, double y) ! 42 !{ ! 43 ! return (x - TwoTo32 * floor(x * TwoToMinus32)); ! 44 !} ! 46 !static double ! 47 !mod(double x, double oneoverm, double m) ! 48 !{ ! 49 ! return (x - m * floor(x * oneoverm)); ! 50 !} ! 52 !#endif ! 55 !static void ! 56 !cleanup(double *dt, int from, int tlen) ! 57 !{ ! ! SUBROUTINE cleanup ! ! OFFSET SOURCE LINE LABEL INSTRUCTION cleanup: /* 000000 57 */ sra %o1,0,%o4 /* 0x0004 */ sra %o2,0,%o5 ! 58 ! int i; ! 59 ! double tmp, tmp1, x, x1; ! 61 ! tmp = tmp1 = Zero; /* 0x0008 61 */ sll %o5,1,%g5 ! 63 ! for (i = 2 * from; i < 2 * tlen; i += 2) { /* 0x000c 63 */ sll %o4,1,%g3 /* 0x0010 */ cmp %g3,%g5 /* 0x0014 */ bge,pn %icc,.L77000188 /* 0x0018 0 */ sethi %hi(Zero),%o3 .L77000197: /* 0x001c 63 */ ldd [%o3+%lo(Zero)],%f8 /* 0x0020 */ sra %g3,0,%o1 /* 0x0024 */ sub %g5,1,%g2 /* 0x0028 */ sllx %o1,3,%g4 ! 64 ! x = dt[i]; /* 0x002c 64 */ ldd [%g4+%o0],%f10 /* 0x0030 63 */ add %g4,%o0,%g1 /* 0x0034 */ fmovd %f8,%f18 /* 0x0038 */ fmovd %f8,%f16 ! 65 ! x1 = dt[i + 1]; ! 66 ! dt[i] = lower32(x, Zero) + tmp; .L900000110: /* 0x003c 66 */ fdtox %f10,%f0 /* 0x0040 65 */ ldd [%g1+8],%f12 ! 67 ! dt[i + 1] = lower32(x1, Zero) + tmp1; ! 68 ! tmp = upper32(x); ! 69 ! tmp1 = upper32(x1); /* 0x0044 69 */ add %g3,2,%g3 /* 0x0048 */ cmp %g3,%g2 /* 0x004c 67 */ fdtox %f12,%f2 /* 0x0050 68 */ fmovd %f0,%f4 /* 0x0054 66 */ fmovs %f8,%f0 /* 0x0058 67 */ fmovs %f8,%f2 /* 0x005c 66 */ fxtod %f0,%f0 /* 0x0060 67 */ fxtod %f2,%f2 /* 0x0064 69 */ fdtox %f12,%f6 /* 0x0068 66 */ faddd %f0,%f18,%f10 /* 0x006c */ std %f10,[%g1] /* 0x0070 67 */ faddd %f2,%f16,%f14 /* 0x0074 */ std %f14,[%g1+8] /* 0x0078 68 */ fitod %f4,%f18 /* 0x007c 69 */ add %g1,16,%g1 /* 0x0080 */ fitod %f6,%f16 /* 0x0084 */ ble,a,pt %icc,.L900000110 /* 0x0088 64 */ ldd [%g1],%f10 .L77000188: /* 0x008c 69 */ retl ! Result = /* 0x0090 */ nop /* 0x0094 0 */ .type cleanup,2 /* 0x0094 0 */ .size cleanup,(.-cleanup) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 /* 000000 */ .skip 24 /* 0x0018 */ .align 32 ! 70 ! } ! 71 !} ! 75 !#ifdef _KERNEL ! 76 !/* ! 77 ! * This only works if 0 <= d < 2^53 ! 78 ! */ ! 79 !uint64_t ! 80 !double2uint64_t(double* d) ! 81 !{ ! 82 ! uint64_t x; ! 83 ! uint64_t exp; ! 84 ! uint64_t man; ! 86 ! x = *((uint64_t *)d); ! ! SUBROUTINE double2uint64_t ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global double2uint64_t double2uint64_t: /* 000000 86 */ ldx [%o0],%o2 ! 87 ! if (x == 0) { /* 0x0004 87 */ cmp %o2,0 /* 0x0008 */ bne,pn %xcc,.L900000206 /* 0x000c 94 */ sethi %hi(0xfff00000),%o5 .L77000202: /* 0x0010 94 */ retl ! Result = %o0 ! 88 ! return (0ULL); /* 0x0014 88 */ or %g0,0,%o0 ! 89 ! } ! 90 ! exp = (x >> 52) - 1023; ! 91 ! man = (x & 0xfffffffffffffULL) | 0x10000000000000ULL; ! 92 ! x = man >> (52 - exp); ! 94 ! return (x); .L900000206: /* 0x0018 94 */ sllx %o5,32,%o4 /* 0x001c */ srlx %o2,52,%o0 /* 0x0020 */ sethi %hi(0x40000000),%o1 /* 0x0024 */ or %g0,1023,%g5 /* 0x0028 */ sllx %o1,22,%g4 /* 0x002c */ xor %o4,-1,%o3 /* 0x0030 */ sub %g5,%o0,%g3 /* 0x0034 */ and %o2,%o3,%g2 /* 0x0038 */ or %g2,%g4,%o5 /* 0x003c */ add %g3,52,%g1 /* 0x0040 */ retl ! Result = %o0 /* 0x0044 */ srlx %o5,%g1,%o0 /* 0x0048 0 */ .type double2uint64_t,2 /* 0x0048 0 */ .size double2uint64_t,(.-double2uint64_t) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 /* 000000 */ .skip 24 /* 0x0018 */ .align 32 ! 95 !} ! 96 !#else ! 97 !/* ! 98 ! * This only works if 0 <= d < 2^63 ! 99 ! */ ! 100 !uint64_t ! 101 !double2uint64_t(double* d) ! 102 !{ ! 103 ! return ((int64_t)(*d)); ! 104 !} ! 105 !#endif ! 107 !/* ARGSUSED */ ! 108 !void ! 109 !conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen) ! 110 !{ ! ! SUBROUTINE conv_d16_to_i32 ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global conv_d16_to_i32 conv_d16_to_i32: /* 000000 110 */ save %sp,-176,%sp ! 111 ! int i; ! 112 ! int64_t t, t1, /* using int64_t and not uint64_t */ ! 113 ! a, b, c, d; /* because more efficient code is */ ! 114 ! /* generated this way, and there */ ! 115 ! /* is no overflow */ ! 116 ! t1 = 0; ! 117 ! a = double2uint64_t(&(d16[0])); /* 0x0004 117 */ ldx [%i1],%o0 /* 0x0008 118 */ ldx [%i1+8],%i2 /* 0x000c 117 */ cmp %o0,0 /* 0x0010 */ bne,pn %xcc,.L77000216 /* 0x0014 */ or %g0,0,%i4 .L77000215: /* 0x0018 117 */ ba .L900000316 /* 0x001c 118 */ cmp %i2,0 .L77000216: /* 0x0020 117 */ srlx %o0,52,%o5 /* 0x0024 */ sethi %hi(0xfff00000),%i4 /* 0x0028 */ sllx %i4,32,%o2 /* 0x002c */ sethi %hi(0x40000000),%o7 /* 0x0030 */ sllx %o7,22,%o3 /* 0x0034 */ or %g0,1023,%o4 /* 0x0038 */ xor %o2,-1,%g5 /* 0x003c */ sub %o4,%o5,%l0 /* 0x0040 */ and %o0,%g5,%o1 /* 0x0044 */ add %l0,52,%l1 /* 0x0048 */ or %o1,%o3,%g4 ! 118 ! b = double2uint64_t(&(d16[1])); /* 0x004c 118 */ cmp %i2,0 /* 0x0050 117 */ srlx %g4,%l1,%i4 .L900000316: /* 0x0054 118 */ bne,pn %xcc,.L77000222 /* 0x0058 134 */ sub %i3,1,%l3 .L77000221: /* 0x005c 118 */ or %g0,0,%i2 /* 0x0060 */ ba .L900000315 /* 0x0064 116 */ or %g0,0,%o3 .L77000222: /* 0x0068 118 */ srlx %i2,52,%l6 /* 0x006c */ sethi %hi(0xfff00000),%g4 /* 0x0070 */ sllx %g4,32,%i5 /* 0x0074 */ sethi %hi(0x40000000),%l5 /* 0x0078 */ xor %i5,-1,%l4 /* 0x007c */ or %g0,1023,%l2 /* 0x0080 */ and %i2,%l4,%l7 /* 0x0084 */ sllx %l5,22,%i2 /* 0x0088 */ sub %l2,%l6,%g1 /* 0x008c */ or %l7,%i2,%g3 /* 0x0090 */ add %g1,52,%g2 /* 0x0094 116 */ or %g0,0,%o3 /* 0x0098 118 */ srlx %g3,%g2,%i2 ! 119 ! for (i = 0; i < ilen - 1; i++) { .L900000315: /* 0x009c 119 */ cmp %l3,0 /* 0x00a0 */ ble,pn %icc,.L77000210 /* 0x00a4 */ or %g0,0,%l4 .L77000245: /* 0x00a8 118 */ sethi %hi(0xfff00000),%l7 /* 0x00ac */ or %g0,-1,%l6 /* 0x00b0 */ sllx %l7,32,%l3 /* 0x00b4 */ srl %l6,0,%l6 /* 0x00b8 */ sethi %hi(0x40000000),%l1 /* 0x00bc */ sethi %hi(0xfc00),%l2 /* 0x00c0 */ xor %l3,-1,%l7 /* 0x00c4 */ sllx %l1,22,%l3 /* 0x00c8 */ sub %i3,2,%l5 /* 0x00cc */ add %l2,1023,%l2 /* 0x00d0 */ or %g0,2,%g2 /* 0x00d4 */ or %g0,%i0,%g1 ! 120 ! c = double2uint64_t(&(d16[2 * i + 2])); .L77000208: /* 0x00d8 120 */ sra %g2,0,%g3 /* 0x00dc 123 */ add %g2,1,%o2 /* 0x00e0 120 */ sllx %g3,3,%i3 ! 121 ! t1 += a & 0xffffffff; ! 122 ! t = (a >> 32); ! 123 ! d = double2uint64_t(&(d16[2 * i + 3])); /* 0x00e4 123 */ sra %o2,0,%g5 /* 0x00e8 120 */ ldx [%i1+%i3],%o5 /* 0x00ec 123 */ sllx %g5,3,%o0 /* 0x00f0 121 */ and %i4,%l6,%g4 /* 0x00f4 123 */ ldx [%i1+%o0],%i3 /* 0x00f8 120 */ cmp %o5,0 /* 0x00fc */ bne,pn %xcc,.L77000228 /* 0x0100 124 */ and %i2,%l2,%i5 .L77000227: /* 0x0104 120 */ or %g0,0,%l1 /* 0x0108 */ ba .L900000314 /* 0x010c 121 */ add %o3,%g4,%o0 .L77000228: /* 0x0110 120 */ srlx %o5,52,%o7 /* 0x0114 */ and %o5,%l7,%o5 /* 0x0118 */ or %g0,52,%l0 /* 0x011c */ sub %o7,1023,%o4 /* 0x0120 */ or %o5,%l3,%l1 /* 0x0124 */ sub %l0,%o4,%o1 /* 0x0128 */ srlx %l1,%o1,%l1 /* 0x012c 121 */ add %o3,%g4,%o0 .L900000314: /* 0x0130 122 */ srax %i4,32,%g3 /* 0x0134 123 */ cmp %i3,0 /* 0x0138 */ bne,pn %xcc,.L77000234 /* 0x013c 124 */ sllx %i5,16,%g5 .L77000233: /* 0x0140 123 */ or %g0,0,%o2 /* 0x0144 */ ba .L900000313 /* 0x0148 124 */ add %o0,%g5,%o7 .L77000234: /* 0x014c 123 */ srlx %i3,52,%o2 /* 0x0150 */ and %i3,%l7,%i4 /* 0x0154 */ sub %o2,1023,%o1 /* 0x0158 */ or %g0,52,%g4 /* 0x015c */ sub %g4,%o1,%i5 /* 0x0160 */ or %i4,%l3,%i3 /* 0x0164 */ srlx %i3,%i5,%o2 ! 124 ! t1 += (b & 0xffff) << 16; /* 0x0168 124 */ add %o0,%g5,%o7 ! 125 ! t += (b >> 16) + (t1 >> 32); .L900000313: /* 0x016c 125 */ srax %i2,16,%l0 /* 0x0170 */ srax %o7,32,%o4 /* 0x0174 */ add %l0,%o4,%o3 ! 126 ! i32[i] = t1 & 0xffffffff; ! 127 ! t1 = t; ! 128 ! a = c; ! 129 ! b = d; /* 0x0178 129 */ add %l4,1,%l4 /* 0x017c 126 */ and %o7,%l6,%o5 /* 0x0180 125 */ add %g3,%o3,%o3 /* 0x0184 126 */ st %o5,[%g1] /* 0x0188 128 */ or %g0,%l1,%i4 /* 0x018c 129 */ or %g0,%o2,%i2 /* 0x0190 */ add %g2,2,%g2 /* 0x0194 */ cmp %l4,%l5 /* 0x0198 */ ble,pt %icc,.L77000208 /* 0x019c */ add %g1,4,%g1 ! 130 ! } ! 131 ! t1 += a & 0xffffffff; ! 132 ! t = (a >> 32); ! 133 ! t1 += (b & 0xffff) << 16; ! 134 ! i32[i] = t1 & 0xffffffff; .L77000210: /* 0x01a0 134 */ sra %l4,0,%l4 /* 0x01a4 */ sethi %hi(0xfc00),%i1 /* 0x01a8 */ add %o3,%i4,%l2 /* 0x01ac */ add %i1,1023,%i5 /* 0x01b0 */ and %i2,%i5,%l5 /* 0x01b4 */ sllx %l4,2,%i2 /* 0x01b8 */ sllx %l5,16,%l6 /* 0x01bc */ add %l2,%l6,%l7 /* 0x01c0 */ st %l7,[%i0+%i2] /* 0x01c4 129 */ ret ! Result = /* 0x01c8 */ restore %g0,%g0,%g0 /* 0x01cc 0 */ .type conv_d16_to_i32,2 /* 0x01cc 0 */ .size conv_d16_to_i32,(.-conv_d16_to_i32) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 ! ! CONSTANT POOL ! ___const_seg_900000401: /* 000000 0 */ .word 1127219200,0 /* 0x0008 */ .word 1127219200 /* 0x000c 0 */ .type ___const_seg_900000401,1 /* 0x000c 0 */ .size ___const_seg_900000401,(.-___const_seg_900000401) /* 0x000c 0 */ .align 8 /* 0x0010 */ .skip 24 /* 0x0028 */ .align 32 ! 135 !} ! 138 !void ! 139 !conv_i32_to_d32(double *d32, uint32_t *i32, int len) ! 140 !{ ! ! SUBROUTINE conv_i32_to_d32 ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global conv_i32_to_d32 conv_i32_to_d32: /* 000000 140 */ orcc %g0,%o2,%o2 ! 141 ! int i; ! 143 !#pragma pipeloop(0) ! 144 ! for (i = 0; i < len; i++) /* 0x0004 144 */ ble,pn %icc,.L77000254 /* 0x0008 */ sub %o2,1,%o3 .L77000263: /* 0x000c 140 */ or %g0,%o0,%o2 ! 145 ! d32[i] = (double)(i32[i]); /* 0x0010 145 */ add %o3,1,%o5 /* 0x0014 144 */ or %g0,0,%g5 /* 0x0018 145 */ cmp %o5,10 /* 0x001c */ bl,pn %icc,.L77000261 /* 0x0020 */ sethi %hi(___const_seg_900000401),%g4 .L900000407: /* 0x0024 145 */ prefetch [%o1],0 /* 0x0028 */ prefetch [%o0],22 /* 0x002c */ sethi %hi(___const_seg_900000401+8),%o4 /* 0x0030 */ or %g0,%o0,%o2 /* 0x0034 */ prefetch [%o1+64],0 /* 0x0038 */ add %o1,8,%o0 /* 0x003c */ sub %o3,7,%o5 /* 0x0040 */ prefetch [%o2+64],22 /* 0x0044 */ or %g0,2,%g5 /* 0x0048 */ prefetch [%o2+128],22 /* 0x004c */ prefetch [%o2+192],22 /* 0x0050 */ prefetch [%o1+128],0 /* 0x0054 */ ld [%o4+%lo(___const_seg_900000401+8)],%f2 /* 0x0058 */ ldd [%g4+%lo(___const_seg_900000401)],%f16 /* 0x005c */ fmovs %f2,%f0 /* 0x0060 */ prefetch [%o2+256],22 /* 0x0064 */ prefetch [%o2+320],22 /* 0x0068 */ ld [%o1],%f3 /* 0x006c */ prefetch [%o1+192],0 /* 0x0070 */ ld [%o1+4],%f1 .L900000405: /* 0x0074 145 */ prefetch [%o0+188],0 /* 0x0078 */ fsubd %f2,%f16,%f22 /* 0x007c */ add %g5,8,%g5 /* 0x0080 */ add %o0,32,%o0 /* 0x0084 */ ld [%o4+%lo(___const_seg_900000401+8)],%f4 /* 0x0088 */ std %f22,[%o2] /* 0x008c */ cmp %g5,%o5 /* 0x0090 */ ld [%o0-32],%f5 /* 0x0094 */ fsubd %f0,%f16,%f24 /* 0x0098 */ add %o2,64,%o2 /* 0x009c */ fmovs %f4,%f0 /* 0x00a0 */ std %f24,[%o2-56] /* 0x00a4 */ ld [%o0-28],%f1 /* 0x00a8 */ fsubd %f4,%f16,%f26 /* 0x00ac */ fmovs %f0,%f6 /* 0x00b0 */ prefetch [%o2+312],22 /* 0x00b4 */ std %f26,[%o2-48] /* 0x00b8 */ ld [%o0-24],%f7 /* 0x00bc */ fsubd %f0,%f16,%f28 /* 0x00c0 */ fmovs %f6,%f8 /* 0x00c4 */ std %f28,[%o2-40] /* 0x00c8 */ ld [%o0-20],%f9 /* 0x00cc */ fsubd %f6,%f16,%f30 /* 0x00d0 */ fmovs %f8,%f10 /* 0x00d4 */ std %f30,[%o2-32] /* 0x00d8 */ ld [%o0-16],%f11 /* 0x00dc */ prefetch [%o2+344],22 /* 0x00e0 */ fsubd %f8,%f16,%f48 /* 0x00e4 */ fmovs %f10,%f12 /* 0x00e8 */ std %f48,[%o2-24] /* 0x00ec */ ld [%o0-12],%f13 /* 0x00f0 */ fsubd %f10,%f16,%f50 /* 0x00f4 */ fmovs %f12,%f2 /* 0x00f8 */ std %f50,[%o2-16] /* 0x00fc */ ld [%o0-8],%f3 /* 0x0100 */ fsubd %f12,%f16,%f52 /* 0x0104 */ fmovs %f2,%f0 /* 0x0108 */ std %f52,[%o2-8] /* 0x010c */ ble,pt %icc,.L900000405 /* 0x0110 */ ld [%o0-4],%f1 .L900000408: /* 0x0114 145 */ fsubd %f2,%f16,%f18 /* 0x0118 */ add %o2,16,%o2 /* 0x011c */ cmp %g5,%o3 /* 0x0120 */ std %f18,[%o2-16] /* 0x0124 */ fsubd %f0,%f16,%f20 /* 0x0128 */ or %g0,%o0,%o1 /* 0x012c */ bg,pn %icc,.L77000254 /* 0x0130 */ std %f20,[%o2-8] .L77000261: /* 0x0134 145 */ ld [%o1],%f15 .L900000409: /* 0x0138 145 */ sethi %hi(___const_seg_900000401+8),%o4 /* 0x013c */ ldd [%g4+%lo(___const_seg_900000401)],%f16 /* 0x0140 */ add %g5,1,%g5 /* 0x0144 */ ld [%o4+%lo(___const_seg_900000401+8)],%f14 /* 0x0148 */ add %o1,4,%o1 /* 0x014c */ cmp %g5,%o3 /* 0x0150 */ fsubd %f14,%f16,%f54 /* 0x0154 */ std %f54,[%o2] /* 0x0158 */ add %o2,8,%o2 /* 0x015c */ ble,a,pt %icc,.L900000409 /* 0x0160 */ ld [%o1],%f15 .L77000254: /* 0x0164 145 */ retl ! Result = /* 0x0168 */ nop /* 0x016c 0 */ .type conv_i32_to_d32,2 /* 0x016c 0 */ .size conv_i32_to_d32,(.-conv_i32_to_d32) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 ! ! CONSTANT POOL ! ___const_seg_900000501: /* 000000 0 */ .word 1127219200,0 /* 0x0008 */ .word 1127219200 /* 0x000c 0 */ .type ___const_seg_900000501,1 /* 0x000c 0 */ .size ___const_seg_900000501,(.-___const_seg_900000501) /* 0x000c 0 */ .align 8 /* 0x0010 */ .skip 24 /* 0x0028 */ .align 32 ! 146 !} ! 149 !void ! 150 !conv_i32_to_d16(double *d16, uint32_t *i32, int len) ! 151 !{ ! ! SUBROUTINE conv_i32_to_d16 ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global conv_i32_to_d16 conv_i32_to_d16: /* 000000 151 */ save %sp,-368,%sp /* 0x0004 */ orcc %g0,%i2,%i2 ! 152 ! int i; ! 153 ! uint32_t a; ! 155 !#pragma pipeloop(0) ! 156 ! for (i = 0; i < len; i++) { /* 0x0008 156 */ ble,pn %icc,.L77000272 /* 0x000c */ sub %i2,1,%l6 .L77000281: /* 0x0010 156 */ sethi %hi(0xfc00),%i3 ! 157 ! a = i32[i]; /* 0x0014 157 */ or %g0,%i2,%l1 /* 0x0018 156 */ add %i3,1023,%i4 /* 0x001c 157 */ cmp %i2,4 /* 0x0020 151 */ or %g0,%i1,%l7 /* 0x0024 */ or %g0,%i0,%i2 /* 0x0028 156 */ or %g0,0,%i5 /* 0x002c */ or %g0,0,%i3 /* 0x0030 157 */ bl,pn %icc,.L77000279 /* 0x0034 0 */ sethi %hi(___const_seg_900000501),%i1 .L900000508: /* 0x0038 157 */ prefetch [%i0+8],22 /* 0x003c */ prefetch [%i0+72],22 /* 0x0040 */ or %g0,%i0,%l2 ! 158 ! d16[2 * i] = (double)(a & 0xffff); /* 0x0044 158 */ sethi %hi(___const_seg_900000501+8),%l1 /* 0x0048 157 */ prefetch [%i0+136],22 /* 0x004c */ sub %l6,1,%i0 /* 0x0050 */ or %g0,0,%i3 /* 0x0054 */ prefetch [%i2+200],22 /* 0x0058 */ or %g0,2,%i5 /* 0x005c */ prefetch [%i2+264],22 /* 0x0060 */ prefetch [%i2+328],22 /* 0x0064 */ prefetch [%i2+392],22 /* 0x0068 */ ld [%l7],%l3 /* 0x006c */ ld [%l7+4],%l4 /* 0x0070 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20 ! 159 ! d16[2 * i + 1] = (double)(a >> 16); /* 0x0074 159 */ srl %l3,16,%o1 /* 0x0078 158 */ and %l3,%i4,%o3 /* 0x007c */ st %o3,[%sp+2335] /* 0x0080 159 */ srl %l4,16,%g4 /* 0x0084 158 */ and %l4,%i4,%o0 /* 0x0088 */ st %o0,[%sp+2303] /* 0x008c 159 */ add %l7,8,%l7 /* 0x0090 */ st %o1,[%sp+2271] /* 0x0094 */ st %g4,[%sp+2239] /* 0x0098 157 */ prefetch [%i2+456],22 /* 0x009c */ prefetch [%i2+520],22 .L900000506: /* 0x00a0 157 */ prefetch [%l2+536],22 /* 0x00a4 159 */ add %i5,2,%i5 /* 0x00a8 157 */ add %l2,32,%l2 /* 0x00ac */ ld [%l7],%g2 /* 0x00b0 159 */ cmp %i5,%i0 /* 0x00b4 */ add %l7,8,%l7 /* 0x00b8 158 */ ld [%sp+2335],%f9 /* 0x00bc 159 */ add %i3,4,%i3 /* 0x00c0 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f8 /* 0x00c4 159 */ ld [%sp+2271],%f11 /* 0x00c8 158 */ and %g2,%i4,%g3 /* 0x00cc 159 */ fmovs %f8,%f10 /* 0x00d0 158 */ st %g3,[%sp+2335] /* 0x00d4 */ fsubd %f8,%f20,%f28 /* 0x00d8 */ std %f28,[%l2-32] /* 0x00dc 159 */ srl %g2,16,%g1 /* 0x00e0 */ st %g1,[%sp+2271] /* 0x00e4 */ fsubd %f10,%f20,%f30 /* 0x00e8 */ std %f30,[%l2-24] /* 0x00ec 157 */ ld [%l7-4],%l0 /* 0x00f0 158 */ ld [%sp+2303],%f13 /* 0x00f4 */ ld [%l1+%lo(___const_seg_900000501+8)],%f12 /* 0x00f8 159 */ ld [%sp+2239],%f15 /* 0x00fc 158 */ and %l0,%i4,%l5 /* 0x0100 159 */ fmovs %f12,%f14 /* 0x0104 158 */ st %l5,[%sp+2303] /* 0x0108 */ fsubd %f12,%f20,%f44 /* 0x010c */ std %f44,[%l2-16] /* 0x0110 159 */ srl %l0,16,%o5 /* 0x0114 */ st %o5,[%sp+2239] /* 0x0118 */ fsubd %f14,%f20,%f46 /* 0x011c */ ble,pt %icc,.L900000506 /* 0x0120 */ std %f46,[%l2-8] .L900000509: /* 0x0124 158 */ ld [%l1+%lo(___const_seg_900000501+8)],%f0 /* 0x0128 159 */ cmp %i5,%l6 /* 0x012c */ add %i3,4,%i3 /* 0x0130 158 */ ld [%sp+2335],%f1 /* 0x0134 */ ld [%sp+2303],%f5 /* 0x0138 159 */ fmovs %f0,%f2 /* 0x013c */ ld [%sp+2271],%f3 /* 0x0140 158 */ fmovs %f0,%f4 /* 0x0144 159 */ ld [%sp+2239],%f7 /* 0x0148 */ fmovs %f0,%f6 /* 0x014c 158 */ fsubd %f0,%f20,%f22 /* 0x0150 */ std %f22,[%l2] /* 0x0154 159 */ fsubd %f2,%f20,%f24 /* 0x0158 */ std %f24,[%l2+8] /* 0x015c 158 */ fsubd %f4,%f20,%f26 /* 0x0160 */ std %f26,[%l2+16] /* 0x0164 159 */ fsubd %f6,%f20,%f20 /* 0x0168 */ bg,pn %icc,.L77000272 /* 0x016c */ std %f20,[%l2+24] .L77000279: /* 0x0170 157 */ ld [%l7],%l2 .L900000510: /* 0x0174 158 */ and %l2,%i4,%o4 /* 0x0178 */ st %o4,[%sp+2399] /* 0x017c 159 */ srl %l2,16,%o2 /* 0x0180 */ st %o2,[%sp+2367] /* 0x0184 158 */ sethi %hi(___const_seg_900000501+8),%l1 /* 0x0188 */ sra %i3,0,%i0 /* 0x018c */ ld [%l1+%lo(___const_seg_900000501+8)],%f16 /* 0x0190 */ sllx %i0,3,%o1 /* 0x0194 159 */ add %i3,1,%o3 /* 0x0198 158 */ ldd [%i1+%lo(___const_seg_900000501)],%f20 /* 0x019c 159 */ sra %o3,0,%l3 /* 0x01a0 */ add %i5,1,%i5 /* 0x01a4 158 */ ld [%sp+2399],%f17 /* 0x01a8 159 */ sllx %l3,3,%o0 /* 0x01ac */ add %l7,4,%l7 /* 0x01b0 */ fmovs %f16,%f18 /* 0x01b4 */ cmp %i5,%l6 /* 0x01b8 */ add %i3,2,%i3 /* 0x01bc 158 */ fsubd %f16,%f20,%f48 /* 0x01c0 */ std %f48,[%i2+%o1] /* 0x01c4 159 */ ld [%sp+2367],%f19 /* 0x01c8 */ fsubd %f18,%f20,%f50 /* 0x01cc */ std %f50,[%i2+%o0] /* 0x01d0 */ ble,a,pt %icc,.L900000510 /* 0x01d4 157 */ ld [%l7],%l2 .L77000272: /* 0x01d8 159 */ ret ! Result = /* 0x01dc */ restore %g0,%g0,%g0 /* 0x01e0 0 */ .type conv_i32_to_d16,2 /* 0x01e0 0 */ .size conv_i32_to_d16,(.-conv_i32_to_d16) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 8 ! ! CONSTANT POOL ! ___const_seg_900000601: /* 000000 0 */ .word 1127219200,0 /* 0x0008 */ .word 1127219200 /* 0x000c 0 */ .type ___const_seg_900000601,1 /* 0x000c 0 */ .size ___const_seg_900000601,(.-___const_seg_900000601) /* 0x000c 0 */ .align 8 /* 0x0010 */ .skip 24 /* 0x0028 */ .align 32 ! 160 ! } ! 161 !} ! 163 !#ifdef RF_INLINE_MACROS ! 165 !void ! 166 !i16_to_d16_and_d32x4(const double *, /* 1/(2^16) */ ! 167 ! const double *, /* 2^16 */ ! 168 ! const double *, /* 0 */ ! 169 ! double *, /* result16 */ ! 170 ! double *, /* result32 */ ! 171 ! float *); /* source - should be unsigned int* */ ! 172 ! /* converted to float* */ ! 174 !#else ! 177 !/* ARGSUSED */ ! 178 !static void ! 179 !i16_to_d16_and_d32x4(const double *dummy1, /* 1/(2^16) */ ! 180 ! const double *dummy2, /* 2^16 */ ! 181 ! const double *dummy3, /* 0 */ ! 182 ! double *result16, ! 183 ! double *result32, ! 184 ! float *src) /* source - should be unsigned int* */ ! 185 ! /* converted to float* */ ! 186 !{ ! 187 ! uint32_t *i32; ! 188 ! uint32_t a, b, c, d; ! 190 ! i32 = (uint32_t *)src; ! 191 ! a = i32[0]; ! 192 ! b = i32[1]; ! 193 ! c = i32[2]; ! 194 ! d = i32[3]; ! 195 ! result16[0] = (double)(a & 0xffff); ! 196 ! result16[1] = (double)(a >> 16); ! 197 ! result32[0] = (double)a; ! 198 ! result16[2] = (double)(b & 0xffff); ! 199 ! result16[3] = (double)(b >> 16); ! 200 ! result32[1] = (double)b; ! 201 ! result16[4] = (double)(c & 0xffff); ! 202 ! result16[5] = (double)(c >> 16); ! 203 ! result32[2] = (double)c; ! 204 ! result16[6] = (double)(d & 0xffff); ! 205 ! result16[7] = (double)(d >> 16); ! 206 ! result32[3] = (double)d; ! 207 !} ! 209 !#endif ! 212 !void ! 213 !conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len) ! 214 !{ ! ! SUBROUTINE conv_i32_to_d32_and_d16 ! ! OFFSET SOURCE LINE LABEL INSTRUCTION .global conv_i32_to_d32_and_d16 conv_i32_to_d32_and_d16: /* 000000 214 */ save %sp,-368,%sp ! 215 ! int i; ! 216 ! uint32_t a; ! 218 !#pragma pipeloop(0) ! 219 ! for (i = 0; i < len - 3; i += 4) { ! 220 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero, ! 221 ! &(d16[2*i]), &(d32[i]), ! 222 ! (float *)(&(i32[i]))); ! 223 ! } ! 224 ! for (; i < len; i++) { ! 225 ! a = i32[i]; ! 226 ! d32[i] = (double)(i32[i]); ! 227 ! d16[2 * i] = (double)(a & 0xffff); ! 228 ! d16[2 * i + 1] = (double)(a >> 16); /* 0x0004 228 */ sub %i3,3,%i4 /* 0x0008 219 */ cmp %i4,0 /* 0x000c */ ble,pn %icc,.L77000289 /* 0x0010 */ or %g0,0,%i5 .L77000306: /* 0x0014 222 */ sethi %hi(Zero),%g3 /* 0x0018 */ sethi %hi(TwoToMinus16),%g2 /* 0x001c */ sethi %hi(TwoTo16),%o5 /* 0x0020 */ ldd [%g3+%lo(Zero)],%f2 /* 0x0024 219 */ sub %i3,4,%o4 /* 0x0028 */ or %g0,0,%o3 /* 0x002c */ or %g0,%i0,%l6 /* 0x0030 */ or %g0,%i2,%l5 .L900000615: /* 0x0034 222 */ fmovd %f2,%f26 /* 0x0038 */ ld [%l5],%f27 /* 0x003c */ sra %o3,0,%o0 /* 0x0040 */ add %i5,4,%i5 /* 0x0044 */ fmovd %f2,%f28 /* 0x0048 */ ld [%l5+4],%f29 /* 0x004c */ sllx %o0,3,%g5 /* 0x0050 */ cmp %i5,%o4 /* 0x0054 */ fmovd %f2,%f30 /* 0x0058 */ ld [%l5+8],%f31 /* 0x005c */ add %i1,%g5,%g4 /* 0x0060 */ add %o3,8,%o3 /* 0x0064 */ ld [%l5+12],%f3 /* 0x0068 */ fxtod %f26,%f26 /* 0x006c */ ldd [%g2+%lo(TwoToMinus16)],%f32 /* 0x0070 */ fxtod %f28,%f28 /* 0x0074 */ add %l5,16,%l5 /* 0x0078 */ fxtod %f30,%f30 /* 0x007c */ ldd [%o5+%lo(TwoTo16)],%f34 /* 0x0080 */ fxtod %f2,%f2 /* 0x0084 */ std %f2,[%l6+24] /* 0x0088 */ fmuld %f32,%f26,%f36 /* 0x008c */ std %f26,[%l6] /* 0x0090 */ fmuld %f32,%f28,%f38 /* 0x0094 */ std %f28,[%l6+8] /* 0x0098 */ fmuld %f32,%f30,%f40 /* 0x009c */ std %f30,[%l6+16] /* 0x00a0 */ fmuld %f32,%f2,%f42 /* 0x00a4 */ add %l6,32,%l6 /* 0x00a8 */ fdtox %f36,%f36 /* 0x00ac */ fdtox %f38,%f38 /* 0x00b0 */ fdtox %f40,%f40 /* 0x00b4 */ fdtox %f42,%f42 /* 0x00b8 */ fxtod %f36,%f36 /* 0x00bc */ std %f36,[%g4+8] /* 0x00c0 */ fxtod %f38,%f38 /* 0x00c4 */ std %f38,[%g4+24] /* 0x00c8 */ fxtod %f40,%f40 /* 0x00cc */ std %f40,[%g4+40] /* 0x00d0 */ fxtod %f42,%f42 /* 0x00d4 */ std %f42,[%g4+56] /* 0x00d8 */ fmuld %f36,%f34,%f36 /* 0x00dc */ fmuld %f38,%f34,%f38 /* 0x00e0 */ fmuld %f40,%f34,%f40 /* 0x00e4 */ fmuld %f42,%f34,%f42 /* 0x00e8 */ fsubd %f26,%f36,%f36 /* 0x00ec */ std %f36,[%i1+%g5] /* 0x00f0 */ fsubd %f28,%f38,%f38 /* 0x00f4 */ std %f38,[%g4+16] /* 0x00f8 */ fsubd %f30,%f40,%f40 /* 0x00fc */ std %f40,[%g4+32] /* 0x0100 */ fsubd %f2,%f42,%f42 /* 0x0104 */ std %f42,[%g4+48] /* 0x0108 */ ble,a,pt %icc,.L900000615 /* 0x010c */ ldd [%g3+%lo(Zero)],%f2 .L77000289: /* 0x0110 224 */ cmp %i5,%i3 /* 0x0114 */ bge,pn %icc,.L77000294 /* 0x0118 */ sethi %hi(0xfc00),%l0 .L77000307: /* 0x011c 224 */ sra %i5,0,%l2 /* 0x0120 */ sll %i5,1,%i4 /* 0x0124 */ sllx %l2,3,%l1 /* 0x0128 */ sllx %l2,2,%o1 /* 0x012c 225 */ sub %i3,%i5,%l3 /* 0x0130 224 */ add %l0,1023,%l0 /* 0x0134 */ add %l1,%i0,%l1 /* 0x0138 */ add %o1,%i2,%i2 /* 0x013c 225 */ cmp %l3,5 /* 0x0140 */ bl,pn %icc,.L77000291 /* 0x0144 0 */ sethi %hi(___const_seg_900000601),%l7 .L900000612: /* 0x0148 225 */ prefetch [%l1],22 /* 0x014c */ prefetch [%l1+64],22 /* 0x0150 */ sra %i4,0,%l6 /* 0x0154 226 */ sethi %hi(___const_seg_900000601+8),%l2 /* 0x0158 225 */ prefetch [%l1+128],22 /* 0x015c */ add %l6,-2,%l5 /* 0x0160 */ sub %i3,3,%i0 /* 0x0164 */ prefetch [%l1+192],22 /* 0x0168 */ sllx %l5,3,%o4 /* 0x016c 228 */ add %i5,1,%i5 /* 0x0170 225 */ add %i1,%o4,%o3 /* 0x0174 */ or %g0,%i3,%g1 /* 0x0178 */ ld [%i2],%l4 /* 0x017c */ prefetch [%o3+16],22 /* 0x0180 */ add %o3,16,%l3 /* 0x0184 228 */ add %i2,4,%i2 /* 0x0188 225 */ prefetch [%o3+80],22 /* 0x018c 228 */ srl %l4,16,%o1 /* 0x0190 227 */ and %l4,%l0,%o0 /* 0x0194 225 */ prefetch [%o3+144],22 /* 0x0198 228 */ st %o1,[%sp+2271] /* 0x019c 227 */ st %o0,[%sp+2239] /* 0x01a0 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32 /* 0x01a4 228 */ ld [%l2+%lo(___const_seg_900000601+8)],%f0 /* 0x01a8 225 */ prefetch [%o3+208],22 /* 0x01ac */ prefetch [%o3+272],22 /* 0x01b0 */ prefetch [%o3+336],22 .L900000610: /* 0x01b4 225 */ prefetch [%l1+192],22 /* 0x01b8 228 */ add %i5,4,%i5 /* 0x01bc 225 */ add %l3,64,%l3 /* 0x01c0 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f8 /* 0x01c4 228 */ cmp %i5,%i0 /* 0x01c8 225 */ ld [%i2],%g5 /* 0x01cc 228 */ add %i2,16,%i2 /* 0x01d0 */ add %l1,32,%l1 /* 0x01d4 */ add %i4,8,%i4 /* 0x01d8 226 */ ld [%i2-20],%f7 /* 0x01dc 228 */ srl %g5,16,%i3 /* 0x01e0 226 */ fmovs %f8,%f6 /* 0x01e4 228 */ st %i3,[%sp+2335] /* 0x01e8 227 */ and %g5,%l0,%g4 /* 0x01ec */ st %g4,[%sp+2303] /* 0x01f0 226 */ fsubd %f6,%f32,%f40 /* 0x01f4 227 */ ld [%sp+2239],%f9 /* 0x01f8 228 */ ld [%sp+2271],%f1 /* 0x01fc */ fmovs %f8,%f12 /* 0x0200 226 */ std %f40,[%l1-32] /* 0x0204 227 */ fsubd %f8,%f32,%f42 /* 0x0208 */ std %f42,[%l3-64] /* 0x020c 228 */ fsubd %f0,%f32,%f44 /* 0x0210 */ std %f44,[%l3-56] /* 0x0214 227 */ fmovs %f12,%f10 /* 0x0218 225 */ ld [%i2-12],%g2 /* 0x021c 226 */ ld [%i2-16],%f1 /* 0x0220 228 */ srl %g2,16,%g3 /* 0x0224 226 */ fmovs %f12,%f0 /* 0x0228 225 */ prefetch [%l3+320],22 /* 0x022c 228 */ st %g3,[%sp+2271] /* 0x0230 227 */ and %g2,%l0,%l6 /* 0x0234 */ st %l6,[%sp+2239] /* 0x0238 226 */ fsubd %f0,%f32,%f46 /* 0x023c 227 */ ld [%sp+2303],%f11 /* 0x0240 228 */ ld [%sp+2335],%f13 /* 0x0244 */ fmovs %f12,%f18 /* 0x0248 226 */ std %f46,[%l1-24] /* 0x024c 227 */ fsubd %f10,%f32,%f48 /* 0x0250 */ std %f48,[%l3-48] /* 0x0254 228 */ fsubd %f12,%f32,%f50 /* 0x0258 */ std %f50,[%l3-40] /* 0x025c 227 */ fmovs %f18,%f16 /* 0x0260 225 */ ld [%i2-8],%o5 /* 0x0264 226 */ ld [%i2-12],%f15 /* 0x0268 228 */ srl %o5,16,%l5 /* 0x026c 226 */ fmovs %f18,%f14 /* 0x0270 228 */ st %l5,[%sp+2335] /* 0x0274 227 */ and %o5,%l0,%o4 /* 0x0278 */ st %o4,[%sp+2303] /* 0x027c 226 */ fsubd %f14,%f32,%f52 /* 0x0280 227 */ ld [%sp+2239],%f17 /* 0x0284 228 */ ld [%sp+2271],%f19 /* 0x0288 225 */ prefetch [%l3+352],22 /* 0x028c 228 */ fmovs %f18,%f24 /* 0x0290 226 */ std %f52,[%l1-16] /* 0x0294 227 */ fsubd %f16,%f32,%f54 /* 0x0298 */ std %f54,[%l3-32] /* 0x029c 228 */ fsubd %f18,%f32,%f56 /* 0x02a0 */ std %f56,[%l3-24] /* 0x02a4 227 */ fmovs %f24,%f22 /* 0x02a8 225 */ ld [%i2-4],%l4 /* 0x02ac 226 */ ld [%i2-8],%f21 /* 0x02b0 228 */ srl %l4,16,%o3 /* 0x02b4 226 */ fmovs %f24,%f20 /* 0x02b8 228 */ st %o3,[%sp+2271] /* 0x02bc 227 */ and %l4,%l0,%o2 /* 0x02c0 */ st %o2,[%sp+2239] /* 0x02c4 226 */ fsubd %f20,%f32,%f58 /* 0x02c8 227 */ ld [%sp+2303],%f23 /* 0x02cc 228 */ ld [%sp+2335],%f25 /* 0x02d0 */ fmovs %f24,%f0 /* 0x02d4 226 */ std %f58,[%l1-8] /* 0x02d8 227 */ fsubd %f22,%f32,%f60 /* 0x02dc */ std %f60,[%l3-16] /* 0x02e0 228 */ fsubd %f24,%f32,%f62 /* 0x02e4 */ bl,pt %icc,.L900000610 /* 0x02e8 */ std %f62,[%l3-8] .L900000613: /* 0x02ec 227 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4 /* 0x02f0 228 */ add %l1,8,%l1 /* 0x02f4 */ cmp %i5,%g1 /* 0x02f8 226 */ ld [%i2-4],%f3 /* 0x02fc 225 */ or %g0,%g1,%i3 /* 0x0300 228 */ add %i4,2,%i4 /* 0x0304 227 */ ld [%sp+2239],%f5 /* 0x0308 226 */ fmovs %f4,%f2 /* 0x030c 228 */ ld [%sp+2271],%f1 /* 0x0310 226 */ fsubd %f2,%f32,%f34 /* 0x0314 */ std %f34,[%l1-8] /* 0x0318 227 */ fsubd %f4,%f32,%f36 /* 0x031c */ std %f36,[%l3] /* 0x0320 228 */ fsubd %f0,%f32,%f38 /* 0x0324 */ bge,pn %icc,.L77000294 /* 0x0328 */ std %f38,[%l3+8] .L77000291: /* 0x032c 225 */ ld [%i2],%o2 .L900000614: /* 0x0330 226 */ ldd [%l7+%lo(___const_seg_900000601)],%f32 /* 0x0334 228 */ srl %o2,16,%l3 /* 0x0338 227 */ sra %i4,0,%i0 /* 0x033c 228 */ st %l3,[%sp+2367] /* 0x0340 227 */ and %o2,%l0,%g1 /* 0x0344 226 */ sethi %hi(___const_seg_900000601+8),%l2 /* 0x0348 227 */ st %g1,[%sp+2399] /* 0x034c */ sllx %i0,3,%o0 /* 0x0350 228 */ add %i4,1,%l4 /* 0x0354 226 */ ld [%l2+%lo(___const_seg_900000601+8)],%f4 /* 0x0358 228 */ sra %l4,0,%o1 /* 0x035c */ add %i5,1,%i5 /* 0x0360 226 */ ld [%i2],%f5 /* 0x0364 228 */ sllx %o1,3,%g5 /* 0x0368 */ cmp %i5,%i3 /* 0x036c */ ld [%sp+2367],%f9 /* 0x0370 */ add %i2,4,%i2 /* 0x0374 */ add %i4,2,%i4 /* 0x0378 227 */ fmovs %f4,%f6 /* 0x037c 226 */ fsubd %f4,%f32,%f44 /* 0x0380 */ std %f44,[%l1] /* 0x0384 227 */ ld [%sp+2399],%f7 /* 0x0388 228 */ fmovs %f6,%f8 /* 0x038c */ add %l1,8,%l1 /* 0x0390 */ fsubd %f8,%f32,%f48 /* 0x0394 227 */ fsubd %f6,%f32,%f46 /* 0x0398 */ std %f46,[%i1+%o0] /* 0x039c 228 */ std %f48,[%i1+%g5] /* 0x03a0 */ bl,a,pt %icc,.L900000614 /* 0x03a4 225 */ ld [%i2],%o2 .L77000294: /* 0x03a8 222 */ ret ! Result = /* 0x03ac */ restore %g0,%g0,%g0 /* 0x03b0 0 */ .type conv_i32_to_d32_and_d16,2 /* 0x03b0 0 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 32 ! 229 ! } ! 230 !} ! 232 !extern long long c1, c2, c3, c4; ! 234 !static void ! 235 !adjust_montf_result(uint32_t *i32, uint32_t *nint, int len) ! 236 !{ ! ! SUBROUTINE adjust_montf_result ! ! OFFSET SOURCE LINE LABEL INSTRUCTION adjust_montf_result: /* 000000 236 */ sra %o2,0,%g2 /* 0x0004 */ or %g0,%o0,%o4 ! 237 ! int64_t acc; ! 238 ! int i; ! 240 ! if (i32[len] > 0) { /* 0x0008 240 */ sllx %g2,2,%g3 /* 0x000c */ ld [%o0+%g3],%o0 /* 0x0010 */ cmp %o0,0 /* 0x0014 */ bleu,pn %icc,.L77000316 /* 0x0018 236 */ or %g0,%o1,%o5 ! 241 ! i = -1; .L77000315: /* 0x001c 241 */ sub %g2,1,%g3 /* 0x0020 */ ba .L900000712 /* 0x0024 249 */ cmp %g2,0 ! 242 ! } else { ! 243 ! for (i = len - 1; i >= 0; i--) { .L77000316: /* 0x0028 243 */ subcc %g2,1,%g3 /* 0x002c */ bneg,pn %icc,.L77000340 /* 0x0030 */ or %g0,%g3,%o3 .L77000348: /* 0x0034 243 */ sra %g3,0,%o1 /* 0x0038 */ sllx %o1,2,%g1 ! 244 ! if (i32[i] != nint[i]) break; /* 0x003c 244 */ ld [%g1+%o5],%g4 /* 0x0040 243 */ add %g1,%o4,%o2 /* 0x0044 */ add %g1,%o5,%o1 .L900000713: /* 0x0048 244 */ ld [%o2],%o0 /* 0x004c */ cmp %o0,%g4 /* 0x0050 */ bne,pn %icc,.L77000324 /* 0x0054 */ sub %o2,4,%o2 .L77000320: /* 0x0058 244 */ sub %o1,4,%o1 /* 0x005c */ subcc %o3,1,%o3 /* 0x0060 */ bpos,a,pt %icc,.L900000713 /* 0x0064 */ ld [%o1],%g4 .L900000706: /* 0x0068 244 */ ba .L900000712 /* 0x006c 249 */ cmp %g2,0 .L77000324: /* 0x0070 244 */ sra %o3,0,%o0 /* 0x0074 */ sllx %o0,2,%g1 /* 0x0078 */ ld [%o5+%g1],%o3 /* 0x007c */ ld [%o4+%g1],%g5 /* 0x0080 */ cmp %g5,%o3 /* 0x0084 */ bleu,pt %icc,.L77000332 /* 0x0088 */ nop ! 245 ! } ! 246 ! } ! 247 ! if ((i < 0) || (i32[i] > nint[i])) { ! 248 ! acc = 0; ! 249 ! for (i = 0; i < len; i++) { .L77000340: /* 0x008c 249 */ cmp %g2,0 .L900000712: /* 0x0090 249 */ ble,pn %icc,.L77000332 /* 0x0094 250 */ or %g0,%g2,%o3 .L77000347: /* 0x0098 249 */ or %g0,0,%o0 ! 250 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]); /* 0x009c 250 */ cmp %o3,10 /* 0x00a0 */ bl,pn %icc,.L77000341 /* 0x00a4 249 */ or %g0,0,%g2 .L900000709: /* 0x00a8 250 */ prefetch [%o4],22 /* 0x00ac */ prefetch [%o4+64],22 ! 251 ! i32[i] = acc & 0xffffffff; ! 252 ! acc = acc >> 32; /* 0x00b0 252 */ add %o5,4,%o1 /* 0x00b4 */ add %o4,8,%o2 /* 0x00b8 250 */ prefetch [%o4+128],22 /* 0x00bc */ sub %o3,8,%o5 /* 0x00c0 */ or %g0,2,%o0 /* 0x00c4 */ prefetch [%o4+192],22 /* 0x00c8 */ prefetch [%o4+256],22 /* 0x00cc */ prefetch [%o4+320],22 /* 0x00d0 */ prefetch [%o4+384],22 /* 0x00d4 */ ld [%o2-4],%g5 /* 0x00d8 */ prefetch [%o2+440],22 /* 0x00dc */ prefetch [%o2+504],22 /* 0x00e0 */ ld [%o4],%g4 /* 0x00e4 */ ld [%o1-4],%o4 /* 0x00e8 */ sub %g4,%o4,%o3 /* 0x00ec 251 */ st %o3,[%o2-8] /* 0x00f0 252 */ srax %o3,32,%g4 .L900000707: /* 0x00f4 252 */ add %o0,8,%o0 /* 0x00f8 */ add %o2,32,%o2 /* 0x00fc 250 */ ld [%o1],%g1 /* 0x0100 */ prefetch [%o2+496],22 /* 0x0104 252 */ cmp %o0,%o5 /* 0x0108 */ add %o1,32,%o1 /* 0x010c 250 */ sub %g5,%g1,%g5 /* 0x0110 */ add %g5,%g4,%o4 /* 0x0114 */ ld [%o2-32],%g4 /* 0x0118 251 */ st %o4,[%o2-36] /* 0x011c 252 */ srax %o4,32,%g1 /* 0x0120 250 */ ld [%o1-28],%o3 /* 0x0124 */ sub %g4,%o3,%g2 /* 0x0128 */ add %g2,%g1,%g5 /* 0x012c */ ld [%o2-28],%o3 /* 0x0130 251 */ st %g5,[%o2-32] /* 0x0134 252 */ srax %g5,32,%g4 /* 0x0138 250 */ ld [%o1-24],%o4 /* 0x013c */ sub %o3,%o4,%g1 /* 0x0140 */ add %g1,%g4,%g2 /* 0x0144 */ ld [%o2-24],%o3 /* 0x0148 251 */ st %g2,[%o2-28] /* 0x014c 252 */ srax %g2,32,%g5 /* 0x0150 250 */ ld [%o1-20],%o4 /* 0x0154 */ sub %o3,%o4,%g4 /* 0x0158 */ add %g4,%g5,%g1 /* 0x015c */ ld [%o2-20],%o4 /* 0x0160 251 */ st %g1,[%o2-24] /* 0x0164 252 */ srax %g1,32,%o3 /* 0x0168 250 */ ld [%o1-16],%g2 /* 0x016c */ sub %o4,%g2,%g5 /* 0x0170 */ add %g5,%o3,%g1 /* 0x0174 */ ld [%o2-16],%g4 /* 0x0178 251 */ st %g1,[%o2-20] /* 0x017c 252 */ srax %g1,32,%o4 /* 0x0180 250 */ ld [%o1-12],%g2 /* 0x0184 */ sub %g4,%g2,%o3 /* 0x0188 */ add %o3,%o4,%g5 /* 0x018c */ ld [%o2-12],%g2 /* 0x0190 251 */ st %g5,[%o2-16] /* 0x0194 252 */ srax %g5,32,%g4 /* 0x0198 250 */ ld [%o1-8],%g1 /* 0x019c */ sub %g2,%g1,%o4 /* 0x01a0 */ add %o4,%g4,%o3 /* 0x01a4 */ ld [%o2-8],%g2 /* 0x01a8 251 */ st %o3,[%o2-12] /* 0x01ac 252 */ srax %o3,32,%g5 /* 0x01b0 250 */ ld [%o1-4],%g1 /* 0x01b4 */ sub %g2,%g1,%g4 /* 0x01b8 */ add %g4,%g5,%o4 /* 0x01bc */ ld [%o2-4],%g5 /* 0x01c0 251 */ st %o4,[%o2-8] /* 0x01c4 252 */ ble,pt %icc,.L900000707 /* 0x01c8 */ srax %o4,32,%g4 .L900000710: /* 0x01cc 250 */ ld [%o1],%o3 /* 0x01d0 252 */ add %o1,4,%o5 /* 0x01d4 250 */ or %g0,%o2,%o4 /* 0x01d8 252 */ cmp %o0,%g3 /* 0x01dc 250 */ sub %g5,%o3,%g2 /* 0x01e0 */ add %g2,%g4,%g1 /* 0x01e4 251 */ st %g1,[%o2-4] /* 0x01e8 252 */ bg,pn %icc,.L77000332 /* 0x01ec */ srax %g1,32,%g2 .L77000341: /* 0x01f0 250 */ ld [%o4],%g5 .L900000711: /* 0x01f4 250 */ ld [%o5],%o2 /* 0x01f8 */ add %g2,%g5,%g4 /* 0x01fc 252 */ add %o0,1,%o0 /* 0x0200 */ cmp %o0,%g3 /* 0x0204 */ add %o5,4,%o5 /* 0x0208 250 */ sub %g4,%o2,%o1 /* 0x020c 251 */ st %o1,[%o4] /* 0x0210 252 */ srax %o1,32,%g2 /* 0x0214 */ add %o4,4,%o4 /* 0x0218 */ ble,a,pt %icc,.L900000711 /* 0x021c 250 */ ld [%o4],%g5 .L77000332: /* 0x0220 252 */ retl ! Result = /* 0x0224 */ nop /* 0x0228 0 */ .type adjust_montf_result,2 /* 0x0228 0 */ .size adjust_montf_result,(.-adjust_montf_result) .section ".text",#alloc,#execinstr /* 000000 0 */ .align 32 ! 253 ! } ! 254 ! } ! 255 !} ! 257 !/************* ! 258 !static void ! 259 !adjust_montf_result_bad(uint32_t *i32, uint32_t *nint, int len) ! 260 !{ ! 261 ! int64_t acc; ! 262 ! int i; ! 264 ! c4++; ! 265 ! ! 266 ! if (i32[len] > 0) { ! 267 ! i = -1; ! 268 ! c1++; ! 269 ! } else { ! 270 ! for (i = len - 1; i >= 0; i++) { ! 271 ! if (i32[i] != nint[i]) break; ! 272 ! c2++; ! 273 ! } ! 274 ! } ! 275 ! if ((i < 0) || (i32[i] > nint[i])) { ! 276 ! c3++; ! 277 ! acc = 0; ! 278 ! for (i = 0; i < len; i++) { ! 279 ! acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]); ! 280 ! i32[i] = acc & 0xffffffff; ! 281 ! acc = acc >> 32; ! 282 ! } ! 283 ! } ! 284 !} ! 285 !uint32_t saveresult[1000]; ! 286 !void printarray(char *name, uint32_t *arr, int len) ! 287 !{ ! 288 ! int i, j; ! 289 ! uint64_t tmp; ! 291 ! printf("uint64_t %s[%d] =\n{\n",name,(len+1)/2); ! 292 ! for(i=j=0; i