1*25c28e83SPiotr Jasiukajtis /* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis 22*25c28e83SPiotr Jasiukajtis /* 23*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24*25c28e83SPiotr Jasiukajtis */ 25*25c28e83SPiotr Jasiukajtis /* 26*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 28*25c28e83SPiotr Jasiukajtis */ 29*25c28e83SPiotr Jasiukajtis 30*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT 31*25c28e83SPiotr Jasiukajtis #define restrict _Restrict 32*25c28e83SPiotr Jasiukajtis #else 33*25c28e83SPiotr Jasiukajtis #define restrict 34*25c28e83SPiotr Jasiukajtis #endif 35*25c28e83SPiotr Jasiukajtis 36*25c28e83SPiotr Jasiukajtis /* float logf(float x) 37*25c28e83SPiotr Jasiukajtis * 38*25c28e83SPiotr Jasiukajtis * Method : 39*25c28e83SPiotr Jasiukajtis * 1. Special cases: 40*25c28e83SPiotr Jasiukajtis * for x is negative, -Inf => QNaN + invalid; 41*25c28e83SPiotr Jasiukajtis * for x = 0 => -Inf + divide-by-zero; 42*25c28e83SPiotr Jasiukajtis * for x = +Inf => Inf; 43*25c28e83SPiotr Jasiukajtis * for x = NaN => QNaN. 44*25c28e83SPiotr Jasiukajtis * 2. Computes logarithm from: 45*25c28e83SPiotr Jasiukajtis * x = m * 2**n => log(x) = n * log(2) + log(m), 46*25c28e83SPiotr Jasiukajtis * m = [1, 2). 47*25c28e83SPiotr Jasiukajtis * Let m = m0 + dm, where m0 = 1 + k / 32, 48*25c28e83SPiotr Jasiukajtis * k = [0, 32], 49*25c28e83SPiotr Jasiukajtis * dm = [-1/64, 1/64]. 50*25c28e83SPiotr Jasiukajtis * Then log(m) = log(m0 + dm) = log(m0) + log(1+y), 51*25c28e83SPiotr Jasiukajtis * where y = dm*(1/m0), y = [-1/66, 1/64]. 52*25c28e83SPiotr Jasiukajtis * Then 53*25c28e83SPiotr Jasiukajtis * 1/m0 is looked up in a table of 1, 1/(1+1/32), ..., 1/(1+32/32); 54*25c28e83SPiotr Jasiukajtis * log(m0) is looked up in a table of log(1), log(1+1/32), 55*25c28e83SPiotr Jasiukajtis * ..., log(1+32/32). 56*25c28e83SPiotr Jasiukajtis * log(1+y) is computed using approximation: 57*25c28e83SPiotr Jasiukajtis * log(1+y) = ((a3*y + a2)*y + a1)*y*y + y. 58*25c28e83SPiotr Jasiukajtis * Accuracy: 59*25c28e83SPiotr Jasiukajtis * The maximum relative error for the approximating 60*25c28e83SPiotr Jasiukajtis * polynomial is 2**(-28.41). All calculations are of 61*25c28e83SPiotr Jasiukajtis * double precision. 62*25c28e83SPiotr Jasiukajtis * Maximum error observed: less than 0.545 ulp for the 63*25c28e83SPiotr Jasiukajtis * whole float type range. 64*25c28e83SPiotr Jasiukajtis */ 65*25c28e83SPiotr Jasiukajtis 66*25c28e83SPiotr Jasiukajtis static const double __TBL_logf[] = { 67*25c28e83SPiotr Jasiukajtis /* __TBL_logf[2*i] = log(1+i/32), i = [0, 32] */ 68*25c28e83SPiotr Jasiukajtis /* __TBL_logf[2*i+1] = 2**(-23)/(1+i/32), i = [0, 32] */ 69*25c28e83SPiotr Jasiukajtis 0.000000000000000000e+00, 1.192092895507812500e-07, 3.077165866675368733e-02, 70*25c28e83SPiotr Jasiukajtis 1.155968868371212153e-07, 6.062462181643483994e-02, 1.121969784007352926e-07, 71*25c28e83SPiotr Jasiukajtis 8.961215868968713805e-02, 1.089913504464285680e-07, 1.177830356563834557e-01, 72*25c28e83SPiotr Jasiukajtis 1.059638129340277719e-07, 1.451820098444978890e-01, 1.030999260979729787e-07, 73*25c28e83SPiotr Jasiukajtis 1.718502569266592284e-01, 1.003867701480263102e-07, 1.978257433299198675e-01, 74*25c28e83SPiotr Jasiukajtis 9.781275040064102225e-08, 2.231435513142097649e-01, 9.536743164062500529e-08, 75*25c28e83SPiotr Jasiukajtis 2.478361639045812692e-01, 9.304139672256097884e-08, 2.719337154836417580e-01, 76*25c28e83SPiotr Jasiukajtis 9.082612537202380448e-08, 2.954642128938358980e-01, 8.871388989825581272e-08, 77*25c28e83SPiotr Jasiukajtis 3.184537311185345887e-01, 8.669766512784091150e-08, 3.409265869705931928e-01, 78*25c28e83SPiotr Jasiukajtis 8.477105034722222546e-08, 3.629054936893684746e-01, 8.292820142663043248e-08, 79*25c28e83SPiotr Jasiukajtis 3.844116989103320559e-01, 8.116377160904255122e-08, 4.054651081081643849e-01, 80*25c28e83SPiotr Jasiukajtis 7.947285970052082892e-08, 4.260843953109000881e-01, 7.785096460459183052e-08, 81*25c28e83SPiotr Jasiukajtis 4.462871026284195297e-01, 7.629394531250000159e-08, 4.660897299245992387e-01, 82*25c28e83SPiotr Jasiukajtis 7.479798560049019504e-08, 4.855078157817008244e-01, 7.335956280048077330e-08, 83*25c28e83SPiotr Jasiukajtis 5.045560107523953119e-01, 7.197542010613207272e-08, 5.232481437645478684e-01, 84*25c28e83SPiotr Jasiukajtis 7.064254195601851460e-08, 5.415972824327444091e-01, 6.935813210227272390e-08, 85*25c28e83SPiotr Jasiukajtis 5.596157879354226594e-01, 6.811959402901785336e-08, 5.773153650348236132e-01, 86*25c28e83SPiotr Jasiukajtis 6.692451343201754014e-08, 5.947071077466927758e-01, 6.577064251077586116e-08, 87*25c28e83SPiotr Jasiukajtis 6.118015411059929409e-01, 6.465588585805084723e-08, 6.286086594223740942e-01, 88*25c28e83SPiotr Jasiukajtis 6.357828776041666578e-08, 6.451379613735847007e-01, 6.253602074795082293e-08, 89*25c28e83SPiotr Jasiukajtis 6.613984822453650159e-01, 6.152737525201612732e-08, 6.773988235918061429e-01, 90*25c28e83SPiotr Jasiukajtis 6.055075024801586965e-08, 6.931471805599452862e-01, 5.960464477539062500e-08 91*25c28e83SPiotr Jasiukajtis }; 92*25c28e83SPiotr Jasiukajtis 93*25c28e83SPiotr Jasiukajtis static const double 94*25c28e83SPiotr Jasiukajtis K3 = -2.49887584306188944706e-01, 95*25c28e83SPiotr Jasiukajtis K2 = 3.33368809981254554946e-01, 96*25c28e83SPiotr Jasiukajtis K1 = -5.00000008402474976565e-01; 97*25c28e83SPiotr Jasiukajtis 98*25c28e83SPiotr Jasiukajtis static const union { 99*25c28e83SPiotr Jasiukajtis int i; 100*25c28e83SPiotr Jasiukajtis float f; 101*25c28e83SPiotr Jasiukajtis } inf = { 0x7f800000 }; 102*25c28e83SPiotr Jasiukajtis 103*25c28e83SPiotr Jasiukajtis #define INF inf.f 104*25c28e83SPiotr Jasiukajtis 105*25c28e83SPiotr Jasiukajtis #define PROCESS(N) \ 106*25c28e83SPiotr Jasiukajtis iy##N = ival##N & 0x007fffff; \ 107*25c28e83SPiotr Jasiukajtis ival##N = (iy##N + 0x20000) & 0xfffc0000; \ 108*25c28e83SPiotr Jasiukajtis i##N = ival##N >> 17; \ 109*25c28e83SPiotr Jasiukajtis iy##N = iy##N - ival##N; \ 110*25c28e83SPiotr Jasiukajtis ty##N = LN2 * (double) exp##N + __TBL_logf[i##N]; \ 111*25c28e83SPiotr Jasiukajtis yy##N = (double) iy##N * __TBL_logf[i##N + 1]; \ 112*25c28e83SPiotr Jasiukajtis yy##N = ((K3 * yy##N + K2) * yy##N + K1) * yy##N * yy##N + yy##N; \ 113*25c28e83SPiotr Jasiukajtis y[0] = (float)(yy##N + ty##N); \ 114*25c28e83SPiotr Jasiukajtis y += stridey; 115*25c28e83SPiotr Jasiukajtis 116*25c28e83SPiotr Jasiukajtis #define PREPROCESS(N, index, label) \ 117*25c28e83SPiotr Jasiukajtis ival##N = *(int*)x; \ 118*25c28e83SPiotr Jasiukajtis value = x[0]; \ 119*25c28e83SPiotr Jasiukajtis x += stridex; \ 120*25c28e83SPiotr Jasiukajtis exp##N = (ival##N >> 23) - 127; \ 121*25c28e83SPiotr Jasiukajtis if ((ival##N & 0x7fffffff) >= 0x7f800000) /* X = NaN or Inf */ \ 122*25c28e83SPiotr Jasiukajtis { \ 123*25c28e83SPiotr Jasiukajtis y[index] = value + INF; \ 124*25c28e83SPiotr Jasiukajtis goto label; \ 125*25c28e83SPiotr Jasiukajtis } \ 126*25c28e83SPiotr Jasiukajtis if (ival##N < 0x00800000) \ 127*25c28e83SPiotr Jasiukajtis { \ 128*25c28e83SPiotr Jasiukajtis if (ival##N > 0) /* X = denormal */ \ 129*25c28e83SPiotr Jasiukajtis { \ 130*25c28e83SPiotr Jasiukajtis value = (float) ival##N; \ 131*25c28e83SPiotr Jasiukajtis ival##N = *(int*) &value; \ 132*25c28e83SPiotr Jasiukajtis exp##N = (ival##N >> 23) - (127 + 149); \ 133*25c28e83SPiotr Jasiukajtis } \ 134*25c28e83SPiotr Jasiukajtis else \ 135*25c28e83SPiotr Jasiukajtis { \ 136*25c28e83SPiotr Jasiukajtis value = 0.0f; \ 137*25c28e83SPiotr Jasiukajtis y[index] = ((ival##N & 0x7fffffff) == 0) ? \ 138*25c28e83SPiotr Jasiukajtis -1.0f / value : value / value; \ 139*25c28e83SPiotr Jasiukajtis goto label; \ 140*25c28e83SPiotr Jasiukajtis } \ 141*25c28e83SPiotr Jasiukajtis } 142*25c28e83SPiotr Jasiukajtis 143*25c28e83SPiotr Jasiukajtis void 144*25c28e83SPiotr Jasiukajtis __vlogf(int n, float * restrict x, int stridex, float * restrict y, 145*25c28e83SPiotr Jasiukajtis int stridey) 146*25c28e83SPiotr Jasiukajtis { 147*25c28e83SPiotr Jasiukajtis double LN2 = __TBL_logf[64]; /* log(2) = 0.6931471805599453094 */ 148*25c28e83SPiotr Jasiukajtis double yy0, yy1, yy2, yy3, yy4; 149*25c28e83SPiotr Jasiukajtis double ty0, ty1, ty2, ty3, ty4; 150*25c28e83SPiotr Jasiukajtis float value; 151*25c28e83SPiotr Jasiukajtis int i0, i1, i2, i3, i4; 152*25c28e83SPiotr Jasiukajtis int ival0, ival1, ival2, ival3, ival4; 153*25c28e83SPiotr Jasiukajtis int exp0, exp1, exp2, exp3, exp4; 154*25c28e83SPiotr Jasiukajtis int iy0, iy1, iy2, iy3, iy4; 155*25c28e83SPiotr Jasiukajtis 156*25c28e83SPiotr Jasiukajtis y -= stridey; 157*25c28e83SPiotr Jasiukajtis 158*25c28e83SPiotr Jasiukajtis for (; ;) 159*25c28e83SPiotr Jasiukajtis { 160*25c28e83SPiotr Jasiukajtis begin: 161*25c28e83SPiotr Jasiukajtis y += stridey; 162*25c28e83SPiotr Jasiukajtis 163*25c28e83SPiotr Jasiukajtis if (--n < 0) 164*25c28e83SPiotr Jasiukajtis break; 165*25c28e83SPiotr Jasiukajtis 166*25c28e83SPiotr Jasiukajtis PREPROCESS(0, 0, begin) 167*25c28e83SPiotr Jasiukajtis 168*25c28e83SPiotr Jasiukajtis if (--n < 0) 169*25c28e83SPiotr Jasiukajtis goto process1; 170*25c28e83SPiotr Jasiukajtis 171*25c28e83SPiotr Jasiukajtis PREPROCESS(1, stridey, process1) 172*25c28e83SPiotr Jasiukajtis 173*25c28e83SPiotr Jasiukajtis if (--n < 0) 174*25c28e83SPiotr Jasiukajtis goto process2; 175*25c28e83SPiotr Jasiukajtis 176*25c28e83SPiotr Jasiukajtis PREPROCESS(2, (stridey << 1), process2) 177*25c28e83SPiotr Jasiukajtis 178*25c28e83SPiotr Jasiukajtis if (--n < 0) 179*25c28e83SPiotr Jasiukajtis goto process3; 180*25c28e83SPiotr Jasiukajtis 181*25c28e83SPiotr Jasiukajtis PREPROCESS(3, (stridey << 1) + stridey, process3) 182*25c28e83SPiotr Jasiukajtis 183*25c28e83SPiotr Jasiukajtis if (--n < 0) 184*25c28e83SPiotr Jasiukajtis goto process4; 185*25c28e83SPiotr Jasiukajtis 186*25c28e83SPiotr Jasiukajtis PREPROCESS(4, (stridey << 2), process4) 187*25c28e83SPiotr Jasiukajtis 188*25c28e83SPiotr Jasiukajtis iy0 = ival0 & 0x007fffff; 189*25c28e83SPiotr Jasiukajtis iy1 = ival1 & 0x007fffff; 190*25c28e83SPiotr Jasiukajtis iy2 = ival2 & 0x007fffff; 191*25c28e83SPiotr Jasiukajtis iy3 = ival3 & 0x007fffff; 192*25c28e83SPiotr Jasiukajtis iy4 = ival4 & 0x007fffff; 193*25c28e83SPiotr Jasiukajtis 194*25c28e83SPiotr Jasiukajtis ival0 = (iy0 + 0x20000) & 0xfffc0000; 195*25c28e83SPiotr Jasiukajtis ival1 = (iy1 + 0x20000) & 0xfffc0000; 196*25c28e83SPiotr Jasiukajtis ival2 = (iy2 + 0x20000) & 0xfffc0000; 197*25c28e83SPiotr Jasiukajtis ival3 = (iy3 + 0x20000) & 0xfffc0000; 198*25c28e83SPiotr Jasiukajtis ival4 = (iy4 + 0x20000) & 0xfffc0000; 199*25c28e83SPiotr Jasiukajtis 200*25c28e83SPiotr Jasiukajtis i0 = ival0 >> 17; 201*25c28e83SPiotr Jasiukajtis i1 = ival1 >> 17; 202*25c28e83SPiotr Jasiukajtis i2 = ival2 >> 17; 203*25c28e83SPiotr Jasiukajtis i3 = ival3 >> 17; 204*25c28e83SPiotr Jasiukajtis i4 = ival4 >> 17; 205*25c28e83SPiotr Jasiukajtis 206*25c28e83SPiotr Jasiukajtis iy0 = iy0 - ival0; 207*25c28e83SPiotr Jasiukajtis iy1 = iy1 - ival1; 208*25c28e83SPiotr Jasiukajtis iy2 = iy2 - ival2; 209*25c28e83SPiotr Jasiukajtis iy3 = iy3 - ival3; 210*25c28e83SPiotr Jasiukajtis iy4 = iy4 - ival4; 211*25c28e83SPiotr Jasiukajtis 212*25c28e83SPiotr Jasiukajtis ty0 = LN2 * (double) exp0 + __TBL_logf[i0]; 213*25c28e83SPiotr Jasiukajtis ty1 = LN2 * (double) exp1 + __TBL_logf[i1]; 214*25c28e83SPiotr Jasiukajtis ty2 = LN2 * (double) exp2 + __TBL_logf[i2]; 215*25c28e83SPiotr Jasiukajtis ty3 = LN2 * (double) exp3 + __TBL_logf[i3]; 216*25c28e83SPiotr Jasiukajtis ty4 = LN2 * (double) exp4 + __TBL_logf[i4]; 217*25c28e83SPiotr Jasiukajtis 218*25c28e83SPiotr Jasiukajtis yy0 = (double) iy0 * __TBL_logf[i0 + 1]; 219*25c28e83SPiotr Jasiukajtis yy1 = (double) iy1 * __TBL_logf[i1 + 1]; 220*25c28e83SPiotr Jasiukajtis yy2 = (double) iy2 * __TBL_logf[i2 + 1]; 221*25c28e83SPiotr Jasiukajtis yy3 = (double) iy3 * __TBL_logf[i3 + 1]; 222*25c28e83SPiotr Jasiukajtis yy4 = (double) iy4 * __TBL_logf[i4 + 1]; 223*25c28e83SPiotr Jasiukajtis 224*25c28e83SPiotr Jasiukajtis yy0 = ((K3 * yy0 + K2) * yy0 + K1) * yy0 * yy0 + yy0; 225*25c28e83SPiotr Jasiukajtis yy1 = ((K3 * yy1 + K2) * yy1 + K1) * yy1 * yy1 + yy1; 226*25c28e83SPiotr Jasiukajtis yy2 = ((K3 * yy2 + K2) * yy2 + K1) * yy2 * yy2 + yy2; 227*25c28e83SPiotr Jasiukajtis yy3 = ((K3 * yy3 + K2) * yy3 + K1) * yy3 * yy3 + yy3; 228*25c28e83SPiotr Jasiukajtis yy4 = ((K3 * yy4 + K2) * yy4 + K1) * yy4 * yy4 + yy4; 229*25c28e83SPiotr Jasiukajtis 230*25c28e83SPiotr Jasiukajtis y[0] = (float)(yy0 + ty0); 231*25c28e83SPiotr Jasiukajtis y += stridey; 232*25c28e83SPiotr Jasiukajtis y[0] = (float)(yy1 + ty1); 233*25c28e83SPiotr Jasiukajtis y += stridey; 234*25c28e83SPiotr Jasiukajtis y[0] = (float)(yy2 + ty2); 235*25c28e83SPiotr Jasiukajtis y += stridey; 236*25c28e83SPiotr Jasiukajtis y[0] = (float)(yy3 + ty3); 237*25c28e83SPiotr Jasiukajtis y += stridey; 238*25c28e83SPiotr Jasiukajtis y[0] = (float)(yy4 + ty4); 239*25c28e83SPiotr Jasiukajtis continue; 240*25c28e83SPiotr Jasiukajtis 241*25c28e83SPiotr Jasiukajtis process1: 242*25c28e83SPiotr Jasiukajtis PROCESS(0) 243*25c28e83SPiotr Jasiukajtis continue; 244*25c28e83SPiotr Jasiukajtis 245*25c28e83SPiotr Jasiukajtis process2: 246*25c28e83SPiotr Jasiukajtis PROCESS(0) 247*25c28e83SPiotr Jasiukajtis PROCESS(1) 248*25c28e83SPiotr Jasiukajtis continue; 249*25c28e83SPiotr Jasiukajtis 250*25c28e83SPiotr Jasiukajtis process3: 251*25c28e83SPiotr Jasiukajtis PROCESS(0) 252*25c28e83SPiotr Jasiukajtis PROCESS(1) 253*25c28e83SPiotr Jasiukajtis PROCESS(2) 254*25c28e83SPiotr Jasiukajtis continue; 255*25c28e83SPiotr Jasiukajtis 256*25c28e83SPiotr Jasiukajtis process4: 257*25c28e83SPiotr Jasiukajtis PROCESS(0) 258*25c28e83SPiotr Jasiukajtis PROCESS(1) 259*25c28e83SPiotr Jasiukajtis PROCESS(2) 260*25c28e83SPiotr Jasiukajtis PROCESS(3) 261*25c28e83SPiotr Jasiukajtis } 262*25c28e83SPiotr Jasiukajtis } 263