xref: /illumos-gate/usr/src/lib/libmvec/common/__vlogf.c (revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8)
1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis  * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis  *
4*25c28e83SPiotr Jasiukajtis  * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis  * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis  * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis  *
8*25c28e83SPiotr Jasiukajtis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis  * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis  * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis  * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis  *
13*25c28e83SPiotr Jasiukajtis  * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis  * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis  * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis  *
19*25c28e83SPiotr Jasiukajtis  * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis  */
21*25c28e83SPiotr Jasiukajtis 
22*25c28e83SPiotr Jasiukajtis /*
23*25c28e83SPiotr Jasiukajtis  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24*25c28e83SPiotr Jasiukajtis  */
25*25c28e83SPiotr Jasiukajtis /*
26*25c28e83SPiotr Jasiukajtis  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27*25c28e83SPiotr Jasiukajtis  * Use is subject to license terms.
28*25c28e83SPiotr Jasiukajtis  */
29*25c28e83SPiotr Jasiukajtis 
30*25c28e83SPiotr Jasiukajtis #ifdef __RESTRICT
31*25c28e83SPiotr Jasiukajtis #define restrict _Restrict
32*25c28e83SPiotr Jasiukajtis #else
33*25c28e83SPiotr Jasiukajtis #define restrict
34*25c28e83SPiotr Jasiukajtis #endif
35*25c28e83SPiotr Jasiukajtis 
36*25c28e83SPiotr Jasiukajtis /* float logf(float x)
37*25c28e83SPiotr Jasiukajtis  *
38*25c28e83SPiotr Jasiukajtis  * Method :
39*25c28e83SPiotr Jasiukajtis  *	1. Special cases:
40*25c28e83SPiotr Jasiukajtis  *		for x is negative, -Inf => QNaN + invalid;
41*25c28e83SPiotr Jasiukajtis  *		for x = 0		=> -Inf + divide-by-zero;
42*25c28e83SPiotr Jasiukajtis  *		for x = +Inf		=> Inf;
43*25c28e83SPiotr Jasiukajtis  *		for x = NaN		=> QNaN.
44*25c28e83SPiotr Jasiukajtis  *	2. Computes logarithm from:
45*25c28e83SPiotr Jasiukajtis  *		x = m * 2**n => log(x) = n * log(2) + log(m),
46*25c28e83SPiotr Jasiukajtis  *		m = [1, 2).
47*25c28e83SPiotr Jasiukajtis  *	Let m = m0 + dm, where m0 = 1 + k / 32,
48*25c28e83SPiotr Jasiukajtis  *		k = [0, 32],
49*25c28e83SPiotr Jasiukajtis  *		dm = [-1/64, 1/64].
50*25c28e83SPiotr Jasiukajtis  *	Then log(m) = log(m0 + dm) = log(m0) + log(1+y),
51*25c28e83SPiotr Jasiukajtis  *		where y = dm*(1/m0), y = [-1/66, 1/64].
52*25c28e83SPiotr Jasiukajtis  *	Then
53*25c28e83SPiotr Jasiukajtis  *		1/m0 is looked up in a table of 1, 1/(1+1/32), ..., 1/(1+32/32);
54*25c28e83SPiotr Jasiukajtis  *		log(m0) is looked up in a table of log(1), log(1+1/32),
55*25c28e83SPiotr Jasiukajtis  *		..., log(1+32/32).
56*25c28e83SPiotr Jasiukajtis  *		log(1+y) is computed using approximation:
57*25c28e83SPiotr Jasiukajtis  *		log(1+y) = ((a3*y + a2)*y + a1)*y*y + y.
58*25c28e83SPiotr Jasiukajtis  * Accuracy:
59*25c28e83SPiotr Jasiukajtis  *	The maximum relative error for the approximating
60*25c28e83SPiotr Jasiukajtis  *	polynomial is 2**(-28.41).  All calculations are of
61*25c28e83SPiotr Jasiukajtis  *	double precision.
62*25c28e83SPiotr Jasiukajtis  *	Maximum error observed: less than 0.545 ulp for the
63*25c28e83SPiotr Jasiukajtis  *	whole float type range.
64*25c28e83SPiotr Jasiukajtis  */
65*25c28e83SPiotr Jasiukajtis 
66*25c28e83SPiotr Jasiukajtis static const double __TBL_logf[] = {
67*25c28e83SPiotr Jasiukajtis 	/* __TBL_logf[2*i] = log(1+i/32), i = [0, 32] */
68*25c28e83SPiotr Jasiukajtis 	/* __TBL_logf[2*i+1] = 2**(-23)/(1+i/32), i = [0, 32] */
69*25c28e83SPiotr Jasiukajtis 0.000000000000000000e+00, 1.192092895507812500e-07, 3.077165866675368733e-02,
70*25c28e83SPiotr Jasiukajtis 1.155968868371212153e-07, 6.062462181643483994e-02, 1.121969784007352926e-07,
71*25c28e83SPiotr Jasiukajtis 8.961215868968713805e-02, 1.089913504464285680e-07, 1.177830356563834557e-01,
72*25c28e83SPiotr Jasiukajtis 1.059638129340277719e-07, 1.451820098444978890e-01, 1.030999260979729787e-07,
73*25c28e83SPiotr Jasiukajtis 1.718502569266592284e-01, 1.003867701480263102e-07, 1.978257433299198675e-01,
74*25c28e83SPiotr Jasiukajtis 9.781275040064102225e-08, 2.231435513142097649e-01, 9.536743164062500529e-08,
75*25c28e83SPiotr Jasiukajtis 2.478361639045812692e-01, 9.304139672256097884e-08, 2.719337154836417580e-01,
76*25c28e83SPiotr Jasiukajtis 9.082612537202380448e-08, 2.954642128938358980e-01, 8.871388989825581272e-08,
77*25c28e83SPiotr Jasiukajtis 3.184537311185345887e-01, 8.669766512784091150e-08, 3.409265869705931928e-01,
78*25c28e83SPiotr Jasiukajtis 8.477105034722222546e-08, 3.629054936893684746e-01, 8.292820142663043248e-08,
79*25c28e83SPiotr Jasiukajtis 3.844116989103320559e-01, 8.116377160904255122e-08, 4.054651081081643849e-01,
80*25c28e83SPiotr Jasiukajtis 7.947285970052082892e-08, 4.260843953109000881e-01, 7.785096460459183052e-08,
81*25c28e83SPiotr Jasiukajtis 4.462871026284195297e-01, 7.629394531250000159e-08, 4.660897299245992387e-01,
82*25c28e83SPiotr Jasiukajtis 7.479798560049019504e-08, 4.855078157817008244e-01, 7.335956280048077330e-08,
83*25c28e83SPiotr Jasiukajtis 5.045560107523953119e-01, 7.197542010613207272e-08, 5.232481437645478684e-01,
84*25c28e83SPiotr Jasiukajtis 7.064254195601851460e-08, 5.415972824327444091e-01, 6.935813210227272390e-08,
85*25c28e83SPiotr Jasiukajtis 5.596157879354226594e-01, 6.811959402901785336e-08, 5.773153650348236132e-01,
86*25c28e83SPiotr Jasiukajtis 6.692451343201754014e-08, 5.947071077466927758e-01, 6.577064251077586116e-08,
87*25c28e83SPiotr Jasiukajtis 6.118015411059929409e-01, 6.465588585805084723e-08, 6.286086594223740942e-01,
88*25c28e83SPiotr Jasiukajtis 6.357828776041666578e-08, 6.451379613735847007e-01, 6.253602074795082293e-08,
89*25c28e83SPiotr Jasiukajtis 6.613984822453650159e-01, 6.152737525201612732e-08, 6.773988235918061429e-01,
90*25c28e83SPiotr Jasiukajtis 6.055075024801586965e-08, 6.931471805599452862e-01, 5.960464477539062500e-08
91*25c28e83SPiotr Jasiukajtis };
92*25c28e83SPiotr Jasiukajtis 
93*25c28e83SPiotr Jasiukajtis static const double
94*25c28e83SPiotr Jasiukajtis 	K3 = -2.49887584306188944706e-01,
95*25c28e83SPiotr Jasiukajtis 	K2 =  3.33368809981254554946e-01,
96*25c28e83SPiotr Jasiukajtis 	K1 = -5.00000008402474976565e-01;
97*25c28e83SPiotr Jasiukajtis 
98*25c28e83SPiotr Jasiukajtis static const union {
99*25c28e83SPiotr Jasiukajtis 	int	i;
100*25c28e83SPiotr Jasiukajtis 	float	f;
101*25c28e83SPiotr Jasiukajtis } inf = { 0x7f800000 };
102*25c28e83SPiotr Jasiukajtis 
103*25c28e83SPiotr Jasiukajtis #define INF	inf.f
104*25c28e83SPiotr Jasiukajtis 
105*25c28e83SPiotr Jasiukajtis #define PROCESS(N)								\
106*25c28e83SPiotr Jasiukajtis 	iy##N = ival##N & 0x007fffff;						\
107*25c28e83SPiotr Jasiukajtis 	ival##N = (iy##N + 0x20000) & 0xfffc0000;				\
108*25c28e83SPiotr Jasiukajtis 	i##N  = ival##N >> 17;							\
109*25c28e83SPiotr Jasiukajtis 	iy##N = iy##N - ival##N;						\
110*25c28e83SPiotr Jasiukajtis 	ty##N = LN2 * (double) exp##N + __TBL_logf[i##N];			\
111*25c28e83SPiotr Jasiukajtis 	yy##N = (double) iy##N * __TBL_logf[i##N + 1];				\
112*25c28e83SPiotr Jasiukajtis 	yy##N = ((K3 * yy##N + K2) * yy##N + K1) * yy##N * yy##N + yy##N;	\
113*25c28e83SPiotr Jasiukajtis 	y[0] = (float)(yy##N + ty##N);						\
114*25c28e83SPiotr Jasiukajtis 	y += stridey;
115*25c28e83SPiotr Jasiukajtis 
116*25c28e83SPiotr Jasiukajtis #define PREPROCESS(N, index, label)						\
117*25c28e83SPiotr Jasiukajtis 	ival##N = *(int*)x;							\
118*25c28e83SPiotr Jasiukajtis 	value = x[0];								\
119*25c28e83SPiotr Jasiukajtis 	x += stridex;								\
120*25c28e83SPiotr Jasiukajtis 	exp##N = (ival##N >> 23) - 127;						\
121*25c28e83SPiotr Jasiukajtis 	if ((ival##N & 0x7fffffff) >= 0x7f800000) /* X = NaN or Inf */	\
122*25c28e83SPiotr Jasiukajtis 	{									\
123*25c28e83SPiotr Jasiukajtis 		y[index] = value + INF;						\
124*25c28e83SPiotr Jasiukajtis 		goto label;							\
125*25c28e83SPiotr Jasiukajtis 	}									\
126*25c28e83SPiotr Jasiukajtis 	if (ival##N < 0x00800000)						\
127*25c28e83SPiotr Jasiukajtis 	{									\
128*25c28e83SPiotr Jasiukajtis 		if (ival##N > 0)	/* X = denormal */			\
129*25c28e83SPiotr Jasiukajtis 		{								\
130*25c28e83SPiotr Jasiukajtis 			value = (float) ival##N;				\
131*25c28e83SPiotr Jasiukajtis 			ival##N = *(int*) &value;				\
132*25c28e83SPiotr Jasiukajtis 			exp##N = (ival##N >> 23) - (127 + 149);			\
133*25c28e83SPiotr Jasiukajtis 		}								\
134*25c28e83SPiotr Jasiukajtis 		else								\
135*25c28e83SPiotr Jasiukajtis 		{								\
136*25c28e83SPiotr Jasiukajtis 			value = 0.0f;						\
137*25c28e83SPiotr Jasiukajtis 			y[index] = ((ival##N & 0x7fffffff) == 0) ?		\
138*25c28e83SPiotr Jasiukajtis 				-1.0f / value : value / value;			\
139*25c28e83SPiotr Jasiukajtis 			goto label;						\
140*25c28e83SPiotr Jasiukajtis 		}								\
141*25c28e83SPiotr Jasiukajtis 	}
142*25c28e83SPiotr Jasiukajtis 
143*25c28e83SPiotr Jasiukajtis void
144*25c28e83SPiotr Jasiukajtis __vlogf(int n, float * restrict x, int stridex, float * restrict y,
145*25c28e83SPiotr Jasiukajtis 	int stridey)
146*25c28e83SPiotr Jasiukajtis {
147*25c28e83SPiotr Jasiukajtis 	double	LN2 = __TBL_logf[64];		/* log(2) = 0.6931471805599453094 	*/
148*25c28e83SPiotr Jasiukajtis 	double	yy0, yy1, yy2, yy3, yy4;
149*25c28e83SPiotr Jasiukajtis 	double	ty0, ty1, ty2, ty3, ty4;
150*25c28e83SPiotr Jasiukajtis 	float	value;
151*25c28e83SPiotr Jasiukajtis 	int	i0, i1, i2, i3, i4;
152*25c28e83SPiotr Jasiukajtis 	int	ival0, ival1, ival2, ival3, ival4;
153*25c28e83SPiotr Jasiukajtis 	int	exp0, exp1, exp2, exp3, exp4;
154*25c28e83SPiotr Jasiukajtis 	int	iy0, iy1, iy2, iy3, iy4;
155*25c28e83SPiotr Jasiukajtis 
156*25c28e83SPiotr Jasiukajtis 	y -= stridey;
157*25c28e83SPiotr Jasiukajtis 
158*25c28e83SPiotr Jasiukajtis 	for (; ;)
159*25c28e83SPiotr Jasiukajtis 	{
160*25c28e83SPiotr Jasiukajtis begin:
161*25c28e83SPiotr Jasiukajtis 		y += stridey;
162*25c28e83SPiotr Jasiukajtis 
163*25c28e83SPiotr Jasiukajtis 		if (--n < 0)
164*25c28e83SPiotr Jasiukajtis 			break;
165*25c28e83SPiotr Jasiukajtis 
166*25c28e83SPiotr Jasiukajtis 		PREPROCESS(0, 0, begin)
167*25c28e83SPiotr Jasiukajtis 
168*25c28e83SPiotr Jasiukajtis 		if (--n < 0)
169*25c28e83SPiotr Jasiukajtis 			goto process1;
170*25c28e83SPiotr Jasiukajtis 
171*25c28e83SPiotr Jasiukajtis 		PREPROCESS(1, stridey, process1)
172*25c28e83SPiotr Jasiukajtis 
173*25c28e83SPiotr Jasiukajtis 		if (--n < 0)
174*25c28e83SPiotr Jasiukajtis 			goto process2;
175*25c28e83SPiotr Jasiukajtis 
176*25c28e83SPiotr Jasiukajtis 		PREPROCESS(2, (stridey << 1), process2)
177*25c28e83SPiotr Jasiukajtis 
178*25c28e83SPiotr Jasiukajtis 		if (--n < 0)
179*25c28e83SPiotr Jasiukajtis 			goto process3;
180*25c28e83SPiotr Jasiukajtis 
181*25c28e83SPiotr Jasiukajtis 		PREPROCESS(3, (stridey << 1) + stridey, process3)
182*25c28e83SPiotr Jasiukajtis 
183*25c28e83SPiotr Jasiukajtis 		if (--n < 0)
184*25c28e83SPiotr Jasiukajtis 			goto process4;
185*25c28e83SPiotr Jasiukajtis 
186*25c28e83SPiotr Jasiukajtis 		PREPROCESS(4, (stridey << 2), process4)
187*25c28e83SPiotr Jasiukajtis 
188*25c28e83SPiotr Jasiukajtis 		iy0 = ival0 & 0x007fffff;
189*25c28e83SPiotr Jasiukajtis 		iy1 = ival1 & 0x007fffff;
190*25c28e83SPiotr Jasiukajtis 		iy2 = ival2 & 0x007fffff;
191*25c28e83SPiotr Jasiukajtis 		iy3 = ival3 & 0x007fffff;
192*25c28e83SPiotr Jasiukajtis 		iy4 = ival4 & 0x007fffff;
193*25c28e83SPiotr Jasiukajtis 
194*25c28e83SPiotr Jasiukajtis 		ival0 = (iy0 + 0x20000) & 0xfffc0000;
195*25c28e83SPiotr Jasiukajtis 		ival1 = (iy1 + 0x20000) & 0xfffc0000;
196*25c28e83SPiotr Jasiukajtis 		ival2 = (iy2 + 0x20000) & 0xfffc0000;
197*25c28e83SPiotr Jasiukajtis 		ival3 = (iy3 + 0x20000) & 0xfffc0000;
198*25c28e83SPiotr Jasiukajtis 		ival4 = (iy4 + 0x20000) & 0xfffc0000;
199*25c28e83SPiotr Jasiukajtis 
200*25c28e83SPiotr Jasiukajtis 		i0 = ival0 >> 17;
201*25c28e83SPiotr Jasiukajtis 		i1 = ival1 >> 17;
202*25c28e83SPiotr Jasiukajtis 		i2 = ival2 >> 17;
203*25c28e83SPiotr Jasiukajtis 		i3 = ival3 >> 17;
204*25c28e83SPiotr Jasiukajtis 		i4 = ival4 >> 17;
205*25c28e83SPiotr Jasiukajtis 
206*25c28e83SPiotr Jasiukajtis 		iy0 = iy0 - ival0;
207*25c28e83SPiotr Jasiukajtis 		iy1 = iy1 - ival1;
208*25c28e83SPiotr Jasiukajtis 		iy2 = iy2 - ival2;
209*25c28e83SPiotr Jasiukajtis 		iy3 = iy3 - ival3;
210*25c28e83SPiotr Jasiukajtis 		iy4 = iy4 - ival4;
211*25c28e83SPiotr Jasiukajtis 
212*25c28e83SPiotr Jasiukajtis 		ty0 = LN2 * (double) exp0 + __TBL_logf[i0];
213*25c28e83SPiotr Jasiukajtis 		ty1 = LN2 * (double) exp1 + __TBL_logf[i1];
214*25c28e83SPiotr Jasiukajtis 		ty2 = LN2 * (double) exp2 + __TBL_logf[i2];
215*25c28e83SPiotr Jasiukajtis 		ty3 = LN2 * (double) exp3 + __TBL_logf[i3];
216*25c28e83SPiotr Jasiukajtis 		ty4 = LN2 * (double) exp4 + __TBL_logf[i4];
217*25c28e83SPiotr Jasiukajtis 
218*25c28e83SPiotr Jasiukajtis 		yy0 = (double) iy0 * __TBL_logf[i0 + 1];
219*25c28e83SPiotr Jasiukajtis 		yy1 = (double) iy1 * __TBL_logf[i1 + 1];
220*25c28e83SPiotr Jasiukajtis 		yy2 = (double) iy2 * __TBL_logf[i2 + 1];
221*25c28e83SPiotr Jasiukajtis 		yy3 = (double) iy3 * __TBL_logf[i3 + 1];
222*25c28e83SPiotr Jasiukajtis 		yy4 = (double) iy4 * __TBL_logf[i4 + 1];
223*25c28e83SPiotr Jasiukajtis 
224*25c28e83SPiotr Jasiukajtis 		yy0 = ((K3 * yy0 + K2) * yy0 + K1) * yy0 * yy0 + yy0;
225*25c28e83SPiotr Jasiukajtis 		yy1 = ((K3 * yy1 + K2) * yy1 + K1) * yy1 * yy1 + yy1;
226*25c28e83SPiotr Jasiukajtis 		yy2 = ((K3 * yy2 + K2) * yy2 + K1) * yy2 * yy2 + yy2;
227*25c28e83SPiotr Jasiukajtis 		yy3 = ((K3 * yy3 + K2) * yy3 + K1) * yy3 * yy3 + yy3;
228*25c28e83SPiotr Jasiukajtis 		yy4 = ((K3 * yy4 + K2) * yy4 + K1) * yy4 * yy4 + yy4;
229*25c28e83SPiotr Jasiukajtis 
230*25c28e83SPiotr Jasiukajtis 		y[0] = (float)(yy0 + ty0);
231*25c28e83SPiotr Jasiukajtis 		y += stridey;
232*25c28e83SPiotr Jasiukajtis 		y[0] = (float)(yy1 + ty1);
233*25c28e83SPiotr Jasiukajtis 		y += stridey;
234*25c28e83SPiotr Jasiukajtis 		y[0] = (float)(yy2 + ty2);
235*25c28e83SPiotr Jasiukajtis 		y += stridey;
236*25c28e83SPiotr Jasiukajtis 		y[0] = (float)(yy3 + ty3);
237*25c28e83SPiotr Jasiukajtis 		y += stridey;
238*25c28e83SPiotr Jasiukajtis 		y[0] = (float)(yy4 + ty4);
239*25c28e83SPiotr Jasiukajtis 		continue;
240*25c28e83SPiotr Jasiukajtis 
241*25c28e83SPiotr Jasiukajtis process1:
242*25c28e83SPiotr Jasiukajtis 		PROCESS(0)
243*25c28e83SPiotr Jasiukajtis 		continue;
244*25c28e83SPiotr Jasiukajtis 
245*25c28e83SPiotr Jasiukajtis process2:
246*25c28e83SPiotr Jasiukajtis 		PROCESS(0)
247*25c28e83SPiotr Jasiukajtis 		PROCESS(1)
248*25c28e83SPiotr Jasiukajtis 		continue;
249*25c28e83SPiotr Jasiukajtis 
250*25c28e83SPiotr Jasiukajtis process3:
251*25c28e83SPiotr Jasiukajtis 		PROCESS(0)
252*25c28e83SPiotr Jasiukajtis 		PROCESS(1)
253*25c28e83SPiotr Jasiukajtis 		PROCESS(2)
254*25c28e83SPiotr Jasiukajtis 		continue;
255*25c28e83SPiotr Jasiukajtis 
256*25c28e83SPiotr Jasiukajtis process4:
257*25c28e83SPiotr Jasiukajtis 		PROCESS(0)
258*25c28e83SPiotr Jasiukajtis 		PROCESS(1)
259*25c28e83SPiotr Jasiukajtis 		PROCESS(2)
260*25c28e83SPiotr Jasiukajtis 		PROCESS(3)
261*25c28e83SPiotr Jasiukajtis 	}
262*25c28e83SPiotr Jasiukajtis }
263