125c28e8Piotr Jasiukajtis/*
225c28e8Piotr Jasiukajtis * CDDL HEADER START
325c28e8Piotr Jasiukajtis *
425c28e8Piotr Jasiukajtis * The contents of this file are subject to the terms of the
525c28e8Piotr Jasiukajtis * Common Development and Distribution License (the "License").
625c28e8Piotr Jasiukajtis * You may not use this file except in compliance with the License.
725c28e8Piotr Jasiukajtis *
825c28e8Piotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
925c28e8Piotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
1025c28e8Piotr Jasiukajtis * See the License for the specific language governing permissions
1125c28e8Piotr Jasiukajtis * and limitations under the License.
1225c28e8Piotr Jasiukajtis *
1325c28e8Piotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
1425c28e8Piotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1525c28e8Piotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
1625c28e8Piotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
1725c28e8Piotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
1825c28e8Piotr Jasiukajtis *
1925c28e8Piotr Jasiukajtis * CDDL HEADER END
2025c28e8Piotr Jasiukajtis */
2125c28e8Piotr Jasiukajtis
2225c28e8Piotr Jasiukajtis/*
2325c28e8Piotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
2425c28e8Piotr Jasiukajtis */
2525c28e8Piotr Jasiukajtis/*
2625c28e8Piotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
2725c28e8Piotr Jasiukajtis * Use is subject to license terms.
2825c28e8Piotr Jasiukajtis */
2925c28e8Piotr Jasiukajtis
3025c28e8Piotr Jasiukajtis#ifdef __RESTRICT
3125c28e8Piotr Jasiukajtis#define restrict _Restrict
3225c28e8Piotr Jasiukajtis#else
3325c28e8Piotr Jasiukajtis#define restrict
3425c28e8Piotr Jasiukajtis#endif
3525c28e8Piotr Jasiukajtis
3625c28e8Piotr Jasiukajtis/* float logf(float x)
3725c28e8Piotr Jasiukajtis *
3825c28e8Piotr Jasiukajtis * Method :
3925c28e8Piotr Jasiukajtis *	1. Special cases:
4025c28e8Piotr Jasiukajtis *		for x is negative, -Inf => QNaN + invalid;
4125c28e8Piotr Jasiukajtis *		for x = 0		=> -Inf + divide-by-zero;
4225c28e8Piotr Jasiukajtis *		for x = +Inf		=> Inf;
4325c28e8Piotr Jasiukajtis *		for x = NaN		=> QNaN.
4425c28e8Piotr Jasiukajtis *	2. Computes logarithm from:
4525c28e8Piotr Jasiukajtis *		x = m * 2**n => log(x) = n * log(2) + log(m),
4625c28e8Piotr Jasiukajtis *		m = [1, 2).
4725c28e8Piotr Jasiukajtis *	Let m = m0 + dm, where m0 = 1 + k / 32,
4825c28e8Piotr Jasiukajtis *		k = [0, 32],
4925c28e8Piotr Jasiukajtis *		dm = [-1/64, 1/64].
5025c28e8Piotr Jasiukajtis *	Then log(m) = log(m0 + dm) = log(m0) + log(1+y),
5125c28e8Piotr Jasiukajtis *		where y = dm*(1/m0), y = [-1/66, 1/64].
5225c28e8Piotr Jasiukajtis *	Then
5325c28e8Piotr Jasiukajtis *		1/m0 is looked up in a table of 1, 1/(1+1/32), ..., 1/(1+32/32);
5425c28e8Piotr Jasiukajtis *		log(m0) is looked up in a table of log(1), log(1+1/32),
5525c28e8Piotr Jasiukajtis *		..., log(1+32/32).
5625c28e8Piotr Jasiukajtis *		log(1+y) is computed using approximation:
5725c28e8Piotr Jasiukajtis *		log(1+y) = ((a3*y + a2)*y + a1)*y*y + y.
5825c28e8Piotr Jasiukajtis * Accuracy:
5925c28e8Piotr Jasiukajtis *	The maximum relative error for the approximating
6025c28e8Piotr Jasiukajtis *	polynomial is 2**(-28.41).  All calculations are of
6125c28e8Piotr Jasiukajtis *	double precision.
6225c28e8Piotr Jasiukajtis *	Maximum error observed: less than 0.545 ulp for the
6325c28e8Piotr Jasiukajtis *	whole float type range.
6425c28e8Piotr Jasiukajtis */
6525c28e8Piotr Jasiukajtis
6625c28e8Piotr Jasiukajtisstatic const double __TBL_logf[] = {
6725c28e8Piotr Jasiukajtis	/* __TBL_logf[2*i] = log(1+i/32), i = [0, 32] */
6825c28e8Piotr Jasiukajtis	/* __TBL_logf[2*i+1] = 2**(-23)/(1+i/32), i = [0, 32] */
6925c28e8Piotr Jasiukajtis0.000000000000000000e+00, 1.192092895507812500e-07, 3.077165866675368733e-02,
7025c28e8Piotr Jasiukajtis1.155968868371212153e-07, 6.062462181643483994e-02, 1.121969784007352926e-07,
7125c28e8Piotr Jasiukajtis8.961215868968713805e-02, 1.089913504464285680e-07, 1.177830356563834557e-01,
7225c28e8Piotr Jasiukajtis1.059638129340277719e-07, 1.451820098444978890e-01, 1.030999260979729787e-07,
7325c28e8Piotr Jasiukajtis1.718502569266592284e-01, 1.003867701480263102e-07, 1.978257433299198675e-01,
7425c28e8Piotr Jasiukajtis9.781275040064102225e-08, 2.231435513142097649e-01, 9.536743164062500529e-08,
7525c28e8Piotr Jasiukajtis2.478361639045812692e-01, 9.304139672256097884e-08, 2.719337154836417580e-01,
7625c28e8Piotr Jasiukajtis9.082612537202380448e-08, 2.954642128938358980e-01, 8.871388989825581272e-08,
7725c28e8Piotr Jasiukajtis3.184537311185345887e-01, 8.669766512784091150e-08, 3.409265869705931928e-01,
7825c28e8Piotr Jasiukajtis8.477105034722222546e-08, 3.629054936893684746e-01, 8.292820142663043248e-08,
7925c28e8Piotr Jasiukajtis3.844116989103320559e-01, 8.116377160904255122e-08, 4.054651081081643849e-01,
8025c28e8Piotr Jasiukajtis7.947285970052082892e-08, 4.260843953109000881e-01, 7.785096460459183052e-08,
8125c28e8Piotr Jasiukajtis4.462871026284195297e-01, 7.629394531250000159e-08, 4.660897299245992387e-01,
8225c28e8Piotr Jasiukajtis7.479798560049019504e-08, 4.855078157817008244e-01, 7.335956280048077330e-08,
8325c28e8Piotr Jasiukajtis5.045560107523953119e-01, 7.197542010613207272e-08, 5.232481437645478684e-01,
8425c28e8Piotr Jasiukajtis7.064254195601851460e-08, 5.415972824327444091e-01, 6.935813210227272390e-08,
8525c28e8Piotr Jasiukajtis5.596157879354226594e-01, 6.811959402901785336e-08, 5.773153650348236132e-01,
8625c28e8Piotr Jasiukajtis6.692451343201754014e-08, 5.947071077466927758e-01, 6.577064251077586116e-08,
8725c28e8Piotr Jasiukajtis6.118015411059929409e-01, 6.465588585805084723e-08, 6.286086594223740942e-01,
8825c28e8Piotr Jasiukajtis6.357828776041666578e-08, 6.451379613735847007e-01, 6.253602074795082293e-08,
8925c28e8Piotr Jasiukajtis6.613984822453650159e-01, 6.152737525201612732e-08, 6.773988235918061429e-01,
9025c28e8Piotr Jasiukajtis6.055075024801586965e-08, 6.931471805599452862e-01, 5.960464477539062500e-08
9125c28e8Piotr Jasiukajtis};
9225c28e8Piotr Jasiukajtis
9325c28e8Piotr Jasiukajtisstatic const double
9425c28e8Piotr Jasiukajtis	K3 = -2.49887584306188944706e-01,
9525c28e8Piotr Jasiukajtis	K2 =  3.33368809981254554946e-01,
9625c28e8Piotr Jasiukajtis	K1 = -5.00000008402474976565e-01;
9725c28e8Piotr Jasiukajtis
9825c28e8Piotr Jasiukajtisstatic const union {
9925c28e8Piotr Jasiukajtis	int	i;
10025c28e8Piotr Jasiukajtis	float	f;
10125c28e8Piotr Jasiukajtis} inf = { 0x7f800000 };
10225c28e8Piotr Jasiukajtis
10325c28e8Piotr Jasiukajtis#define INF	inf.f
10425c28e8Piotr Jasiukajtis
10525c28e8Piotr Jasiukajtis#define PROCESS(N)								\
10625c28e8Piotr Jasiukajtis	iy##N = ival##N & 0x007fffff;						\
10725c28e8Piotr Jasiukajtis	ival##N = (iy##N + 0x20000) & 0xfffc0000;				\
10825c28e8Piotr Jasiukajtis	i##N  = ival##N >> 17;							\
10925c28e8Piotr Jasiukajtis	iy##N = iy##N - ival##N;						\
11025c28e8Piotr Jasiukajtis	ty##N = LN2 * (double) exp##N + __TBL_logf[i##N];			\
11125c28e8Piotr Jasiukajtis	yy##N = (double) iy##N * __TBL_logf[i##N + 1];				\
11225c28e8Piotr Jasiukajtis	yy##N = ((K3 * yy##N + K2) * yy##N + K1) * yy##N * yy##N + yy##N;	\
11325c28e8Piotr Jasiukajtis	y[0] = (float)(yy##N + ty##N);						\
11425c28e8Piotr Jasiukajtis	y += stridey;
11525c28e8Piotr Jasiukajtis
11625c28e8Piotr Jasiukajtis#define PREPROCESS(N, index, label)						\
11725c28e8Piotr Jasiukajtis	ival##N = *(int*)x;							\
11825c28e8Piotr Jasiukajtis	value = x[0];								\
11925c28e8Piotr Jasiukajtis	x += stridex;								\
12025c28e8Piotr Jasiukajtis	exp##N = (ival##N >> 23) - 127;						\
12125c28e8Piotr Jasiukajtis	if ((ival##N & 0x7fffffff) >= 0x7f800000) /* X = NaN or Inf */	\
12225c28e8Piotr Jasiukajtis	{									\
12325c28e8Piotr Jasiukajtis		y[index] = value + INF;						\
12425c28e8Piotr Jasiukajtis		goto label;							\
12525c28e8Piotr Jasiukajtis	}									\
12625c28e8Piotr Jasiukajtis	if (ival##N < 0x00800000)						\
12725c28e8Piotr Jasiukajtis	{									\
12825c28e8Piotr Jasiukajtis		if (ival##N > 0)	/* X = denormal */			\
12925c28e8Piotr Jasiukajtis		{								\
13025c28e8Piotr Jasiukajtis			value = (float) ival##N;				\
13125c28e8Piotr Jasiukajtis			ival##N = *(int*) &value;				\
13225c28e8Piotr Jasiukajtis			exp##N = (ival##N >> 23) - (127 + 149);			\
13325c28e8Piotr Jasiukajtis		}								\
13425c28e8Piotr Jasiukajtis		else								\
13525c28e8Piotr Jasiukajtis		{								\
13625c28e8Piotr Jasiukajtis			value = 0.0f;						\
13725c28e8Piotr Jasiukajtis			y[index] = ((ival##N & 0x7fffffff) == 0) ?		\
13825c28e8Piotr Jasiukajtis				-1.0f / value : value / value;			\
13925c28e8Piotr Jasiukajtis			goto label;						\
14025c28e8Piotr Jasiukajtis		}								\
14125c28e8Piotr Jasiukajtis	}
14225c28e8Piotr Jasiukajtis
14325c28e8Piotr Jasiukajtisvoid
14425c28e8Piotr Jasiukajtis__vlogf(int n, float * restrict x, int stridex, float * restrict y,
14525c28e8Piotr Jasiukajtis	int stridey)
14625c28e8Piotr Jasiukajtis{
14725c28e8Piotr Jasiukajtis	double	LN2 = __TBL_logf[64];		/* log(2) = 0.6931471805599453094 	*/
14825c28e8Piotr Jasiukajtis	double	yy0, yy1, yy2, yy3, yy4;
14925c28e8Piotr Jasiukajtis	double	ty0, ty1, ty2, ty3, ty4;
15025c28e8Piotr Jasiukajtis	float	value;
15125c28e8Piotr Jasiukajtis	int	i0, i1, i2, i3, i4;
15225c28e8Piotr Jasiukajtis	int	ival0, ival1, ival2, ival3, ival4;
15325c28e8Piotr Jasiukajtis	int	exp0, exp1, exp2, exp3, exp4;
15425c28e8Piotr Jasiukajtis	int	iy0, iy1, iy2, iy3, iy4;
15525c28e8Piotr Jasiukajtis
15625c28e8Piotr Jasiukajtis	y -= stridey;
15725c28e8Piotr Jasiukajtis
15825c28e8Piotr Jasiukajtis	for (; ;)
15925c28e8Piotr Jasiukajtis	{
16025c28e8Piotr Jasiukajtisbegin:
16125c28e8Piotr Jasiukajtis		y += stridey;
16225c28e8Piotr Jasiukajtis
16325c28e8Piotr Jasiukajtis		if (--n < 0)
16425c28e8Piotr Jasiukajtis			break;
16525c28e8Piotr Jasiukajtis
16625c28e8Piotr Jasiukajtis		PREPROCESS(0, 0, begin)
16725c28e8Piotr Jasiukajtis
16825c28e8Piotr Jasiukajtis		if (--n < 0)
16925c28e8Piotr Jasiukajtis			goto process1;
17025c28e8Piotr Jasiukajtis
17125c28e8Piotr Jasiukajtis		PREPROCESS(1, stridey, process1)
17225c28e8Piotr Jasiukajtis
17325c28e8Piotr Jasiukajtis		if (--n < 0)
17425c28e8Piotr Jasiukajtis			goto process2;
17525c28e8Piotr Jasiukajtis
17625c28e8Piotr Jasiukajtis		PREPROCESS(2, (stridey << 1), process2)
17725c28e8Piotr Jasiukajtis
17825c28e8Piotr Jasiukajtis		if (--n < 0)
17925c28e8Piotr Jasiukajtis			goto process3;
18025c28e8Piotr Jasiukajtis
18125c28e8Piotr Jasiukajtis		PREPROCESS(3, (stridey << 1) + stridey, process3)
18225c28e8Piotr Jasiukajtis
18325c28e8Piotr Jasiukajtis		if (--n < 0)
18425c28e8Piotr Jasiukajtis			goto process4;
18525c28e8Piotr Jasiukajtis
18625c28e8Piotr Jasiukajtis		PREPROCESS(4, (stridey << 2), process4)
18725c28e8Piotr Jasiukajtis
18825c28e8Piotr Jasiukajtis		iy0 = ival0 & 0x007fffff;
18925c28e8Piotr Jasiukajtis		iy1 = ival1 & 0x007fffff;
19025c28e8Piotr Jasiukajtis		iy2 = ival2 & 0x007fffff;
19125c28e8Piotr Jasiukajtis		iy3 = ival3 & 0x007fffff;
19225c28e8Piotr Jasiukajtis		iy4 = ival4 & 0x007fffff;
19325c28e8Piotr Jasiukajtis
19425c28e8Piotr Jasiukajtis		ival0 = (iy0 + 0x20000) & 0xfffc0000;
19525c28e8Piotr Jasiukajtis		ival1 = (iy1 + 0x20000) & 0xfffc0000;
19625c28e8Piotr Jasiukajtis		ival2 = (iy2 + 0x20000) & 0xfffc0000;
19725c28e8Piotr Jasiukajtis		ival3 = (iy3 + 0x20000) & 0xfffc0000;
19825c28e8Piotr Jasiukajtis		ival4 = (iy4 + 0x20000) & 0xfffc0000;
19925c28e8Piotr Jasiukajtis
20025c28e8Piotr Jasiukajtis		i0 = ival0 >> 17;
20125c28e8Piotr Jasiukajtis		i1 = ival1 >> 17;
20225c28e8Piotr Jasiukajtis		i2 = ival2 >> 17;
20325c28e8Piotr Jasiukajtis		i3 = ival3 >> 17;
20425c28e8Piotr Jasiukajtis		i4 = ival4 >> 17;
20525c28e8Piotr Jasiukajtis
20625c28e8Piotr Jasiukajtis		iy0 = iy0 - ival0;
20725c28e8Piotr Jasiukajtis		iy1 = iy1 - ival1;
20825c28e8Piotr Jasiukajtis		iy2 = iy2 - ival2;
20925c28e8Piotr Jasiukajtis		iy3 = iy3 - ival3;
21025c28e8Piotr Jasiukajtis		iy4 = iy4 - ival4;
21125c28e8Piotr Jasiukajtis
21225c28e8Piotr Jasiukajtis		ty0 = LN2 * (double) exp0 + __TBL_logf[i0];
21325c28e8Piotr Jasiukajtis		ty1 = LN2 * (double) exp1 + __TBL_logf[i1];
21425c28e8Piotr Jasiukajtis		ty2 = LN2 * (double) exp2 + __TBL_logf[i2];
21525c28e8Piotr Jasiukajtis		ty3 = LN2 * (double) exp3 + __TBL_logf[i3];
21625c28e8Piotr Jasiukajtis		ty4 = LN2 * (double) exp4 + __TBL_logf[i4];
21725c28e8Piotr Jasiukajtis
21825c28e8Piotr Jasiukajtis		yy0 = (double) iy0 * __TBL_logf[i0 + 1];
21925c28e8Piotr Jasiukajtis		yy1 = (double) iy1 * __TBL_logf[i1 + 1];
22025c28e8Piotr Jasiukajtis		yy2 = (double) iy2 * __TBL_logf[i2 + 1];
22125c28e8Piotr Jasiukajtis		yy3 = (double) iy3 * __TBL_logf[i3 + 1];
22225c28e8Piotr Jasiukajtis		yy4 = (double) iy4 * __TBL_logf[i4 + 1];
22325c28e8Piotr Jasiukajtis
22425c28e8Piotr Jasiukajtis		yy0 = ((K3 * yy0 + K2) * yy0 + K1) * yy0 * yy0 + yy0;
22525c28e8Piotr Jasiukajtis		yy1 = ((K3 * yy1 + K2) * yy1 + K1) * yy1 * yy1 + yy1;
22625c28e8Piotr Jasiukajtis		yy2 = ((K3 * yy2 + K2) * yy2 + K1) * yy2 * yy2 + yy2;
22725c28e8Piotr Jasiukajtis		yy3 = ((K3 * yy3 + K2) * yy3 + K1) * yy3 * yy3 + yy3;
22825c28e8Piotr Jasiukajtis		yy4 = ((K3 * yy4 + K2) * yy4 + K1) * yy4 * yy4 + yy4;
22925c28e8Piotr Jasiukajtis
23025c28e8Piotr Jasiukajtis		y[0] = (float)(yy0 + ty0);
23125c28e8Piotr Jasiukajtis		y += stridey;
23225c28e8Piotr Jasiukajtis		y[0] = (float)(yy1 + ty1);
23325c28e8Piotr Jasiukajtis		y += stridey;
23425c28e8Piotr Jasiukajtis		y[0] = (float)(yy2 + ty2);
23525c28e8Piotr Jasiukajtis		y += stridey;
23625c28e8Piotr Jasiukajtis		y[0] = (float)(yy3 + ty3);
23725c28e8Piotr Jasiukajtis		y += stridey;
23825c28e8Piotr Jasiukajtis		y[0] = (float)(yy4 + ty4);
23925c28e8Piotr Jasiukajtis		continue;
24025c28e8Piotr Jasiukajtis
24125c28e8Piotr Jasiukajtisprocess1:
24225c28e8Piotr Jasiukajtis		PROCESS(0)
24325c28e8Piotr Jasiukajtis		continue;
24425c28e8Piotr Jasiukajtis
24525c28e8Piotr Jasiukajtisprocess2:
24625c28e8Piotr Jasiukajtis		PROCESS(0)
24725c28e8Piotr Jasiukajtis		PROCESS(1)
24825c28e8Piotr Jasiukajtis		continue;
24925c28e8Piotr Jasiukajtis
25025c28e8Piotr Jasiukajtisprocess3:
25125c28e8Piotr Jasiukajtis		PROCESS(0)
25225c28e8Piotr Jasiukajtis		PROCESS(1)
25325c28e8Piotr Jasiukajtis		PROCESS(2)
25425c28e8Piotr Jasiukajtis		continue;
25525c28e8Piotr Jasiukajtis
25625c28e8Piotr Jasiukajtisprocess4:
25725c28e8Piotr Jasiukajtis		PROCESS(0)
25825c28e8Piotr Jasiukajtis		PROCESS(1)
25925c28e8Piotr Jasiukajtis		PROCESS(2)
26025c28e8Piotr Jasiukajtis		PROCESS(3)
26125c28e8Piotr Jasiukajtis	}
26225c28e8Piotr Jasiukajtis}
263