xref: /illumos-gate/usr/src/lib/libm/common/LD/erfl.c (revision ddc0e0b5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24  */
25 /*
26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 /* long double function erf,erfc (long double x)
31  * K.C. Ng, September, 1989.
32  *			     x
33  *		      2      |\
34  *     erf(x)  =  ---------  | exp(-t*t)dt
35  *	 	   sqrt(pi) \|
36  *			     0
37  *
38  *     erfc(x) =  1-erf(x)
39  *
40  * method:
41  * 	Since erf(-x) = -erf(x), we assume x>=0.
42  *	For x near 0, we have the expansion
43  *
44  *     	    erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....).
45  *
46  * 	Since 2/sqrt(pi) = 1.128379167095512573896158903121545171688,
47  *	we use x + x*P(x^2) to approximate erf(x). This formula will
48  *	guarantee the error less than one ulp where x is not too far
49  *	away from 0. We note that erf(x)=x at x = 0.6174...... After
50  *	some experiment, we choose the following approximation on
51  *	interval [0,0.84375].
52  *
53  *	For x in [0,0.84375]
54  *		   2		    2        4		     40
55  *	   P = 	P(x ) = (p0 + p1 * x + p2 * x + ... + p20 * x  )
56  *
57  *	   erf(x)  = x + x*P
58  *	   erfc(x) = 1 - erf(x) 	  if x<=0.25
59  *		   = 0.5 + ((0.5-x)-x*P)  if x in [0.25,0.84375]
60  *	precision: |P(x^2)-(erf(x)-x)/x| <= 2**-122.50
61  *
62  *	For x in [0.84375,1.25], let s = x - 1, and
63  *	c = 0.84506291151 rounded to single (24 bits)
64  *	   erf(x)  = c  + P1(s)/Q1(s)
65  *	   erfc(x) = (1-c)  - P1(s)/Q1(s)
66  *	precision: |P1/Q1 - (erf(x)-c)| <= 2**-118.41
67  *
68  *
69  *	For x in [1.25,1.75], let s = x - 1.5, and
70  *	c = 0.95478588343 rounded to single (24 bits)
71  *	   erf(x)  = c  + P2(s)/Q2(s)
72  *	   erfc(x) = (1-c)  - P2(s)/Q2(s)
73  *	precision: |P1/Q1 - (erf(x)-c)| <= 2**-123.83
74  *
75  *
76  *	For x in [1.75,16/3]
77  *	   erfc(x) = exp(-x*x)*(1/x)*R1(1/x)/S1(1/x)
78  *	   erf(x)  = 1 - erfc(x)
79  *	precision: absolute error of R1/S1 is bounded by 2**-124.03
80  *
81  *	For x in [16/3,107]
82  *	   erfc(x) = exp(-x*x)*(1/x)*R2(1/x)/S2(1/x)
83  *	   erf(x)  = 1 - erfc(x) (if x>=9 simple return erf(x)=1 with inexact)
84  *	precision: absolute error of R2/S2 is bounded by 2**-120.07
85  *
86  *	Else if inf > x >= 107
87  *	   erf(x)  = 1 with inexact
88  *	   erfc(x) = 0 with underflow
89  *
90  *	Special case:
91  *	   erf(inf)  = 1
92  *	   erfc(inf) = 0
93  */
94 
95 #pragma weak __erfl = erfl
96 #pragma weak __erfcl = erfcl
97 
98 #include "libm.h"
99 #include "longdouble.h"
100 
101 static long double
102 tiny	    = 1e-40L,
103 nearunfl    = 1e-4000L,
104 half	    = 0.5L,
105 one	    = 1.0L,
106 onehalf	    = 1.5L,
107 L16_3	    = 16.0L/3.0L;
108 /*
109  * Coefficients for even polynomial P for erf(x)=x+x*P(x^2) on [0,0.84375]
110  */
111 static long double P[] = { 	/* 21 coeffs */
112    1.283791670955125738961589031215451715556e-0001L,
113   -3.761263890318375246320529677071815594603e-0001L,
114    1.128379167095512573896158903121205899135e-0001L,
115   -2.686617064513125175943235483344625046092e-0002L,
116    5.223977625442187842111846652980454568389e-0003L,
117   -8.548327023450852832546626271083862724358e-0004L,
118    1.205533298178966425102164715902231976672e-0004L,
119   -1.492565035840625097674944905027897838996e-0005L,
120    1.646211436588924733604648849172936692024e-0006L,
121   -1.636584469123491976815834704799733514987e-0007L,
122    1.480719281587897445302529007144770739305e-0008L,
123   -1.229055530170782843046467986464722047175e-0009L,
124    9.422759064320307357553954945760654341633e-0011L,
125   -6.711366846653439036162105104991433380926e-0012L,
126    4.463224090341893165100275380693843116240e-0013L,
127   -2.783513452582658245422635662559779162312e-0014L,
128    1.634227412586960195251346878863754661546e-0015L,
129   -9.060782672889577722765711455623117802795e-0017L,
130    4.741341801266246873412159213893613602354e-0018L,
131   -2.272417596497826188374846636534317381203e-0019L,
132    8.069088733716068462496835658928566920933e-0021L,
133 };
134 
135 /*
136  * Rational erf(x) = ((float)0.84506291151) + P1(x-1)/Q1(x-1) on [0.84375,1.25]
137  */
138 static long double C1   = (long double)((float)0.84506291151);
139 static long double P1[] = { 	/*  12 top coeffs */
140   -2.362118560752659955654364917390741930316e-0003L,
141    4.129623379624420034078926610650759979146e-0001L,
142   -3.973857505403547283109417923182669976904e-0002L,
143    4.357503184084022439763567513078036755183e-0002L,
144    8.015593623388421371247676683754171456950e-0002L,
145   -1.034459310403352486685467221776778474602e-0002L,
146    5.671850295381046679675355719017720821383e-0003L,
147    1.219262563232763998351452194968781174318e-0003L,
148    5.390833481581033423020320734201065475098e-0004L,
149   -1.978853912815115495053119023517805528300e-0004L,
150    6.184234513953600118335017885706420552487e-0005L,
151   -5.331802711697810861017518515816271808286e-0006L,
152 };
153 static long double Q1[] = { 	/*  12 bottom coeffs with leading 1.0 hidden */
154    9.081506296064882195280178373107623196655e-0001L,
155    6.821049531968204097604392183650687642520e-0001L,
156    4.067869178233539502315055970743271822838e-0001L,
157    1.702332233546316765818144723063881095577e-0001L,
158    7.498098377690553934266423088708614219356e-0002L,
159    2.050154396918178697056927234366372760310e-0002L,
160    7.012988534031999899054782333851905939379e-0003L,
161    1.149904787014400354649843451234570731076e-0003L,
162    3.185620255011299476196039491205159718620e-0004L,
163    1.273405072153008775426376193374105840517e-0005L,
164    4.753866999959432971956781228148402971454e-0006L,
165   -1.002287602111660026053981728549540200683e-0006L,
166 };
167 /*
168  * Rational erf(x) = ((float)0.95478588343) + P2(x-1.5)/Q2(x-1.5)
169  * on [1.25,1.75]
170  */
171 static long double C2   = (long double)((float)0.95478588343);
172 static long double P2[] = { 	/*  12 top coeffs */
173    1.131926304864446730135126164594785863512e-0002L,
174    1.273617996967754151544330055186210322832e-0001L,
175   -8.169980734667512519897816907190281143423e-0002L,
176    9.512267486090321197833634271787944271746e-0002L,
177   -2.394251569804872160005274999735914368170e-0002L,
178    1.108768660227528667525252333184520222905e-0002L,
179    3.527435492933902414662043314373277494221e-0004L,
180    4.946116273341953463584319006669474625971e-0004L,
181   -4.289851942513144714600285769022420962418e-0005L,
182    8.304719841341952705874781636002085119978e-0005L,
183   -1.040460226177309338781902252282849903189e-0005L,
184    2.122913331584921470381327583672044434087e-0006L,
185 };
186 static long double Q2[] = { 	/*  13 bottom coeffs with leading 1.0 hidden */
187    7.448815737306992749168727691042003832150e-0001L,
188    7.161813850236008294484744312430122188043e-0001L,
189    3.603134756584225766144922727405641236121e-0001L,
190    1.955811609133766478080550795194535852653e-0001L,
191    7.253059963716225972479693813787810711233e-0002L,
192    2.752391253757421424212770221541238324978e-0002L,
193    7.677654852085240257439050673446546828005e-0003L,
194    2.141102244555509687346497060326630061069e-0003L,
195    4.342123013830957093949563339130674364271e-0004L,
196    8.664587895570043348530991997272212150316e-0005L,
197    1.109201582511752087060167429397033701988e-0005L,
198    1.357834375781831062713347000030984364311e-0006L,
199    4.957746280594384997273090385060680016451e-0008L,
200 };
201 /*
202  * erfc(x) = exp(-x*x)/x * R1(1/x)/S1(1/x) on [1.75, 16/3]
203  */
204 static long double R1[] = { 	/*  14 top coeffs */
205    4.630195122654315016370705767621550602948e+0006L,
206    1.257949521746494830700654204488675713628e+0007L,
207    1.704153822720260272814743497376181625707e+0007L,
208    1.502600568706061872381577539537315739943e+0007L,
209    9.543710793431995284827024445387333922861e+0006L,
210    4.589344808584091011652238164935949522427e+0006L,
211    1.714660662941745791190907071920671844289e+0006L,
212    5.034802147768798894307672256192466283867e+0005L,
213    1.162286400443554670553152110447126850725e+0005L,
214    2.086643834548901681362757308058660399137e+0004L,
215    2.839793161868140305907004392890348777338e+0003L,
216    2.786687241658423601778258694498655680778e+0002L,
217    1.779177837102695602425897452623985786464e+0001L,
218    5.641895835477470769043614623819144434731e-0001L,
219 };
220 static long double S1[] = { 	/* 15 bottom coeffs with leading 1.0 hidden */
221    4.630195122654331529595606896287596843110e+0006L,
222    1.780411093345512024324781084220509055058e+0007L,
223    3.250113097051800703707108623715776848283e+0007L,
224    3.737857099176755050912193712123489115755e+0007L,
225    3.029787497516578821459174055870781168593e+0007L,
226    1.833850619965384765005769632103205777227e+0007L,
227    8.562719999736915722210391222639186586498e+0006L,
228    3.139684562074658971315545539760008136973e+0006L,
229    9.106421313731384880027703627454366930945e+0005L,
230    2.085108342384266508613267136003194920001e+0005L,
231    3.723126272693120340730491416449539290600e+0004L,
232    5.049169878567344046145695360784436929802e+0003L,
233    4.944274532748010767670150730035392093899e+0002L,
234    3.153510608818213929982940249162268971412e+0001L,
235    1.0e00L,
236 };
237 
238 /*
239  * erfc(x) = exp(-x*x)/x * R2(1/x)/S2(1/x) on [16/3, 107]
240  */
241 static long double R2[] = { 	/*  15 top coeffs in reverse order!!*/
242    2.447288012254302966796326587537136931669e+0005L,
243    8.768592567189861896653369912716538739016e+0005L,
244    1.552293152581780065761497908005779524953e+0006L,
245    1.792075924835942935864231657504259926729e+0006L,
246    1.504001463155897344947500222052694835875e+0006L,
247    9.699485556326891411801230186016013019935e+0005L,
248    4.961449933661807969863435013364796037700e+0005L,
249    2.048726544693474028061176764716228273791e+0005L,
250    6.891532964330949722479061090551896886635e+0004L,
251    1.888014709010307507771964047905823237985e+0004L,
252    4.189692064988957745054734809642495644502e+0003L,
253    7.362346487427048068212968889642741734621e+0002L,
254    9.980359714211411423007641056580813116207e+0001L,
255    9.426910895135379181107191962193485174159e+0000L,
256    5.641895835477562869480794515623601280429e-0001L,
257 };
258 static long double S2[] = { 	/* 16 coefficients */
259    2.447282203601902971246004716790604686880e+0005L,
260    1.153009852759385309367759460934808489833e+0006L,
261    2.608580649612639131548966265078663384849e+0006L,
262    3.766673917346623308850202792390569025740e+0006L,
263    3.890566255138383910789924920541335370691e+0006L,
264    3.052882073900746207613166259994150527732e+0006L,
265    1.885574519970380988460241047248519418407e+0006L,
266    9.369722034759943185851450846811445012922e+0005L,
267    3.792278350536686111444869752624492443659e+0005L,
268    1.257750606950115799965366001773094058720e+0005L,
269    3.410830600242369370645608634643620355058e+0004L,
270    7.513984469742343134851326863175067271240e+0003L,
271    1.313296320593190002554779998138695507840e+0003L,
272    1.773972700887629157006326333696896516769e+0002L,
273    1.670876451822586800422009013880457094162e+0001L,
274    1.000L,
275 };
276 
erfl(x)277 long double erfl(x)
278 long double x;
279 {
280 	long double erfcl(long double),s,y,t;
281 
282 	if (!finitel(x)) {
283 	    if (x != x) return x+x; 	/* NaN */
284 	    return copysignl(one,x);	/* return +-1.0 is x=Inf */
285 	}
286 
287 	y = fabsl(x);
288 	if (y <= 0.84375L) {
289 	    if (y<=tiny) return x+P[0]*x;
290 	    s = y*y;
291 	    t = __poly_libmq(s,21,P);
292 	    return  x+x*t;
293 	}
294 	if (y<=1.25L) {
295 	    s = y-one;
296 	    t = C1+__poly_libmq(s,12,P1)/(one+s*__poly_libmq(s,12,Q1));
297 	    return (signbitl(x))? -t: t;
298 	} else if (y<=1.75L) {
299 	    s = y-onehalf;
300 	    t = C2+__poly_libmq(s,12,P2)/(one+s*__poly_libmq(s,13,Q2));
301 	    return (signbitl(x))? -t: t;
302 	}
303 	if (y<=9.0L) t = erfcl(y); else t = tiny;
304 	return (signbitl(x))? t-one: one-t;
305 }
306 
erfcl(x)307 long double erfcl(x)
308 long double x;
309 {
310 	long double erfl(long double),s,y,t;
311 
312 	if (!finitel(x)) {
313 	    if (x != x) return x+x; 	/* NaN */
314 	    				/* return 2.0 if x= -inf
315 						  0.0 if x= +inf */
316 	    if (x<0.0L) return 2.0L; else return 0.0L;
317 	}
318 
319 	if (x <= 0.84375L) {
320 	    if (x<=0.25) return one-erfl(x);
321 	    s = x*x;
322 	    t = half-x;
323 	    t = t - x*__poly_libmq(s,21,P);
324 	    return  half+t;
325 	}
326 	if (x<=1.25L) {
327 	    s = x-one;
328 	    t = one-C1;
329 	    return t - __poly_libmq(s,12,P1)/(one+s*__poly_libmq(s,12,Q1));
330 	} else if (x<=1.75L) {
331 	    s = x-onehalf;
332 	    t = one-C2;
333 	    return t - __poly_libmq(s,12,P2)/(one+s*__poly_libmq(s,13,Q2));
334 	}
335 	if (x>=107.0L) return nearunfl*nearunfl;		/* underflow */
336 	else if (x >= L16_3) {
337 	    y = __poly_libmq(x,15,R2);
338 	    t = y/__poly_libmq(x,16,S2);
339 	} else {
340 	    y = __poly_libmq(x,14,R1);
341 	    t = y/__poly_libmq(x,15,S1);
342 	}
343 	/* see comment in ../Q/erfl.c */
344 	y = x;
345 	*(int*)&y = 0;
346 	t *= expl(-y*y)*expl(-(x-y)*(x+y));
347 	return  t;
348 }
349