1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24 */
25/*
26 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27 * Use is subject to license terms.
28 */
29
30#pragma weak __sqrtl = sqrtl
31
32#include "libm.h"
33#include "longdouble.h"
34
35extern int __swapTE(int);
36extern int __swapEX(int);
37extern enum fp_direction_type __swapRD(enum fp_direction_type);
38
39/*
40 * in struct longdouble, msw consists of
41 *	unsigned short	sgn:1;
42 *	unsigned short	exp:15;
43 *	unsigned short	frac1:16;
44 */
45
46#ifdef __LITTLE_ENDIAN
47
48/* array indices used to access words within a double */
49#define	HIWORD	1
50#define	LOWORD	0
51
52/* structure used to access words within a quad */
53union longdouble {
54	struct {
55		unsigned int	frac4;
56		unsigned int	frac3;
57		unsigned int	frac2;
58		unsigned int	msw;
59	} l;
60	long double	d;
61};
62
63/* default NaN returned for sqrt(neg) */
64static const union longdouble
65	qnan = { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff };
66
67/* signalling NaN used to raise invalid */
68static const union {
69	unsigned u[2];
70	double d;
71} snan = { 0, 0x7ff00001 };
72
73#else
74
75/* array indices used to access words within a double */
76#define	HIWORD	0
77#define	LOWORD	1
78
79/* structure used to access words within a quad */
80union longdouble {
81	struct {
82		unsigned int	msw;
83		unsigned int	frac2;
84		unsigned int	frac3;
85		unsigned int	frac4;
86	} l;
87	long double	d;
88};
89
90/* default NaN returned for sqrt(neg) */
91static const union longdouble
92	qnan = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff };
93
94/* signalling NaN used to raise invalid */
95static const union {
96	unsigned u[2];
97	double d;
98} snan = { 0x7ff00001, 0 };
99
100#endif /* __LITTLE_ENDIAN */
101
102
103static const double
104	zero = 0.0,
105	half = 0.5,
106	one = 1.0,
107	huge = 1.0e300,
108	tiny = 1.0e-300,
109	two36 = 6.87194767360000000000e+10,
110	two30 = 1.07374182400000000000e+09,
111	two6 = 6.40000000000000000000e+01,
112	two4 = 1.60000000000000000000e+01,
113	twom18 = 3.81469726562500000000e-06,
114	twom28 = 3.72529029846191406250e-09,
115	twom42 = 2.27373675443232059479e-13,
116	twom60 = 8.67361737988403547206e-19,
117	twom62 = 2.16840434497100886801e-19,
118	twom66 = 1.35525271560688054251e-20,
119	twom90 = 8.07793566946316088742e-28,
120	twom113 = 9.62964972193617926528e-35,
121	twom124 = 4.70197740328915003187e-38;
122
123
124/*
125*	Extract the exponent and normalized significand (represented as
126*	an array of five doubles) from a finite, nonzero quad.
127*/
128static int
129__q_unpack(const union longdouble *x, double *s)
130{
131	union {
132		double			d;
133		unsigned int	l[2];
134	} u;
135	double			b;
136	unsigned int	lx, w[3];
137	int				ex;
138
139	/* get the normalized significand and exponent */
140	ex = (int) ((x->l.msw & 0x7fffffff) >> 16);
141	lx = x->l.msw & 0xffff;
142	if (ex)
143	{
144		lx |= 0x10000;
145		w[0] = x->l.frac2;
146		w[1] = x->l.frac3;
147		w[2] = x->l.frac4;
148	}
149	else
150	{
151		if (lx | (x->l.frac2 & 0xfffe0000))
152		{
153			w[0] = x->l.frac2;
154			w[1] = x->l.frac3;
155			w[2] = x->l.frac4;
156			ex = 1;
157		}
158		else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000))
159		{
160			lx = x->l.frac2;
161			w[0] = x->l.frac3;
162			w[1] = x->l.frac4;
163			w[2] = 0;
164			ex = -31;
165		}
166		else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000))
167		{
168			lx = x->l.frac3;
169			w[0] = x->l.frac4;
170			w[1] = w[2] = 0;
171			ex = -63;
172		}
173		else
174		{
175			lx = x->l.frac4;
176			w[0] = w[1] = w[2] = 0;
177			ex = -95;
178		}
179		while ((lx & 0x10000) == 0)
180		{
181			lx = (lx << 1) | (w[0] >> 31);
182			w[0] = (w[0] << 1) | (w[1] >> 31);
183			w[1] = (w[1] << 1) | (w[2] >> 31);
184			w[2] <<= 1;
185			ex--;
186		}
187	}
188
189	/* extract the significand into five doubles */
190	u.l[HIWORD] = 0x42300000;
191	u.l[LOWORD] = 0;
192	b = u.d;
193	u.l[LOWORD] = lx;
194	s[0] = u.d - b;
195
196	u.l[HIWORD] = 0x40300000;
197	u.l[LOWORD] = 0;
198	b = u.d;
199	u.l[LOWORD] = w[0] & 0xffffff00;
200	s[1] = u.d - b;
201
202	u.l[HIWORD] = 0x3e300000;
203	u.l[LOWORD] = 0;
204	b = u.d;
205	u.l[HIWORD] |= w[0] & 0xff;
206	u.l[LOWORD] = w[1] & 0xffff0000;
207	s[2] = u.d - b;
208
209	u.l[HIWORD] = 0x3c300000;
210	u.l[LOWORD] = 0;
211	b = u.d;
212	u.l[HIWORD] |= w[1] & 0xffff;
213	u.l[LOWORD] = w[2] & 0xff000000;
214	s[3] = u.d - b;
215
216	u.l[HIWORD] = 0x3c300000;
217	u.l[LOWORD] = 0;
218	b = u.d;
219	u.l[LOWORD] = w[2] & 0xffffff;
220	s[4] = u.d - b;
221
222	return ex - 0x3fff;
223}
224
225
226/*
227*	Pack an exponent and array of three doubles representing a finite,
228*	nonzero number into a quad.  Assume the sign is already there and
229*	the rounding mode has been fudged accordingly.
230*/
231static void
232__q_pack(const double *z, int exp, enum fp_direction_type rm,
233	union longdouble *x, int *inexact)
234{
235	union {
236		double			d;
237		unsigned int	l[2];
238	} u;
239	double			s[3], t, t2;
240	unsigned int	msw, frac2, frac3, frac4;
241
242	/* bias exponent and strip off integer bit */
243	exp += 0x3fff;
244	s[0] = z[0] - one;
245	s[1] = z[1];
246	s[2] = z[2];
247
248	/*
249	 * chop the significand to obtain the fraction;
250	 * use round-to-minus-infinity to ensure chopping
251	 */
252	(void) __swapRD(fp_negative);
253
254	/* extract the first eighty bits of fraction */
255	t = s[1] + s[2];
256	u.d = two36 + (s[0] + t);
257	msw = u.l[LOWORD];
258	s[0] -= (u.d - two36);
259
260	u.d = two4 + (s[0] + t);
261	frac2 = u.l[LOWORD];
262	s[0] -= (u.d - two4);
263
264	u.d = twom28 + (s[0] + t);
265	frac3 = u.l[LOWORD];
266	s[0] -= (u.d - twom28);
267
268	/* condense the remaining fraction; errors here won't matter */
269	t = s[0] + s[1];
270	s[1] = ((s[0] - t) + s[1]) + s[2];
271	s[0] = t;
272
273	/* get the last word of fraction */
274	u.d = twom60 + (s[0] + s[1]);
275	frac4 = u.l[LOWORD];
276	s[0] -= (u.d - twom60);
277
278	/*
279	 * keep track of what's left for rounding; note that
280	 * t2 will be non-negative due to rounding mode
281	 */
282	t = s[0] + s[1];
283	t2 = (s[0] - t) + s[1];
284
285	if (t != zero)
286	{
287		*inexact = 1;
288
289		/* decide whether to round the fraction up */
290		if (rm == fp_positive || (rm == fp_nearest && (t > twom113 ||
291			(t == twom113 && (t2 != zero || frac4 & 1)))))
292		{
293			/* round up and renormalize if necessary */
294			if (++frac4 == 0)
295				if (++frac3 == 0)
296					if (++frac2 == 0)
297						if (++msw == 0x10000)
298						{
299							msw = 0;
300							exp++;
301						}
302		}
303	}
304
305	/* assemble the result */
306	x->l.msw |= msw | (exp << 16);
307	x->l.frac2 = frac2;
308	x->l.frac3 = frac3;
309	x->l.frac4 = frac4;
310}
311
312
313/*
314*	Compute the square root of x and place the TP result in s.
315*/
316static void
317__q_tp_sqrt(const double *x, double *s)
318{
319	double	c, rr, r[3], tt[3], t[5];
320
321	/* approximate the divisor for the Newton iteration */
322	c = sqrt((x[0] + x[1]) + x[2]);
323	rr = half / c;
324
325	/* compute the first five "digits" of the square root */
326	t[0] = (c + two30) - two30;
327	tt[0] = t[0] + t[0];
328	r[0] = ((x[0] - t[0] * t[0]) + x[1]) + x[2];
329
330	t[1] = (rr * (r[0] + x[3]) + two6) - two6;
331	tt[1] = t[1] + t[1];
332	r[0] -= tt[0] * t[1];
333	r[1] = x[3] - t[1] * t[1];
334	c = (r[1] + twom18) - twom18;
335	r[0] += c;
336	r[1] = (r[1] - c) + x[4];
337
338	t[2] = (rr * (r[0] + r[1]) + twom18) - twom18;
339	tt[2] = t[2] + t[2];
340	r[0] -= tt[0] * t[2];
341	r[1] -= tt[1] * t[2];
342	c = (r[1] + twom42) - twom42;
343	r[0] += c;
344	r[1] = (r[1] - c) - t[2] * t[2];
345
346	t[3] = (rr * (r[0] + r[1]) + twom42) - twom42;
347	r[0] = ((r[0] - tt[0] * t[3]) + r[1]) - tt[1] * t[3];
348	r[1] = -tt[2] * t[3];
349	c = (r[1] + twom90) - twom90;
350	r[0] += c;
351	r[1] = (r[1] - c) - t[3] * t[3];
352
353	t[4] = (rr * (r[0] + r[1]) + twom66) - twom66;
354
355	/* here we just need to get the sign of the remainder */
356	c = (((((r[0] - tt[0] * t[4]) - tt[1] * t[4]) + r[1])
357		- tt[2] * t[4]) - (t[3] + t[3]) * t[4]) - t[4] * t[4];
358
359	/* reduce to three doubles */
360	t[0] += t[1];
361	t[1] = t[2] + t[3];
362	t[2] = t[4];
363
364	/* if the third term might lie on a rounding boundary, perturb it */
365	if (c != zero && t[2] == (twom62 + t[2]) - twom62)
366	{
367		if (c < zero)
368			t[2] -= twom124;
369		else
370			t[2] += twom124;
371	}
372
373	/* condense the square root */
374	c = t[1] + t[2];
375	t[2] += (t[1] - c);
376	t[1] = c;
377	c = t[0] + t[1];
378	s[1] = t[1] + (t[0] - c);
379	s[0] = c;
380	if (s[1] == zero)
381	{
382		c = s[0] + t[2];
383		s[1] = t[2] + (s[0] - c);
384		s[0] = c;
385		s[2] = zero;
386	}
387	else
388	{
389		c = s[1] + t[2];
390		s[2] = t[2] + (s[1] - c);
391		s[1] = c;
392	}
393}
394
395
396long double
397sqrtl(long double ldx)
398{
399	union	longdouble		x;
400	volatile double			t;
401	double					xx[5], zz[3];
402	enum fp_direction_type	rm;
403	int				ex, inexact, exc, traps;
404
405	/* clear cexc */
406	t = zero;
407	t -= zero;
408
409	/* check for zero operand */
410	x.d = ldx;
411	if (!((x.l.msw & 0x7fffffff) | x.l.frac2 | x.l.frac3 | x.l.frac4))
412		return ldx;
413
414	/* handle nan and inf cases */
415	if ((x.l.msw & 0x7fffffff) >= 0x7fff0000)
416	{
417		if ((x.l.msw & 0xffff) | x.l.frac2 | x.l.frac3 | x.l.frac4)
418		{
419			if (!(x.l.msw & 0x8000))
420			{
421				/* snan, signal invalid */
422				t += snan.d;
423			}
424			x.l.msw |= 0x8000;
425			return x.d;
426		}
427		if (x.l.msw & 0x80000000)
428		{
429			/* sqrt(-inf), signal invalid */
430			t = -one;
431			t = sqrt(t);
432			return qnan.d;
433		}
434		/* sqrt(inf), return inf */
435		return x.d;
436	}
437
438	/* handle negative numbers */
439	if (x.l.msw & 0x80000000)
440	{
441		t = -one;
442		t = sqrt(t);
443		return qnan.d;
444	}
445
446	/* now x is finite, positive */
447
448	traps = __swapTE(0);
449	exc = __swapEX(0);
450	rm = __swapRD(fp_nearest);
451
452	ex = __q_unpack(&x, xx);
453	if (ex & 1)
454	{
455		/* make exponent even */
456		xx[0] += xx[0];
457		xx[1] += xx[1];
458		xx[2] += xx[2];
459		xx[3] += xx[3];
460		xx[4] += xx[4];
461		ex--;
462	}
463	__q_tp_sqrt(xx, zz);
464
465	/* put everything together */
466	x.l.msw = 0;
467	inexact = 0;
468	__q_pack(zz, ex >> 1, rm, &x, &inexact);
469
470	(void) __swapRD(rm);
471	(void) __swapEX(exc);
472	(void) __swapTE(traps);
473	if (inexact)
474	{
475		t = huge;
476		t += tiny;
477	}
478	return x.d;
479}
480