1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#include "lint.h"
30#include "base_conversion.h"
31#include <sys/types.h>
32#include <malloc.h>
33#include <memory.h>
34#include <stdlib.h>
35#include <errno.h>
36
37/*
38 * Multiply a _big_float by a power of two or ten
39 */
40
41/* see comment in double_decim.c */
42static unsigned int
43__quorem10000(unsigned int x, unsigned short *pr)
44{
45	*pr = x % 10000;
46	return (x / 10000);
47}
48
49/*
50 * Multiply a base-2**16 significand by multiplier.  Extend length as
51 * necessary to accommodate carries.
52 */
53static void
54__multiply_base_two(_big_float *pbf, unsigned short multiplier)
55{
56	unsigned int	p, carry;
57	int		j, length = pbf->blength;
58
59	carry = 0;
60	for (j = 0; j < length; j++) {
61		p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
62		pbf->bsignificand[j] = p & 0xffff;
63		carry = p >> 16;
64	}
65	if (carry != 0)
66		pbf->bsignificand[j++] = carry;
67	pbf->blength = j;
68}
69
70/*
71 * Multiply a base-10**4 significand by multiplier.  Extend length as
72 * necessary to accommodate carries.
73 */
74static void
75__multiply_base_ten(_big_float *pbf, unsigned short multiplier)
76{
77	unsigned int	p, carry;
78	int		j, length = pbf->blength;
79
80	carry = 0;
81	for (j = 0; j < length; j++) {
82		p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
83		carry = __quorem10000(p, &pbf->bsignificand[j]);
84	}
85	while (carry != 0) {
86		carry = __quorem10000(carry, &pbf->bsignificand[j]);
87		j++;
88	}
89	pbf->blength = j;
90}
91
92/*
93 * Multiply a base-10**4 significand by 2**multiplier.  Extend length
94 * as necessary to accommodate carries.
95 */
96static void
97__multiply_base_ten_by_two(_big_float *pbf, unsigned short multiplier)
98{
99	unsigned int	p, carry;
100	int		j, length = pbf->blength;
101
102	carry = 0;
103	for (j = 0; j < length; j++) {
104		p = ((unsigned int)pbf->bsignificand[j] << multiplier) + carry;
105		carry = __quorem10000(p, &pbf->bsignificand[j]);
106	}
107	while (carry != 0) {
108		carry = __quorem10000(carry, &pbf->bsignificand[j]);
109		j++;
110	}
111	pbf->blength = j;
112}
113
114/*
115 * Propagate carries in a base-2**16 significand.
116 */
117static void
118__carry_propagate_two(unsigned int carry, unsigned short *psignificand)
119{
120	unsigned int	p;
121	int		j;
122
123	j = 0;
124	while (carry != 0) {
125		p = psignificand[j] + carry;
126		psignificand[j++] = p & 0xffff;
127		carry = p >> 16;
128	}
129}
130
131/*
132 * Propagate carries in a base-10**4 significand.
133 */
134static void
135__carry_propagate_ten(unsigned int carry, unsigned short *psignificand)
136{
137	unsigned int	p;
138	int		j;
139
140	j = 0;
141	while (carry != 0) {
142		p = psignificand[j] + carry;
143		carry = __quorem10000(p, &psignificand[j]);
144		j++;
145	}
146}
147
148/*
149 * Given x[] and y[], base-2**16 vectors of length n, compute the
150 * dot product
151 *
152 * sum (i=0,n-1) of x[i]*y[n-1-i]
153 *
154 * The product may fill as many as three base-2**16 digits; product[0]
155 * is the least significant, product[2] the most.
156 */
157static void
158__multiply_base_two_vector(unsigned short n, unsigned short *px,
159    unsigned short *py, unsigned short *product)
160{
161
162	unsigned int	acc, p;
163	unsigned short	carry;
164	int		i;
165
166	acc = 0;
167	carry = 0;
168	for (i = 0; i < (int)n; i++) {
169		p = px[i] * py[n - 1 - i] + acc;
170		if (p < acc)
171			carry++;
172		acc = p;
173	}
174	product[0] = acc & 0xffff;
175	product[1] = acc >> 16;
176	product[2] = carry;
177}
178
179/*
180 * Given x[] and y[], base-10**4 vectors of length n, compute the
181 * dot product
182 *
183 * sum (i=0,n-1) of x[i]*y[n-1-i]
184 *
185 * The product may fill as many as three base-10**4 digits; product[0]
186 * is the least significant, product[2] the most.
187 */
188#define	ABASE	3000000000u	/* base of accumulator */
189
190static void
191__multiply_base_ten_vector(unsigned short n, unsigned short *px,
192    unsigned short *py, unsigned short *product)
193{
194
195	unsigned int	acc;
196	unsigned short	carry;
197	int		i;
198
199	acc = 0;
200	carry = 0;
201	for (i = 0; i < (int)n; i++) {
202		acc = px[i] * py[n - 1 - i] + acc;
203		if (acc >= ABASE) {
204			carry++;
205			acc -= ABASE;
206		}
207	}
208	product[0] = acc % 10000;
209	acc = acc / 10000;
210	product[1] = acc % 10000;
211	acc = acc / 10000;
212	product[2] = acc + (ABASE / 100000000) * carry;
213}
214
215/*
216 * Multiply *pbf by the n-th power of mult, which must be two or
217 * ten.  If mult is two, *pbf is assumed to be base 10**4; if mult
218 * is ten, *pbf is assumed to be base 2**16.  precision specifies
219 * the number of significant bits or decimal digits required in the
220 * result.  (The product may have more or fewer digits than this,
221 * but it will be accurate to at least this many.)
222 *
223 * On exit, if the product is small enough, it overwrites *pbf, and
224 * *pnewbf is set to pbf.  If the product is too large to fit in *pbf,
225 * this routine calls malloc(3M) to allocate storage and sets *pnewbf
226 * to point to this area; it is the caller's responsibility to free
227 * this storage when it is no longer needed.  Note that *pbf may be
228 * modified even when the routine allocates new storage.
229 *
230 * If n is too large, we set errno to ERANGE and call abort(3C).
231 * If an attempted malloc fails, we set errno to ENOMEM and call
232 * abort(3C).
233 */
234void
235__big_float_times_power(_big_float *pbf, int mult, int n, int precision,
236    _big_float **pnewbf)
237{
238	int		base, needed_precision, productsize;
239	int		i, j, itlast, trailing_zeros_to_delete;
240	int		tablepower[3], length[3];
241	int		lengthx, lengthp, istart, istop;
242	int		excess_check;
243	unsigned short	*pp, *table[3], canquit;
244	unsigned short	multiplier, product[3];
245
246	if (pbf->blength == 0) {
247		*pnewbf = pbf;
248		return;
249	}
250
251	if (mult == 2) {
252		/*
253		 * Handle small n cases that don't require extra
254		 * storage quickly.
255		 */
256		if (n <= 16 && pbf->blength + 2 < pbf->bsize) {
257			__multiply_base_ten_by_two(pbf, n);
258			*pnewbf = pbf;
259			return;
260		}
261
262		/* *pbf is base 10**4 */
263		base = 10;
264
265		/*
266		 * Convert precision (base ten digits) to needed_precision
267		 * (base 10**4 digits), allowing an additional digit at
268		 * each end.
269		 */
270		needed_precision = 2 + (precision >> 2);
271
272		/*
273		 * Set up pointers to the table entries and compute their
274		 * lengths.
275		 */
276		if (n < __TBL_2_SMALL_SIZE) {
277			itlast = 0;
278			tablepower[0] = n;
279			tablepower[1] = tablepower[2] = 0;
280		} else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE)) {
281			itlast = 1;
282			tablepower[0] = n % __TBL_2_SMALL_SIZE;
283			tablepower[1] = n / __TBL_2_SMALL_SIZE;
284			tablepower[2] = 0;
285		} else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE *
286		    __TBL_2_HUGE_SIZE)) {
287			itlast = 2;
288			tablepower[0] = n % __TBL_2_SMALL_SIZE;
289			n /= __TBL_2_SMALL_SIZE;
290			tablepower[1] = n % __TBL_2_BIG_SIZE;
291			tablepower[2] = n / __TBL_2_BIG_SIZE;
292		} else {
293			errno = ERANGE;
294			abort();
295		}
296		pp = (unsigned short *)__tbl_2_small_start + tablepower[0];
297		table[0] = (unsigned short *)__tbl_2_small_digits + pp[0];
298		length[0] = pp[1] - pp[0];
299		pp = (unsigned short *)__tbl_2_big_start + tablepower[1];
300		table[1] = (unsigned short *)__tbl_2_big_digits + pp[0];
301		length[1] = pp[1] - pp[0];
302		pp = (unsigned short *)__tbl_2_huge_start + tablepower[2];
303		table[2] = (unsigned short *)__tbl_2_huge_digits + pp[0];
304		length[2] = pp[1] - pp[0];
305	} else {
306		if (n <= 4 && pbf->blength + 1 < pbf->bsize) {
307			pbf->bexponent += (short)n;
308			__multiply_base_two(pbf, __tbl_10_small_digits[n]);
309			*pnewbf = pbf;
310			return;
311		}
312
313		/* *pbf is base 2**16 */
314		base = 2;
315		pbf->bexponent += (short)n; /* now need to multiply by 5**n */
316		needed_precision = 2 + (precision >> 4);
317		if (n < __TBL_10_SMALL_SIZE) {
318			itlast = 0;
319			tablepower[0] = n;
320			tablepower[1] = tablepower[2] = 0;
321		} else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE)) {
322			itlast = 1;
323			tablepower[0] = n % __TBL_10_SMALL_SIZE;
324			tablepower[1] = n / __TBL_10_SMALL_SIZE;
325			tablepower[2] = 0;
326		} else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE *
327		    __TBL_10_HUGE_SIZE)) {
328			itlast = 2;
329			tablepower[0] = n % __TBL_10_SMALL_SIZE;
330			n /= __TBL_10_SMALL_SIZE;
331			tablepower[1] = n % __TBL_10_BIG_SIZE;
332			tablepower[2] = n / __TBL_10_BIG_SIZE;
333		} else {
334			errno = ERANGE;
335			abort();
336		}
337		pp = (unsigned short *)__tbl_10_small_start + tablepower[0];
338		table[0] = (unsigned short *)__tbl_10_small_digits + pp[0];
339		length[0] = pp[1] - pp[0];
340		pp = (unsigned short *)__tbl_10_big_start + tablepower[1];
341		table[1] = (unsigned short *)__tbl_10_big_digits + pp[0];
342		length[1] = pp[1] - pp[0];
343		pp = (unsigned short *)__tbl_10_huge_start + tablepower[2];
344		table[2] = (unsigned short *)__tbl_10_huge_digits + pp[0];
345		length[2] = pp[1] - pp[0];
346	}
347
348	/* compute an upper bound on the size of the product */
349	productsize = pbf->blength;
350	for (i = 0; i <= itlast; i++)
351		productsize += length[i];
352
353	if (productsize < needed_precision)
354		needed_precision = productsize;
355
356	if (productsize <= pbf->bsize) {
357		*pnewbf = pbf;
358	} else {
359		i = sizeof (_big_float) + sizeof (unsigned short) *
360		    (productsize - _BIG_FLOAT_SIZE);
361		if ((*pnewbf = malloc(i)) == NULL) {
362			errno = ENOMEM;
363			abort();
364		}
365		(void) memcpy((*pnewbf)->bsignificand, pbf->bsignificand,
366		    pbf->blength * sizeof (unsigned short));
367		(*pnewbf)->blength = pbf->blength;
368		(*pnewbf)->bexponent = pbf->bexponent;
369		pbf = *pnewbf;
370		pbf->bsize = productsize;
371	}
372
373	/*
374	 * Now pbf points to the input and the output.  Step through
375	 * each level of the tables.
376	 */
377	for (i = 0; i <= itlast; i++) {
378		if (tablepower[i] == 0)
379			continue;
380
381		lengthp = length[i];
382		if (lengthp == 1) {
383			/* short multiplier (<= 10**4 or 2**13) */
384			if (base == 10) {
385				/* left shift by tablepower[i] */
386				__multiply_base_ten_by_two(pbf, tablepower[i]);
387			} else {
388				__multiply_base_two(pbf, (table[i])[0]);
389			}
390			continue;
391		}
392
393		lengthx = pbf->blength;
394		if (lengthx == 1) {
395			/* short multiplicand */
396			multiplier = pbf->bsignificand[0];
397			(void) memcpy(pbf->bsignificand, table[i],
398			    lengthp * sizeof (unsigned short));
399			pbf->blength = lengthp;
400			if (base == 10)
401				__multiply_base_ten(pbf, multiplier);
402			else
403				__multiply_base_two(pbf, multiplier);
404			continue;
405		}
406
407		/* keep track of trailing zeroes */
408		trailing_zeros_to_delete = 0;
409
410		/* initialize for carry propagation */
411		pbf->bsignificand[lengthx + lengthp - 1] = 0;
412
413		/*
414		 * General case - the result will be accumulated in *pbf
415		 * from most significant digit to least significant.
416		 */
417		for (j = lengthx + lengthp - 2; j >= 0; j--) {
418			istart = j - lengthp + 1;
419			if (istart < 0)
420				istart = 0;
421
422			istop = lengthx - 1;
423			if (istop > j)
424				istop = j;
425
426			pp = table[i];
427			if (base == 2) {
428				__multiply_base_two_vector(istop - istart + 1,
429				    &(pbf->bsignificand[istart]),
430				    &(pp[j - istop]), product);
431				if (product[2] != 0)
432					__carry_propagate_two(
433					    (unsigned int)product[2],
434					    &(pbf->bsignificand[j + 2]));
435				if (product[1] != 0)
436					__carry_propagate_two(
437					    (unsigned int)product[1],
438					    &(pbf->bsignificand[j + 1]));
439			} else {
440				__multiply_base_ten_vector(istop - istart + 1,
441				    &(pbf->bsignificand[istart]),
442				    &(pp[j - istop]), product);
443				if (product[2] != 0)
444					__carry_propagate_ten(
445					    (unsigned int)product[2],
446					    &(pbf->bsignificand[j + 2]));
447				if (product[1] != 0)
448					__carry_propagate_ten(
449					    (unsigned int)product[1],
450					    &(pbf->bsignificand[j + 1]));
451			}
452			pbf->bsignificand[j] = product[0];
453			if (i < itlast || j > lengthx + lengthp - 4
454			    - needed_precision)
455				continue;
456
457			/*
458			 * On the last multiplication, it's not necessary
459			 * to develop the entire product if further digits
460			 * can't possibly affect significant digits.  But
461			 * note that further digits can affect the product
462			 * in one of two ways: (i) the sum of digits beyond
463			 * the significant ones can cause a carry that would
464			 * propagate into the significant digits, or (ii) no
465			 * carry will occur, but there may be more nonzero
466			 * digits that will need to be recorded in a sticky
467			 * bit.
468			 */
469			excess_check = lengthx + lengthp;
470			if (pbf->bsignificand[excess_check - 1] == 0)
471				excess_check--;
472			excess_check -= needed_precision + 4;
473			canquit = ((base == 2)? 65535 : 9999) -
474			    ((lengthx < lengthp)? lengthx : lengthp);
475			/*
476			 * If j <= excess_check, then we have all the
477			 * significant digits.  If the (j + 1)-st digit
478			 * is no larger than canquit, then the sum of the
479			 * digits not yet computed can't carry into the
480			 * significant digits.  If the j-th and (j + 1)-st
481			 * digits are not both zero, then we know we are
482			 * discarding nonzero digits.  (If both of these
483			 * digits are zero, we need to keep forming more
484			 * of the product to see whether or not there are
485			 * any more nonzero digits.)
486			 */
487			if (j <= excess_check &&
488			    pbf->bsignificand[j + 1] <= canquit &&
489			    (pbf->bsignificand[j + 1] | pbf->bsignificand[j])
490			    != 0) {
491				/* can discard j+1, j, ... 0 */
492				trailing_zeros_to_delete = j + 2;
493
494				/* set sticky bit */
495				pbf->bsignificand[j + 2] |= 1;
496				break;
497			}
498		}
499
500		/* if the product didn't carry, delete the leading zero */
501		pbf->blength = lengthx + lengthp;
502		if (pbf->bsignificand[pbf->blength - 1] == 0)
503			pbf->blength--;
504
505		/* look for additional trailing zeros to delete */
506		for (; pbf->bsignificand[trailing_zeros_to_delete] == 0;
507		    trailing_zeros_to_delete++)
508			continue;
509
510		if (trailing_zeros_to_delete > 0) {
511			for (j = 0; j < (int)pbf->blength -
512			    trailing_zeros_to_delete; j++) {
513				pbf->bsignificand[j] = pbf->bsignificand[j
514				    + trailing_zeros_to_delete];
515			}
516			pbf->blength -= trailing_zeros_to_delete;
517			pbf->bexponent += trailing_zeros_to_delete <<
518			    ((base == 2)? 4 : 2);
519		}
520	}
521}
522