xref: /illumos-gate/usr/src/lib/libc/port/fp/__x_power.c (revision 7257d1b4d25bfac0c802847390e98a464fd787ac)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "lint.h"
30 #include "base_conversion.h"
31 #include <sys/types.h>
32 #include <malloc.h>
33 #include <memory.h>
34 #include <stdlib.h>
35 #include <errno.h>
36 
37 /*
38  * Multiply a _big_float by a power of two or ten
39  */
40 
41 /* see comment in double_decim.c */
42 static unsigned int
43 __quorem10000(unsigned int x, unsigned short *pr)
44 {
45 	*pr = x % 10000;
46 	return (x / 10000);
47 }
48 
49 /*
50  * Multiply a base-2**16 significand by multiplier.  Extend length as
51  * necessary to accommodate carries.
52  */
53 static void
54 __multiply_base_two(_big_float *pbf, unsigned short multiplier)
55 {
56 	unsigned int	p, carry;
57 	int		j, length = pbf->blength;
58 
59 	carry = 0;
60 	for (j = 0; j < length; j++) {
61 		p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
62 		pbf->bsignificand[j] = p & 0xffff;
63 		carry = p >> 16;
64 	}
65 	if (carry != 0)
66 		pbf->bsignificand[j++] = carry;
67 	pbf->blength = j;
68 }
69 
70 /*
71  * Multiply a base-10**4 significand by multiplier.  Extend length as
72  * necessary to accommodate carries.
73  */
74 static void
75 __multiply_base_ten(_big_float *pbf, unsigned short multiplier)
76 {
77 	unsigned int	p, carry;
78 	int		j, length = pbf->blength;
79 
80 	carry = 0;
81 	for (j = 0; j < length; j++) {
82 		p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
83 		carry = __quorem10000(p, &pbf->bsignificand[j]);
84 	}
85 	while (carry != 0) {
86 		carry = __quorem10000(carry, &pbf->bsignificand[j]);
87 		j++;
88 	}
89 	pbf->blength = j;
90 }
91 
92 /*
93  * Multiply a base-10**4 significand by 2**multiplier.  Extend length
94  * as necessary to accommodate carries.
95  */
96 static void
97 __multiply_base_ten_by_two(_big_float *pbf, unsigned short multiplier)
98 {
99 	unsigned int	p, carry;
100 	int		j, length = pbf->blength;
101 
102 	carry = 0;
103 	for (j = 0; j < length; j++) {
104 		p = ((unsigned int)pbf->bsignificand[j] << multiplier) + carry;
105 		carry = __quorem10000(p, &pbf->bsignificand[j]);
106 	}
107 	while (carry != 0) {
108 		carry = __quorem10000(carry, &pbf->bsignificand[j]);
109 		j++;
110 	}
111 	pbf->blength = j;
112 }
113 
114 /*
115  * Propagate carries in a base-2**16 significand.
116  */
117 static void
118 __carry_propagate_two(unsigned int carry, unsigned short *psignificand)
119 {
120 	unsigned int	p;
121 	int		j;
122 
123 	j = 0;
124 	while (carry != 0) {
125 		p = psignificand[j] + carry;
126 		psignificand[j++] = p & 0xffff;
127 		carry = p >> 16;
128 	}
129 }
130 
131 /*
132  * Propagate carries in a base-10**4 significand.
133  */
134 static void
135 __carry_propagate_ten(unsigned int carry, unsigned short *psignificand)
136 {
137 	unsigned int	p;
138 	int		j;
139 
140 	j = 0;
141 	while (carry != 0) {
142 		p = psignificand[j] + carry;
143 		carry = __quorem10000(p, &psignificand[j]);
144 		j++;
145 	}
146 }
147 
148 /*
149  * Given x[] and y[], base-2**16 vectors of length n, compute the
150  * dot product
151  *
152  * sum (i=0,n-1) of x[i]*y[n-1-i]
153  *
154  * The product may fill as many as three base-2**16 digits; product[0]
155  * is the least significant, product[2] the most.
156  */
157 static void
158 __multiply_base_two_vector(unsigned short n, unsigned short *px,
159     unsigned short *py, unsigned short *product)
160 {
161 
162 	unsigned int	acc, p;
163 	unsigned short	carry;
164 	int		i;
165 
166 	acc = 0;
167 	carry = 0;
168 	for (i = 0; i < (int)n; i++) {
169 		p = px[i] * py[n - 1 - i] + acc;
170 		if (p < acc)
171 			carry++;
172 		acc = p;
173 	}
174 	product[0] = acc & 0xffff;
175 	product[1] = acc >> 16;
176 	product[2] = carry;
177 }
178 
179 /*
180  * Given x[] and y[], base-10**4 vectors of length n, compute the
181  * dot product
182  *
183  * sum (i=0,n-1) of x[i]*y[n-1-i]
184  *
185  * The product may fill as many as three base-10**4 digits; product[0]
186  * is the least significant, product[2] the most.
187  */
188 #define	ABASE	3000000000u	/* base of accumulator */
189 
190 static void
191 __multiply_base_ten_vector(unsigned short n, unsigned short *px,
192     unsigned short *py, unsigned short *product)
193 {
194 
195 	unsigned int	acc;
196 	unsigned short	carry;
197 	int		i;
198 
199 	acc = 0;
200 	carry = 0;
201 	for (i = 0; i < (int)n; i++) {
202 		acc = px[i] * py[n - 1 - i] + acc;
203 		if (acc >= ABASE) {
204 			carry++;
205 			acc -= ABASE;
206 		}
207 	}
208 	product[0] = acc % 10000;
209 	acc = acc / 10000;
210 	product[1] = acc % 10000;
211 	acc = acc / 10000;
212 	product[2] = acc + (ABASE / 100000000) * carry;
213 }
214 
215 /*
216  * Multiply *pbf by the n-th power of mult, which must be two or
217  * ten.  If mult is two, *pbf is assumed to be base 10**4; if mult
218  * is ten, *pbf is assumed to be base 2**16.  precision specifies
219  * the number of significant bits or decimal digits required in the
220  * result.  (The product may have more or fewer digits than this,
221  * but it will be accurate to at least this many.)
222  *
223  * On exit, if the product is small enough, it overwrites *pbf, and
224  * *pnewbf is set to pbf.  If the product is too large to fit in *pbf,
225  * this routine calls malloc(3M) to allocate storage and sets *pnewbf
226  * to point to this area; it is the caller's responsibility to free
227  * this storage when it is no longer needed.  Note that *pbf may be
228  * modified even when the routine allocates new storage.
229  *
230  * If n is too large, we set errno to ERANGE and call abort(3C).
231  * If an attempted malloc fails, we set errno to ENOMEM and call
232  * abort(3C).
233  */
234 void
235 __big_float_times_power(_big_float *pbf, int mult, int n, int precision,
236     _big_float **pnewbf)
237 {
238 	int		base, needed_precision, productsize;
239 	int		i, j, itlast, trailing_zeros_to_delete;
240 	int		tablepower[3], length[3];
241 	int		lengthx, lengthp, istart, istop;
242 	int		excess_check;
243 	unsigned short	*pp, *table[3], canquit;
244 	unsigned short	multiplier, product[3];
245 
246 	if (pbf->blength == 0) {
247 		*pnewbf = pbf;
248 		return;
249 	}
250 
251 	if (mult == 2) {
252 		/*
253 		 * Handle small n cases that don't require extra
254 		 * storage quickly.
255 		 */
256 		if (n <= 16 && pbf->blength + 2 < pbf->bsize) {
257 			__multiply_base_ten_by_two(pbf, n);
258 			*pnewbf = pbf;
259 			return;
260 		}
261 
262 		/* *pbf is base 10**4 */
263 		base = 10;
264 
265 		/*
266 		 * Convert precision (base ten digits) to needed_precision
267 		 * (base 10**4 digits), allowing an additional digit at
268 		 * each end.
269 		 */
270 		needed_precision = 2 + (precision >> 2);
271 
272 		/*
273 		 * Set up pointers to the table entries and compute their
274 		 * lengths.
275 		 */
276 		if (n < __TBL_2_SMALL_SIZE) {
277 			itlast = 0;
278 			tablepower[0] = n;
279 			tablepower[1] = tablepower[2] = 0;
280 		} else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE)) {
281 			itlast = 1;
282 			tablepower[0] = n % __TBL_2_SMALL_SIZE;
283 			tablepower[1] = n / __TBL_2_SMALL_SIZE;
284 			tablepower[2] = 0;
285 		} else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE *
286 		    __TBL_2_HUGE_SIZE)) {
287 			itlast = 2;
288 			tablepower[0] = n % __TBL_2_SMALL_SIZE;
289 			n /= __TBL_2_SMALL_SIZE;
290 			tablepower[1] = n % __TBL_2_BIG_SIZE;
291 			tablepower[2] = n / __TBL_2_BIG_SIZE;
292 		} else {
293 			errno = ERANGE;
294 			abort();
295 		}
296 		pp = (unsigned short *)__tbl_2_small_start + tablepower[0];
297 		table[0] = (unsigned short *)__tbl_2_small_digits + pp[0];
298 		length[0] = pp[1] - pp[0];
299 		pp = (unsigned short *)__tbl_2_big_start + tablepower[1];
300 		table[1] = (unsigned short *)__tbl_2_big_digits + pp[0];
301 		length[1] = pp[1] - pp[0];
302 		pp = (unsigned short *)__tbl_2_huge_start + tablepower[2];
303 		table[2] = (unsigned short *)__tbl_2_huge_digits + pp[0];
304 		length[2] = pp[1] - pp[0];
305 	} else {
306 		if (n <= 4 && pbf->blength + 1 < pbf->bsize) {
307 			pbf->bexponent += (short)n;
308 			__multiply_base_two(pbf, __tbl_10_small_digits[n]);
309 			*pnewbf = pbf;
310 			return;
311 		}
312 
313 		/* *pbf is base 2**16 */
314 		base = 2;
315 		pbf->bexponent += (short)n; /* now need to multiply by 5**n */
316 		needed_precision = 2 + (precision >> 4);
317 		if (n < __TBL_10_SMALL_SIZE) {
318 			itlast = 0;
319 			tablepower[0] = n;
320 			tablepower[1] = tablepower[2] = 0;
321 		} else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE)) {
322 			itlast = 1;
323 			tablepower[0] = n % __TBL_10_SMALL_SIZE;
324 			tablepower[1] = n / __TBL_10_SMALL_SIZE;
325 			tablepower[2] = 0;
326 		} else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE *
327 		    __TBL_10_HUGE_SIZE)) {
328 			itlast = 2;
329 			tablepower[0] = n % __TBL_10_SMALL_SIZE;
330 			n /= __TBL_10_SMALL_SIZE;
331 			tablepower[1] = n % __TBL_10_BIG_SIZE;
332 			tablepower[2] = n / __TBL_10_BIG_SIZE;
333 		} else {
334 			errno = ERANGE;
335 			abort();
336 		}
337 		pp = (unsigned short *)__tbl_10_small_start + tablepower[0];
338 		table[0] = (unsigned short *)__tbl_10_small_digits + pp[0];
339 		length[0] = pp[1] - pp[0];
340 		pp = (unsigned short *)__tbl_10_big_start + tablepower[1];
341 		table[1] = (unsigned short *)__tbl_10_big_digits + pp[0];
342 		length[1] = pp[1] - pp[0];
343 		pp = (unsigned short *)__tbl_10_huge_start + tablepower[2];
344 		table[2] = (unsigned short *)__tbl_10_huge_digits + pp[0];
345 		length[2] = pp[1] - pp[0];
346 	}
347 
348 	/* compute an upper bound on the size of the product */
349 	productsize = pbf->blength;
350 	for (i = 0; i <= itlast; i++)
351 		productsize += length[i];
352 
353 	if (productsize < needed_precision)
354 		needed_precision = productsize;
355 
356 	if (productsize <= pbf->bsize) {
357 		*pnewbf = pbf;
358 	} else {
359 		i = sizeof (_big_float) + sizeof (unsigned short) *
360 		    (productsize - _BIG_FLOAT_SIZE);
361 		if ((*pnewbf = malloc(i)) == NULL) {
362 			errno = ENOMEM;
363 			abort();
364 		}
365 		(void) memcpy((*pnewbf)->bsignificand, pbf->bsignificand,
366 		    pbf->blength * sizeof (unsigned short));
367 		(*pnewbf)->blength = pbf->blength;
368 		(*pnewbf)->bexponent = pbf->bexponent;
369 		pbf = *pnewbf;
370 		pbf->bsize = productsize;
371 	}
372 
373 	/*
374 	 * Now pbf points to the input and the output.  Step through
375 	 * each level of the tables.
376 	 */
377 	for (i = 0; i <= itlast; i++) {
378 		if (tablepower[i] == 0)
379 			continue;
380 
381 		lengthp = length[i];
382 		if (lengthp == 1) {
383 			/* short multiplier (<= 10**4 or 2**13) */
384 			if (base == 10) {
385 				/* left shift by tablepower[i] */
386 				__multiply_base_ten_by_two(pbf, tablepower[i]);
387 			} else {
388 				__multiply_base_two(pbf, (table[i])[0]);
389 			}
390 			continue;
391 		}
392 
393 		lengthx = pbf->blength;
394 		if (lengthx == 1) {
395 			/* short multiplicand */
396 			multiplier = pbf->bsignificand[0];
397 			(void) memcpy(pbf->bsignificand, table[i],
398 			    lengthp * sizeof (unsigned short));
399 			pbf->blength = lengthp;
400 			if (base == 10)
401 				__multiply_base_ten(pbf, multiplier);
402 			else
403 				__multiply_base_two(pbf, multiplier);
404 			continue;
405 		}
406 
407 		/* keep track of trailing zeroes */
408 		trailing_zeros_to_delete = 0;
409 
410 		/* initialize for carry propagation */
411 		pbf->bsignificand[lengthx + lengthp - 1] = 0;
412 
413 		/*
414 		 * General case - the result will be accumulated in *pbf
415 		 * from most significant digit to least significant.
416 		 */
417 		for (j = lengthx + lengthp - 2; j >= 0; j--) {
418 			istart = j - lengthp + 1;
419 			if (istart < 0)
420 				istart = 0;
421 
422 			istop = lengthx - 1;
423 			if (istop > j)
424 				istop = j;
425 
426 			pp = table[i];
427 			if (base == 2) {
428 				__multiply_base_two_vector(istop - istart + 1,
429 				    &(pbf->bsignificand[istart]),
430 				    &(pp[j - istop]), product);
431 				if (product[2] != 0)
432 					__carry_propagate_two(
433 					    (unsigned int)product[2],
434 					    &(pbf->bsignificand[j + 2]));
435 				if (product[1] != 0)
436 					__carry_propagate_two(
437 					    (unsigned int)product[1],
438 					    &(pbf->bsignificand[j + 1]));
439 			} else {
440 				__multiply_base_ten_vector(istop - istart + 1,
441 				    &(pbf->bsignificand[istart]),
442 				    &(pp[j - istop]), product);
443 				if (product[2] != 0)
444 					__carry_propagate_ten(
445 					    (unsigned int)product[2],
446 					    &(pbf->bsignificand[j + 2]));
447 				if (product[1] != 0)
448 					__carry_propagate_ten(
449 					    (unsigned int)product[1],
450 					    &(pbf->bsignificand[j + 1]));
451 			}
452 			pbf->bsignificand[j] = product[0];
453 			if (i < itlast || j > lengthx + lengthp - 4
454 			    - needed_precision)
455 				continue;
456 
457 			/*
458 			 * On the last multiplication, it's not necessary
459 			 * to develop the entire product if further digits
460 			 * can't possibly affect significant digits.  But
461 			 * note that further digits can affect the product
462 			 * in one of two ways: (i) the sum of digits beyond
463 			 * the significant ones can cause a carry that would
464 			 * propagate into the significant digits, or (ii) no
465 			 * carry will occur, but there may be more nonzero
466 			 * digits that will need to be recorded in a sticky
467 			 * bit.
468 			 */
469 			excess_check = lengthx + lengthp;
470 			if (pbf->bsignificand[excess_check - 1] == 0)
471 				excess_check--;
472 			excess_check -= needed_precision + 4;
473 			canquit = ((base == 2)? 65535 : 9999) -
474 			    ((lengthx < lengthp)? lengthx : lengthp);
475 			/*
476 			 * If j <= excess_check, then we have all the
477 			 * significant digits.  If the (j + 1)-st digit
478 			 * is no larger than canquit, then the sum of the
479 			 * digits not yet computed can't carry into the
480 			 * significant digits.  If the j-th and (j + 1)-st
481 			 * digits are not both zero, then we know we are
482 			 * discarding nonzero digits.  (If both of these
483 			 * digits are zero, we need to keep forming more
484 			 * of the product to see whether or not there are
485 			 * any more nonzero digits.)
486 			 */
487 			if (j <= excess_check &&
488 			    pbf->bsignificand[j + 1] <= canquit &&
489 			    (pbf->bsignificand[j + 1] | pbf->bsignificand[j])
490 			    != 0) {
491 				/* can discard j+1, j, ... 0 */
492 				trailing_zeros_to_delete = j + 2;
493 
494 				/* set sticky bit */
495 				pbf->bsignificand[j + 2] |= 1;
496 				break;
497 			}
498 		}
499 
500 		/* if the product didn't carry, delete the leading zero */
501 		pbf->blength = lengthx + lengthp;
502 		if (pbf->bsignificand[pbf->blength - 1] == 0)
503 			pbf->blength--;
504 
505 		/* look for additional trailing zeros to delete */
506 		for (; pbf->bsignificand[trailing_zeros_to_delete] == 0;
507 		    trailing_zeros_to_delete++)
508 			continue;
509 
510 		if (trailing_zeros_to_delete > 0) {
511 			for (j = 0; j < (int)pbf->blength -
512 			    trailing_zeros_to_delete; j++) {
513 				pbf->bsignificand[j] = pbf->bsignificand[j
514 				    + trailing_zeros_to_delete];
515 			}
516 			pbf->blength -= trailing_zeros_to_delete;
517 			pbf->bexponent += trailing_zeros_to_delete <<
518 			    ((base == 2)? 4 : 2);
519 		}
520 	}
521 }
522