xref: /illumos-gate/usr/src/lib/libc/port/fp/__x_power.c (revision 1da57d55)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "lint.h"
28 #include "base_conversion.h"
29 #include <sys/types.h>
30 #include <malloc.h>
31 #include <memory.h>
32 #include <stdlib.h>
33 #include <errno.h>
34 
35 /*
36  * Multiply a _big_float by a power of two or ten
37  */
38 
39 /* see comment in double_decim.c */
40 static unsigned int
__quorem10000(unsigned int x,unsigned short * pr)41 __quorem10000(unsigned int x, unsigned short *pr)
42 {
43 	*pr = x % 10000;
44 	return (x / 10000);
45 }
46 
47 /*
48  * Multiply a base-2**16 significand by multiplier.  Extend length as
49  * necessary to accommodate carries.
50  */
51 static void
__multiply_base_two(_big_float * pbf,unsigned short multiplier)52 __multiply_base_two(_big_float *pbf, unsigned short multiplier)
53 {
54 	unsigned int	p, carry;
55 	int		j, length = pbf->blength;
56 
57 	carry = 0;
58 	for (j = 0; j < length; j++) {
59 		p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
60 		pbf->bsignificand[j] = p & 0xffff;
61 		carry = p >> 16;
62 	}
63 	if (carry != 0)
64 		pbf->bsignificand[j++] = carry;
65 	pbf->blength = j;
66 }
67 
68 /*
69  * Multiply a base-10**4 significand by multiplier.  Extend length as
70  * necessary to accommodate carries.
71  */
72 static void
__multiply_base_ten(_big_float * pbf,unsigned short multiplier)73 __multiply_base_ten(_big_float *pbf, unsigned short multiplier)
74 {
75 	unsigned int	p, carry;
76 	int		j, length = pbf->blength;
77 
78 	carry = 0;
79 	for (j = 0; j < length; j++) {
80 		p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
81 		carry = __quorem10000(p, &pbf->bsignificand[j]);
82 	}
83 	while (carry != 0) {
84 		carry = __quorem10000(carry, &pbf->bsignificand[j]);
85 		j++;
86 	}
87 	pbf->blength = j;
88 }
89 
90 /*
91  * Multiply a base-10**4 significand by 2**multiplier.  Extend length
92  * as necessary to accommodate carries.
93  */
94 static void
__multiply_base_ten_by_two(_big_float * pbf,unsigned short multiplier)95 __multiply_base_ten_by_two(_big_float *pbf, unsigned short multiplier)
96 {
97 	unsigned int	p, carry;
98 	int		j, length = pbf->blength;
99 
100 	carry = 0;
101 	for (j = 0; j < length; j++) {
102 		p = ((unsigned int)pbf->bsignificand[j] << multiplier) + carry;
103 		carry = __quorem10000(p, &pbf->bsignificand[j]);
104 	}
105 	while (carry != 0) {
106 		carry = __quorem10000(carry, &pbf->bsignificand[j]);
107 		j++;
108 	}
109 	pbf->blength = j;
110 }
111 
112 /*
113  * Propagate carries in a base-2**16 significand.
114  */
115 static void
__carry_propagate_two(unsigned int carry,unsigned short * psignificand)116 __carry_propagate_two(unsigned int carry, unsigned short *psignificand)
117 {
118 	unsigned int	p;
119 	int		j;
120 
121 	j = 0;
122 	while (carry != 0) {
123 		p = psignificand[j] + carry;
124 		psignificand[j++] = p & 0xffff;
125 		carry = p >> 16;
126 	}
127 }
128 
129 /*
130  * Propagate carries in a base-10**4 significand.
131  */
132 static void
__carry_propagate_ten(unsigned int carry,unsigned short * psignificand)133 __carry_propagate_ten(unsigned int carry, unsigned short *psignificand)
134 {
135 	unsigned int	p;
136 	int		j;
137 
138 	j = 0;
139 	while (carry != 0) {
140 		p = psignificand[j] + carry;
141 		carry = __quorem10000(p, &psignificand[j]);
142 		j++;
143 	}
144 }
145 
146 /*
147  * Given x[] and y[], base-2**16 vectors of length n, compute the
148  * dot product
149  *
150  * sum (i=0,n-1) of x[i]*y[n-1-i]
151  *
152  * The product may fill as many as three base-2**16 digits; product[0]
153  * is the least significant, product[2] the most.
154  */
155 static void
__multiply_base_two_vector(unsigned short n,unsigned short * px,unsigned short * py,unsigned short * product)156 __multiply_base_two_vector(unsigned short n, unsigned short *px,
157     unsigned short *py, unsigned short *product)
158 {
159 
160 	unsigned int	acc, p;
161 	unsigned short	carry;
162 	int		i;
163 
164 	acc = 0;
165 	carry = 0;
166 	for (i = 0; i < (int)n; i++) {
167 		p = px[i] * py[n - 1 - i] + acc;
168 		if (p < acc)
169 			carry++;
170 		acc = p;
171 	}
172 	product[0] = acc & 0xffff;
173 	product[1] = acc >> 16;
174 	product[2] = carry;
175 }
176 
177 /*
178  * Given x[] and y[], base-10**4 vectors of length n, compute the
179  * dot product
180  *
181  * sum (i=0,n-1) of x[i]*y[n-1-i]
182  *
183  * The product may fill as many as three base-10**4 digits; product[0]
184  * is the least significant, product[2] the most.
185  */
186 #define	ABASE	3000000000u	/* base of accumulator */
187 
188 static void
__multiply_base_ten_vector(unsigned short n,unsigned short * px,unsigned short * py,unsigned short * product)189 __multiply_base_ten_vector(unsigned short n, unsigned short *px,
190     unsigned short *py, unsigned short *product)
191 {
192 
193 	unsigned int	acc;
194 	unsigned short	carry;
195 	int		i;
196 
197 	acc = 0;
198 	carry = 0;
199 	for (i = 0; i < (int)n; i++) {
200 		acc = px[i] * py[n - 1 - i] + acc;
201 		if (acc >= ABASE) {
202 			carry++;
203 			acc -= ABASE;
204 		}
205 	}
206 	product[0] = acc % 10000;
207 	acc = acc / 10000;
208 	product[1] = acc % 10000;
209 	acc = acc / 10000;
210 	product[2] = acc + (ABASE / 100000000) * carry;
211 }
212 
213 /*
214  * Multiply *pbf by the n-th power of mult, which must be two or
215  * ten.  If mult is two, *pbf is assumed to be base 10**4; if mult
216  * is ten, *pbf is assumed to be base 2**16.  precision specifies
217  * the number of significant bits or decimal digits required in the
218  * result.  (The product may have more or fewer digits than this,
219  * but it will be accurate to at least this many.)
220  *
221  * On exit, if the product is small enough, it overwrites *pbf, and
222  * *pnewbf is set to pbf.  If the product is too large to fit in *pbf,
223  * this routine calls malloc(3M) to allocate storage and sets *pnewbf
224  * to point to this area; it is the caller's responsibility to free
225  * this storage when it is no longer needed.  Note that *pbf may be
226  * modified even when the routine allocates new storage.
227  *
228  * If n is too large, we set errno to ERANGE and call abort(3C).
229  * If an attempted malloc fails, we set errno to ENOMEM and call
230  * abort(3C).
231  */
232 void
__big_float_times_power(_big_float * pbf,int mult,int n,int precision,_big_float ** pnewbf)233 __big_float_times_power(_big_float *pbf, int mult, int n, int precision,
234     _big_float **pnewbf)
235 {
236 	int		base, needed_precision, productsize;
237 	int		i, j, itlast, trailing_zeros_to_delete;
238 	int		tablepower[3], length[3];
239 	int		lengthx, lengthp, istart, istop;
240 	int		excess_check;
241 	unsigned short	*pp, *table[3], canquit;
242 	unsigned short	multiplier, product[3];
243 
244 	if (pbf->blength == 0) {
245 		*pnewbf = pbf;
246 		return;
247 	}
248 
249 	if (mult == 2) {
250 		/*
251 		 * Handle small n cases that don't require extra
252 		 * storage quickly.
253 		 */
254 		if (n <= 16 && pbf->blength + 2 < pbf->bsize) {
255 			__multiply_base_ten_by_two(pbf, n);
256 			*pnewbf = pbf;
257 			return;
258 		}
259 
260 		/* *pbf is base 10**4 */
261 		base = 10;
262 
263 		/*
264 		 * Convert precision (base ten digits) to needed_precision
265 		 * (base 10**4 digits), allowing an additional digit at
266 		 * each end.
267 		 */
268 		needed_precision = 2 + (precision >> 2);
269 
270 		/*
271 		 * Set up pointers to the table entries and compute their
272 		 * lengths.
273 		 */
274 		if (n < __TBL_2_SMALL_SIZE) {
275 			itlast = 0;
276 			tablepower[0] = n;
277 			tablepower[1] = tablepower[2] = 0;
278 		} else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE)) {
279 			itlast = 1;
280 			tablepower[0] = n % __TBL_2_SMALL_SIZE;
281 			tablepower[1] = n / __TBL_2_SMALL_SIZE;
282 			tablepower[2] = 0;
283 		} else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE *
284 		    __TBL_2_HUGE_SIZE)) {
285 			itlast = 2;
286 			tablepower[0] = n % __TBL_2_SMALL_SIZE;
287 			n /= __TBL_2_SMALL_SIZE;
288 			tablepower[1] = n % __TBL_2_BIG_SIZE;
289 			tablepower[2] = n / __TBL_2_BIG_SIZE;
290 		} else {
291 			errno = ERANGE;
292 			abort();
293 		}
294 		pp = (unsigned short *)__tbl_2_small_start + tablepower[0];
295 		table[0] = (unsigned short *)__tbl_2_small_digits + pp[0];
296 		length[0] = pp[1] - pp[0];
297 		pp = (unsigned short *)__tbl_2_big_start + tablepower[1];
298 		table[1] = (unsigned short *)__tbl_2_big_digits + pp[0];
299 		length[1] = pp[1] - pp[0];
300 		pp = (unsigned short *)__tbl_2_huge_start + tablepower[2];
301 		table[2] = (unsigned short *)__tbl_2_huge_digits + pp[0];
302 		length[2] = pp[1] - pp[0];
303 	} else {
304 		if (n <= 4 && pbf->blength + 1 < pbf->bsize) {
305 			pbf->bexponent += (short)n;
306 			__multiply_base_two(pbf, __tbl_10_small_digits[n]);
307 			*pnewbf = pbf;
308 			return;
309 		}
310 
311 		/* *pbf is base 2**16 */
312 		base = 2;
313 		pbf->bexponent += (short)n; /* now need to multiply by 5**n */
314 		needed_precision = 2 + (precision >> 4);
315 		if (n < __TBL_10_SMALL_SIZE) {
316 			itlast = 0;
317 			tablepower[0] = n;
318 			tablepower[1] = tablepower[2] = 0;
319 		} else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE)) {
320 			itlast = 1;
321 			tablepower[0] = n % __TBL_10_SMALL_SIZE;
322 			tablepower[1] = n / __TBL_10_SMALL_SIZE;
323 			tablepower[2] = 0;
324 		} else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE *
325 		    __TBL_10_HUGE_SIZE)) {
326 			itlast = 2;
327 			tablepower[0] = n % __TBL_10_SMALL_SIZE;
328 			n /= __TBL_10_SMALL_SIZE;
329 			tablepower[1] = n % __TBL_10_BIG_SIZE;
330 			tablepower[2] = n / __TBL_10_BIG_SIZE;
331 		} else {
332 			errno = ERANGE;
333 			abort();
334 		}
335 		pp = (unsigned short *)__tbl_10_small_start + tablepower[0];
336 		table[0] = (unsigned short *)__tbl_10_small_digits + pp[0];
337 		length[0] = pp[1] - pp[0];
338 		pp = (unsigned short *)__tbl_10_big_start + tablepower[1];
339 		table[1] = (unsigned short *)__tbl_10_big_digits + pp[0];
340 		length[1] = pp[1] - pp[0];
341 		pp = (unsigned short *)__tbl_10_huge_start + tablepower[2];
342 		table[2] = (unsigned short *)__tbl_10_huge_digits + pp[0];
343 		length[2] = pp[1] - pp[0];
344 	}
345 
346 	/* compute an upper bound on the size of the product */
347 	productsize = pbf->blength;
348 	for (i = 0; i <= itlast; i++)
349 		productsize += length[i];
350 
351 	if (productsize < needed_precision)
352 		needed_precision = productsize;
353 
354 	if (productsize <= pbf->bsize) {
355 		*pnewbf = pbf;
356 	} else {
357 		i = sizeof (_big_float) + sizeof (unsigned short) *
358 		    (productsize - _BIG_FLOAT_SIZE);
359 		if ((*pnewbf = malloc(i)) == NULL) {
360 			errno = ENOMEM;
361 			abort();
362 		}
363 		(void) memcpy((*pnewbf)->bsignificand, pbf->bsignificand,
364 		    pbf->blength * sizeof (unsigned short));
365 		(*pnewbf)->blength = pbf->blength;
366 		(*pnewbf)->bexponent = pbf->bexponent;
367 		pbf = *pnewbf;
368 		pbf->bsize = productsize;
369 	}
370 
371 	/*
372 	 * Now pbf points to the input and the output.  Step through
373 	 * each level of the tables.
374 	 */
375 	for (i = 0; i <= itlast; i++) {
376 		if (tablepower[i] == 0)
377 			continue;
378 
379 		lengthp = length[i];
380 		if (lengthp == 1) {
381 			/* short multiplier (<= 10**4 or 2**13) */
382 			if (base == 10) {
383 				/* left shift by tablepower[i] */
384 				__multiply_base_ten_by_two(pbf, tablepower[i]);
385 			} else {
386 				__multiply_base_two(pbf, (table[i])[0]);
387 			}
388 			continue;
389 		}
390 
391 		lengthx = pbf->blength;
392 		if (lengthx == 1) {
393 			/* short multiplicand */
394 			multiplier = pbf->bsignificand[0];
395 			(void) memcpy(pbf->bsignificand, table[i],
396 			    lengthp * sizeof (unsigned short));
397 			pbf->blength = lengthp;
398 			if (base == 10)
399 				__multiply_base_ten(pbf, multiplier);
400 			else
401 				__multiply_base_two(pbf, multiplier);
402 			continue;
403 		}
404 
405 		/* keep track of trailing zeroes */
406 		trailing_zeros_to_delete = 0;
407 
408 		/* initialize for carry propagation */
409 		pbf->bsignificand[lengthx + lengthp - 1] = 0;
410 
411 		/*
412 		 * General case - the result will be accumulated in *pbf
413 		 * from most significant digit to least significant.
414 		 */
415 		for (j = lengthx + lengthp - 2; j >= 0; j--) {
416 			istart = j - lengthp + 1;
417 			if (istart < 0)
418 				istart = 0;
419 
420 			istop = lengthx - 1;
421 			if (istop > j)
422 				istop = j;
423 
424 			pp = table[i];
425 			if (base == 2) {
426 				__multiply_base_two_vector(istop - istart + 1,
427 				    &(pbf->bsignificand[istart]),
428 				    &(pp[j - istop]), product);
429 				if (product[2] != 0)
430 					__carry_propagate_two(
431 					    (unsigned int)product[2],
432 					    &(pbf->bsignificand[j + 2]));
433 				if (product[1] != 0)
434 					__carry_propagate_two(
435 					    (unsigned int)product[1],
436 					    &(pbf->bsignificand[j + 1]));
437 			} else {
438 				__multiply_base_ten_vector(istop - istart + 1,
439 				    &(pbf->bsignificand[istart]),
440 				    &(pp[j - istop]), product);
441 				if (product[2] != 0)
442 					__carry_propagate_ten(
443 					    (unsigned int)product[2],
444 					    &(pbf->bsignificand[j + 2]));
445 				if (product[1] != 0)
446 					__carry_propagate_ten(
447 					    (unsigned int)product[1],
448 					    &(pbf->bsignificand[j + 1]));
449 			}
450 			pbf->bsignificand[j] = product[0];
451 			if (i < itlast || j > lengthx + lengthp - 4
452 			    - needed_precision)
453 				continue;
454 
455 			/*
456 			 * On the last multiplication, it's not necessary
457 			 * to develop the entire product if further digits
458 			 * can't possibly affect significant digits.  But
459 			 * note that further digits can affect the product
460 			 * in one of two ways: (i) the sum of digits beyond
461 			 * the significant ones can cause a carry that would
462 			 * propagate into the significant digits, or (ii) no
463 			 * carry will occur, but there may be more nonzero
464 			 * digits that will need to be recorded in a sticky
465 			 * bit.
466 			 */
467 			excess_check = lengthx + lengthp;
468 			if (pbf->bsignificand[excess_check - 1] == 0)
469 				excess_check--;
470 			excess_check -= needed_precision + 4;
471 			canquit = ((base == 2)? 65535 : 9999) -
472 			    ((lengthx < lengthp)? lengthx : lengthp);
473 			/*
474 			 * If j <= excess_check, then we have all the
475 			 * significant digits.  If the (j + 1)-st digit
476 			 * is no larger than canquit, then the sum of the
477 			 * digits not yet computed can't carry into the
478 			 * significant digits.  If the j-th and (j + 1)-st
479 			 * digits are not both zero, then we know we are
480 			 * discarding nonzero digits.  (If both of these
481 			 * digits are zero, we need to keep forming more
482 			 * of the product to see whether or not there are
483 			 * any more nonzero digits.)
484 			 */
485 			if (j <= excess_check &&
486 			    pbf->bsignificand[j + 1] <= canquit &&
487 			    (pbf->bsignificand[j + 1] | pbf->bsignificand[j])
488 			    != 0) {
489 				/* can discard j+1, j, ... 0 */
490 				trailing_zeros_to_delete = j + 2;
491 
492 				/* set sticky bit */
493 				pbf->bsignificand[j + 2] |= 1;
494 				break;
495 			}
496 		}
497 
498 		/* if the product didn't carry, delete the leading zero */
499 		pbf->blength = lengthx + lengthp;
500 		if (pbf->bsignificand[pbf->blength - 1] == 0)
501 			pbf->blength--;
502 
503 		/* look for additional trailing zeros to delete */
504 		for (; pbf->bsignificand[trailing_zeros_to_delete] == 0;
505 		    trailing_zeros_to_delete++)
506 			continue;
507 
508 		if (trailing_zeros_to_delete > 0) {
509 			for (j = 0; j < (int)pbf->blength -
510 			    trailing_zeros_to_delete; j++) {
511 				pbf->bsignificand[j] = pbf->bsignificand[j
512 				    + trailing_zeros_to_delete];
513 			}
514 			pbf->blength -= trailing_zeros_to_delete;
515 			pbf->bexponent += trailing_zeros_to_delete <<
516 			    ((base == 2)? 4 : 2);
517 		}
518 	}
519 }
520