1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "lint.h" 30 #include "base_conversion.h" 31 #include <sys/types.h> 32 #include <malloc.h> 33 #include <memory.h> 34 #include <stdlib.h> 35 #include <errno.h> 36 37 /* 38 * Multiply a _big_float by a power of two or ten 39 */ 40 41 /* see comment in double_decim.c */ 42 static unsigned int 43 __quorem10000(unsigned int x, unsigned short *pr) 44 { 45 *pr = x % 10000; 46 return (x / 10000); 47 } 48 49 /* 50 * Multiply a base-2**16 significand by multiplier. Extend length as 51 * necessary to accommodate carries. 52 */ 53 static void 54 __multiply_base_two(_big_float *pbf, unsigned short multiplier) 55 { 56 unsigned int p, carry; 57 int j, length = pbf->blength; 58 59 carry = 0; 60 for (j = 0; j < length; j++) { 61 p = (unsigned int)pbf->bsignificand[j] * multiplier + carry; 62 pbf->bsignificand[j] = p & 0xffff; 63 carry = p >> 16; 64 } 65 if (carry != 0) 66 pbf->bsignificand[j++] = carry; 67 pbf->blength = j; 68 } 69 70 /* 71 * Multiply a base-10**4 significand by multiplier. Extend length as 72 * necessary to accommodate carries. 73 */ 74 static void 75 __multiply_base_ten(_big_float *pbf, unsigned short multiplier) 76 { 77 unsigned int p, carry; 78 int j, length = pbf->blength; 79 80 carry = 0; 81 for (j = 0; j < length; j++) { 82 p = (unsigned int)pbf->bsignificand[j] * multiplier + carry; 83 carry = __quorem10000(p, &pbf->bsignificand[j]); 84 } 85 while (carry != 0) { 86 carry = __quorem10000(carry, &pbf->bsignificand[j]); 87 j++; 88 } 89 pbf->blength = j; 90 } 91 92 /* 93 * Multiply a base-10**4 significand by 2**multiplier. Extend length 94 * as necessary to accommodate carries. 95 */ 96 static void 97 __multiply_base_ten_by_two(_big_float *pbf, unsigned short multiplier) 98 { 99 unsigned int p, carry; 100 int j, length = pbf->blength; 101 102 carry = 0; 103 for (j = 0; j < length; j++) { 104 p = ((unsigned int)pbf->bsignificand[j] << multiplier) + carry; 105 carry = __quorem10000(p, &pbf->bsignificand[j]); 106 } 107 while (carry != 0) { 108 carry = __quorem10000(carry, &pbf->bsignificand[j]); 109 j++; 110 } 111 pbf->blength = j; 112 } 113 114 /* 115 * Propagate carries in a base-2**16 significand. 116 */ 117 static void 118 __carry_propagate_two(unsigned int carry, unsigned short *psignificand) 119 { 120 unsigned int p; 121 int j; 122 123 j = 0; 124 while (carry != 0) { 125 p = psignificand[j] + carry; 126 psignificand[j++] = p & 0xffff; 127 carry = p >> 16; 128 } 129 } 130 131 /* 132 * Propagate carries in a base-10**4 significand. 133 */ 134 static void 135 __carry_propagate_ten(unsigned int carry, unsigned short *psignificand) 136 { 137 unsigned int p; 138 int j; 139 140 j = 0; 141 while (carry != 0) { 142 p = psignificand[j] + carry; 143 carry = __quorem10000(p, &psignificand[j]); 144 j++; 145 } 146 } 147 148 /* 149 * Given x[] and y[], base-2**16 vectors of length n, compute the 150 * dot product 151 * 152 * sum (i=0,n-1) of x[i]*y[n-1-i] 153 * 154 * The product may fill as many as three base-2**16 digits; product[0] 155 * is the least significant, product[2] the most. 156 */ 157 static void 158 __multiply_base_two_vector(unsigned short n, unsigned short *px, 159 unsigned short *py, unsigned short *product) 160 { 161 162 unsigned int acc, p; 163 unsigned short carry; 164 int i; 165 166 acc = 0; 167 carry = 0; 168 for (i = 0; i < (int)n; i++) { 169 p = px[i] * py[n - 1 - i] + acc; 170 if (p < acc) 171 carry++; 172 acc = p; 173 } 174 product[0] = acc & 0xffff; 175 product[1] = acc >> 16; 176 product[2] = carry; 177 } 178 179 /* 180 * Given x[] and y[], base-10**4 vectors of length n, compute the 181 * dot product 182 * 183 * sum (i=0,n-1) of x[i]*y[n-1-i] 184 * 185 * The product may fill as many as three base-10**4 digits; product[0] 186 * is the least significant, product[2] the most. 187 */ 188 #define ABASE 3000000000u /* base of accumulator */ 189 190 static void 191 __multiply_base_ten_vector(unsigned short n, unsigned short *px, 192 unsigned short *py, unsigned short *product) 193 { 194 195 unsigned int acc; 196 unsigned short carry; 197 int i; 198 199 acc = 0; 200 carry = 0; 201 for (i = 0; i < (int)n; i++) { 202 acc = px[i] * py[n - 1 - i] + acc; 203 if (acc >= ABASE) { 204 carry++; 205 acc -= ABASE; 206 } 207 } 208 product[0] = acc % 10000; 209 acc = acc / 10000; 210 product[1] = acc % 10000; 211 acc = acc / 10000; 212 product[2] = acc + (ABASE / 100000000) * carry; 213 } 214 215 /* 216 * Multiply *pbf by the n-th power of mult, which must be two or 217 * ten. If mult is two, *pbf is assumed to be base 10**4; if mult 218 * is ten, *pbf is assumed to be base 2**16. precision specifies 219 * the number of significant bits or decimal digits required in the 220 * result. (The product may have more or fewer digits than this, 221 * but it will be accurate to at least this many.) 222 * 223 * On exit, if the product is small enough, it overwrites *pbf, and 224 * *pnewbf is set to pbf. If the product is too large to fit in *pbf, 225 * this routine calls malloc(3M) to allocate storage and sets *pnewbf 226 * to point to this area; it is the caller's responsibility to free 227 * this storage when it is no longer needed. Note that *pbf may be 228 * modified even when the routine allocates new storage. 229 * 230 * If n is too large, we set errno to ERANGE and call abort(3C). 231 * If an attempted malloc fails, we set errno to ENOMEM and call 232 * abort(3C). 233 */ 234 void 235 __big_float_times_power(_big_float *pbf, int mult, int n, int precision, 236 _big_float **pnewbf) 237 { 238 int base, needed_precision, productsize; 239 int i, j, itlast, trailing_zeros_to_delete; 240 int tablepower[3], length[3]; 241 int lengthx, lengthp, istart, istop; 242 int excess_check; 243 unsigned short *pp, *table[3], canquit; 244 unsigned short multiplier, product[3]; 245 246 if (pbf->blength == 0) { 247 *pnewbf = pbf; 248 return; 249 } 250 251 if (mult == 2) { 252 /* 253 * Handle small n cases that don't require extra 254 * storage quickly. 255 */ 256 if (n <= 16 && pbf->blength + 2 < pbf->bsize) { 257 __multiply_base_ten_by_two(pbf, n); 258 *pnewbf = pbf; 259 return; 260 } 261 262 /* *pbf is base 10**4 */ 263 base = 10; 264 265 /* 266 * Convert precision (base ten digits) to needed_precision 267 * (base 10**4 digits), allowing an additional digit at 268 * each end. 269 */ 270 needed_precision = 2 + (precision >> 2); 271 272 /* 273 * Set up pointers to the table entries and compute their 274 * lengths. 275 */ 276 if (n < __TBL_2_SMALL_SIZE) { 277 itlast = 0; 278 tablepower[0] = n; 279 tablepower[1] = tablepower[2] = 0; 280 } else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE)) { 281 itlast = 1; 282 tablepower[0] = n % __TBL_2_SMALL_SIZE; 283 tablepower[1] = n / __TBL_2_SMALL_SIZE; 284 tablepower[2] = 0; 285 } else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE * 286 __TBL_2_HUGE_SIZE)) { 287 itlast = 2; 288 tablepower[0] = n % __TBL_2_SMALL_SIZE; 289 n /= __TBL_2_SMALL_SIZE; 290 tablepower[1] = n % __TBL_2_BIG_SIZE; 291 tablepower[2] = n / __TBL_2_BIG_SIZE; 292 } else { 293 errno = ERANGE; 294 abort(); 295 } 296 pp = (unsigned short *)__tbl_2_small_start + tablepower[0]; 297 table[0] = (unsigned short *)__tbl_2_small_digits + pp[0]; 298 length[0] = pp[1] - pp[0]; 299 pp = (unsigned short *)__tbl_2_big_start + tablepower[1]; 300 table[1] = (unsigned short *)__tbl_2_big_digits + pp[0]; 301 length[1] = pp[1] - pp[0]; 302 pp = (unsigned short *)__tbl_2_huge_start + tablepower[2]; 303 table[2] = (unsigned short *)__tbl_2_huge_digits + pp[0]; 304 length[2] = pp[1] - pp[0]; 305 } else { 306 if (n <= 4 && pbf->blength + 1 < pbf->bsize) { 307 pbf->bexponent += (short)n; 308 __multiply_base_two(pbf, __tbl_10_small_digits[n]); 309 *pnewbf = pbf; 310 return; 311 } 312 313 /* *pbf is base 2**16 */ 314 base = 2; 315 pbf->bexponent += (short)n; /* now need to multiply by 5**n */ 316 needed_precision = 2 + (precision >> 4); 317 if (n < __TBL_10_SMALL_SIZE) { 318 itlast = 0; 319 tablepower[0] = n; 320 tablepower[1] = tablepower[2] = 0; 321 } else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE)) { 322 itlast = 1; 323 tablepower[0] = n % __TBL_10_SMALL_SIZE; 324 tablepower[1] = n / __TBL_10_SMALL_SIZE; 325 tablepower[2] = 0; 326 } else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE * 327 __TBL_10_HUGE_SIZE)) { 328 itlast = 2; 329 tablepower[0] = n % __TBL_10_SMALL_SIZE; 330 n /= __TBL_10_SMALL_SIZE; 331 tablepower[1] = n % __TBL_10_BIG_SIZE; 332 tablepower[2] = n / __TBL_10_BIG_SIZE; 333 } else { 334 errno = ERANGE; 335 abort(); 336 } 337 pp = (unsigned short *)__tbl_10_small_start + tablepower[0]; 338 table[0] = (unsigned short *)__tbl_10_small_digits + pp[0]; 339 length[0] = pp[1] - pp[0]; 340 pp = (unsigned short *)__tbl_10_big_start + tablepower[1]; 341 table[1] = (unsigned short *)__tbl_10_big_digits + pp[0]; 342 length[1] = pp[1] - pp[0]; 343 pp = (unsigned short *)__tbl_10_huge_start + tablepower[2]; 344 table[2] = (unsigned short *)__tbl_10_huge_digits + pp[0]; 345 length[2] = pp[1] - pp[0]; 346 } 347 348 /* compute an upper bound on the size of the product */ 349 productsize = pbf->blength; 350 for (i = 0; i <= itlast; i++) 351 productsize += length[i]; 352 353 if (productsize < needed_precision) 354 needed_precision = productsize; 355 356 if (productsize <= pbf->bsize) { 357 *pnewbf = pbf; 358 } else { 359 i = sizeof (_big_float) + sizeof (unsigned short) * 360 (productsize - _BIG_FLOAT_SIZE); 361 if ((*pnewbf = malloc(i)) == NULL) { 362 errno = ENOMEM; 363 abort(); 364 } 365 (void) memcpy((*pnewbf)->bsignificand, pbf->bsignificand, 366 pbf->blength * sizeof (unsigned short)); 367 (*pnewbf)->blength = pbf->blength; 368 (*pnewbf)->bexponent = pbf->bexponent; 369 pbf = *pnewbf; 370 pbf->bsize = productsize; 371 } 372 373 /* 374 * Now pbf points to the input and the output. Step through 375 * each level of the tables. 376 */ 377 for (i = 0; i <= itlast; i++) { 378 if (tablepower[i] == 0) 379 continue; 380 381 lengthp = length[i]; 382 if (lengthp == 1) { 383 /* short multiplier (<= 10**4 or 2**13) */ 384 if (base == 10) { 385 /* left shift by tablepower[i] */ 386 __multiply_base_ten_by_two(pbf, tablepower[i]); 387 } else { 388 __multiply_base_two(pbf, (table[i])[0]); 389 } 390 continue; 391 } 392 393 lengthx = pbf->blength; 394 if (lengthx == 1) { 395 /* short multiplicand */ 396 multiplier = pbf->bsignificand[0]; 397 (void) memcpy(pbf->bsignificand, table[i], 398 lengthp * sizeof (unsigned short)); 399 pbf->blength = lengthp; 400 if (base == 10) 401 __multiply_base_ten(pbf, multiplier); 402 else 403 __multiply_base_two(pbf, multiplier); 404 continue; 405 } 406 407 /* keep track of trailing zeroes */ 408 trailing_zeros_to_delete = 0; 409 410 /* initialize for carry propagation */ 411 pbf->bsignificand[lengthx + lengthp - 1] = 0; 412 413 /* 414 * General case - the result will be accumulated in *pbf 415 * from most significant digit to least significant. 416 */ 417 for (j = lengthx + lengthp - 2; j >= 0; j--) { 418 istart = j - lengthp + 1; 419 if (istart < 0) 420 istart = 0; 421 422 istop = lengthx - 1; 423 if (istop > j) 424 istop = j; 425 426 pp = table[i]; 427 if (base == 2) { 428 __multiply_base_two_vector(istop - istart + 1, 429 &(pbf->bsignificand[istart]), 430 &(pp[j - istop]), product); 431 if (product[2] != 0) 432 __carry_propagate_two( 433 (unsigned int)product[2], 434 &(pbf->bsignificand[j + 2])); 435 if (product[1] != 0) 436 __carry_propagate_two( 437 (unsigned int)product[1], 438 &(pbf->bsignificand[j + 1])); 439 } else { 440 __multiply_base_ten_vector(istop - istart + 1, 441 &(pbf->bsignificand[istart]), 442 &(pp[j - istop]), product); 443 if (product[2] != 0) 444 __carry_propagate_ten( 445 (unsigned int)product[2], 446 &(pbf->bsignificand[j + 2])); 447 if (product[1] != 0) 448 __carry_propagate_ten( 449 (unsigned int)product[1], 450 &(pbf->bsignificand[j + 1])); 451 } 452 pbf->bsignificand[j] = product[0]; 453 if (i < itlast || j > lengthx + lengthp - 4 454 - needed_precision) 455 continue; 456 457 /* 458 * On the last multiplication, it's not necessary 459 * to develop the entire product if further digits 460 * can't possibly affect significant digits. But 461 * note that further digits can affect the product 462 * in one of two ways: (i) the sum of digits beyond 463 * the significant ones can cause a carry that would 464 * propagate into the significant digits, or (ii) no 465 * carry will occur, but there may be more nonzero 466 * digits that will need to be recorded in a sticky 467 * bit. 468 */ 469 excess_check = lengthx + lengthp; 470 if (pbf->bsignificand[excess_check - 1] == 0) 471 excess_check--; 472 excess_check -= needed_precision + 4; 473 canquit = ((base == 2)? 65535 : 9999) - 474 ((lengthx < lengthp)? lengthx : lengthp); 475 /* 476 * If j <= excess_check, then we have all the 477 * significant digits. If the (j + 1)-st digit 478 * is no larger than canquit, then the sum of the 479 * digits not yet computed can't carry into the 480 * significant digits. If the j-th and (j + 1)-st 481 * digits are not both zero, then we know we are 482 * discarding nonzero digits. (If both of these 483 * digits are zero, we need to keep forming more 484 * of the product to see whether or not there are 485 * any more nonzero digits.) 486 */ 487 if (j <= excess_check && 488 pbf->bsignificand[j + 1] <= canquit && 489 (pbf->bsignificand[j + 1] | pbf->bsignificand[j]) 490 != 0) { 491 /* can discard j+1, j, ... 0 */ 492 trailing_zeros_to_delete = j + 2; 493 494 /* set sticky bit */ 495 pbf->bsignificand[j + 2] |= 1; 496 break; 497 } 498 } 499 500 /* if the product didn't carry, delete the leading zero */ 501 pbf->blength = lengthx + lengthp; 502 if (pbf->bsignificand[pbf->blength - 1] == 0) 503 pbf->blength--; 504 505 /* look for additional trailing zeros to delete */ 506 for (; pbf->bsignificand[trailing_zeros_to_delete] == 0; 507 trailing_zeros_to_delete++) 508 continue; 509 510 if (trailing_zeros_to_delete > 0) { 511 for (j = 0; j < (int)pbf->blength - 512 trailing_zeros_to_delete; j++) { 513 pbf->bsignificand[j] = pbf->bsignificand[j 514 + trailing_zeros_to_delete]; 515 } 516 pbf->blength -= trailing_zeros_to_delete; 517 pbf->bexponent += trailing_zeros_to_delete << 518 ((base == 2)? 4 : 2); 519 } 520 } 521 } 522