port/fp/__x_power.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include "lint.h"
#include "base_conversion.h"
#include <sys/types.h>
#include <malloc.h>
#include <memory.h>
#include <stdlib.h>
#include <errno.h>

/*
 * Multiply a _big_float by a power of two or ten
 */

/* see comment in double_decim.c */
static unsigned int
__quorem10000(unsigned int x, unsigned short *pr)
{
	*pr = x % 10000;
	return (x / 10000);
}

/*
 * Multiply a base-2**16 significand by multiplier.  Extend length as
 * necessary to accommodate carries.
 */
static void
__multiply_base_two(_big_float *pbf, unsigned short multiplier)
{
	unsigned int	p, carry;
	int		j, length = pbf->blength;

	carry = 0;
	for (j = 0; j < length; j++) {
		p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
		pbf->bsignificand[j] = p & 0xffff;
		carry = p >> 16;
	}
	if (carry != 0)
		pbf->bsignificand[j++] = carry;
	pbf->blength = j;
}

/*
 * Multiply a base-10**4 significand by multiplier.  Extend length as
 * necessary to accommodate carries.
 */
static void
__multiply_base_ten(_big_float *pbf, unsigned short multiplier)
{
	unsigned int	p, carry;
	int		j, length = pbf->blength;

	carry = 0;
	for (j = 0; j < length; j++) {
		p = (unsigned int)pbf->bsignificand[j] * multiplier + carry;
		carry = __quorem10000(p, &pbf->bsignificand[j]);
	}
	while (carry != 0) {
		carry = __quorem10000(carry, &pbf->bsignificand[j]);
		j++;
	}
	pbf->blength = j;
}

/*
 * Multiply a base-10**4 significand by 2**multiplier.  Extend length
 * as necessary to accommodate carries.
 */
static void
__multiply_base_ten_by_two(_big_float *pbf, unsigned short multiplier)
{
	unsigned int	p, carry;
	int		j, length = pbf->blength;

	carry = 0;
	for (j = 0; j < length; j++) {
		p = ((unsigned int)pbf->bsignificand[j] << multiplier) + carry;
		carry = __quorem10000(p, &pbf->bsignificand[j]);
	}
	while (carry != 0) {
		carry = __quorem10000(carry, &pbf->bsignificand[j]);
		j++;
	}
	pbf->blength = j;
}

/*
 * Propagate carries in a base-2**16 significand.
 */
static void
__carry_propagate_two(unsigned int carry, unsigned short *psignificand)
{
	unsigned int	p;
	int		j;

	j = 0;
	while (carry != 0) {
		p = psignificand[j] + carry;
		psignificand[j++] = p & 0xffff;
		carry = p >> 16;
	}
}

/*
 * Propagate carries in a base-10**4 significand.
 */
static void
__carry_propagate_ten(unsigned int carry, unsigned short *psignificand)
{
	unsigned int	p;
	int		j;

	j = 0;
	while (carry != 0) {
		p = psignificand[j] + carry;
		carry = __quorem10000(p, &psignificand[j]);
		j++;
	}
}

/*
 * Given x[] and y[], base-2**16 vectors of length n, compute the
 * dot product
 *
 * sum (i=0,n-1) of x[i]*y[n-1-i]
 *
 * The product may fill as many as three base-2**16 digits; product[0]
 * is the least significant, product[2] the most.
 */
static void
__multiply_base_two_vector(unsigned short n, unsigned short *px,
    unsigned short *py, unsigned short *product)
{

	unsigned int	acc, p;
	unsigned short	carry;
	int		i;

	acc = 0;
	carry = 0;
	for (i = 0; i < (int)n; i++) {
		p = px[i] * py[n - 1 - i] + acc;
		if (p < acc)
			carry++;
		acc = p;
	}
	product[0] = acc & 0xffff;
	product[1] = acc >> 16;
	product[2] = carry;
}

/*
 * Given x[] and y[], base-10**4 vectors of length n, compute the
 * dot product
 *
 * sum (i=0,n-1) of x[i]*y[n-1-i]
 *
 * The product may fill as many as three base-10**4 digits; product[0]
 * is the least significant, product[2] the most.
 */
#define	ABASE	3000000000u	/* base of accumulator */

static void
__multiply_base_ten_vector(unsigned short n, unsigned short *px,
    unsigned short *py, unsigned short *product)
{

	unsigned int	acc;
	unsigned short	carry;
	int		i;

	acc = 0;
	carry = 0;
	for (i = 0; i < (int)n; i++) {
		acc = px[i] * py[n - 1 - i] + acc;
		if (acc >= ABASE) {
			carry++;
			acc -= ABASE;
		}
	}
	product[0] = acc % 10000;
	acc = acc / 10000;
	product[1] = acc % 10000;
	acc = acc / 10000;
	product[2] = acc + (ABASE / 100000000) * carry;
}

/*
 * Multiply *pbf by the n-th power of mult, which must be two or
 * ten.  If mult is two, *pbf is assumed to be base 10**4; if mult
 * is ten, *pbf is assumed to be base 2**16.  precision specifies
 * the number of significant bits or decimal digits required in the
 * result.  (The product may have more or fewer digits than this,
 * but it will be accurate to at least this many.)
 *
 * On exit, if the product is small enough, it overwrites *pbf, and
 * *pnewbf is set to pbf.  If the product is too large to fit in *pbf,
 * this routine calls malloc(3M) to allocate storage and sets *pnewbf
 * to point to this area; it is the caller's responsibility to free
 * this storage when it is no longer needed.  Note that *pbf may be
 * modified even when the routine allocates new storage.
 *
 * If n is too large, we set errno to ERANGE and call abort(3C).
 * If an attempted malloc fails, we set errno to ENOMEM and call
 * abort(3C).
 */
void
__big_float_times_power(_big_float *pbf, int mult, int n, int precision,
    _big_float **pnewbf)
{
	int		base, needed_precision, productsize;
	int		i, j, itlast, trailing_zeros_to_delete;
	int		tablepower[3], length[3];
	int		lengthx, lengthp, istart, istop;
	int		excess_check;
	unsigned short	*pp, *table[3], canquit;
	unsigned short	multiplier, product[3];

	if (pbf->blength == 0) {
		*pnewbf = pbf;
		return;
	}

	if (mult == 2) {
		/*
		 * Handle small n cases that don't require extra
		 * storage quickly.
		 */
		if (n <= 16 && pbf->blength + 2 < pbf->bsize) {
			__multiply_base_ten_by_two(pbf, n);
			*pnewbf = pbf;
			return;
		}

		/* *pbf is base 10**4 */
		base = 10;

		/*
		 * Convert precision (base ten digits) to needed_precision
		 * (base 10**4 digits), allowing an additional digit at
		 * each end.
		 */
		needed_precision = 2 + (precision >> 2);

		/*
		 * Set up pointers to the table entries and compute their
		 * lengths.
		 */
		if (n < __TBL_2_SMALL_SIZE) {
			itlast = 0;
			tablepower[0] = n;
			tablepower[1] = tablepower[2] = 0;
		} else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE)) {
			itlast = 1;
			tablepower[0] = n % __TBL_2_SMALL_SIZE;
			tablepower[1] = n / __TBL_2_SMALL_SIZE;
			tablepower[2] = 0;
		} else if (n < (__TBL_2_SMALL_SIZE * __TBL_2_BIG_SIZE *
		    __TBL_2_HUGE_SIZE)) {
			itlast = 2;
			tablepower[0] = n % __TBL_2_SMALL_SIZE;
			n /= __TBL_2_SMALL_SIZE;
			tablepower[1] = n % __TBL_2_BIG_SIZE;
			tablepower[2] = n / __TBL_2_BIG_SIZE;
		} else {
			errno = ERANGE;
			abort();
		}
		pp = (unsigned short *)__tbl_2_small_start + tablepower[0];
		table[0] = (unsigned short *)__tbl_2_small_digits + pp[0];
		length[0] = pp[1] - pp[0];
		pp = (unsigned short *)__tbl_2_big_start + tablepower[1];
		table[1] = (unsigned short *)__tbl_2_big_digits + pp[0];
		length[1] = pp[1] - pp[0];
		pp = (unsigned short *)__tbl_2_huge_start + tablepower[2];
		table[2] = (unsigned short *)__tbl_2_huge_digits + pp[0];
		length[2] = pp[1] - pp[0];
	} else {
		if (n <= 4 && pbf->blength + 1 < pbf->bsize) {
			pbf->bexponent += (short)n;
			__multiply_base_two(pbf, __tbl_10_small_digits[n]);
			*pnewbf = pbf;
			return;
		}

		/* *pbf is base 2**16 */
		base = 2;
		pbf->bexponent += (short)n; /* now need to multiply by 5**n */
		needed_precision = 2 + (precision >> 4);
		if (n < __TBL_10_SMALL_SIZE) {
			itlast = 0;
			tablepower[0] = n;
			tablepower[1] = tablepower[2] = 0;
		} else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE)) {
			itlast = 1;
			tablepower[0] = n % __TBL_10_SMALL_SIZE;
			tablepower[1] = n / __TBL_10_SMALL_SIZE;
			tablepower[2] = 0;
		} else if (n < (__TBL_10_SMALL_SIZE * __TBL_10_BIG_SIZE *
		    __TBL_10_HUGE_SIZE)) {
			itlast = 2;
			tablepower[0] = n % __TBL_10_SMALL_SIZE;
			n /= __TBL_10_SMALL_SIZE;
			tablepower[1] = n % __TBL_10_BIG_SIZE;
			tablepower[2] = n / __TBL_10_BIG_SIZE;
		} else {
			errno = ERANGE;
			abort();
		}
		pp = (unsigned short *)__tbl_10_small_start + tablepower[0];
		table[0] = (unsigned short *)__tbl_10_small_digits + pp[0];
		length[0] = pp[1] - pp[0];
		pp = (unsigned short *)__tbl_10_big_start + tablepower[1];
		table[1] = (unsigned short *)__tbl_10_big_digits + pp[0];
		length[1] = pp[1] - pp[0];
		pp = (unsigned short *)__tbl_10_huge_start + tablepower[2];
		table[2] = (unsigned short *)__tbl_10_huge_digits + pp[0];
		length[2] = pp[1] - pp[0];
	}

	/* compute an upper bound on the size of the product */
	productsize = pbf->blength;
	for (i = 0; i <= itlast; i++)
		productsize += length[i];

	if (productsize < needed_precision)
		needed_precision = productsize;

	if (productsize <= pbf->bsize) {
		*pnewbf = pbf;
	} else {
		i = sizeof (_big_float) + sizeof (unsigned short) *
		    (productsize - _BIG_FLOAT_SIZE);
		if ((*pnewbf = malloc(i)) == NULL) {
			errno = ENOMEM;
			abort();
		}
		(void) memcpy((*pnewbf)->bsignificand, pbf->bsignificand,
		    pbf->blength * sizeof (unsigned short));
		(*pnewbf)->blength = pbf->blength;
		(*pnewbf)->bexponent = pbf->bexponent;
		pbf = *pnewbf;
		pbf->bsize = productsize;
	}

	/*
	 * Now pbf points to the input and the output.  Step through
	 * each level of the tables.
	 */
	for (i = 0; i <= itlast; i++) {
		if (tablepower[i] == 0)
			continue;

		lengthp = length[i];
		if (lengthp == 1) {
			/* short multiplier (<= 10**4 or 2**13) */
			if (base == 10) {
				/* left shift by tablepower[i] */
				__multiply_base_ten_by_two(pbf, tablepower[i]);
			} else {
				__multiply_base_two(pbf, (table[i])[0]);
			}
			continue;
		}

		lengthx = pbf->blength;
		if (lengthx == 1) {
			/* short multiplicand */
			multiplier = pbf->bsignificand[0];
			(void) memcpy(pbf->bsignificand, table[i],
			    lengthp * sizeof (unsigned short));
			pbf->blength = lengthp;
			if (base == 10)
				__multiply_base_ten(pbf, multiplier);
			else
				__multiply_base_two(pbf, multiplier);
			continue;
		}

		/* keep track of trailing zeroes */
		trailing_zeros_to_delete = 0;

		/* initialize for carry propagation */
		pbf->bsignificand[lengthx + lengthp - 1] = 0;

		/*
		 * General case - the result will be accumulated in *pbf
		 * from most significant digit to least significant.
		 */
		for (j = lengthx + lengthp - 2; j >= 0; j--) {
			istart = j - lengthp + 1;
			if (istart < 0)
				istart = 0;

			istop = lengthx - 1;
			if (istop > j)
				istop = j;

			pp = table[i];
			if (base == 2) {
				__multiply_base_two_vector(istop - istart + 1,
				    &(pbf->bsignificand[istart]),
				    &(pp[j - istop]), product);
				if (product[2] != 0)
					__carry_propagate_two(
					    (unsigned int)product[2],
					    &(pbf->bsignificand[j + 2]));
				if (product[1] != 0)
					__carry_propagate_two(
					    (unsigned int)product[1],
					    &(pbf->bsignificand[j + 1]));
			} else {
				__multiply_base_ten_vector(istop - istart + 1,
				    &(pbf->bsignificand[istart]),
				    &(pp[j - istop]), product);
				if (product[2] != 0)
					__carry_propagate_ten(
					    (unsigned int)product[2],
					    &(pbf->bsignificand[j + 2]));
				if (product[1] != 0)
					__carry_propagate_ten(
					    (unsigned int)product[1],
					    &(pbf->bsignificand[j + 1]));
			}
			pbf->bsignificand[j] = product[0];
			if (i < itlast || j > lengthx + lengthp - 4
			    - needed_precision)
				continue;

			/*
			 * On the last multiplication, it's not necessary
			 * to develop the entire product if further digits
			 * can't possibly affect significant digits.  But
			 * note that further digits can affect the product
			 * in one of two ways: (i) the sum of digits beyond
			 * the significant ones can cause a carry that would
			 * propagate into the significant digits, or (ii) no
			 * carry will occur, but there may be more nonzero
			 * digits that will need to be recorded in a sticky
			 * bit.
			 */
			excess_check = lengthx + lengthp;
			if (pbf->bsignificand[excess_check - 1] == 0)
				excess_check--;
			excess_check -= needed_precision + 4;
			canquit = ((base == 2)? 65535 : 9999) -
			    ((lengthx < lengthp)? lengthx : lengthp);
			/*
			 * If j <= excess_check, then we have all the
			 * significant digits.  If the (j + 1)-st digit
			 * is no larger than canquit, then the sum of the
			 * digits not yet computed can't carry into the
			 * significant digits.  If the j-th and (j + 1)-st
			 * digits are not both zero, then we know we are
			 * discarding nonzero digits.  (If both of these
			 * digits are zero, we need to keep forming more
			 * of the product to see whether or not there are
			 * any more nonzero digits.)
			 */
			if (j <= excess_check &&
			    pbf->bsignificand[j + 1] <= canquit &&
			    (pbf->bsignificand[j + 1] | pbf->bsignificand[j])
			    != 0) {
				/* can discard j+1, j, ... 0 */
				trailing_zeros_to_delete = j + 2;

				/* set sticky bit */
				pbf->bsignificand[j + 2] |= 1;
				break;
			}
		}

		/* if the product didn't carry, delete the leading zero */
		pbf->blength = lengthx + lengthp;
		if (pbf->bsignificand[pbf->blength - 1] == 0)
			pbf->blength--;

		/* look for additional trailing zeros to delete */
		for (; pbf->bsignificand[trailing_zeros_to_delete] == 0;
		    trailing_zeros_to_delete++)
			continue;

		if (trailing_zeros_to_delete > 0) {
			for (j = 0; j < (int)pbf->blength -
			    trailing_zeros_to_delete; j++) {
				pbf->bsignificand[j] = pbf->bsignificand[j
				    + trailing_zeros_to_delete];
			}
			pbf->blength -= trailing_zeros_to_delete;
			pbf->bexponent += trailing_zeros_to_delete <<
			    ((base == 2)? 4 : 2);
		}
	}
}