port/fp/char_to_decimal.h

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 * This file contains the common part of the functions string_to_decimal,
 * func_to_decimal, and file_to_decimal.  Much of this code has been dupli-
 * cated in wstring_to_decimal (see wstod.c) with some simplifications and
 * appropriate modifications for wide characters.  DO NOT fix a bug here
 * without fixing the same bug in wstring_to_decimal, if it applies.
 *
 * The code below makes the following assumptions.
 *
 * 1. The first six parameters to the function are declared with the
 *    following names and types:
 *
 *    char **ppc;
 *    int nmax;
 *    int fortran_conventions;
 *    decimal_record *pd;
 *    enum decimal_string_form *pform;
 *    char **pechar;
 *
 * 2. Before this file is #included, the following variables have been
 *    defined and initialized as shown:
 *
 *    char *cp;
 *    char *good = *ppc - 1;
 *    int current;
 *    int nread;
 *
 *    If the first character can be read successfully, then current is set
 *    to the value of the first character, cp is set to *ppc, (char)current
 *    is stored at *cp, and nread = 1.  If the first character cannot be
 *    read successfully, then current = EOF and nread = 0.
 *
 * 3. The macro NEXT is defined to expand to code that implements
 *    the following logic:
 *
 *      if (nread < nmax) {
 *          current = <next character>;
 *          if (current != EOF) {
 *             *++cp = (char)current;
 *             nread++;
 *          }
 *      } else
 *          current = EOF;
 *
 *    Note that nread always reflects the number of characters successfully
 *    read, the buffer pointed to by *ppc gets filled only with characters
 *    that have been successfully read, and cp always points to the location
 *    in the buffer that was filled by the last character successfully read.
 *    current == EOF if and only if we can't read any more, either because
 *    we've reached the end of the input file or the buffer is full (i.e.,
 *    we've read nmax characters).
 *
 * 4. After this file is #included, the following variables may be used
 *    and will have the specified values:
 *
 *    *ppc, *pd, *pform, and *pechar will be set as documented in the
 *      manual page;
 *    nmax and fortran_conventions will be unchanged;
 *    nread will be the number of characters actually read;
 *    cp will point to the last character actually read, provided at least
 *      one character was read successfully (in which case cp >= *ppc).
 */

#define	UCASE(c) ((('a' <= c) && (c <= 'z'))? c - 32 : c)

#define	NZDIGIT(c)	(('1' <= c && c <= '9') || ((int)form < 0 && \
			(('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))))

{
	static const char    *infstring = "INFINITY";
	static const char    *nanstring = "NAN";

	int	sigfound, spacefound = 0;
	int	ids = 0;
	int	i, agree;
	int	nzbp = 0; /* number of zeros before point */
	int	nzap = 0; /* number of zeros after point */
	char	decpt;
	int	nfast, nfastlimit;
	char	*pfast;
	int	e, esign;
	int	expshift = 0;
	enum decimal_string_form	form;

	/*
	 * This routine assumes that the radix point is a single
	 * ASCII character, so that following this assignment, the
	 * condition (current == decpt) will correctly detect it.
	 */
	if (fortran_conventions > 0)
		decpt = '.';
	else
		decpt = *(localeconv()->decimal_point);

	/* input is invalid until we find something */
	pd->fpclass = fp_signaling;
	pd->sign = 0;
	pd->exponent = 0;
	pd->ds[0] = '\0';
	pd->more = 0;
	pd->ndigits = 0;
	*pform = form = invalid_form;
	*pechar = NULL;

	/* skip white space */
	while (isspace(current)) {
		spacefound = 1;
		NEXT;
	}

	if (fortran_conventions >= 2 && spacefound) {
		/*
		 * We found at least one white space character.  For
		 * Fortran formatted input, accept this; if we don't
		 * find anything else, we'll interpret it as a valid zero.
		 */
		pd->fpclass = fp_zero;
		form = whitespace_form;
		sigfound = 0;		/* 0 = only zeros found so far */
		if (current == EOF) {
			good = cp;
			goto done;
		} else {
			good = cp - 1;
		}
	} else {
		sigfound = -1;		/* -1 = no digits found yet */
	}

	/* look for optional leading sign */
	if (current == '+') {
		NEXT;
	} else if (current == '-') {
		pd->sign = 1;
		NEXT;
	}

	/*
	 * Admissible first non-white-space, non-sign characters are
	 * 0-9, i, I, n, N, or the radix point.
	 */
	if ('1' <= current && current <= '9') {
		good = cp;
		pd->fpclass = fp_normal;
		form = fixed_int_form;
		sigfound = 1;		/* 1 = significant digits found */
		pd->ds[ids++] = (char)current;
		NEXT;
	} else {
		switch (current) {
		case ' ':
			if (fortran_conventions < 2)
				goto done;
			/*
			 * When fortran_conventions >= 2, treat leading
			 * blanks the same as leading zeroes.
			 */
			/*FALLTHRU*/

		case '0':
			/*
			 * Accept the leading zero and set pd->fpclass
			 * accordingly, but don't set sigfound until we
			 * determine that this isn't a "fake" hex string
			 * (i.e., 0x.p...).
			 */
			good = cp;
			pd->fpclass = fp_zero;
			if (fortran_conventions < 0) {
				/* look for a hex fp string */
				NEXT;
				if (current == 'X' || current == 'x') {
					/* assume hex fp form */
					form = (enum decimal_string_form)-1;
					expshift = 2;
					NEXT;
					/*
					 * Only a digit or radix point can
					 * follow "0x".
					 */
					if (NZDIGIT(current)) {
						pd->fpclass = fp_normal;
						good = cp;
						sigfound = 1;
						pd->ds[ids++] = (char)current;
						NEXT;
						break;
					} else if (current == decpt) {
						NEXT;
						goto afterpoint;
					} else if (current != '0') {
						/* not hex fp after all */
						form = fixed_int_form;
						expshift = 0;
						goto done;
					}
				} else {
					form = fixed_int_form;
				}
			} else {
				form = fixed_int_form;
			}

			/* skip all leading zeros */
			while (current == '0' || (current == ' ' &&
			    fortran_conventions >= 2)) {
				NEXT;
			}
			sigfound = 0;	/* 0 = only zeros found so far */
			if (current == EOF) {
				good = cp;
				goto done;
			} else {
				good = cp - 1;
			}
			break;

		case 'i':
		case 'I':
			/* look for inf or infinity */
			NEXT;
			agree = 1;
			while (agree <= 7 &&
			    UCASE(current) == infstring[agree]) {
				NEXT;
				agree++;
			}
			if (agree < 3)
				goto done;
			/* found valid infinity */
			pd->fpclass = fp_infinity;
			sigfound = 1;
			__inf_read = 1;
			if (agree < 8) {
				good = (current == EOF)? cp + 3 - agree :
				    cp + 2 - agree;
				form = inf_form;
			} else {
				good = (current == EOF)? cp : cp - 1;
				form = infinity_form;
			}
			/*
			 * Accept trailing blanks if no extra characters
			 * intervene.
			 */
			if (fortran_conventions >= 2 && (agree == 3 ||
			    agree == 8)) {
				while (current == ' ') {
					NEXT;
				}
				good = (current == EOF)? cp : cp - 1;
			}
			goto done;

		case 'n':
		case 'N':
			/* look for nan or nan(string) */
			NEXT;
			agree = 1;
			while (agree <= 2 &&
			    UCASE(current) == nanstring[agree]) {
				NEXT;
				agree++;
			}
			if (agree < 3)
				goto done;
			/* found valid NaN */
			good = (current == EOF)? cp : cp - 1;
			pd->fpclass = fp_quiet;
			form = nan_form;
			sigfound = 1;
			__nan_read = 1;
			if (current == '(') {
				/* accept parenthesized string */
				NEXT;
				if (fortran_conventions < 0) {
					while ((isalnum(current) ||
					    current == '_') &&
					    ids < DECIMAL_STRING_LENGTH - 1) {
						pd->ds[ids++] = (char)current;
						NEXT;
					}
					while (isalnum(current) ||
					    current == '_') {
						pd->more = 1;
						NEXT;
					}
				} else {
					while (current > 0 && current != ')' &&
					    ids < DECIMAL_STRING_LENGTH - 1) {
						pd->ds[ids++] = (char)current;
						NEXT;
					}
					while (current > 0 && current != ')') {
						pd->more = 1;
						NEXT;
					}
				}
				if (current != ')')
					goto done;
				good = cp;
				form = nanstring_form;
				/* prepare for loop below */
				if (fortran_conventions >= 2) {
					NEXT;
				}
			}
			/* accept trailing blanks */
			if (fortran_conventions >= 2) {
				while (current == ' ') {
					NEXT;
				}
				good = (current == EOF)? cp : cp - 1;
			}
			goto done;

		default:
			if (current == decpt) {
				/*
				 * Don't accept the radix point just yet;
				 * we need to see at least one digit.
				 */
				NEXT;
				goto afterpoint;
			}
			goto done;
		}
	}

nextnumber:
	/*
	 * Admissible characters after the first digit are a valid digit,
	 * an exponent delimiter (E or e for any decimal form; +, -, D, d,
	 * Q, or q when fortran_conventions >= 2; P or p for hex form),
	 * or the radix point.  (Note that we can't get here unless we've
	 * already found a digit.)
	 */
	if (NZDIGIT(current)) {
		/*
		 * Found another nonzero digit.  If there's enough room
		 * in pd->ds, store any intervening zeros we've found so far
		 * and then store this digit.  Otherwise, stop storing
		 * digits in pd->ds and set pd->more.
		 */
		if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) {
			for (i = 0; i < nzbp; i++)
				pd->ds[ids++] = '0';
			pd->ds[ids++] = (char)current;
		} else {
			pd->exponent += (nzbp + 1) << expshift;
			pd->more = 1;
			if (ids < DECIMAL_STRING_LENGTH) {
				pd->ds[ids] = '\0';
				pd->ndigits = ids;
				/* don't store any more digits */
				ids = DECIMAL_STRING_LENGTH;
			}
		}
		pd->fpclass = fp_normal;
		sigfound = 1;
		nzbp = 0;
		NEXT;

		/*
		 * Use an optimized loop to grab a consecutive sequence
		 * of nonzero digits quickly.
		 */
		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
		for (nfast = 0, pfast = &(pd->ds[ids]);
		    nfast < nfastlimit && NZDIGIT(current);
		    nfast++) {
			*pfast++ = (char)current;
			NEXT;
		}
		ids += nfast;
		if (current == '0')
			goto nextnumberzero;	/* common case */
		/* advance good to the last accepted digit */
		good = (current == EOF)? cp : cp - 1;
		goto nextnumber;
	} else {
		switch (current) {
		case ' ':
			if (fortran_conventions < 2)
				goto done;
			if (fortran_conventions == 2) {
				while (current == ' ') {
					NEXT;
				}
				good = (current == EOF)? cp : cp - 1;
				goto nextnumber;
			}
			/*
			 * When fortran_conventions > 2, treat internal
			 * blanks the same as zeroes.
			 */
			/*FALLTHRU*/

		case '0':
nextnumberzero:
			/*
			 * Count zeros before the radix point.  Later we
			 * will either put these zeros into pd->ds or add
			 * nzbp to pd->exponent to account for them.
			 */
			while (current == '0' || (current == ' ' &&
			    fortran_conventions > 2)) {
				nzbp++;
				NEXT;
			}
			good = (current == EOF)? cp : cp - 1;
			goto nextnumber;

		case '+':
		case '-':
		case 'D':
		case 'd':
		case 'Q':
		case 'q':
			/*
			 * Only accept these as the start of the exponent
			 * field if fortran_conventions is positive.
			 */
			if (fortran_conventions <= 0)
				goto done;
			/*FALLTHRU*/

		case 'E':
		case 'e':
			if ((int)form < 0)
				goto done;
			goto exponent;

		case 'P':
		case 'p':
			if ((int)form > 0)
				goto done;
			goto exponent;

		default:
			if (current == decpt) {
				/* accept the radix point */
				good = cp;
				if (form == fixed_int_form)
					form = fixed_intdot_form;
				NEXT;
				goto afterpoint;
			}
			goto done;
		}
	}

afterpoint:
	/*
	 * Admissible characters after the radix point are a valid digit
	 * or an exponent delimiter.  (Note that it is possible to get
	 * here even though we haven't found any digits yet.)
	 */
	if (NZDIGIT(current)) {
		/* found a digit after the point; revise form */
		if (form == invalid_form || form == whitespace_form)
			form = fixed_dotfrac_form;
		else if (form == fixed_intdot_form)
			form = fixed_intdotfrac_form;
		good = cp;
		if (sigfound < 1) {
			/* no significant digits found until now */
			pd->fpclass = fp_normal;
			sigfound = 1;
			pd->ds[ids++] = (char)current;
			pd->exponent = (-(nzap + 1)) << expshift;
		} else {
			/* significant digits have been found */
			if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) {
				for (i = 0; i < nzbp + nzap; i++)
					pd->ds[ids++] = '0';
				pd->ds[ids++] = (char)current;
				pd->exponent -= (nzap + 1) << expshift;
			} else {
				pd->exponent += nzbp << expshift;
				pd->more = 1;
				if (ids < DECIMAL_STRING_LENGTH) {
					pd->ds[ids] = '\0';
					pd->ndigits = ids;
					/* don't store any more digits */
					ids = DECIMAL_STRING_LENGTH;
				}
			}
		}
		nzbp = 0;
		nzap = 0;
		NEXT;

		/*
		 * Use an optimized loop to grab a consecutive sequence
		 * of nonzero digits quickly.
		 */
		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
		for (nfast = 0, pfast = &(pd->ds[ids]);
		    nfast < nfastlimit && NZDIGIT(current);
		    nfast++) {
			*pfast++ = (char)current;
			NEXT;
		}
		ids += nfast;
		pd->exponent -= nfast << expshift;
		if (current == '0')
			goto zeroafterpoint;
		/* advance good to the last accepted digit */
		good = (current == EOF)? cp : cp - 1;
		goto afterpoint;
	} else {
		switch (current) {
		case ' ':
			if (fortran_conventions < 2)
				goto done;
			if (fortran_conventions == 2) {
				/*
				 * Treat a radix point followed by blanks
				 * but no digits as zero so we'll pass FCVS.
				 */
				if (sigfound == -1) {
					pd->fpclass = fp_zero;
					sigfound = 0;
				}
				while (current == ' ') {
					NEXT;
				}
				good = (current == EOF)? cp : cp - 1;
				goto afterpoint;
			}
			/*
			 * when fortran_conventions > 2, treat internal
			 * blanks the same as zeroes
			 */
			/*FALLTHRU*/

		case '0':
			/* found a digit after the point; revise form */
			if (form == invalid_form || form == whitespace_form)
				form = fixed_dotfrac_form;
			else if (form == fixed_intdot_form)
				form = fixed_intdotfrac_form;
			if (sigfound == -1) {
				pd->fpclass = fp_zero;
				sigfound = 0;
			}
zeroafterpoint:
			/*
			 * Count zeros after the radix point.  If we find
			 * any more nonzero digits later, we will put these
			 * zeros into pd->ds and decrease pd->exponent by
			 * nzap.
			 */
			while (current == '0' || (current == ' ' &&
			    fortran_conventions > 2)) {
				nzap++;
				NEXT;
			}
			if (current == EOF) {
				good = cp;
				goto done;
			} else {
				good = cp - 1;
			}
			goto afterpoint;

		case '+':
		case '-':
		case 'D':
		case 'd':
		case 'Q':
		case 'q':
			/*
			 * Only accept these as the start of the exponent
			 * field if fortran_conventions is positive.
			 */
			if (fortran_conventions <= 0)
				goto done;
			/*FALLTHRU*/

		case 'E':
		case 'e':
			/* don't accept exponent without preceding digits */
			if (sigfound == -1 || (int)form < 0)
				goto done;
			break;

		case 'P':
		case 'p':
			/* don't accept exponent without preceding digits */
			if (sigfound == -1 || (int)form > 0)
				goto done;
			break;

		default:
			goto done;
		}
	}

exponent:
	/*
	 * Set *pechar to point to the character that looks like the
	 * beginning of the exponent field, then attempt to parse it.
	 */
	*pechar = cp;
	if (current != '+' && current != '-') {
		/* skip the exponent character and following blanks */
		NEXT;
		if (fortran_conventions >= 2 && current == ' ') {
			while (current == ' ') {
				NEXT;
			}
			if (fortran_conventions > 2)
				good = (current == EOF)? cp : cp - 1;
		}
	}

	e = 0;
	esign = 0;

	/* look for optional exponent sign */
	if (current == '+') {
		NEXT;
	} else if (current == '-') {
		esign = 1;
		NEXT;
	}

	/*
	 * Accumulate explicit exponent.  Note that if we don't find at
	 * least one digit, good won't be updated and e will remain 0.
	 * Also, we keep e from getting too large so we don't overflow
	 * the range of int (but notice that the threshold is large
	 * enough that any larger e would cause the result to underflow
	 * or overflow anyway).
	 */
	while (('0' <= current && current <= '9') || current == ' ') {
		if (current == ' ') {
			if (fortran_conventions < 2)
				break;
			if (fortran_conventions == 2) {
				NEXT;
				continue;
			}
			current = '0';
		}
		good = cp;
		if (e <= 1000000)
			e = 10 * e + current - '0';
		NEXT;
		if (fortran_conventions == 2 && current == ' ') {
			/* accept trailing blanks */
			while (current == ' ') {
				NEXT;
			}
			good = (current == EOF)? cp : cp - 1;
		}
	}
	if (esign == 1)
		pd->exponent -= e;
	else
		pd->exponent += e;

	/*
	 * If we successfully parsed an exponent field, update form
	 * accordingly.  If we didn't, don't set *pechar.
	 */
	if (good >= *pechar) {
		switch (form) {
		case whitespace_form:
		case fixed_int_form:
			form = floating_int_form;
			break;

		case fixed_intdot_form:
			form = floating_intdot_form;
			break;

		case fixed_dotfrac_form:
			form = floating_dotfrac_form;
			break;

		case fixed_intdotfrac_form:
			form = floating_intdotfrac_form;
			break;
		}
	} else {
		*pechar = NULL;
	}

done:
	/*
	 * If we found any zeros before the radix point that were not
	 * accounted for earlier, adjust the exponent.  (This is only
	 * relevant when pd->fpclass == fp_normal, but it's harmless
	 * in all other cases.)
	 */
	pd->exponent += nzbp << expshift;

	/* terminate pd->ds if we haven't already */
	if (ids < DECIMAL_STRING_LENGTH) {
		pd->ds[ids] = '\0';
		pd->ndigits = ids;
	}

	/*
	 * If we accepted any characters, advance *ppc to point to the
	 * first character we didn't accept; otherwise, pass back a
	 * signaling nan.
	 */
	if (good >= *ppc) {
		*ppc = good + 1;
	} else {
		pd->fpclass = fp_signaling;
		pd->sign = 0;
		form = invalid_form;
	}

	*pform = form;
}