1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * This file contains the common part of the functions string_to_decimal,
31 * func_to_decimal, and file_to_decimal.  Much of this code has been dupli-
32 * cated in wstring_to_decimal (see wstod.c) with some simplifications and
33 * appropriate modifications for wide characters.  DO NOT fix a bug here
34 * without fixing the same bug in wstring_to_decimal, if it applies.
35 *
36 * The code below makes the following assumptions.
37 *
38 * 1. The first six parameters to the function are declared with the
39 *    following names and types:
40 *
41 *    char **ppc;
42 *    int nmax;
43 *    int fortran_conventions;
44 *    decimal_record *pd;
45 *    enum decimal_string_form *pform;
46 *    char **pechar;
47 *
48 * 2. Before this file is #included, the following variables have been
49 *    defined and initialized as shown:
50 *
51 *    char *cp;
52 *    char *good = *ppc - 1;
53 *    int current;
54 *    int nread;
55 *
56 *    If the first character can be read successfully, then current is set
57 *    to the value of the first character, cp is set to *ppc, (char)current
58 *    is stored at *cp, and nread = 1.  If the first character cannot be
59 *    read successfully, then current = EOF and nread = 0.
60 *
61 * 3. The macro NEXT is defined to expand to code that implements
62 *    the following logic:
63 *
64 *      if (nread < nmax) {
65 *          current = <next character>;
66 *          if (current != EOF) {
67 *             *++cp = (char)current;
68 *             nread++;
69 *          }
70 *      } else
71 *          current = EOF;
72 *
73 *    Note that nread always reflects the number of characters successfully
74 *    read, the buffer pointed to by *ppc gets filled only with characters
75 *    that have been successfully read, and cp always points to the location
76 *    in the buffer that was filled by the last character successfully read.
77 *    current == EOF if and only if we can't read any more, either because
78 *    we've reached the end of the input file or the buffer is full (i.e.,
79 *    we've read nmax characters).
80 *
81 * 4. After this file is #included, the following variables may be used
82 *    and will have the specified values:
83 *
84 *    *ppc, *pd, *pform, and *pechar will be set as documented in the
85 *      manual page;
86 *    nmax and fortran_conventions will be unchanged;
87 *    nread will be the number of characters actually read;
88 *    cp will point to the last character actually read, provided at least
89 *      one character was read successfully (in which case cp >= *ppc).
90 */
91
92#define	UCASE(c) ((('a' <= c) && (c <= 'z'))? c - 32 : c)
93
94#define	NZDIGIT(c)	(('1' <= c && c <= '9') || ((int)form < 0 && \
95			(('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))))
96
97{
98	static const char    *infstring = "INFINITY";
99	static const char    *nanstring = "NAN";
100
101	int	sigfound, spacefound = 0;
102	int	ids = 0;
103	int	i, agree;
104	int	nzbp = 0; /* number of zeros before point */
105	int	nzap = 0; /* number of zeros after point */
106	char	decpt;
107	int	nfast, nfastlimit;
108	char	*pfast;
109	int	e, esign;
110	int	expshift = 0;
111	enum decimal_string_form	form;
112
113	/*
114	 * This routine assumes that the radix point is a single
115	 * ASCII character, so that following this assignment, the
116	 * condition (current == decpt) will correctly detect it.
117	 */
118	if (fortran_conventions > 0)
119		decpt = '.';
120	else
121		decpt = *(localeconv()->decimal_point);
122
123	/* input is invalid until we find something */
124	pd->fpclass = fp_signaling;
125	pd->sign = 0;
126	pd->exponent = 0;
127	pd->ds[0] = '\0';
128	pd->more = 0;
129	pd->ndigits = 0;
130	*pform = form = invalid_form;
131	*pechar = NULL;
132
133	/* skip white space */
134	while (isspace(current)) {
135		spacefound = 1;
136		NEXT;
137	}
138
139	if (fortran_conventions >= 2 && spacefound) {
140		/*
141		 * We found at least one white space character.  For
142		 * Fortran formatted input, accept this; if we don't
143		 * find anything else, we'll interpret it as a valid zero.
144		 */
145		pd->fpclass = fp_zero;
146		form = whitespace_form;
147		sigfound = 0;		/* 0 = only zeros found so far */
148		if (current == EOF) {
149			good = cp;
150			goto done;
151		} else {
152			good = cp - 1;
153		}
154	} else {
155		sigfound = -1;		/* -1 = no digits found yet */
156	}
157
158	/* look for optional leading sign */
159	if (current == '+') {
160		NEXT;
161	} else if (current == '-') {
162		pd->sign = 1;
163		NEXT;
164	}
165
166	/*
167	 * Admissible first non-white-space, non-sign characters are
168	 * 0-9, i, I, n, N, or the radix point.
169	 */
170	if ('1' <= current && current <= '9') {
171		good = cp;
172		pd->fpclass = fp_normal;
173		form = fixed_int_form;
174		sigfound = 1;		/* 1 = significant digits found */
175		pd->ds[ids++] = (char)current;
176		NEXT;
177	} else {
178		switch (current) {
179		case ' ':
180			if (fortran_conventions < 2)
181				goto done;
182			/*
183			 * When fortran_conventions >= 2, treat leading
184			 * blanks the same as leading zeroes.
185			 */
186			/*FALLTHRU*/
187
188		case '0':
189			/*
190			 * Accept the leading zero and set pd->fpclass
191			 * accordingly, but don't set sigfound until we
192			 * determine that this isn't a "fake" hex string
193			 * (i.e., 0x.p...).
194			 */
195			good = cp;
196			pd->fpclass = fp_zero;
197			if (fortran_conventions < 0) {
198				/* look for a hex fp string */
199				NEXT;
200				if (current == 'X' || current == 'x') {
201					/* assume hex fp form */
202					form = (enum decimal_string_form)-1;
203					expshift = 2;
204					NEXT;
205					/*
206					 * Only a digit or radix point can
207					 * follow "0x".
208					 */
209					if (NZDIGIT(current)) {
210						pd->fpclass = fp_normal;
211						good = cp;
212						sigfound = 1;
213						pd->ds[ids++] = (char)current;
214						NEXT;
215						break;
216					} else if (current == decpt) {
217						NEXT;
218						goto afterpoint;
219					} else if (current != '0') {
220						/* not hex fp after all */
221						form = fixed_int_form;
222						expshift = 0;
223						goto done;
224					}
225				} else {
226					form = fixed_int_form;
227				}
228			} else {
229				form = fixed_int_form;
230			}
231
232			/* skip all leading zeros */
233			while (current == '0' || (current == ' ' &&
234			    fortran_conventions >= 2)) {
235				NEXT;
236			}
237			sigfound = 0;	/* 0 = only zeros found so far */
238			if (current == EOF) {
239				good = cp;
240				goto done;
241			} else {
242				good = cp - 1;
243			}
244			break;
245
246		case 'i':
247		case 'I':
248			/* look for inf or infinity */
249			NEXT;
250			agree = 1;
251			while (agree <= 7 &&
252			    UCASE(current) == infstring[agree]) {
253				NEXT;
254				agree++;
255			}
256			if (agree < 3)
257				goto done;
258			/* found valid infinity */
259			pd->fpclass = fp_infinity;
260			sigfound = 1;
261			__inf_read = 1;
262			if (agree < 8) {
263				good = (current == EOF)? cp + 3 - agree :
264				    cp + 2 - agree;
265				form = inf_form;
266			} else {
267				good = (current == EOF)? cp : cp - 1;
268				form = infinity_form;
269			}
270			/*
271			 * Accept trailing blanks if no extra characters
272			 * intervene.
273			 */
274			if (fortran_conventions >= 2 && (agree == 3 ||
275			    agree == 8)) {
276				while (current == ' ') {
277					NEXT;
278				}
279				good = (current == EOF)? cp : cp - 1;
280			}
281			goto done;
282
283		case 'n':
284		case 'N':
285			/* look for nan or nan(string) */
286			NEXT;
287			agree = 1;
288			while (agree <= 2 &&
289			    UCASE(current) == nanstring[agree]) {
290				NEXT;
291				agree++;
292			}
293			if (agree < 3)
294				goto done;
295			/* found valid NaN */
296			good = (current == EOF)? cp : cp - 1;
297			pd->fpclass = fp_quiet;
298			form = nan_form;
299			sigfound = 1;
300			__nan_read = 1;
301			if (current == '(') {
302				/* accept parenthesized string */
303				NEXT;
304				if (fortran_conventions < 0) {
305					while ((isalnum(current) ||
306					    current == '_') &&
307					    ids < DECIMAL_STRING_LENGTH - 1) {
308						pd->ds[ids++] = (char)current;
309						NEXT;
310					}
311					while (isalnum(current) ||
312					    current == '_') {
313						pd->more = 1;
314						NEXT;
315					}
316				} else {
317					while (current > 0 && current != ')' &&
318					    ids < DECIMAL_STRING_LENGTH - 1) {
319						pd->ds[ids++] = (char)current;
320						NEXT;
321					}
322					while (current > 0 && current != ')') {
323						pd->more = 1;
324						NEXT;
325					}
326				}
327				if (current != ')')
328					goto done;
329				good = cp;
330				form = nanstring_form;
331				/* prepare for loop below */
332				if (fortran_conventions >= 2) {
333					NEXT;
334				}
335			}
336			/* accept trailing blanks */
337			if (fortran_conventions >= 2) {
338				while (current == ' ') {
339					NEXT;
340				}
341				good = (current == EOF)? cp : cp - 1;
342			}
343			goto done;
344
345		default:
346			if (current == decpt) {
347				/*
348				 * Don't accept the radix point just yet;
349				 * we need to see at least one digit.
350				 */
351				NEXT;
352				goto afterpoint;
353			}
354			goto done;
355		}
356	}
357
358nextnumber:
359	/*
360	 * Admissible characters after the first digit are a valid digit,
361	 * an exponent delimiter (E or e for any decimal form; +, -, D, d,
362	 * Q, or q when fortran_conventions >= 2; P or p for hex form),
363	 * or the radix point.  (Note that we can't get here unless we've
364	 * already found a digit.)
365	 */
366	if (NZDIGIT(current)) {
367		/*
368		 * Found another nonzero digit.  If there's enough room
369		 * in pd->ds, store any intervening zeros we've found so far
370		 * and then store this digit.  Otherwise, stop storing
371		 * digits in pd->ds and set pd->more.
372		 */
373		if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) {
374			for (i = 0; i < nzbp; i++)
375				pd->ds[ids++] = '0';
376			pd->ds[ids++] = (char)current;
377		} else {
378			pd->exponent += (nzbp + 1) << expshift;
379			pd->more = 1;
380			if (ids < DECIMAL_STRING_LENGTH) {
381				pd->ds[ids] = '\0';
382				pd->ndigits = ids;
383				/* don't store any more digits */
384				ids = DECIMAL_STRING_LENGTH;
385			}
386		}
387		pd->fpclass = fp_normal;
388		sigfound = 1;
389		nzbp = 0;
390		NEXT;
391
392		/*
393		 * Use an optimized loop to grab a consecutive sequence
394		 * of nonzero digits quickly.
395		 */
396		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
397		for (nfast = 0, pfast = &(pd->ds[ids]);
398		    nfast < nfastlimit && NZDIGIT(current);
399		    nfast++) {
400			*pfast++ = (char)current;
401			NEXT;
402		}
403		ids += nfast;
404		if (current == '0')
405			goto nextnumberzero;	/* common case */
406		/* advance good to the last accepted digit */
407		good = (current == EOF)? cp : cp - 1;
408		goto nextnumber;
409	} else {
410		switch (current) {
411		case ' ':
412			if (fortran_conventions < 2)
413				goto done;
414			if (fortran_conventions == 2) {
415				while (current == ' ') {
416					NEXT;
417				}
418				good = (current == EOF)? cp : cp - 1;
419				goto nextnumber;
420			}
421			/*
422			 * When fortran_conventions > 2, treat internal
423			 * blanks the same as zeroes.
424			 */
425			/*FALLTHRU*/
426
427		case '0':
428nextnumberzero:
429			/*
430			 * Count zeros before the radix point.  Later we
431			 * will either put these zeros into pd->ds or add
432			 * nzbp to pd->exponent to account for them.
433			 */
434			while (current == '0' || (current == ' ' &&
435			    fortran_conventions > 2)) {
436				nzbp++;
437				NEXT;
438			}
439			good = (current == EOF)? cp : cp - 1;
440			goto nextnumber;
441
442		case '+':
443		case '-':
444		case 'D':
445		case 'd':
446		case 'Q':
447		case 'q':
448			/*
449			 * Only accept these as the start of the exponent
450			 * field if fortran_conventions is positive.
451			 */
452			if (fortran_conventions <= 0)
453				goto done;
454			/*FALLTHRU*/
455
456		case 'E':
457		case 'e':
458			if ((int)form < 0)
459				goto done;
460			goto exponent;
461
462		case 'P':
463		case 'p':
464			if ((int)form > 0)
465				goto done;
466			goto exponent;
467
468		default:
469			if (current == decpt) {
470				/* accept the radix point */
471				good = cp;
472				if (form == fixed_int_form)
473					form = fixed_intdot_form;
474				NEXT;
475				goto afterpoint;
476			}
477			goto done;
478		}
479	}
480
481afterpoint:
482	/*
483	 * Admissible characters after the radix point are a valid digit
484	 * or an exponent delimiter.  (Note that it is possible to get
485	 * here even though we haven't found any digits yet.)
486	 */
487	if (NZDIGIT(current)) {
488		/* found a digit after the point; revise form */
489		if (form == invalid_form || form == whitespace_form)
490			form = fixed_dotfrac_form;
491		else if (form == fixed_intdot_form)
492			form = fixed_intdotfrac_form;
493		good = cp;
494		if (sigfound < 1) {
495			/* no significant digits found until now */
496			pd->fpclass = fp_normal;
497			sigfound = 1;
498			pd->ds[ids++] = (char)current;
499			pd->exponent = (-(nzap + 1)) << expshift;
500		} else {
501			/* significant digits have been found */
502			if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) {
503				for (i = 0; i < nzbp + nzap; i++)
504					pd->ds[ids++] = '0';
505				pd->ds[ids++] = (char)current;
506				pd->exponent -= (nzap + 1) << expshift;
507			} else {
508				pd->exponent += nzbp << expshift;
509				pd->more = 1;
510				if (ids < DECIMAL_STRING_LENGTH) {
511					pd->ds[ids] = '\0';
512					pd->ndigits = ids;
513					/* don't store any more digits */
514					ids = DECIMAL_STRING_LENGTH;
515				}
516			}
517		}
518		nzbp = 0;
519		nzap = 0;
520		NEXT;
521
522		/*
523		 * Use an optimized loop to grab a consecutive sequence
524		 * of nonzero digits quickly.
525		 */
526		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
527		for (nfast = 0, pfast = &(pd->ds[ids]);
528		    nfast < nfastlimit && NZDIGIT(current);
529		    nfast++) {
530			*pfast++ = (char)current;
531			NEXT;
532		}
533		ids += nfast;
534		pd->exponent -= nfast << expshift;
535		if (current == '0')
536			goto zeroafterpoint;
537		/* advance good to the last accepted digit */
538		good = (current == EOF)? cp : cp - 1;
539		goto afterpoint;
540	} else {
541		switch (current) {
542		case ' ':
543			if (fortran_conventions < 2)
544				goto done;
545			if (fortran_conventions == 2) {
546				/*
547				 * Treat a radix point followed by blanks
548				 * but no digits as zero so we'll pass FCVS.
549				 */
550				if (sigfound == -1) {
551					pd->fpclass = fp_zero;
552					sigfound = 0;
553				}
554				while (current == ' ') {
555					NEXT;
556				}
557				good = (current == EOF)? cp : cp - 1;
558				goto afterpoint;
559			}
560			/*
561			 * when fortran_conventions > 2, treat internal
562			 * blanks the same as zeroes
563			 */
564			/*FALLTHRU*/
565
566		case '0':
567			/* found a digit after the point; revise form */
568			if (form == invalid_form || form == whitespace_form)
569				form = fixed_dotfrac_form;
570			else if (form == fixed_intdot_form)
571				form = fixed_intdotfrac_form;
572			if (sigfound == -1) {
573				pd->fpclass = fp_zero;
574				sigfound = 0;
575			}
576zeroafterpoint:
577			/*
578			 * Count zeros after the radix point.  If we find
579			 * any more nonzero digits later, we will put these
580			 * zeros into pd->ds and decrease pd->exponent by
581			 * nzap.
582			 */
583			while (current == '0' || (current == ' ' &&
584			    fortran_conventions > 2)) {
585				nzap++;
586				NEXT;
587			}
588			if (current == EOF) {
589				good = cp;
590				goto done;
591			} else {
592				good = cp - 1;
593			}
594			goto afterpoint;
595
596		case '+':
597		case '-':
598		case 'D':
599		case 'd':
600		case 'Q':
601		case 'q':
602			/*
603			 * Only accept these as the start of the exponent
604			 * field if fortran_conventions is positive.
605			 */
606			if (fortran_conventions <= 0)
607				goto done;
608			/*FALLTHRU*/
609
610		case 'E':
611		case 'e':
612			/* don't accept exponent without preceding digits */
613			if (sigfound == -1 || (int)form < 0)
614				goto done;
615			break;
616
617		case 'P':
618		case 'p':
619			/* don't accept exponent without preceding digits */
620			if (sigfound == -1 || (int)form > 0)
621				goto done;
622			break;
623
624		default:
625			goto done;
626		}
627	}
628
629exponent:
630	/*
631	 * Set *pechar to point to the character that looks like the
632	 * beginning of the exponent field, then attempt to parse it.
633	 */
634	*pechar = cp;
635	if (current != '+' && current != '-') {
636		/* skip the exponent character and following blanks */
637		NEXT;
638		if (fortran_conventions >= 2 && current == ' ') {
639			while (current == ' ') {
640				NEXT;
641			}
642			if (fortran_conventions > 2)
643				good = (current == EOF)? cp : cp - 1;
644		}
645	}
646
647	e = 0;
648	esign = 0;
649
650	/* look for optional exponent sign */
651	if (current == '+') {
652		NEXT;
653	} else if (current == '-') {
654		esign = 1;
655		NEXT;
656	}
657
658	/*
659	 * Accumulate explicit exponent.  Note that if we don't find at
660	 * least one digit, good won't be updated and e will remain 0.
661	 * Also, we keep e from getting too large so we don't overflow
662	 * the range of int (but notice that the threshold is large
663	 * enough that any larger e would cause the result to underflow
664	 * or overflow anyway).
665	 */
666	while (('0' <= current && current <= '9') || current == ' ') {
667		if (current == ' ') {
668			if (fortran_conventions < 2)
669				break;
670			if (fortran_conventions == 2) {
671				NEXT;
672				continue;
673			}
674			current = '0';
675		}
676		good = cp;
677		if (e <= 1000000)
678			e = 10 * e + current - '0';
679		NEXT;
680		if (fortran_conventions == 2 && current == ' ') {
681			/* accept trailing blanks */
682			while (current == ' ') {
683				NEXT;
684			}
685			good = (current == EOF)? cp : cp - 1;
686		}
687	}
688	if (esign == 1)
689		pd->exponent -= e;
690	else
691		pd->exponent += e;
692
693	/*
694	 * If we successfully parsed an exponent field, update form
695	 * accordingly.  If we didn't, don't set *pechar.
696	 */
697	if (good >= *pechar) {
698		switch (form) {
699		case whitespace_form:
700		case fixed_int_form:
701			form = floating_int_form;
702			break;
703
704		case fixed_intdot_form:
705			form = floating_intdot_form;
706			break;
707
708		case fixed_dotfrac_form:
709			form = floating_dotfrac_form;
710			break;
711
712		case fixed_intdotfrac_form:
713			form = floating_intdotfrac_form;
714			break;
715		}
716	} else {
717		*pechar = NULL;
718	}
719
720done:
721	/*
722	 * If we found any zeros before the radix point that were not
723	 * accounted for earlier, adjust the exponent.  (This is only
724	 * relevant when pd->fpclass == fp_normal, but it's harmless
725	 * in all other cases.)
726	 */
727	pd->exponent += nzbp << expshift;
728
729	/* terminate pd->ds if we haven't already */
730	if (ids < DECIMAL_STRING_LENGTH) {
731		pd->ds[ids] = '\0';
732		pd->ndigits = ids;
733	}
734
735	/*
736	 * If we accepted any characters, advance *ppc to point to the
737	 * first character we didn't accept; otherwise, pass back a
738	 * signaling nan.
739	 */
740	if (good >= *ppc) {
741		*ppc = good + 1;
742	} else {
743		pd->fpclass = fp_signaling;
744		pd->sign = 0;
745		form = invalid_form;
746	}
747
748	*pform = form;
749}
750