1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This file contains the common part of the functions string_to_decimal,
31  * func_to_decimal, and file_to_decimal.  Much of this code has been dupli-
32  * cated in wstring_to_decimal (see wstod.c) with some simplifications and
33  * appropriate modifications for wide characters.  DO NOT fix a bug here
34  * without fixing the same bug in wstring_to_decimal, if it applies.
35  *
36  * The code below makes the following assumptions.
37  *
38  * 1. The first six parameters to the function are declared with the
39  *    following names and types:
40  *
41  *    char **ppc;
42  *    int nmax;
43  *    int fortran_conventions;
44  *    decimal_record *pd;
45  *    enum decimal_string_form *pform;
46  *    char **pechar;
47  *
48  * 2. Before this file is #included, the following variables have been
49  *    defined and initialized as shown:
50  *
51  *    char *cp;
52  *    char *good = *ppc - 1;
53  *    int current;
54  *    int nread;
55  *
56  *    If the first character can be read successfully, then current is set
57  *    to the value of the first character, cp is set to *ppc, (char)current
58  *    is stored at *cp, and nread = 1.  If the first character cannot be
59  *    read successfully, then current = EOF and nread = 0.
60  *
61  * 3. The macro NEXT is defined to expand to code that implements
62  *    the following logic:
63  *
64  *      if (nread < nmax) {
65  *          current = <next character>;
66  *          if (current != EOF) {
67  *             *++cp = (char)current;
68  *             nread++;
69  *          }
70  *      } else
71  *          current = EOF;
72  *
73  *    Note that nread always reflects the number of characters successfully
74  *    read, the buffer pointed to by *ppc gets filled only with characters
75  *    that have been successfully read, and cp always points to the location
76  *    in the buffer that was filled by the last character successfully read.
77  *    current == EOF if and only if we can't read any more, either because
78  *    we've reached the end of the input file or the buffer is full (i.e.,
79  *    we've read nmax characters).
80  *
81  * 4. After this file is #included, the following variables may be used
82  *    and will have the specified values:
83  *
84  *    *ppc, *pd, *pform, and *pechar will be set as documented in the
85  *      manual page;
86  *    nmax and fortran_conventions will be unchanged;
87  *    nread will be the number of characters actually read;
88  *    cp will point to the last character actually read, provided at least
89  *      one character was read successfully (in which case cp >= *ppc).
90  */
91 
92 #define	UCASE(c) ((('a' <= c) && (c <= 'z'))? c - 32 : c)
93 
94 #define	NZDIGIT(c)	(('1' <= c && c <= '9') || ((int)form < 0 && \
95 			(('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))))
96 
97 {
98 	static const char    *infstring = "INFINITY";
99 	static const char    *nanstring = "NAN";
100 
101 	int	sigfound, spacefound = 0;
102 	int	ids = 0;
103 	int	i, agree;
104 	int	nzbp = 0; /* number of zeros before point */
105 	int	nzap = 0; /* number of zeros after point */
106 	char	decpt;
107 	int	nfast, nfastlimit;
108 	char	*pfast;
109 	int	e, esign;
110 	int	expshift = 0;
111 	enum decimal_string_form	form;
112 
113 	/*
114 	 * This routine assumes that the radix point is a single
115 	 * ASCII character, so that following this assignment, the
116 	 * condition (current == decpt) will correctly detect it.
117 	 */
118 	if (fortran_conventions > 0)
119 		decpt = '.';
120 	else
121 		decpt = *(localeconv()->decimal_point);
122 
123 	/* input is invalid until we find something */
124 	pd->fpclass = fp_signaling;
125 	pd->sign = 0;
126 	pd->exponent = 0;
127 	pd->ds[0] = '\0';
128 	pd->more = 0;
129 	pd->ndigits = 0;
130 	*pform = form = invalid_form;
131 	*pechar = NULL;
132 
133 	/* skip white space */
134 	while (isspace(current)) {
135 		spacefound = 1;
136 		NEXT;
137 	}
138 
139 	if (fortran_conventions >= 2 && spacefound) {
140 		/*
141 		 * We found at least one white space character.  For
142 		 * Fortran formatted input, accept this; if we don't
143 		 * find anything else, we'll interpret it as a valid zero.
144 		 */
145 		pd->fpclass = fp_zero;
146 		form = whitespace_form;
147 		sigfound = 0;		/* 0 = only zeros found so far */
148 		if (current == EOF) {
149 			good = cp;
150 			goto done;
151 		} else {
152 			good = cp - 1;
153 		}
154 	} else {
155 		sigfound = -1;		/* -1 = no digits found yet */
156 	}
157 
158 	/* look for optional leading sign */
159 	if (current == '+') {
160 		NEXT;
161 	} else if (current == '-') {
162 		pd->sign = 1;
163 		NEXT;
164 	}
165 
166 	/*
167 	 * Admissible first non-white-space, non-sign characters are
168 	 * 0-9, i, I, n, N, or the radix point.
169 	 */
170 	if ('1' <= current && current <= '9') {
171 		good = cp;
172 		pd->fpclass = fp_normal;
173 		form = fixed_int_form;
174 		sigfound = 1;		/* 1 = significant digits found */
175 		pd->ds[ids++] = (char)current;
176 		NEXT;
177 	} else {
178 		switch (current) {
179 		case ' ':
180 			if (fortran_conventions < 2)
181 				goto done;
182 			/*
183 			 * When fortran_conventions >= 2, treat leading
184 			 * blanks the same as leading zeroes.
185 			 */
186 			/*FALLTHRU*/
187 
188 		case '0':
189 			/*
190 			 * Accept the leading zero and set pd->fpclass
191 			 * accordingly, but don't set sigfound until we
192 			 * determine that this isn't a "fake" hex string
193 			 * (i.e., 0x.p...).
194 			 */
195 			good = cp;
196 			pd->fpclass = fp_zero;
197 			if (fortran_conventions < 0) {
198 				/* look for a hex fp string */
199 				NEXT;
200 				if (current == 'X' || current == 'x') {
201 					/* assume hex fp form */
202 					form = (enum decimal_string_form)-1;
203 					expshift = 2;
204 					NEXT;
205 					/*
206 					 * Only a digit or radix point can
207 					 * follow "0x".
208 					 */
209 					if (NZDIGIT(current)) {
210 						pd->fpclass = fp_normal;
211 						good = cp;
212 						sigfound = 1;
213 						pd->ds[ids++] = (char)current;
214 						NEXT;
215 						break;
216 					} else if (current == decpt) {
217 						NEXT;
218 						goto afterpoint;
219 					} else if (current != '0') {
220 						/* not hex fp after all */
221 						form = fixed_int_form;
222 						expshift = 0;
223 						goto done;
224 					}
225 				} else {
226 					form = fixed_int_form;
227 				}
228 			} else {
229 				form = fixed_int_form;
230 			}
231 
232 			/* skip all leading zeros */
233 			while (current == '0' || (current == ' ' &&
234 			    fortran_conventions >= 2)) {
235 				NEXT;
236 			}
237 			sigfound = 0;	/* 0 = only zeros found so far */
238 			if (current == EOF) {
239 				good = cp;
240 				goto done;
241 			} else {
242 				good = cp - 1;
243 			}
244 			break;
245 
246 		case 'i':
247 		case 'I':
248 			/* look for inf or infinity */
249 			NEXT;
250 			agree = 1;
251 			while (agree <= 7 &&
252 			    UCASE(current) == infstring[agree]) {
253 				NEXT;
254 				agree++;
255 			}
256 			if (agree < 3)
257 				goto done;
258 			/* found valid infinity */
259 			pd->fpclass = fp_infinity;
260 			sigfound = 1;
261 			__inf_read = 1;
262 			if (agree < 8) {
263 				good = (current == EOF)? cp + 3 - agree :
264 				    cp + 2 - agree;
265 				form = inf_form;
266 			} else {
267 				good = (current == EOF)? cp : cp - 1;
268 				form = infinity_form;
269 			}
270 			/*
271 			 * Accept trailing blanks if no extra characters
272 			 * intervene.
273 			 */
274 			if (fortran_conventions >= 2 && (agree == 3 ||
275 			    agree == 8)) {
276 				while (current == ' ') {
277 					NEXT;
278 				}
279 				good = (current == EOF)? cp : cp - 1;
280 			}
281 			goto done;
282 
283 		case 'n':
284 		case 'N':
285 			/* look for nan or nan(string) */
286 			NEXT;
287 			agree = 1;
288 			while (agree <= 2 &&
289 			    UCASE(current) == nanstring[agree]) {
290 				NEXT;
291 				agree++;
292 			}
293 			if (agree < 3)
294 				goto done;
295 			/* found valid NaN */
296 			good = (current == EOF)? cp : cp - 1;
297 			pd->fpclass = fp_quiet;
298 			form = nan_form;
299 			sigfound = 1;
300 			__nan_read = 1;
301 			if (current == '(') {
302 				/* accept parenthesized string */
303 				NEXT;
304 				if (fortran_conventions < 0) {
305 					while ((isalnum(current) ||
306 					    current == '_') &&
307 					    ids < DECIMAL_STRING_LENGTH - 1) {
308 						pd->ds[ids++] = (char)current;
309 						NEXT;
310 					}
311 					while (isalnum(current) ||
312 					    current == '_') {
313 						pd->more = 1;
314 						NEXT;
315 					}
316 				} else {
317 					while (current > 0 && current != ')' &&
318 					    ids < DECIMAL_STRING_LENGTH - 1) {
319 						pd->ds[ids++] = (char)current;
320 						NEXT;
321 					}
322 					while (current > 0 && current != ')') {
323 						pd->more = 1;
324 						NEXT;
325 					}
326 				}
327 				if (current != ')')
328 					goto done;
329 				good = cp;
330 				form = nanstring_form;
331 				/* prepare for loop below */
332 				if (fortran_conventions >= 2) {
333 					NEXT;
334 				}
335 			}
336 			/* accept trailing blanks */
337 			if (fortran_conventions >= 2) {
338 				while (current == ' ') {
339 					NEXT;
340 				}
341 				good = (current == EOF)? cp : cp - 1;
342 			}
343 			goto done;
344 
345 		default:
346 			if (current == decpt) {
347 				/*
348 				 * Don't accept the radix point just yet;
349 				 * we need to see at least one digit.
350 				 */
351 				NEXT;
352 				goto afterpoint;
353 			}
354 			goto done;
355 		}
356 	}
357 
358 nextnumber:
359 	/*
360 	 * Admissible characters after the first digit are a valid digit,
361 	 * an exponent delimiter (E or e for any decimal form; +, -, D, d,
362 	 * Q, or q when fortran_conventions >= 2; P or p for hex form),
363 	 * or the radix point.  (Note that we can't get here unless we've
364 	 * already found a digit.)
365 	 */
366 	if (NZDIGIT(current)) {
367 		/*
368 		 * Found another nonzero digit.  If there's enough room
369 		 * in pd->ds, store any intervening zeros we've found so far
370 		 * and then store this digit.  Otherwise, stop storing
371 		 * digits in pd->ds and set pd->more.
372 		 */
373 		if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) {
374 			for (i = 0; i < nzbp; i++)
375 				pd->ds[ids++] = '0';
376 			pd->ds[ids++] = (char)current;
377 		} else {
378 			pd->exponent += (nzbp + 1) << expshift;
379 			pd->more = 1;
380 			if (ids < DECIMAL_STRING_LENGTH) {
381 				pd->ds[ids] = '\0';
382 				pd->ndigits = ids;
383 				/* don't store any more digits */
384 				ids = DECIMAL_STRING_LENGTH;
385 			}
386 		}
387 		pd->fpclass = fp_normal;
388 		sigfound = 1;
389 		nzbp = 0;
390 		NEXT;
391 
392 		/*
393 		 * Use an optimized loop to grab a consecutive sequence
394 		 * of nonzero digits quickly.
395 		 */
396 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
397 		for (nfast = 0, pfast = &(pd->ds[ids]);
398 		    nfast < nfastlimit && NZDIGIT(current);
399 		    nfast++) {
400 			*pfast++ = (char)current;
401 			NEXT;
402 		}
403 		ids += nfast;
404 		if (current == '0')
405 			goto nextnumberzero;	/* common case */
406 		/* advance good to the last accepted digit */
407 		good = (current == EOF)? cp : cp - 1;
408 		goto nextnumber;
409 	} else {
410 		switch (current) {
411 		case ' ':
412 			if (fortran_conventions < 2)
413 				goto done;
414 			if (fortran_conventions == 2) {
415 				while (current == ' ') {
416 					NEXT;
417 				}
418 				good = (current == EOF)? cp : cp - 1;
419 				goto nextnumber;
420 			}
421 			/*
422 			 * When fortran_conventions > 2, treat internal
423 			 * blanks the same as zeroes.
424 			 */
425 			/*FALLTHRU*/
426 
427 		case '0':
428 nextnumberzero:
429 			/*
430 			 * Count zeros before the radix point.  Later we
431 			 * will either put these zeros into pd->ds or add
432 			 * nzbp to pd->exponent to account for them.
433 			 */
434 			while (current == '0' || (current == ' ' &&
435 			    fortran_conventions > 2)) {
436 				nzbp++;
437 				NEXT;
438 			}
439 			good = (current == EOF)? cp : cp - 1;
440 			goto nextnumber;
441 
442 		case '+':
443 		case '-':
444 		case 'D':
445 		case 'd':
446 		case 'Q':
447 		case 'q':
448 			/*
449 			 * Only accept these as the start of the exponent
450 			 * field if fortran_conventions is positive.
451 			 */
452 			if (fortran_conventions <= 0)
453 				goto done;
454 			/*FALLTHRU*/
455 
456 		case 'E':
457 		case 'e':
458 			if ((int)form < 0)
459 				goto done;
460 			goto exponent;
461 
462 		case 'P':
463 		case 'p':
464 			if ((int)form > 0)
465 				goto done;
466 			goto exponent;
467 
468 		default:
469 			if (current == decpt) {
470 				/* accept the radix point */
471 				good = cp;
472 				if (form == fixed_int_form)
473 					form = fixed_intdot_form;
474 				NEXT;
475 				goto afterpoint;
476 			}
477 			goto done;
478 		}
479 	}
480 
481 afterpoint:
482 	/*
483 	 * Admissible characters after the radix point are a valid digit
484 	 * or an exponent delimiter.  (Note that it is possible to get
485 	 * here even though we haven't found any digits yet.)
486 	 */
487 	if (NZDIGIT(current)) {
488 		/* found a digit after the point; revise form */
489 		if (form == invalid_form || form == whitespace_form)
490 			form = fixed_dotfrac_form;
491 		else if (form == fixed_intdot_form)
492 			form = fixed_intdotfrac_form;
493 		good = cp;
494 		if (sigfound < 1) {
495 			/* no significant digits found until now */
496 			pd->fpclass = fp_normal;
497 			sigfound = 1;
498 			pd->ds[ids++] = (char)current;
499 			pd->exponent = (-(nzap + 1)) << expshift;
500 		} else {
501 			/* significant digits have been found */
502 			if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) {
503 				for (i = 0; i < nzbp + nzap; i++)
504 					pd->ds[ids++] = '0';
505 				pd->ds[ids++] = (char)current;
506 				pd->exponent -= (nzap + 1) << expshift;
507 			} else {
508 				pd->exponent += nzbp << expshift;
509 				pd->more = 1;
510 				if (ids < DECIMAL_STRING_LENGTH) {
511 					pd->ds[ids] = '\0';
512 					pd->ndigits = ids;
513 					/* don't store any more digits */
514 					ids = DECIMAL_STRING_LENGTH;
515 				}
516 			}
517 		}
518 		nzbp = 0;
519 		nzap = 0;
520 		NEXT;
521 
522 		/*
523 		 * Use an optimized loop to grab a consecutive sequence
524 		 * of nonzero digits quickly.
525 		 */
526 		nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
527 		for (nfast = 0, pfast = &(pd->ds[ids]);
528 		    nfast < nfastlimit && NZDIGIT(current);
529 		    nfast++) {
530 			*pfast++ = (char)current;
531 			NEXT;
532 		}
533 		ids += nfast;
534 		pd->exponent -= nfast << expshift;
535 		if (current == '0')
536 			goto zeroafterpoint;
537 		/* advance good to the last accepted digit */
538 		good = (current == EOF)? cp : cp - 1;
539 		goto afterpoint;
540 	} else {
541 		switch (current) {
542 		case ' ':
543 			if (fortran_conventions < 2)
544 				goto done;
545 			if (fortran_conventions == 2) {
546 				/*
547 				 * Treat a radix point followed by blanks
548 				 * but no digits as zero so we'll pass FCVS.
549 				 */
550 				if (sigfound == -1) {
551 					pd->fpclass = fp_zero;
552 					sigfound = 0;
553 				}
554 				while (current == ' ') {
555 					NEXT;
556 				}
557 				good = (current == EOF)? cp : cp - 1;
558 				goto afterpoint;
559 			}
560 			/*
561 			 * when fortran_conventions > 2, treat internal
562 			 * blanks the same as zeroes
563 			 */
564 			/*FALLTHRU*/
565 
566 		case '0':
567 			/* found a digit after the point; revise form */
568 			if (form == invalid_form || form == whitespace_form)
569 				form = fixed_dotfrac_form;
570 			else if (form == fixed_intdot_form)
571 				form = fixed_intdotfrac_form;
572 			if (sigfound == -1) {
573 				pd->fpclass = fp_zero;
574 				sigfound = 0;
575 			}
576 zeroafterpoint:
577 			/*
578 			 * Count zeros after the radix point.  If we find
579 			 * any more nonzero digits later, we will put these
580 			 * zeros into pd->ds and decrease pd->exponent by
581 			 * nzap.
582 			 */
583 			while (current == '0' || (current == ' ' &&
584 			    fortran_conventions > 2)) {
585 				nzap++;
586 				NEXT;
587 			}
588 			if (current == EOF) {
589 				good = cp;
590 				goto done;
591 			} else {
592 				good = cp - 1;
593 			}
594 			goto afterpoint;
595 
596 		case '+':
597 		case '-':
598 		case 'D':
599 		case 'd':
600 		case 'Q':
601 		case 'q':
602 			/*
603 			 * Only accept these as the start of the exponent
604 			 * field if fortran_conventions is positive.
605 			 */
606 			if (fortran_conventions <= 0)
607 				goto done;
608 			/*FALLTHRU*/
609 
610 		case 'E':
611 		case 'e':
612 			/* don't accept exponent without preceding digits */
613 			if (sigfound == -1 || (int)form < 0)
614 				goto done;
615 			break;
616 
617 		case 'P':
618 		case 'p':
619 			/* don't accept exponent without preceding digits */
620 			if (sigfound == -1 || (int)form > 0)
621 				goto done;
622 			break;
623 
624 		default:
625 			goto done;
626 		}
627 	}
628 
629 exponent:
630 	/*
631 	 * Set *pechar to point to the character that looks like the
632 	 * beginning of the exponent field, then attempt to parse it.
633 	 */
634 	*pechar = cp;
635 	if (current != '+' && current != '-') {
636 		/* skip the exponent character and following blanks */
637 		NEXT;
638 		if (fortran_conventions >= 2 && current == ' ') {
639 			while (current == ' ') {
640 				NEXT;
641 			}
642 			if (fortran_conventions > 2)
643 				good = (current == EOF)? cp : cp - 1;
644 		}
645 	}
646 
647 	e = 0;
648 	esign = 0;
649 
650 	/* look for optional exponent sign */
651 	if (current == '+') {
652 		NEXT;
653 	} else if (current == '-') {
654 		esign = 1;
655 		NEXT;
656 	}
657 
658 	/*
659 	 * Accumulate explicit exponent.  Note that if we don't find at
660 	 * least one digit, good won't be updated and e will remain 0.
661 	 * Also, we keep e from getting too large so we don't overflow
662 	 * the range of int (but notice that the threshold is large
663 	 * enough that any larger e would cause the result to underflow
664 	 * or overflow anyway).
665 	 */
666 	while (('0' <= current && current <= '9') || current == ' ') {
667 		if (current == ' ') {
668 			if (fortran_conventions < 2)
669 				break;
670 			if (fortran_conventions == 2) {
671 				NEXT;
672 				continue;
673 			}
674 			current = '0';
675 		}
676 		good = cp;
677 		if (e <= 1000000)
678 			e = 10 * e + current - '0';
679 		NEXT;
680 		if (fortran_conventions == 2 && current == ' ') {
681 			/* accept trailing blanks */
682 			while (current == ' ') {
683 				NEXT;
684 			}
685 			good = (current == EOF)? cp : cp - 1;
686 		}
687 	}
688 	if (esign == 1)
689 		pd->exponent -= e;
690 	else
691 		pd->exponent += e;
692 
693 	/*
694 	 * If we successfully parsed an exponent field, update form
695 	 * accordingly.  If we didn't, don't set *pechar.
696 	 */
697 	if (good >= *pechar) {
698 		switch (form) {
699 		case whitespace_form:
700 		case fixed_int_form:
701 			form = floating_int_form;
702 			break;
703 
704 		case fixed_intdot_form:
705 			form = floating_intdot_form;
706 			break;
707 
708 		case fixed_dotfrac_form:
709 			form = floating_dotfrac_form;
710 			break;
711 
712 		case fixed_intdotfrac_form:
713 			form = floating_intdotfrac_form;
714 			break;
715 		}
716 	} else {
717 		*pechar = NULL;
718 	}
719 
720 done:
721 	/*
722 	 * If we found any zeros before the radix point that were not
723 	 * accounted for earlier, adjust the exponent.  (This is only
724 	 * relevant when pd->fpclass == fp_normal, but it's harmless
725 	 * in all other cases.)
726 	 */
727 	pd->exponent += nzbp << expshift;
728 
729 	/* terminate pd->ds if we haven't already */
730 	if (ids < DECIMAL_STRING_LENGTH) {
731 		pd->ds[ids] = '\0';
732 		pd->ndigits = ids;
733 	}
734 
735 	/*
736 	 * If we accepted any characters, advance *ppc to point to the
737 	 * first character we didn't accept; otherwise, pass back a
738 	 * signaling nan.
739 	 */
740 	if (good >= *ppc) {
741 		*ppc = good + 1;
742 	} else {
743 		pd->fpclass = fp_signaling;
744 		pd->sign = 0;
745 		form = invalid_form;
746 	}
747 
748 	*pform = form;
749 }
750