xref: /illumos-gate/usr/src/lib/libxcurses/src/libc/stdio/vfscanf.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 1996, by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * System V.2 Emulation Stdio Library -- vfscanf
31  *
32  * Copyright 1985, 1992 by Mortice Kern Systems Inc.  All rights reserved.
33  *
34  */
35 
36 #ifdef M_RCSID
37 #ifndef lint
38 static char rcsID[] = "$Id: vfscanf.c 1.27 1995/09/20 19:07:52 ant Exp $";
39 #endif
40 #endif
41 
42 #include <mks.h>
43 #include <ctype.h>
44 #include <limits.h>
45 #include <stdarg.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #ifdef __FLOAT__
49 #include <math.h>
50 #endif
51 
52 #define	CONVTYPE	1
53 #define STAR		2
54 #define PERCENT		3
55 #define NUMBER		4
56 #define MODCONVL	5
57 #define NSCAN		6
58 #define	BRACKET		7
59 #define MODCONVH	8
60 
61 #define BASE16	16
62 #define BASE10	10
63 #define BASE8	8
64 #define NOBASE	0
65 #define SIGNED	1
66 #define UNSIGNED 0
67 
68 #define	CBUFSIZ	100	/* size of character buffer for input conversion */
69 
70 struct lexlist {
71 	char	name;
72 	char	type;
73 };
74 static struct lexlist *lexp;
75 static struct lexlist lexlist[] ={
76 	'*',	STAR,
77 	'%',	PERCENT,
78 	'l',	MODCONVL,
79 	'h',	MODCONVH,
80 	'n',	NSCAN,
81 	'[',	BRACKET,
82 	'd',	CONVTYPE,
83 	'S',	CONVTYPE,	/* dummy entry (for multibyte characters) */
84 	's',	CONVTYPE,
85 	'u',	CONVTYPE,
86 	'c',	CONVTYPE,
87 	'x',	CONVTYPE,
88 	'o',	CONVTYPE,
89 	'0',	NUMBER,
90 	'1',	NUMBER,
91 	'2',	NUMBER,
92 	'3',	NUMBER,
93 	'4',	NUMBER,
94 	'5',	NUMBER,
95 	'6',	NUMBER,
96 	'7',	NUMBER,
97 	'8',	NUMBER,
98 	'9',	NUMBER,
99 	'i',	CONVTYPE,
100 	'f',	CONVTYPE,
101 	'e',	CONVTYPE,
102 	'g',	CONVTYPE,
103 	0,	0
104 };
105 
106 static int	scan(int, const char *, const char *);
107 static	int	gettoken(void);
108 static	void	whitespace(void);
109 static	int	match(const char *, char *);
110 static	long	unsigned	getnum(int, int, int);
111 static	int	getin(void);
112 static	void	unget(int);
113 #ifdef	__FLOAT__
114 static	double	lstrtod(void);
115 #endif
116 
117 static	int	ungot;		/* getin/unget char */
118 static	FILE	*fpin;		/* input file pointer */
119 static	int	pflag;		/*indicator of conversion description present */
120 static	int	width;		/* field width value */
121 static	const	char	*fmtptr;	/* format string pointer */
122 static	int	charcnt;	/* number of characters scanned (for %n) */
123 static	int	from;		/* token type we've come from */
124 static	int	gfail;		/* getnum() fail flag, non-zero for fail */
125 
126 /*
127  * Convert formatted input from given input.
128  * This is the workhorse for scanf, sscanf, and fscanf.
129  * Returns the number of matched and assigned input items.
130  */
131 int
132 mks_vfscanf(FILE *pfin, const char *fmt, va_list ap)
133 {
134 	int	nitems;
135 	int	ltoken;
136 	int	c;
137 	int	modconv;	/* flag indicating conversion modifier */
138 	int	suppression;	/* flag to suppress conversion */
139 
140 	long unsigned number;	/* return value from getnumber */
141 
142 	ungot = EOF;
143 	fpin = pfin;
144 	fmtptr = fmt;
145 	from = 'X';
146 	nitems = 0;
147 	charcnt = 0;
148 
149 	for (;;) {
150 		if (from == 'X') {
151 			pflag = 0;
152 			modconv = 0;
153 			suppression = 0;
154 			width = 0;
155 		}
156 		ltoken = gettoken();
157 
158 		switch (ltoken) {
159 
160 		case 0:
161 			goto retitems;
162 
163 		case MODCONVL:
164 		case MODCONVH:
165 			switch (from) {
166 
167 			case 'A':
168 			case 'D':
169 			case 'P':
170 				from = 'E';
171 				modconv = ltoken;
172 				break;
173 			default:
174 				from = 'X';
175 				break;
176 			}
177 			break;
178 
179 		case CONVTYPE:
180 			switch (from) {
181 
182 			int	intassign;
183 
184 			case 'E':
185 			case 'P':
186 			case 'D':
187 			case 'A':
188 				from = 'X';
189 				intassign = 1;
190 				pflag = 0;
191 
192 				switch (lexp->name) {
193 
194 				case 'd':
195 					number = getnum(BASE10, width, SIGNED);
196 					if (gfail)
197 						goto retitems;
198 					break;
199 				case 'u':
200 					number = getnum(BASE10, width, UNSIGNED);
201 					if (gfail)
202 						goto retitems;
203 					break;
204 				case 'x':
205 					number = getnum(BASE16, width, SIGNED);
206 					if (gfail)
207 						goto retitems;
208 					break;
209 				case 'o':
210 					number = getnum(BASE8, width, SIGNED);
211 					if (gfail)
212 						goto retitems;
213 					break;
214 				case 'i':
215 					number = getnum(NOBASE, width, SIGNED);
216 					if (gfail)
217 						goto retitems;
218 					break;
219 				case 'c':
220 				/* 'S' dummy entry (for multibyte characters) */
221 				case 'S':
222 				case 's': {
223 					int gotitem = 0;
224 					char	*str;
225 
226 					if (!suppression)
227 						str = va_arg(ap, char *);
228 
229 					/* Input whitespace is not skipped
230 					 * for %c, which implies that %c
231 					 * can return whitespace.
232 					 */
233 					if (lexp->name != 'c')
234 						whitespace();
235 					for (;;) {
236 						c = getin();
237 
238 						/* Only %s and %S stop on
239 						 * whitespace.
240 						 */
241 						if (lexp->name != 'c' && isspace(c)) {
242 							unget(c);
243 							break;
244 						}
245 						if (c == EOF) {
246 							if(!gotitem)
247 								goto retitems;
248 							break;
249 						}
250 
251 						gotitem = 1;
252 						if (!suppression)
253 							*str++ = c;
254 
255 						if (width) {
256 							if (--width == 0)
257 								break;
258 						}
259 					}
260 
261 					/*
262 					 * ANSI C states that %c does not
263 					 * terminate with a null character.
264 					 */
265 					if (!suppression && lexp->name != 'c')
266 						*str = '\0';
267 					intassign = 0;
268 					break;
269 				}
270 #ifdef	__FLOAT__
271 				case 'f':
272 				case 'g':
273 				case 'e': {
274 					double	fresult;
275 
276 					fresult = lstrtod();
277 					if(gfail)
278 						goto retitems;
279 					if(suppression)
280 						break;
281 					if (modconv == MODCONVL)
282 						*(double *)va_arg(ap, double *) = fresult;
283 					else
284 						*(float *)va_arg(ap, float *) = (float)fresult;
285 					/*FALLTHROUGH*/
286 				}
287 #else	/* !__FLOAT__ */
288 				case 'f':
289 				case 'g':
290 				case 'e':
291 #endif	/* __FLOAT__ */
292 				default:
293 					intassign = 0;
294 					break;
295 				}
296 
297 				if (suppression)
298 					break;
299 				else
300 					nitems++;
301 
302 				if (intassign == 0)
303 					break;
304 
305 				switch (modconv) {
306 
307 				case MODCONVH:
308 					*(short *)va_arg(ap, short *) = (short)number;
309 					break;
310 				case MODCONVL:
311 					*(long *)va_arg(ap, long *) = (long)number;
312 					break;
313 				default:
314 					*(int *)va_arg(ap, int *) = (int)number;
315 					break;
316 				}
317 				break;
318 			default:
319 				from = 'X';
320 				break;
321 			}
322 			break;
323 
324 		case STAR:
325 			if (from == 'P') {
326 				from = 'A';
327 				suppression = 1;
328 			} else {
329 				from = 'X';
330 			}
331 			break;
332 
333 		case PERCENT:
334 			if (from == 'P') {
335 				from = 'X';
336 				pflag = 0;
337 				c = getin();
338 				if (c != '%')
339 					goto retitems;
340 			} else {
341 				from = 'X';
342 			}
343 			break;
344 
345 		case NUMBER:
346 			if (from == 'P' || from == 'A') {
347 				from = 'D';
348 			} else {
349 				from = 'X';
350 			}
351 			break;
352 
353 		case NSCAN:
354 			if (from == 'P') {
355 				pflag = 0;
356 				if (!suppression) {
357 					*(int *)va_arg(ap, int *) = charcnt;
358 				}
359 			}
360 			from = 'X';
361 			break;
362 
363 		case BRACKET:
364 			switch (from) {
365 
366 			case 'A':
367 			case 'D':
368 			case 'P': {
369 				char *ptr;
370 
371 				pflag = 0;
372 				if (width == 0)
373 					width = INT_MAX;
374 				ptr = suppression ? NULL : va_arg(ap, char *);
375 				if (match(fmtptr, ptr) && !feof(fpin)
376 				&& !suppression)
377 					nitems++;
378 				while (*fmtptr++ != ']')
379 					;
380 				break;
381 			}
382 			default:
383 				break;
384 			}
385 			from = 'X';
386 			break;
387 
388 		default:
389 			c = *(fmtptr-1);
390 			if (c == ' ' || c == '\t' || c == '\n' || c == '\f')
391 				whitespace();
392 			else {
393 				c = getin();
394 
395 				if (c != *(fmtptr-1))
396 					goto retitems;
397 			}
398 			from = 'X';
399 			break;
400 		}
401 	}
402 retitems:
403 	if (ungot != EOF) {
404 		ungetc(ungot, fpin);
405 		ungot = EOF;
406 	}
407 	return nitems==0 ? EOF : nitems;
408 }
409 
410 static int
411 gettoken()
412 {
413 	char	c;
414 
415 	if (*fmtptr == 0)
416 		return 0;	/* return 0 for end of string */
417 
418 	c = *fmtptr++;
419 
420 	if (pflag) {
421 		for(lexp=lexlist; lexp->name != 0; lexp++) {
422 			if (c == lexp->name) {
423 				if (lexp->type == NUMBER) {
424 					width = (int) strtol(fmtptr-1, (char **)0, BASE10);
425 					while (*fmtptr >= '0' && *fmtptr <= '9')
426 						fmtptr++;
427 				} else if (c == 'c') {
428 					/* No width specified for %c, default
429 					 * is one.
430 					 */
431 					width = 1;
432 				}
433 				return lexp->type;
434 			}
435 		}
436 		return -1;
437 	}
438 
439 	if (c == '%') {
440 		pflag = 1;
441 		from = 'P';
442 		return gettoken();
443 	}
444 	return -1;
445 }
446 
447 static void
448 whitespace()
449 {
450 	register int	c;
451 
452 	do {
453 		c = getin();
454 	} while (isspace(c));
455 
456 	unget(c);
457 }
458 
459 static int
460 scan(int ch, const char *str, const char *estr)
461 {
462 	for (; str < estr; ++str)
463 		if (*str == ch)
464 			return 1;
465 
466 	return 0;
467 }
468 
469 static int
470 match(const char *str, char *outstr)
471 {
472 	int	complement;
473 	int	i;
474 	char	start, end;
475 	int	c;
476 	const	char	*bscan, *escan;
477 
478 	if (*str == '^') {
479 		complement = 1;
480 		str++;
481 	} else
482 		complement = 0;
483 
484 	start = *str++;
485 	end = 0;
486 	if (*str == '-') {
487 		if (str[2] == ']')
488 			end = str[1];
489 	}
490 	if (start > end) {
491 		bscan = str - 1;
492 		while (*str++ != ']')
493 			;
494 		escan = str - 1;
495 
496 		for (i=0; i<width; i++) {
497 			if ((c = getin()) == EOF)
498 				return 0;
499 			if (!scan(c, bscan, escan) ^ complement)
500 				break;
501 			if (outstr != NULL)
502 				*outstr++ = c;
503 		}
504 	} else {
505 		for (i=0; i<width; i++) {
506 			c = getin();
507 			if (complement) {
508 				if (c >= start && c <= end)
509 					break;
510 				else if (outstr != NULL)
511 					*outstr++ = c;
512 			} else {
513 				if (c < start || c > end)
514 					break;
515 				else if (outstr != NULL)
516 					*outstr++ = c;
517 			}
518 		}
519 	}
520 
521 	if (i < width)
522 		unget(c);
523 
524 	if (outstr != NULL)
525 		*outstr = '\0';
526 	return (i > 1);
527 }
528 
529 /*
530  * Get a number from the input stream.
531  * The base, if zero, will be determined by the nature of the number.
532  * A leading 0x means hexadecimal, a leading 0 for octal, otherwise decimal.
533  *
534  * if the width is 0 then the max input string length of number is used.
535  *
536  * The sign tell us that a signed number is expected (rather than the
537  *	'u' conversion type which is unsigned).
538  */
539 static long unsigned
540 getnum(int base, int width, int sign)
541 {
542 	char	*s;
543 	char	cbuf[CBUFSIZ];			/* char buffer for number */
544 	int	w;
545 	register int	c;
546 	int	neg;
547 	long	ret;
548 
549 	gfail = 0;
550 	whitespace();
551 
552 	if (width == 0)
553 		width = sizeof cbuf;
554 
555 	neg = 0;
556 	if (sign) {
557 		c = getin();
558 		if (c == '+' || c == '-')
559 			neg = c=='-' ? 1 : 0;
560 		else
561 			unget(c);
562 	}
563 
564 	if (base == 0) {
565 		base = 10;
566 		c = getin();
567 		if (c == '0') {
568 			base = 8;
569 			c = getin();
570 			if (c == 'X' || c == 'x')
571 				base = 16;
572 			else
573 				unget(c);
574 		} else
575 			unget(c);
576 	}
577 	if (base == 10) {
578 		w = 0;
579 		s = cbuf;
580 		while (w < width && w < sizeof cbuf) {
581 			c = getin();
582 			switch (c) {
583 
584 			case '0':
585 			case '1':
586 			case '2':
587 			case '3':
588 			case '4':
589 			case '5':
590 			case '6':
591 			case '7':
592 			case '8':
593 			case '9':
594 				*s++ = c;
595 				w++;
596 				continue;
597 			default:
598 				unget(c);
599 				w = width;	/* force end of loop */
600 				break;
601 			}
602 		}
603 		*s = '\0';
604 		ret = strtol(cbuf, (char **)0, 10);
605 		goto retn;
606 	}
607 	if (base == 8) {
608 		w = 0;
609 		s = cbuf;
610 		while (w < width && w < sizeof cbuf) {
611 			c = getin();
612 			switch (c) {
613 
614 			case '0':
615 			case '1':
616 			case '2':
617 			case '3':
618 			case '4':
619 			case '5':
620 			case '6':
621 			case '7':
622 				*s++ = c;
623 				w++;
624 				continue;
625 			default:
626 				unget(c);
627 				w = width;	/* force end of loop */
628 				break;
629 			}
630 		}
631 		*s = '\0';
632 		ret = strtol(cbuf, (char **)0, 8);
633 		goto retn;
634 	}
635 	if (base == 16) {
636 		w = 0;
637 		s = cbuf;
638 		while (w < width && w < sizeof cbuf) {
639 			c = getin();
640 			c = toupper(c);
641 			switch (c) {
642 
643 			case '0':
644 			case '1':
645 			case '2':
646 			case '3':
647 			case '4':
648 			case '5':
649 			case '6':
650 			case '7':
651 			case '8':
652 			case '9':
653 			case 'A':
654 			case 'B':
655 			case 'C':
656 			case 'D':
657 			case 'E':
658 			case 'F':
659 				*s++ = c;
660 				w++;
661 				continue;
662 			default:
663 				unget(c);
664 				w = width;	/* force end of loop */
665 				break;
666 			}
667 		}
668 		*s = '\0';
669 		ret = strtol(cbuf, (char **)0, 16);
670 		goto retn;
671 	}
672 
673 /*
674  * if we get this far then a bad base was passed.
675  */
676 	gfail = -1;
677 
678 retn:
679 	if (*cbuf == '\0')	/* No number at all?? */
680 		gfail = -1;
681 	if (neg)
682 		ret = -ret;
683 	return ret;
684 }
685 
686 #ifdef	__FLOAT__
687 static double
688 lstrtod()
689 {
690 	int	slen;
691 	int	neg, eneg;
692 	char	cbuf[CBUFSIZ];
693 	register int	c;
694 	register char	*sp, *s1, *s2, *s3;
695 	double	total, exp, tens;
696 
697 	neg = eneg = 1;
698 	gfail = 0;
699 
700 	whitespace();
701 
702 	c = getin();
703 
704 	if (c == '-' || c == '+')
705 		if (c == '-') {
706 			neg = -1;
707 			c = getin();
708 		}
709 
710 	sp = s1 = cbuf;
711 	while (c >= '0' && c <= '9') {
712 		*sp++ = c;
713 		c = getin();
714 	}
715 
716 	s2 = sp;
717 	if (c == '.') {
718 		c = getin();
719 		while (c >= '0' && c <= '9') {
720 			*sp++ = c;
721 			c = getin();
722 		}
723 	}
724 
725 	s3 = sp;
726 	if (c == 'e' || c == 'E') {
727 		c = getin();
728 		if (c == '-' || c == '+')
729 			if (c == '-') {
730 				eneg = -1;
731 				c = getin();
732 			}
733 		while (c >= '0' && c <= '9') {
734 			*sp++ = c;
735 			c = getin();
736 		}
737 	}
738 	*sp = '\0';
739 
740 	if (s1 == s2 && s2 == s3) {
741 		gfail = -1;
742 		return 0.0;
743 	}
744 	unget(c);
745 
746 	/*
747 	 * convert the three strings (integer, fraction, and exponent)
748 	 * into a floating point number.
749 	 */
750 
751 	total = 0.0;
752 	tens = 1.0;
753 	for (sp=s2-1; sp >= s1; sp--) {
754 		total += (*sp -'0') * tens;
755 		tens *= 10.0;
756 	}
757 
758 	tens = .1;
759 	for (sp=s2; sp < s3; sp++) {
760 		total += (*sp - '0') * tens;
761 		tens /= 10.0;
762 	}
763 	total *= (double)neg;
764 
765 	exp = 0.0;
766 	tens = 1.0;
767 	if ((slen = strlen(s3)) > 0) {
768 		sp = s3 + slen - 1;
769 		for ( ; sp >= s3; sp--) {
770 			exp += (*sp - '0') * tens;
771 			tens *= 10.0;
772 		}
773 	}
774 	*sp = '\0';
775 
776 	exp *= (double)eneg;
777 
778 	total *= pow(10.0, exp);
779 
780 	return total;
781 }
782 #endif	/* __FLOAT__ */
783 
784 static	int
785 getin()
786 {
787 	int	c;
788 
789 	if (ungot != EOF) {
790 		c = ungot;
791 		ungot = EOF;
792 	} else
793 		c = getc(fpin);
794 	charcnt++;
795 	return c;
796 }
797 
798 static void
799 unget(int c)
800 {
801 	/* Dont' use ungetc because it doesn't work with m_fsopen */
802 	ungot = c;
803 	charcnt--;
804 }
805 
806