xref: /illumos-gate/usr/src/lib/libc/port/stdio/doscan.c (revision 4a38094c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1988 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 #include "lint.h"
31 #include <sys/types.h>
32 #include "mtlib.h"
33 #include "file64.h"
34 #include <stdio.h>
35 #include <ctype.h>
36 #include <stdarg.h>
37 #include <values.h>
38 #include <errno.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <math.h>
42 #include <thread.h>
43 #include <synch.h>
44 #include <stdlib.h>
45 #include <fnmatch.h>
46 #include <limits.h>
47 #include <wchar.h>
48 #include <unistd.h>
49 #include "libc.h"
50 #include "stdiom.h"
51 #include "xpg6.h"
52 
53 #define	NCHARS	(1 << BITSPERBYTE)
54 
55 /* if the _IOWRT flag is set, this must be a call from sscanf */
56 #define	locgetc(cnt)	(cnt += 1, (iop->_flag & _IOWRT) ? \
57 				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
58 				GETC(iop))
59 #define	locungetc(cnt, x) (cnt -= 1, (x == EOF) ? EOF : \
60 				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
61 				    (++iop->_cnt, *(--iop->_ptr))))
62 
63 #define	wlocgetc()	((iop->_flag & _IOWRT) ? \
64 				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
65 				GETC(iop))
66 #define	wlocungetc(x) ((x == EOF) ? EOF : \
67 				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
68 				    UNGETC(x, iop)))
69 
70 #define	MAXARGS	30	/* max. number of args for fast positional paramters */
71 
72 /*
73  * stva_list is used to subvert C's restriction that a variable with an
74  * array type can not appear on the left hand side of an assignment operator.
75  * By putting the array inside a structure, the functionality of assigning to
76  * the whole array through a simple assignment is achieved..
77  */
78 typedef struct stva_list {
79 	va_list	ap;
80 } stva_list;
81 
82 static int number(int *, int *, int, int, int, int, FILE *, va_list *);
83 static int readchar(FILE *, int *);
84 static int string(int *, int *, int, int, int, char *, FILE *, va_list *);
85 static int wstring(int *, int *, int, int, int, FILE *, va_list *);
86 static int	wbrstring(int *, int *, int, int, int, FILE *,
87     unsigned char *, va_list *);
88 #ifdef	_WIDE
89 static int	brstring(int *, int *, int, int, int, FILE *,
90     unsigned char *, va_list *);
91 #endif
92 static int _bi_getwc(FILE *);
93 static int _bi_ungetwc(wint_t, FILE *);
94 
95 #ifdef	_WIDE
96 static int _mkarglst(const wchar_t *, stva_list, stva_list[]);
97 static wint_t	_wd_getwc(int *, FILE *);
98 static wint_t	_wd_ungetwc(int *, wchar_t, FILE *);
99 static int	_watoi(wchar_t *);
100 #else  /* _WIDE */
101 static int _mkarglst(const char *, stva_list, stva_list[]);
102 #endif /* _WIDE */
103 
104 #ifndef	_WIDE
105 int
_doscan(FILE * iop,const char * fmt,va_list va_Alist)106 _doscan(FILE *iop, const char *fmt, va_list va_Alist)
107 {
108 	int ret;
109 	rmutex_t *lk;
110 
111 	if (iop->_flag & _IOWRT)
112 		ret = __doscan_u(iop, fmt, va_Alist, 0);
113 	else {
114 		FLOCKFILE(lk, iop);
115 		ret = __doscan_u(iop, fmt, va_Alist, 0);
116 		FUNLOCKFILE(lk);
117 	}
118 	return (ret);
119 }
120 #endif  /* _WIDE */
121 
122 /* ARGSUSED3 */
123 #ifdef	_WIDE
124 int
__wdoscan_u(FILE * iop,const wchar_t * fmt,va_list va_Alist,int scflag __unused)125 __wdoscan_u(FILE *iop, const wchar_t *fmt, va_list va_Alist,
126     int scflag __unused)
127 #else  /* _WIDE */
128 int
129 __doscan_u(FILE *iop, const char *sfmt, va_list va_Alist, int scflag __unused)
130 #endif /* _WIDE */
131 {
132 #ifdef	_WIDE
133 	wchar_t	ch;
134 	wchar_t	inchar, size;
135 	int	nmatch = 0, len, stow;
136 #else  /* _WIDE */
137 	int	ch;
138 	int		nmatch = 0, len, inchar, stow, size;
139 #endif /* _WIDE */
140 
141 	unsigned char	*bracket_str = NULL;
142 	int		chcount, flag_eof;
143 	char	tab[NCHARS];
144 
145 	/* variables for postional parameters */
146 #ifdef	_WIDE
147 	const wchar_t	*sformat = fmt;	/* save the beginning of the format */
148 #else  /* _WIDE */
149 	const unsigned char	*fmt = (const unsigned char *)sfmt;
150 	const char	*sformat = sfmt; /* save the beginning of the format */
151 #endif /* _WIDE */
152 	int		fpos = 1;	/* 1 if first postional parameter */
153 	stva_list	args;	/* used to step through the argument list */
154 	stva_list	sargs;	/* used to save start of the argument list */
155 	stva_list	arglst[MAXARGS];
156 					/*
157 					 * array giving the appropriate values
158 					 * for va_arg() to retrieve the
159 					 * corresponding argument:
160 					 * arglst[0] is the first argument
161 					 * arglst[1] is the second argument,etc.
162 					 */
163 	/* Check if readable stream */
164 	if (!(iop->_flag & (_IOREAD | _IORW))) {
165 		errno = EBADF;
166 		return (EOF);
167 	}
168 
169 	/*
170 	 * Initialize args and sargs to the start of the argument list.
171 	 * We don't know any portable way to copy an arbitrary C object
172 	 * so we use a system-specific routine(probably a macro) from
173 	 * stdarg.h.  (Remember that if va_list is an array, in_args will
174 	 * be a pointer and &in_args won't be what we would want for
175 	 * memcpy.)
176 	 */
177 	va_copy(args.ap, va_Alist);
178 
179 	sargs = args;
180 
181 	chcount = 0; flag_eof = 0;
182 
183 	/*
184 	 * ****************************************************
185 	 * Main loop: reads format to determine a pattern,
186 	 *		and then goes to read input stream
187 	 *		in attempt to match the pattern.
188 	 * ****************************************************
189 	 */
190 	for (; ; ) {
191 		if ((ch = *fmt++) == '\0') {
192 			return (nmatch); /* end of format */
193 		}
194 #ifdef	_WIDE
195 		if (iswspace(ch)) {
196 			if (!flag_eof) {
197 				while (iswspace(inchar =
198 				    _wd_getwc(&chcount, iop)))
199 					;
200 				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
201 					flag_eof = 1;
202 			}
203 			continue;
204 		}
205 		if (ch != '%' || (ch = *fmt++) == '%') {
206 			if (ch == '%') {
207 				if (!flag_eof) {
208 					while (iswspace(inchar =
209 					    _wd_getwc(&chcount, iop)))
210 						;
211 					if (_wd_ungetwc(&chcount, inchar, iop)
212 					    == WEOF)
213 						flag_eof = 1;
214 				}
215 			}
216 			if ((inchar = _wd_getwc(&chcount, iop)) == ch)
217 				continue;
218 			if (_wd_ungetwc(&chcount, inchar, iop) != WEOF) {
219 				return (nmatch); /* failed to match input */
220 			}
221 			break;
222 		}
223 #else  /* _WIDE */
224 		if (isspace(ch)) {
225 			if (!flag_eof) {
226 				while (isspace(inchar = locgetc(chcount)))
227 					;
228 				if (locungetc(chcount, inchar) == EOF)
229 					flag_eof = 1;
230 
231 			}
232 			continue;
233 		}
234 		if (ch != '%' || (ch = *fmt++) == '%') {
235 			if (ch == '%') {
236 				if (!flag_eof) {
237 					while (isspace(inchar =
238 					    locgetc(chcount)))
239 						;
240 					if (locungetc(chcount, inchar) == EOF)
241 						flag_eof = 1;
242 				}
243 			}
244 			if ((inchar = locgetc(chcount)) == ch)
245 				continue;
246 			if (locungetc(chcount, inchar) != EOF) {
247 				return (nmatch); /* failed to match input */
248 			}
249 			break;
250 		}
251 #endif /* _WIDE */
252 
253 charswitch:	/* target of a goto 8-( */
254 
255 		if (ch == '*') {
256 			stow = 0;
257 			ch = *fmt++;
258 		} else
259 			stow = 1;
260 
261 #ifdef	_WIDE
262 		for (len = 0; ((ch >= 0) && (ch < 256) && isdigit(ch));
263 		    ch = *fmt++)
264 			len = len * 10 + ch - '0';
265 #else  /* _WIDE */
266 		for (len = 0; isdigit(ch); ch = *fmt++)
267 			len = len * 10 + ch - '0';
268 #endif /* _WIDE */
269 
270 		if (ch == '$') {
271 			/*
272 			 * positional parameter handling - the number
273 			 * specified in len gives the argument to which
274 			 * the next conversion should be applied.
275 			 * WARNING: This implementation of positional
276 			 * parameters assumes that the sizes of all pointer
277 			 * types are the same. (Code similar to that
278 			 * in the portable doprnt.c should be used if this
279 			 * assumption does not hold for a particular
280 			 * port.)
281 			 */
282 			if (fpos) {
283 				if (_mkarglst(sformat, sargs, arglst) != 0) {
284 					return (EOF);
285 				} else {
286 					fpos = 0;
287 				}
288 			}
289 			if (len <= MAXARGS) {
290 				args = arglst[len - 1];
291 			} else {
292 				args = arglst[MAXARGS - 1];
293 				for (len -= MAXARGS; len > 0; len--)
294 					(void) va_arg(args.ap, void *);
295 			}
296 			len = 0;
297 			ch = *fmt++;
298 			goto charswitch;
299 		}
300 
301 		if (len == 0)
302 			len = MAXINT;
303 #ifdef	_WIDE
304 		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
305 		    (size == 'j') || (size == 't') || (size == 'z'))
306 			ch = *fmt++;
307 #else  /* _WIDE */
308 		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
309 		    (size == 'w') || (size == 'j') || (size == 't') ||
310 		    (size == 'z'))
311 			ch = *fmt++;
312 #endif /* _WIDE */
313 		if (size == 'l' && ch == 'l') {
314 			size = 'm';		/* size = 'm' if long long */
315 			ch = *fmt++;
316 		} else if (size == 'h' && ch == 'h') {
317 			size = 'b';		/* use size = 'b' if char */
318 			ch = *fmt++;
319 		} else if ((size == 't') || (size == 'z')) {
320 			size = 'l';
321 		} else if (size == 'j') {
322 #ifndef _LP64
323 			/* check scflag for size of u/intmax_t (32-bit libc) */
324 			if (!(scflag & _F_INTMAX32)) {
325 #endif
326 				size = 'm';
327 #ifndef _LP64
328 			}
329 #endif
330 		}
331 		if (ch == '\0') {
332 			return (EOF);		/* unexpected end of format */
333 		}
334 #ifdef	_WIDE
335 		if (ch == '[') {
336 			wchar_t	c;
337 			size_t	len;
338 			int	negflg = 0;
339 			wchar_t	*p;
340 			wchar_t	*wbracket_str;
341 			size_t	wlen, clen;
342 
343 			/* p points to the address of '[' */
344 			p = (wchar_t *)fmt - 1;
345 			len = 0;
346 			if (*fmt == '^') {
347 				len++;
348 				fmt++;
349 				negflg = 1;
350 			}
351 			if (((c = *fmt) == ']') || (c == '-')) {
352 				len++;
353 				fmt++;
354 			}
355 			while ((c = *fmt) != ']') {
356 				if (c == '\0') {
357 					return (EOF); /* unexpected EOF */
358 				} else {
359 					len++;
360 					fmt++;
361 				}
362 			}
363 			fmt++;
364 			len += 2;
365 			wbracket_str = (wchar_t *)
366 			    malloc(sizeof (wchar_t) * (len + 1));
367 			if (wbracket_str == NULL) {
368 				errno = ENOMEM;
369 				return (EOF);
370 			} else {
371 				(void) wmemcpy(wbracket_str,
372 				    (const wchar_t *)p, len);
373 				*(wbracket_str + len) = L'\0';
374 				if (negflg && *(wbracket_str + 1) == '^') {
375 					*(wbracket_str + 1) = L'!';
376 				}
377 			}
378 			wlen = wcslen(wbracket_str);
379 			clen = wcstombs((char *)NULL, wbracket_str, 0);
380 			if (clen == (size_t)-1) {
381 				free(wbracket_str);
382 				return (EOF);
383 			}
384 			bracket_str = (unsigned char *)
385 			    malloc(sizeof (unsigned char) * (clen + 1));
386 			if (bracket_str == NULL) {
387 				free(wbracket_str);
388 				errno = ENOMEM;
389 				return (EOF);
390 			}
391 			clen = wcstombs((char *)bracket_str, wbracket_str,
392 			    wlen + 1);
393 			free(wbracket_str);
394 			if (clen == (size_t)-1) {
395 				free(bracket_str);
396 				return (EOF);
397 			}
398 		}
399 #else  /* _WIDE */
400 		if (ch == '[') {
401 			if (size == 'l') {
402 				int	c, len, i;
403 				int	negflg = 0;
404 				unsigned char	*p;
405 
406 				p = (unsigned char *)(fmt - 1);
407 				len = 0;
408 				if (*fmt == '^') {
409 					len++;
410 					fmt++;
411 					negflg = 1;
412 				}
413 				if (((c = *fmt) == ']') || (c == '-')) {
414 					len++;
415 					fmt++;
416 				}
417 				while ((c = *fmt) != ']') {
418 					if (c == '\0') {
419 						return (EOF);
420 					} else if (isascii(c)) {
421 						len++;
422 						fmt++;
423 					} else {
424 						i = mblen((const char *)fmt,
425 						    MB_CUR_MAX);
426 						if (i <= 0) {
427 							return (EOF);
428 						} else {
429 							len += i;
430 							fmt += i;
431 						}
432 					}
433 				}
434 				fmt++;
435 				len += 2;
436 				bracket_str = (unsigned char *)
437 				    malloc(sizeof (unsigned char) * (len + 1));
438 				if (bracket_str == NULL) {
439 					errno = ENOMEM;
440 					return (EOF);
441 				} else {
442 					(void) strncpy((char *)bracket_str,
443 					    (const char *)p, len);
444 					*(bracket_str + len) = '\0';
445 					if (negflg &&
446 					    *(bracket_str + 1) == '^') {
447 						*(bracket_str + 1) = '!';
448 					}
449 				}
450 			} else {
451 				int	t = 0;
452 				int	b, c, d;
453 
454 				if (*fmt == '^') {
455 					t++;
456 					fmt++;
457 				}
458 				(void) memset(tab, !t, NCHARS);
459 				if ((c = *fmt) == ']' || c == '-') {
460 					tab[c] = t;
461 					fmt++;
462 				}
463 
464 				while ((c = *fmt) != ']') {
465 					if (c == '\0') {
466 						return (EOF);
467 					}
468 					b = *(fmt - 1);
469 					d = *(fmt + 1);
470 					if ((c == '-') && (d != ']') &&
471 					    (b < d)) {
472 						(void) memset(&tab[b], t,
473 						    d - b + 1);
474 						fmt += 2;
475 					} else {
476 						tab[c] = t;
477 						fmt++;
478 					}
479 				}
480 				fmt++;
481 			}
482 		}
483 #endif /* _WIDE */
484 
485 #ifdef	_WIDE
486 		if ((ch >= 0) && (ch < 256) &&
487 		    isupper((int)ch)) { /* no longer documented */
488 			if (_lib_version == c_issue_4) {
489 				if (size != 'm' && size != 'L')
490 					size = 'l';
491 			}
492 			ch = _tolower((int)ch);
493 		}
494 		if (ch != 'n' && !flag_eof) {
495 			if (ch != 'c' && ch != 'C' && ch != '[') {
496 				while (iswspace(inchar =
497 				    _wd_getwc(&chcount, iop)))
498 					;
499 				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
500 					break;
501 
502 			}
503 		}
504 #else  /* _WIDE */
505 		if (isupper(ch)) { /* no longer documented */
506 			if (_lib_version == c_issue_4) {
507 				if (size != 'm' && size != 'L')
508 					size = 'l';
509 			}
510 			ch = _tolower(ch);
511 		}
512 		if (ch != 'n' && !flag_eof) {
513 			if (ch != 'c' && ch != 'C' && ch != '[') {
514 				while (isspace(inchar = locgetc(chcount)))
515 					;
516 				if (locungetc(chcount, inchar) == EOF)
517 					break;
518 			}
519 		}
520 #endif /* _WIDE */
521 
522 		switch (ch) {
523 		case 'C':
524 		case 'S':
525 		case 'c':
526 		case 's':
527 #ifdef	_WIDE
528 			if ((size == 'l') || (size == 'C') || (size == 'S'))
529 #else  /* _WIDE */
530 			if ((size == 'w') || (size == 'l') || (size == 'C') ||
531 			    (size == 'S'))
532 #endif /* _WIDE */
533 			{
534 				size = wstring(&chcount, &flag_eof, stow,
535 				    (int)ch, len, iop, &args.ap);
536 			} else {
537 				size = string(&chcount, &flag_eof, stow,
538 				    (int)ch, len, tab, iop, &args.ap);
539 			}
540 			break;
541 		case '[':
542 			if (size == 'l') {
543 				size = wbrstring(&chcount, &flag_eof, stow,
544 				    (int)ch, len, iop, bracket_str, &args.ap);
545 				free(bracket_str);
546 				bracket_str = NULL;
547 			} else {
548 #ifdef	_WIDE
549 				size = brstring(&chcount, &flag_eof, stow,
550 				    (int)ch, len, iop, bracket_str, &args.ap);
551 				free(bracket_str);
552 				bracket_str = NULL;
553 #else  /* _WIDE */
554 				size = string(&chcount, &flag_eof, stow,
555 				    ch, len, tab, iop, &args.ap);
556 #endif /* _WIDE */
557 			}
558 			break;
559 
560 		case 'n':
561 			if (stow == 0)
562 				continue;
563 			if (size == 'b')	/* char */
564 				*va_arg(args.ap, char *) = (char)chcount;
565 			else if (size == 'h')
566 				*va_arg(args.ap, short *) = (short)chcount;
567 			else if (size == 'l')
568 				*va_arg(args.ap, long *) = (long)chcount;
569 			else if (size == 'm') /* long long */
570 				*va_arg(args.ap, long long *) =
571 				    (long long) chcount;
572 			else
573 				*va_arg(args.ap, int *) = (int)chcount;
574 			continue;
575 
576 		case 'i':
577 		default:
578 			size = number(&chcount, &flag_eof, stow, (int)ch,
579 			    len, (int)size, iop, &args.ap);
580 			break;
581 		}
582 		if (size)
583 			nmatch += stow;
584 		else {
585 			return ((flag_eof && !nmatch) ? EOF : nmatch);
586 		}
587 		continue;
588 	}
589 	if (bracket_str)
590 		free(bracket_str);
591 	return (nmatch != 0 ? nmatch : EOF); /* end of input */
592 }
593 
594 /* ****************************************************************** */
595 /* Functions to read the input stream in an attempt to match incoming */
596 /* data to the current pattern from the main loop of _doscan(). */
597 /* ****************************************************************** */
598 static int
number(int * chcount,int * flag_eof,int stow,int type,int len,int size,FILE * iop,va_list * listp)599 number(int *chcount, int *flag_eof, int stow, int type, int len, int size,
600     FILE *iop, va_list *listp)
601 {
602 	char	numbuf[64];
603 	char	*np = numbuf;
604 	int	c, base, inchar, lookahead;
605 	int	digitseen = 0, floater = 0, negflg = 0;
606 	int	lc;
607 	long long	lcval = 0LL;
608 
609 	switch (type) {
610 	case 'e':
611 	case 'f':
612 	case 'g':
613 		/*
614 		 * lc = 0 corresponds to c90 mode: do not recognize
615 		 *	hexadecimal fp strings; attempt to push back
616 		 *	all unused characters read
617 		 *
618 		 * lc = -1 corresponds to c99 mode: recognize hexa-
619 		 *	decimal fp strings; push back at most one
620 		 *	unused character
621 		 */
622 		lc = (__xpg6 & _C99SUSv3_recognize_hexfp)? -1 : 0;
623 		floater = 1;
624 		break;
625 
626 	case 'a':
627 		lc = -1;
628 		floater = 1;
629 		break;
630 
631 	case 'd':
632 	case 'u':
633 	case 'i':
634 		base = 10;
635 		break;
636 	case 'o':
637 		base = 8;
638 		break;
639 	case 'p':
640 #ifdef	_LP64
641 		size = 'l'; /* pointers are long in LP64 */
642 #endif	/*	_LP64	*/
643 		/* FALLTHROUGH */
644 	case 'x':
645 		base = 16;
646 		break;
647 	default:
648 		return (0); /* unrecognized conversion character */
649 	}
650 
651 	if (floater != 0) {
652 		/*
653 		 * Handle floating point with
654 		 * file_to_decimal.
655 		 */
656 		decimal_mode		dm;
657 		decimal_record		dr;
658 		fp_exception_field_type	efs;
659 		enum decimal_string_form form;
660 		char			*echar;
661 		int			nread;
662 		char			buffer[1024+1];
663 		char			*nb = buffer;
664 
665 		if (len > 1024)
666 			len = 1024;
667 		file_to_decimal(&nb, len, lc, &dr, &form, &echar, iop, &nread);
668 		if (lc == -1) {
669 			/*
670 			 * In C99 mode, the entire string read has to be
671 			 * accepted in order to qualify as a match
672 			 */
673 			if (nb != buffer + nread)
674 				form = invalid_form;
675 		}
676 		if (stow && (form != invalid_form)) {
677 #if defined(__sparc)
678 			dm.rd = _QgetRD();
679 			if (size == 'L') {		/* long double */
680 				if ((int)form < 0)
681 					__hex_to_quadruple(&dr, dm.rd,
682 					    va_arg(*listp, quadruple *), &efs);
683 				else
684 					decimal_to_quadruple(
685 					    va_arg(*listp, quadruple *),
686 					    &dm, &dr, &efs);
687 			}
688 #elif defined(__i386) || defined(__amd64)
689 			dm.rd = __xgetRD();
690 			if (size == 'L') {		/* long double */
691 				if ((int)form < 0)
692 					__hex_to_extended(&dr, dm.rd,
693 					    va_arg(*listp, extended *), &efs);
694 				else
695 					decimal_to_extended(
696 					    va_arg(*listp, extended *),
697 					    &dm, &dr, &efs);
698 			}
699 #else
700 #error Unknown architecture
701 #endif
702 			else if (size == 'l') {		/* double */
703 				if ((int)form < 0)
704 					__hex_to_double(&dr, dm.rd,
705 					    va_arg(*listp, double *), &efs);
706 				else
707 					decimal_to_double(
708 					    va_arg(*listp, double *),
709 					    &dm, &dr, &efs);
710 			} else {			/* float */
711 				if ((int)form < 0)
712 					__hex_to_single(&dr, dm.rd,
713 					    va_arg(*listp, single *), &efs);
714 				else
715 					decimal_to_single((single *)
716 					    va_arg(*listp, single *),
717 					    &dm, &dr, &efs);
718 			}
719 			if ((efs & (1 << fp_overflow)) != 0) {
720 				errno = ERANGE;
721 			}
722 			if ((efs & (1 << fp_underflow)) != 0) {
723 				errno = ERANGE;
724 			}
725 		}
726 		(*chcount) += nread;	/* Count characters read. */
727 		c = locgetc((*chcount));
728 		if (locungetc((*chcount), c) == EOF)
729 			*flag_eof = 1;
730 		return ((form == invalid_form) ? 0 : 1);
731 				/* successful match if non-zero */
732 	}
733 
734 	switch (c = locgetc((*chcount))) {
735 	case '-':
736 		negflg++;
737 		/* FALLTHROUGH */
738 	case '+':
739 		if (--len <= 0)
740 			break;
741 		if ((c = locgetc((*chcount))) != '0')
742 			break;
743 		/* FALLTHROUGH */
744 	case '0':
745 		/*
746 		 * If %i or %x, the characters 0x or 0X may optionally precede
747 		 * the sequence of letters and digits (base 16).
748 		 */
749 		if ((type != 'i' && type != 'x') || (len <= 1))
750 			break;
751 		if (((inchar = locgetc((*chcount))) == 'x') ||
752 		    (inchar == 'X')) {
753 			lookahead = readchar(iop, chcount);
754 			if (isxdigit(lookahead)) {
755 				base = 16;
756 
757 				if (len <= 2) {
758 					(void) locungetc((*chcount), lookahead);
759 					/* Take into account the 'x' */
760 					len -= 1;
761 				} else {
762 					c = lookahead;
763 					/* Take into account '0x' */
764 					len -= 2;
765 				}
766 			} else {
767 				(void) locungetc((*chcount), lookahead);
768 				(void) locungetc((*chcount), inchar);
769 			}
770 		} else {
771 			/* inchar wans't 'x'. */
772 			(void) locungetc((*chcount), inchar); /* Put it back. */
773 			if (type == 'i') /* Only %i accepts an octal. */
774 				base = 8;
775 		}
776 	}
777 	for (; --len  >= 0; *np++ = (char)c, c = locgetc((*chcount))) {
778 		if (np > numbuf + 62) {
779 			errno = ERANGE;
780 			return (0);
781 		}
782 		if (isdigit(c) || base == 16 && isxdigit(c)) {
783 			int digit = c - (isdigit(c) ? '0' :
784 			    isupper(c) ? 'A' - 10 : 'a' - 10);
785 			if (digit >= base)
786 				break;
787 			if (stow)
788 				lcval = base * lcval + digit;
789 			digitseen++;
790 			continue;
791 		}
792 		break;
793 	}
794 
795 	if (stow && digitseen) {
796 		/* suppress possible overflow on 2's-comp negation */
797 		if (negflg && lcval != (1ULL << 63))
798 			lcval = -lcval;
799 		switch (size) {
800 			case 'm':
801 				*va_arg(*listp, long long *) = lcval;
802 				break;
803 			case 'l':
804 				*va_arg(*listp, long *) = (long)lcval;
805 				break;
806 			case 'h':
807 				*va_arg(*listp, short *) = (short)lcval;
808 				break;
809 			case 'b':
810 				*va_arg(*listp, char *) = (char)lcval;
811 				break;
812 			default:
813 				*va_arg(*listp, int *) = (int)lcval;
814 				break;
815 		}
816 	}
817 	if (locungetc((*chcount), c) == EOF)
818 		*flag_eof = 1;
819 	return (digitseen); /* successful match if non-zero */
820 }
821 
822 /* Get a character. If not using sscanf and at the buffer's end */
823 /* then do a direct read(). Characters read via readchar() */
824 /* can be  pushed back on the input stream by locungetc((*chcount),) */
825 /* since there is padding allocated at the end of the stream buffer. */
826 static int
readchar(FILE * iop,int * chcount)827 readchar(FILE *iop, int *chcount)
828 {
829 	int	inchar;
830 	char	buf[1];
831 
832 	if ((iop->_flag & _IOWRT) || (iop->_cnt != 0)) {
833 		inchar = locgetc((*chcount));
834 	} else {
835 		if (_xread(iop, buf, 1) != 1)
836 			return (EOF);
837 		inchar = (int)buf[0];
838 		(*chcount) += 1;
839 	}
840 	return (inchar);
841 }
842 
843 static int
string(int * chcount,int * flag_eof,int stow,int type,int len,char * tab __unused,FILE * iop,va_list * listp)844 string(int *chcount, int *flag_eof, int stow, int type, int len,
845     char *tab __unused, FILE *iop, va_list *listp)
846 {
847 	int	ch;
848 	char	*ptr;
849 	char	*start;
850 
851 	start = ptr = stow ? va_arg(*listp, char *) : NULL;
852 	if (((type == 'c') || (type == 'C')) && len == MAXINT)
853 		len = 1;
854 #ifdef	_WIDE
855 	while ((ch = locgetc((*chcount))) != EOF &&
856 	    !(((type == 's') || (type == 'S')) && isspace(ch))) {
857 #else  /* _WIDE */
858 	while ((ch = locgetc((*chcount))) != EOF &&
859 	    !(((type == 's') || (type == 'S')) &&
860 	    isspace(ch) || type == '[' && tab[ch])) {
861 #endif /* _WIDE */
862 		if (stow)
863 			*ptr = (char)ch;
864 		ptr++;
865 		if (--len <= 0)
866 			break;
867 	}
868 	if (ch == EOF) {
869 		(*flag_eof) = 1;
870 		(*chcount) -= 1;
871 	} else if (len > 0 && locungetc((*chcount), ch) == EOF)
872 		(*flag_eof) = 1;
873 	if (ptr == start)
874 		return (0);	/* no match */
875 	if (stow && ((type != 'c') && (type != 'C')))
876 		*ptr = '\0';
877 	return (1);	/* successful match */
878 }
879 
880 /* This function initializes arglst, to contain the appropriate */
881 /* va_list values for the first MAXARGS arguments. */
882 /* WARNING: this code assumes that the sizes of all pointer types */
883 /* are the same. (Code similar to that in the portable doprnt.c */
884 /* should be used if this assumption is not true for a */
885 /* particular port.) */
886 
887 #ifdef	_WIDE
888 static int
889 _mkarglst(const wchar_t *fmt, stva_list args, stva_list arglst[])
890 #else  /* _WIDE */
891 static int
892 _mkarglst(const char *fmt, stva_list args, stva_list arglst[])
893 #endif /* _WIDE */
894 {
895 #ifdef	_WIDE
896 #define	STRCHR	wcschr
897 #define	STRSPN	wcsspn
898 #define	ATOI(x)	_watoi((wchar_t *)x)
899 #define	SPNSTR1	L"01234567890"
900 #define	SPNSTR2	L"# +-.0123456789hL$"
901 #else  /* _WIDE */
902 #define	STRCHR	strchr
903 #define	STRSPN	strspn
904 #define	ATOI(x)	atoi(x)
905 #define	SPNSTR1	"01234567890"
906 #define	SPNSTR2	"# +-.0123456789hL$"
907 #endif /* _WIDE */
908 
909 	int maxnum, curargno;
910 	size_t n;
911 
912 	maxnum = -1;
913 	curargno = 0;
914 
915 	while ((fmt = STRCHR(fmt, '%')) != NULL) {
916 		fmt++;	/* skip % */
917 		if (*fmt == '*' || *fmt == '%')
918 			continue;
919 		if (fmt[n = STRSPN(fmt, SPNSTR1)] == L'$') {
920 			/* convert to zero base */
921 			curargno = ATOI(fmt) - 1;
922 			fmt += n + 1;
923 		}
924 
925 		if (maxnum < curargno)
926 			maxnum = curargno;
927 		curargno++;	/* default to next in list */
928 
929 		fmt += STRSPN(fmt, SPNSTR2);
930 		if (*fmt == '[') {
931 			fmt++; /* has to be at least on item in scan list */
932 			if (*fmt == ']') {
933 				fmt++;
934 			}
935 			while (*fmt != ']') {
936 				if (*fmt == L'\0') {
937 					return (-1); /* bad format */
938 #ifdef	_WIDE
939 				} else {
940 					fmt++;
941 				}
942 #else  /* _WIDE */
943 				} else if (isascii(*fmt)) {
944 					fmt++;
945 				} else {
946 					int	i;
947 
948 					i = mblen((const char *)
949 					    fmt, MB_CUR_MAX);
950 					if (i <= 0) {
951 						return (-1);
952 					} else {
953 						fmt += i;
954 					}
955 				}
956 #endif /* _WIDE */
957 			}
958 		}
959 	}
960 	if (maxnum > MAXARGS)
961 		maxnum = MAXARGS;
962 	for (n = 0; n <= maxnum; n++) {
963 		arglst[n] = args;
964 		(void) va_arg(args.ap, void *);
965 	}
966 	return (0);
967 }
968 
969 
970 /*
971  * For wide character handling
972  */
973 
974 #ifdef	_WIDE
975 static int
wstring(int * chcount,int * flag_eof,int stow,int type,int len,FILE * iop,va_list * listp)976 wstring(int *chcount, int *flag_eof, int stow, int type,
977     int len, FILE *iop, va_list *listp)
978 {
979 	wint_t	wch;
980 	wchar_t	*ptr;
981 	wchar_t	*wstart;
982 
983 	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
984 
985 	if ((type == 'c') && len == MAXINT)
986 		len = 1;
987 	while (((wch = _wd_getwc(chcount, iop)) != WEOF) &&
988 	    !(type == 's' && iswspace(wch))) {
989 		if (stow)
990 			*ptr = wch;
991 		ptr++;
992 		if (--len <= 0)
993 			break;
994 	}
995 	if (wch == WEOF) {
996 		*flag_eof = 1;
997 		(*chcount) -= 1;
998 	} else {
999 		if (len > 0 && _wd_ungetwc(chcount, wch, iop) == WEOF)
1000 			*flag_eof = 1;
1001 	}
1002 	if (ptr == wstart)
1003 		return (0); /* no match */
1004 	if (stow && (type != 'c'))
1005 		*ptr = '\0';
1006 	return (1); /* successful match */
1007 }
1008 
1009 #else  /* _WIDE */
1010 static int
wstring(int * chcount,int * flag_eof,int stow,int type,int len,FILE * iop,va_list * listp)1011 wstring(int *chcount, int *flag_eof, int stow, int type, int len, FILE *iop,
1012     va_list *listp)
1013 {
1014 	int	wch;
1015 	wchar_t	*ptr;
1016 	wchar_t	*wstart;
1017 
1018 	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1019 
1020 	if ((type == 'c') && len == MAXINT)
1021 		len = 1;
1022 	while (((wch = _bi_getwc(iop)) != EOF) &&
1023 	    !(type == 's' && (isascii(wch) ? isspace(wch) : 0))) {
1024 		(*chcount) += _scrwidth((wchar_t)wch);
1025 		if (stow)
1026 			*ptr = wch;
1027 		ptr++;
1028 		if (--len <= 0)
1029 			break;
1030 	}
1031 	if (wch == EOF) {
1032 		(*flag_eof) = 1;
1033 		(*chcount) -= 1;
1034 	} else {
1035 		if (len > 0 && _bi_ungetwc(wch, iop) == EOF)
1036 			(*flag_eof) = 1;
1037 	}
1038 	if (ptr == wstart)
1039 		return (0); /* no match */
1040 	if (stow && (type != 'c'))
1041 		*ptr = '\0';
1042 	return (1); /* successful match */
1043 }
1044 #endif /* _WIDE */
1045 
1046 #ifdef	_WIDE
1047 static wint_t
_wd_getwc(int * chcount,FILE * iop)1048 _wd_getwc(int *chcount, FILE *iop)
1049 {
1050 	wint_t	wc;
1051 	int	len;
1052 
1053 	if (!(iop->_flag & _IOWRT)) {
1054 		/* call from fwscanf, wscanf */
1055 		wc = __fgetwc_xpg5(iop);
1056 		(*chcount)++;
1057 		return (wc);
1058 	} else {
1059 		/* call from swscanf */
1060 		if (*iop->_ptr == '\0')
1061 			return (WEOF);
1062 		len = mbtowc((wchar_t *)&wc, (const char *)iop->_ptr,
1063 		    MB_CUR_MAX);
1064 		if (len == -1)
1065 			return (WEOF);
1066 		iop->_ptr += len;
1067 		(*chcount)++;
1068 		return (wc);
1069 	}
1070 }
1071 
1072 static wint_t
_wd_ungetwc(int * chcount,wchar_t wc,FILE * iop)1073 _wd_ungetwc(int *chcount, wchar_t wc, FILE *iop)
1074 {
1075 	wint_t	ret;
1076 	int	len;
1077 	char	mbs[MB_LEN_MAX];
1078 
1079 	if (wc == WEOF)
1080 		return (WEOF);
1081 
1082 	if (!(iop->_flag & _IOWRT)) {
1083 		/* call from fwscanf, wscanf */
1084 		ret = __ungetwc_xpg5((wint_t)wc, iop);
1085 		if (ret != (wint_t)wc)
1086 			return (WEOF);
1087 		(*chcount)--;
1088 		return (ret);
1089 	} else {
1090 		/* call from swscanf */
1091 		len = wctomb(mbs, wc);
1092 		if (len == -1)
1093 			return (WEOF);
1094 		iop->_ptr -= len;
1095 		(*chcount)--;
1096 		return ((wint_t)wc);
1097 	}
1098 }
1099 
1100 static int
_watoi(wchar_t * fmt)1101 _watoi(wchar_t *fmt)
1102 {
1103 	int	n = 0;
1104 	wchar_t	ch;
1105 
1106 	ch = *fmt;
1107 	if ((ch >= 0) && (ch < 256) && isdigit((int)ch)) {
1108 		n = ch - '0';
1109 		while (((ch = *++fmt) >= 0) && (ch < 256) &&
1110 		    isdigit((int)ch)) {
1111 			n *= 10;
1112 			n += ch - '0';
1113 		}
1114 	}
1115 	return (n);
1116 }
1117 #endif /* _WIDE */
1118 
1119 static int
wbrstring(int * chcount,int * flag_eof,int stow,int type __unused,int len,FILE * iop,unsigned char * brstr,va_list * listp)1120 wbrstring(int *chcount, int *flag_eof, int stow, int type __unused,
1121     int len, FILE *iop, unsigned char *brstr, va_list *listp)
1122 {
1123 	wint_t	wch;
1124 	int	i;
1125 	char	str[MB_LEN_MAX + 1]; /* include null termination */
1126 	wchar_t	*ptr, *start;
1127 #ifdef	_WIDE
1128 	int	dummy;
1129 #endif /* _WIDE */
1130 
1131 	start = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1132 
1133 #ifdef	_WIDE
1134 	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1135 #else  /* _WIDE */
1136 	while ((wch = _bi_getwc(iop)) != WEOF) {
1137 #endif /* _WIDE */
1138 		i = wctomb(str, (wchar_t)wch);
1139 		if (i == -1) {
1140 			return (0);
1141 		}
1142 		str[i] = '\0';
1143 		if (fnmatch((const char *)brstr, (const char *)str,
1144 		    FNM_NOESCAPE)) {
1145 			break;
1146 		} else {
1147 			if (len > 0) {
1148 #ifdef	_WIDE
1149 				(*chcount)++;
1150 #else  /* _WIDE */
1151 				(*chcount) += _scrwidth(wch);
1152 #endif /* _WIDE */
1153 				len--;
1154 				if (stow) {
1155 					*ptr = wch;
1156 				}
1157 				ptr++;
1158 				if (len <= 0)
1159 					break;
1160 			} else {
1161 				break;
1162 			}
1163 		}
1164 	}
1165 	if (wch == WEOF) {
1166 		*flag_eof = 1;
1167 	} else {
1168 #ifdef	_WIDE
1169 		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1170 #else  /* _WIDE */
1171 		if (len > 0 && _bi_ungetwc(wch, iop) == WEOF)
1172 #endif /* _WIDE */
1173 			*flag_eof = 1;
1174 	}
1175 	if (ptr == start)
1176 		return (0);				/* no match */
1177 	if (stow)
1178 		*ptr = L'\0';
1179 	return (1);					/* successful match */
1180 }
1181 
1182 #ifdef	_WIDE
1183 static int
1184 brstring(int *chcount, int *flag_eof, int stow, int type __unused,
1185     int len, FILE *iop, unsigned char *brstr, va_list *listp)
1186 {
1187 	wint_t	wch;
1188 	int	i;
1189 	char	str[MB_LEN_MAX + 1]; /* include null termination */
1190 	char	*ptr, *start, *p;
1191 	int	dummy;
1192 
1193 	start = ptr = stow ? va_arg(*listp, char *) : NULL;
1194 
1195 	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1196 		p = str;
1197 		i = wctomb(str, (wchar_t)wch);
1198 		if (i == -1) {
1199 			return (0);
1200 		}
1201 		str[i] = '\0';
1202 		if (fnmatch((const char *)brstr, (const char *)str,
1203 		    FNM_NOESCAPE)) {
1204 			break;
1205 		} else {
1206 			if (len >= i) {
1207 				(*chcount)++;
1208 				len -= i;
1209 				if (stow) {
1210 					while (i-- > 0) {
1211 						*ptr++ = *p++;
1212 					}
1213 				} else {
1214 					while (i-- > 0) {
1215 						ptr++;
1216 					}
1217 				}
1218 				if (len <= 0)
1219 					break;
1220 			} else {
1221 				break;
1222 			}
1223 		}
1224 	}
1225 	if (wch == WEOF) {
1226 		*flag_eof = 1;
1227 	} else {
1228 		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1229 			*flag_eof = 1;
1230 	}
1231 	if (ptr == start)
1232 		return (0);				/* no match */
1233 	if (stow)
1234 		*ptr = '\0';
1235 	return (1);					/* successful match */
1236 }
1237 #endif /* _WIDE */
1238 
1239 /*
1240  * Locally define getwc and ungetwc
1241  */
1242 static int
1243 _bi_getwc(FILE *iop)
1244 {
1245 	int c;
1246 	wchar_t intcode;
1247 	int i, nbytes, cur_max;
1248 	char buff[MB_LEN_MAX];
1249 
1250 	if ((c = wlocgetc()) == EOF)
1251 		return (WEOF);
1252 
1253 	if (isascii(c))	/* ASCII code */
1254 		return ((wint_t)c);
1255 
1256 	buff[0] = (char)c;
1257 
1258 	cur_max = (int)MB_CUR_MAX;
1259 	/* MB_CUR_MAX doen't exeed the value of MB_LEN_MAX */
1260 	/* So we use MB_CUR_MAX instead of MB_LEN_MAX for */
1261 	/* improving the performance. */
1262 	for (i = 1; i < cur_max; i++) {
1263 		c = wlocgetc();
1264 		if (c == '\n') {
1265 			(void) wlocungetc(c);
1266 			break;
1267 		}
1268 		if (c == EOF) {
1269 			/* this still may be a valid multibyte character */
1270 			break;
1271 		}
1272 		buff[i] = (char)c;
1273 	}
1274 
1275 	if ((nbytes = mbtowc(&intcode, buff, i)) == -1) {
1276 		/*
1277 		 * If mbtowc fails, the input was not a legal character.
1278 		 *	ungetc all but one character.
1279 		 *
1280 		 * Note:  the number of pushback characters that
1281 		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1282 		 *	In Solaris 2.x, the number of pushback
1283 		 *	characters is 4.
1284 		 */
1285 		while (i-- > 1) {
1286 			(void) wlocungetc((signed char)buff[i]);
1287 		}
1288 		errno = EILSEQ;
1289 		return (WEOF); /* Illegal EUC sequence. */
1290 	}
1291 
1292 	while (i-- > nbytes) {
1293 		/*
1294 		 * Note:  the number of pushback characters that
1295 		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1296 		 *	In Solaris 2.x, the number of pushback
1297 		 *	characters is 4.
1298 		 */
1299 		(void) wlocungetc((signed char)buff[i]);
1300 	}
1301 	return ((int)intcode);
1302 }
1303 
1304 static int
1305 _bi_ungetwc(wint_t wc, FILE *iop)
1306 {
1307 	char mbs[MB_LEN_MAX];
1308 	unsigned char *p;
1309 	int n;
1310 
1311 	if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0))
1312 		return (WEOF);
1313 
1314 	n = wctomb(mbs, (wchar_t)wc);
1315 	if (n <= 0)
1316 		return (WEOF);
1317 
1318 	if (iop->_ptr <= iop->_base) {
1319 		if (iop->_base == NULL) {
1320 			return (WEOF);
1321 		}
1322 		if ((iop->_ptr == iop->_base) && (iop->_cnt == 0)) {
1323 			++iop->_ptr;
1324 		} else if ((iop->_ptr - n) < (iop->_base - PUSHBACK)) {
1325 			return (WEOF);
1326 		}
1327 	}
1328 
1329 	p = (unsigned char *)(mbs+n-1); /* p points the last byte */
1330 	/* if _IOWRT is set to iop->_flag, it means this is */
1331 	/* an invocation from sscanf(), and in that time we */
1332 	/* don't touch iop->_cnt.  Otherwise, which means an */
1333 	/* invocation from fscanf() or scanf(), we touch iop->_cnt */
1334 	if ((iop->_flag & _IOWRT) == 0) {
1335 		/* scanf() and fscanf() */
1336 		iop->_cnt += n;
1337 		while (n--) {
1338 			*--iop->_ptr = *(p--);
1339 		}
1340 	} else {
1341 		/* sscanf() */
1342 		iop->_ptr -= n;
1343 	}
1344 	return (wc);
1345 }
1346