xref: /illumos-gate/usr/src/cmd/awk_xpg4/awk2.c (revision 2a8bcb4e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 1986, 1994 by Mortice Kern Systems Inc.  All rights reserved.
28  */
29 
30 /*
31  * awk -- process input files, field extraction, output
32  *
33  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
34  */
35 
36 #include "awk.h"
37 #include "y.tab.h"
38 
39 static FILE	*awkinfp;		/* Input file pointer */
40 static int	reclen;			/* Length of last record */
41 static int	exstat;			/* Exit status */
42 
43 static FILE	*openfile(NODE *np, int flag, int fatal);
44 static FILE	*newfile(void);
45 static NODE	*nextarg(NODE **npp);
46 static void	adjust_buf(wchar_t **, int *, wchar_t **, char *, size_t);
47 static void	awk_putwc(wchar_t, FILE *);
48 
49 /*
50  * mainline for awk execution
51  */
52 void
awk()53 awk()
54 {
55 	running = 1;
56 	dobegin();
57 	while (nextrecord(linebuf, awkinfp) > 0)
58 		execute(yytree);
59 	doend(exstat);
60 }
61 
62 /*
63  * "cp" is the buffer to fill.  There is a special case if this buffer is
64  * "linebuf" ($0)
65  * Return 1 if OK, zero on EOF, -1 on error.
66  */
67 int
nextrecord(wchar_t * cp,FILE * fp)68 nextrecord(wchar_t *cp, FILE *fp)
69 {
70 	wchar_t *ep = cp;
71 
72 nextfile:
73 	if (fp == FNULL && (fp = newfile()) == FNULL)
74 		return (0);
75 	if ((*awkrecord)(ep, NLINE, fp) == NULL) {
76 		if (fp == awkinfp) {
77 			if (fp != stdin)
78 				(void) fclose(awkinfp);
79 			awkinfp = fp = FNULL;
80 			goto nextfile;
81 		}
82 		if (ferror(fp))
83 			return (-1);
84 		return (0);
85 	}
86 	if (fp == awkinfp) {
87 		if (varNR->n_flags & FINT)
88 			++varNR->n_int;
89 		else
90 			(void) exprreduce(incNR);
91 		if (varFNR->n_flags & FINT)
92 			++varFNR->n_int;
93 		else
94 			(void) exprreduce(incFNR);
95 	}
96 	if (cp == linebuf) {
97 		lbuflen = reclen;
98 		splitdone = 0;
99 		if (needsplit)
100 			fieldsplit();
101 	}
102 	/* if record length is too long then bail out */
103 	if (reclen > NLINE - 2) {
104 		awkerr(gettext("Record too long (LIMIT: %d bytes)"),
105 		    NLINE - 1);
106 		/* Not Reached */
107 	}
108 	return (1);
109 }
110 
111 /*
112  * isclvar()
113  *
114  * Returns 1 if the input string, arg, is a variable assignment,
115  * otherwise returns 0.
116  *
117  * An argument to awk can be either a pathname of a file, or a variable
118  * assignment.  An operand that begins with an undersore or alphabetic
119  * character from the portable character set, followed by a sequence of
120  * underscores, digits, and alphabetics from the portable character set,
121  * followed by the '=' character, shall specify a variable assignment
122  * rather than a pathname.
123  */
124 int
isclvar(wchar_t * arg)125 isclvar(wchar_t *arg)
126 {
127 	wchar_t	*tmpptr = arg;
128 
129 	if (tmpptr != NULL) {
130 
131 		/* Begins with an underscore or alphabetic character */
132 		if (iswalpha(*tmpptr) || *tmpptr == '_') {
133 
134 			/*
135 			 * followed by a sequence of underscores, digits,
136 			 * and alphabetics
137 			 */
138 			for (tmpptr++; *tmpptr; tmpptr++) {
139 				if (!(iswalnum(*tmpptr) || (*tmpptr == '_'))) {
140 					break;
141 				}
142 			}
143 			return (*tmpptr == '=');
144 		}
145 	}
146 
147 	return (0);
148 }
149 
150 /*
151  * Return the next file from the command line.
152  * Return FNULL when no more files.
153  * Sets awkinfp variable to the new current input file.
154  */
155 static FILE *
newfile()156 newfile()
157 {
158 	static int argindex = 1;
159 	static int filedone;
160 	wchar_t *ap;
161 	int argc;
162 	wchar_t *arg;
163 	extern void strescape(wchar_t *);
164 
165 	argc = (int)exprint(varARGC);
166 	for (;;) {
167 		if (argindex >= argc) {
168 			if (filedone)
169 				return (FNULL);
170 			++filedone;
171 			awkinfp = stdin;
172 			arg = M_MB_L("-");
173 			break;
174 		}
175 		constant->n_int = argindex++;
176 		arg = (wchar_t *)exprstring(ARGVsubi);
177 		/*
178 		 * If the argument contains a '=', determine if the
179 		 * argument needs to be treated as a variable assignment
180 		 * or as the pathname of a file.
181 		 */
182 		if (((ap = wcschr(arg, '=')) != NULL) && isclvar(arg)) {
183 			*ap = '\0';
184 			strescape(ap+1);
185 			strassign(vlook(arg), linebuf, FALLOC|FSENSE,
186 			    wcslen(linebuf));
187 			*ap = '=';
188 			continue;
189 		}
190 		if (arg[0] == '\0')
191 			continue;
192 		++filedone;
193 		if (arg[0] == '-' && arg[1] == '\0') {
194 			awkinfp = stdin;
195 			break;
196 		}
197 		if ((awkinfp = fopen(mbunconvert(arg), r)) == FNULL) {
198 			(void) fprintf(stderr, gettext("input file \"%s\""),
199 			    mbunconvert(arg));
200 			exstat = 1;
201 			continue;
202 		}
203 		break;
204 	}
205 	strassign(varFILENAME, arg, FALLOC, wcslen(arg));
206 	if (varFNR->n_flags & FINT)
207 		varFNR->n_int = 0;
208 	else
209 		(void) exprreduce(clrFNR);
210 	return (awkinfp);
211 }
212 
213 /*
214  * Default record reading code
215  * Uses fgets for potential speedups found in some (e.g. MKS)
216  * stdio packages.
217  */
218 wchar_t *
defrecord(wchar_t * bp,int lim,FILE * fp)219 defrecord(wchar_t *bp, int lim, FILE *fp)
220 {
221 	wchar_t *endp;
222 
223 	if (fgetws(bp, lim, fp) == NULL) {
224 		*bp = '\0';
225 		return (NULL);
226 	}
227 /*
228  * XXXX
229  *	switch (fgetws(bp, lim, fp)) {
230  *	case M_FGETS_EOF:
231  *		*bp = '\0';
232  *		return (NULL);
233  *	case M_FGETS_BINARY:
234  *		awkerr(gettext("file is binary"));
235  *	case M_FGETS_LONG:
236  *		awkerr(gettext("line too long: limit %d"),
237  *			lim);
238  *	case M_FGETS_ERROR:
239  *		awkperr(gettext("error reading file"));
240  *	}
241  */
242 
243 	if (*(endp = (bp + (reclen = wcslen(bp))-1)) == '\n') {
244 		*endp = '\0';
245 		reclen--;
246 	}
247 	return (bp);
248 }
249 
250 /*
251  * Read a record separated by one character in the RS.
252  * Compatible calling sequence with fgets, but don't include
253  * record separator character in string.
254  */
255 wchar_t *
charrecord(wchar_t * abp,int alim,FILE * fp)256 charrecord(wchar_t *abp, int alim, FILE *fp)
257 {
258 	wchar_t *bp;
259 	wint_t c;
260 	int limit = alim;
261 	wint_t endc;
262 
263 	bp = abp;
264 	endc = *(wchar_t *)varRS->n_string;
265 	while (--limit > 0 && (c = getwc(fp)) != endc && c != WEOF)
266 		*bp++ = c;
267 	*bp = '\0';
268 	reclen = bp-abp;
269 	return (c == WEOF && bp == abp ? NULL : abp);
270 }
271 
272 /*
273  * Special routine for multiple line records.
274  */
275 wchar_t *
multirecord(wchar_t * abp,int limit,FILE * fp)276 multirecord(wchar_t *abp, int limit, FILE *fp)
277 {
278 	wchar_t *bp;
279 	int c;
280 
281 	while ((c = getwc(fp)) == '\n')
282 		;
283 	bp = abp;
284 	if (c != WEOF) do {
285 		if (--limit == 0)
286 			break;
287 		if (c == '\n' && bp[-1] == '\n')
288 			break;
289 
290 		*bp++ = c;
291 	} while ((c = getwc(fp)) != WEOF);
292 	*bp = '\0';
293 	if (bp > abp)
294 		*--bp = '\0';
295 	reclen = bp-abp;
296 	return (c == WEOF && bp == abp ? NULL : abp);
297 }
298 
299 /*
300  * Look for fields separated by spaces, tabs or newlines.
301  * Extract the next field, given pointer to start address.
302  * Return pointer to beginning of field or NULL.
303  * Reset end of field reference, which is the beginning of the
304  * next field.
305  */
306 wchar_t *
whitefield(wchar_t ** endp)307 whitefield(wchar_t **endp)
308 {
309 	wchar_t *sp;
310 	wchar_t *ep;
311 
312 	sp = *endp;
313 	while (*sp == ' ' || *sp == '\t' || *sp == '\n')
314 		++sp;
315 	if (*sp == '\0')
316 		return (NULL);
317 	for (ep = sp; *ep != ' ' && *ep != '\0' && *ep != '\t' &&
318 	    *ep != '\n'; ++ep)
319 		;
320 	*endp = ep;
321 	return (sp);
322 }
323 
324 /*
325  * Look for fields separated by non-whitespace characters.
326  * Same calling sequence as whitefield().
327  */
328 wchar_t *
blackfield(wchar_t ** endp)329 blackfield(wchar_t **endp)
330 {
331 	wchar_t *cp;
332 	int endc;
333 
334 	endc = *(wchar_t *)varFS->n_string;
335 	cp = *endp;
336 	if (*cp == '\0')
337 		return (NULL);
338 	if (*cp == endc && fcount != 0)
339 		cp++;
340 	if ((*endp = wcschr(cp, endc)) == NULL)
341 		*endp = wcschr(cp, '\0');
342 	return (cp);
343 }
344 
345 /*
346  * This field separation routine uses the same logic as
347  * blackfield but uses a regular expression to separate
348  * the fields.
349  */
350 wchar_t *
refield(wchar_t ** endpp)351 refield(wchar_t **endpp)
352 {
353 	wchar_t *cp, *start;
354 	int flags;
355 	static	REGWMATCH_T match[10];
356 	int result;
357 
358 	cp = *endpp;
359 	if (*cp == '\0') {
360 		match[0].rm_ep = NULL;
361 		return (NULL);
362 	}
363 	if (match[0].rm_ep != NULL) {
364 		flags = REG_NOTBOL;
365 		cp = (wchar_t *)match[0].rm_ep;
366 	} else
367 		flags = 0;
368 	start = cp;
369 again:
370 	switch ((result = REGWEXEC(resep, cp, 10, match, flags))) {
371 	case REG_OK:
372 		/*
373 		 * Check to see if a null string was matched. If this is the
374 		 * case, then move the current pointer beyond this position.
375 		 */
376 		if (match[0].rm_sp == match[0].rm_ep) {
377 			cp = (wchar_t *)match[0].rm_sp;
378 			if (*cp++ != '\0') {
379 				goto again;
380 			}
381 		}
382 		*endpp = (wchar_t *)match[0].rm_sp;
383 		break;
384 	case REG_NOMATCH:
385 		match[0].rm_ep = NULL;
386 		*endpp = wcschr(cp, '\0');
387 		break;
388 	default:
389 		(void) REGWERROR(result, resep, (char *)linebuf,
390 		    sizeof (linebuf));
391 		awkerr(gettext("error splitting record: %s"),
392 		    (char *)linebuf);
393 	}
394 	return (start);
395 }
396 
397 /*
398  * do begin processing
399  */
400 void
dobegin()401 dobegin()
402 {
403 	/*
404 	 * Free all keyword nodes to save space.
405 	 */
406 	{
407 		NODE *np;
408 		int nbuck;
409 		NODE *knp;
410 
411 		np = NNULL;
412 		nbuck = 0;
413 		while ((knp = symwalk(&nbuck, &np)) != NNULL)
414 			if (knp->n_type == KEYWORD)
415 				delsymtab(knp, 1);
416 	}
417 	/*
418 	 * Copy ENVIRON array only if needed.
419 	 * Note the convoluted work to assign to an array
420 	 * and that the temporary nodes will be freed by
421 	 * freetemps() because we are "running".
422 	 */
423 	if (needenviron) {
424 		char **app;
425 		wchar_t *name, *value;
426 		NODE *namep = stringnode(_null, FSTATIC, 0);
427 		NODE *valuep = stringnode(_null, FSTATIC, 0);
428 		NODE *ENVsubname = node(INDEX, varENVIRON, namep);
429 		extern char **environ;
430 
431 		/* (void) m_setenv(); XXX what's this do? */
432 		for (app = environ; *app != NULL; /* empty */) {
433 			name = mbstowcsdup(*app++);
434 
435 			if ((value = wcschr(name, '=')) != NULL) {
436 				*value++ = '\0';
437 				valuep->n_strlen = wcslen(value);
438 				valuep->n_string = value;
439 			} else {
440 				valuep->n_strlen = 0;
441 				valuep->n_string = _null;
442 			}
443 			namep->n_strlen = wcslen(namep->n_string = name);
444 			(void) assign(ENVsubname, valuep);
445 			if (value != NULL)
446 				value[-1] = '=';
447 		}
448 	}
449 	phase = BEGIN;
450 	execute(yytree);
451 	phase = 0;
452 	if (npattern == 0)
453 		doend(0);
454 	/*
455 	 * Delete all pattern/action rules that are BEGIN at this
456 	 * point to save space.
457 	 * NOTE: this is not yet implemented.
458 	 */
459 }
460 
461 /*
462  * Do end processing.
463  * Exit with a status
464  */
465 void
doend(int s)466 doend(int s)
467 {
468 	OFILE *op;
469 
470 	if (phase != END) {
471 		phase = END;
472 		awkinfp = stdin;
473 		execute(yytree);
474 	}
475 	for (op = &ofiles[0]; op < &ofiles[NIOSTREAM]; op++)
476 		if (op->f_fp != FNULL)
477 			awkclose(op);
478 	if (awkinfp == stdin)
479 		(void) fflush(awkinfp);
480 	exit(s);
481 }
482 
483 /*
484  * Print statement.
485  */
486 void
s_print(NODE * np)487 s_print(NODE *np)
488 {
489 	FILE *fp;
490 	NODE *listp;
491 	char *ofs;
492 	int notfirst = 0;
493 
494 	fp = openfile(np->n_right, 1, 1);
495 	if (np->n_left == NNULL)
496 		(void) fputs(mbunconvert(linebuf), fp);
497 	else {
498 		ofs = wcstombsdup((isstring(varOFS->n_flags)) ?
499 		    (wchar_t *)varOFS->n_string :
500 		    (wchar_t *)exprstring(varOFS));
501 		listp = np->n_left;
502 		while ((np = getlist(&listp)) != NNULL) {
503 			if (notfirst++)
504 				(void) fputs(ofs, fp);
505 			np = exprreduce(np);
506 			if (np->n_flags & FINT)
507 				(void) fprintf(fp, "%lld", (INT)np->n_int);
508 			else if (isstring(np->n_flags))
509 				(void) fprintf(fp, "%S", np->n_string);
510 			else
511 				(void) fprintf(fp,
512 				    mbunconvert((wchar_t *)exprstring(varOFMT)),
513 				    (double)np->n_real);
514 		}
515 		free(ofs);
516 	}
517 	(void) fputs(mbunconvert(isstring(varORS->n_flags) ?
518 	    (wchar_t *)varORS->n_string : (wchar_t *)exprstring(varORS)),
519 	    fp);
520 	if (ferror(fp))
521 		awkperr("error on print");
522 }
523 
524 /*
525  * printf statement.
526  */
527 void
s_prf(NODE * np)528 s_prf(NODE *np)
529 {
530 	FILE *fp;
531 
532 	fp = openfile(np->n_right, 1, 1);
533 	(void) xprintf(np->n_left, fp, (wchar_t **)NULL);
534 	if (ferror(fp))
535 		awkperr("error on printf");
536 }
537 
538 /*
539  * Get next input line.
540  * Read into variable on left of node (or $0 if NULL).
541  * Read from pipe or file on right of node (or from regular
542  * input if NULL).
543  * This is an oddball inasmuch as it is a function
544  * but parses more like the keywords print, etc.
545  */
546 NODE *
f_getline(NODE * np)547 f_getline(NODE *np)
548 {
549 	wchar_t *cp;
550 	INT ret;
551 	FILE *fp;
552 	size_t len;
553 
554 	if (np->n_right == NULL && phase == END) {
555 		/* Pretend we've reached end of (the non-existant) file. */
556 		return (intnode(0));
557 	}
558 
559 	if ((fp = openfile(np->n_right, 0, 0)) != FNULL) {
560 		if (np->n_left == NNULL) {
561 			ret = nextrecord(linebuf, fp);
562 		} else {
563 			cp = emalloc(NLINE * sizeof (wchar_t));
564 			ret = nextrecord(cp, fp);
565 			np = np->n_left;
566 			len = wcslen(cp);
567 			cp = erealloc(cp, (len+1)*sizeof (wchar_t));
568 			if (isleaf(np->n_flags)) {
569 				if (np->n_type == PARM)
570 					np = np->n_next;
571 				strassign(np, cp, FNOALLOC, len);
572 			} else
573 				(void) assign(np, stringnode(cp,
574 				    FNOALLOC, len));
575 		}
576 	} else
577 		ret = -1;
578 	return (intnode(ret));
579 }
580 
581 /*
582  * Open a file.  Flag is non-zero for output.
583  */
584 static FILE *
openfile(NODE * np,int flag,int fatal)585 openfile(NODE *np, int flag, int fatal)
586 {
587 	OFILE *op;
588 	char *cp;
589 	FILE *fp;
590 	int type;
591 	OFILE *fop;
592 
593 	if (np == NNULL) {
594 		if (flag)
595 			return (stdout);
596 		if (awkinfp == FNULL)
597 			awkinfp = newfile();
598 		return (awkinfp);
599 	}
600 	if ((type = np->n_type) == APPEND)
601 		type = WRITE;
602 	cp = mbunconvert(exprstring(np->n_left));
603 	fop = (OFILE *)NULL;
604 	for (op = &ofiles[0]; op < &ofiles[NIOSTREAM]; op++) {
605 		if (op->f_fp == FNULL) {
606 			if (fop == (OFILE *)NULL)
607 				fop = op;
608 			continue;
609 		}
610 		if (op->f_mode == type && strcmp(op->f_name, cp) == 0)
611 			return (op->f_fp);
612 	}
613 	if (fop == (OFILE *)NULL)
614 		awkerr(gettext("too many open streams to %s onto \"%s\""),
615 		    flag ? "print/printf" : "getline", cp);
616 	(void) fflush(stdout);
617 	op = fop;
618 	if (cp[0] == '-' && cp[1] == '\0') {
619 		fp = flag ? stdout : stdin;
620 	} else {
621 		switch (np->n_type) {
622 		case WRITE:
623 			if ((fp = fopen(cp, w)) != FNULL) {
624 				if (isatty(fileno(fp)))
625 					(void) setvbuf(fp, 0, _IONBF, 0);
626 			}
627 			break;
628 
629 		case APPEND:
630 			fp = fopen(cp, "a");
631 			break;
632 
633 		case PIPE:
634 			fp = popen(cp, w);
635 			(void) setvbuf(fp, (char *)0, _IOLBF, 0);
636 			break;
637 
638 		case PIPESYM:
639 			fp = popen(cp, r);
640 			break;
641 
642 		case LT:
643 			fp = fopen(cp, r);
644 			break;
645 
646 		default:
647 			awkerr(interr, "openfile");
648 		}
649 	}
650 	if (fp != FNULL) {
651 		op->f_name = strdup(cp);
652 		op->f_fp = fp;
653 		op->f_mode = type;
654 	} else if (fatal) {
655 		awkperr(flag ? gettext("output file \"%s\"") :
656 		    gettext("input file \"%s\""), cp);
657 	}
658 	return (fp);
659 }
660 
661 /*
662  * Close a stream.
663  */
664 void
awkclose(OFILE * op)665 awkclose(OFILE *op)
666 {
667 	if (op->f_mode == PIPE || op->f_mode == PIPESYM)
668 		(void) pclose(op->f_fp);
669 	else if (fclose(op->f_fp) == EOF)
670 		awkperr("error on stream \"%s\"", op->f_name);
671 	op->f_fp = FNULL;
672 	free(op->f_name);
673 	op->f_name = NULL;
674 }
675 
676 /*
677  * Internal routine common to printf, sprintf.
678  * The node is that describing the arguments.
679  * Returns the number of characters written to file
680  * pointer `fp' or the length of the string return
681  * in cp. If cp is NULL then the file pointer is used. If
682  * cp points to a string pointer, a pointer to an allocated
683  * buffer will be returned in it.
684  */
685 size_t
xprintf(NODE * np,FILE * fp,wchar_t ** cp)686 xprintf(NODE *np, FILE *fp, wchar_t **cp)
687 {
688 	wchar_t *fmt;
689 	int c;
690 	wchar_t *bptr = (wchar_t *)NULL;
691 	char fmtbuf[40];
692 	size_t length = 0;
693 	char *ofmtp;
694 	NODE *fnp;
695 	wchar_t *fmtsave;
696 	int slen;
697 	int cplen;
698 
699 	fnp = getlist(&np);
700 	if (isleaf(fnp->n_flags) && fnp->n_type == PARM)
701 		fnp = fnp->n_next;
702 	if (isstring(fnp->n_flags)) {
703 		fmt = fnp->n_string;
704 		fmtsave = NULL;
705 	} else
706 		fmtsave = fmt = (wchar_t *)strsave(exprstring(fnp));
707 
708 	/*
709 	 * if a char * pointer has been passed in then allocate an initial
710 	 * buffer for the string. Make it LINE_MAX plus the length of
711 	 * the format string but do reallocs only based LINE_MAX.
712 	 */
713 	if (cp != (wchar_t **)NULL) {
714 		cplen = LINE_MAX;
715 		bptr = *cp = emalloc(sizeof (wchar_t) * (cplen + wcslen(fmt)));
716 	}
717 
718 	while ((c = *fmt++) != '\0') {
719 		if (c != '%') {
720 			if (bptr == (wchar_t *)NULL)
721 				awk_putwc(c, fp);
722 			else
723 				*bptr++ = c;
724 			++length;
725 			continue;
726 		}
727 		ofmtp = fmtbuf;
728 		*ofmtp++ = (char)c;
729 	nextc:
730 		switch (c = *fmt++) {
731 		case '%':
732 			if (bptr == (wchar_t *)NULL)
733 				awk_putwc(c, fp);
734 			else
735 				*bptr++ = c;
736 			++length;
737 			continue;
738 
739 		case 'c':
740 			*ofmtp++ = 'w';
741 			*ofmtp++ = 'c';
742 			*ofmtp = '\0';
743 			fnp = exprreduce(nextarg(&np));
744 			if (isnumber(fnp->n_flags))
745 				c = exprint(fnp);
746 			else
747 				c = *(wchar_t *)exprstring(fnp);
748 			if (bptr == (wchar_t *)NULL)
749 				length += fprintf(fp, fmtbuf, c);
750 			else {
751 				/*
752 				 * Make sure that the buffer is long
753 				 * enough to hold the formatted string.
754 				 */
755 				adjust_buf(cp, &cplen, &bptr, fmtbuf, 0);
756 				/*
757 				 * Since the call to adjust_buf() has already
758 				 * guaranteed that the buffer will be long
759 				 * enough, just pass in INT_MAX as
760 				 * the length.
761 				 */
762 				(void) wsprintf(bptr, (const char *) fmtbuf, c);
763 				bptr += (slen = wcslen(bptr));
764 				length += slen;
765 			}
766 			continue;
767 /* XXXX Is this bogus? Figure out what s & S mean - look at original code */
768 		case 's':
769 		case 'S':
770 			*ofmtp++ = 'w';
771 			*ofmtp++ = 's';
772 			*ofmtp = '\0';
773 			if (bptr == (wchar_t *)NULL)
774 				length += fprintf(fp, fmtbuf,
775 				    (wchar_t *)exprstring(nextarg(&np)));
776 			else {
777 				wchar_t *ts = exprstring(nextarg(&np));
778 
779 				adjust_buf(cp, &cplen, &bptr, fmtbuf,
780 				    wcslen(ts));
781 				(void) wsprintf(bptr, (const char *) fmtbuf,
782 				    ts);
783 				bptr += (slen = wcslen(bptr));
784 				length += slen;
785 			}
786 			continue;
787 
788 		case 'o':
789 		case 'O':
790 		case 'X':
791 		case 'x':
792 		case 'd':
793 		case 'i':
794 		case 'D':
795 		case 'U':
796 		case 'u':
797 			*ofmtp++ = 'l';
798 			*ofmtp++ = 'l'; /* now dealing with long longs */
799 			*ofmtp++ = c;
800 			*ofmtp = '\0';
801 			if (bptr == (wchar_t *)NULL)
802 				length += fprintf(fp, fmtbuf,
803 				    exprint(nextarg(&np)));
804 			else {
805 				adjust_buf(cp, &cplen, &bptr, fmtbuf, 0);
806 				(void) wsprintf(bptr, (const char *) fmtbuf,
807 				    exprint(nextarg(&np)));
808 				bptr += (slen = wcslen(bptr));
809 				length += slen;
810 			}
811 			continue;
812 
813 		case 'e':
814 		case 'E':
815 		case 'f':
816 		case 'F':
817 		case 'g':
818 		case 'G':
819 			*ofmtp++ = c;
820 			*ofmtp = '\0';
821 			if (bptr == (wchar_t *)NULL)
822 				length += fprintf(fp, fmtbuf,
823 				    exprreal(nextarg(&np)));
824 			else {
825 				adjust_buf(cp, &cplen, &bptr, fmtbuf, 0);
826 				(void) wsprintf(bptr, (const char *) fmtbuf,
827 				    exprreal(nextarg(&np)));
828 				bptr += (slen = wcslen(bptr));
829 				length += slen;
830 			}
831 			continue;
832 
833 		case 'l':
834 		case 'L':
835 			break;
836 
837 		case '*':
838 #ifdef M_BSD_SPRINTF
839 			sprintf(ofmtp, "%lld", (INT)exprint(nextarg(&np)));
840 			ofmtp += strlen(ofmtp);
841 #else
842 			ofmtp += sprintf(ofmtp, "%lld",
843 			    (INT)exprint(nextarg(&np)));
844 #endif
845 			break;
846 
847 		default:
848 			if (c == '\0') {
849 				*ofmtp = (wchar_t)NULL;
850 				(void) fprintf(fp, "%s", fmtbuf);
851 				continue;
852 			} else {
853 				*ofmtp++ = (wchar_t)c;
854 				break;
855 			}
856 		}
857 		goto nextc;
858 	}
859 	if (fmtsave != NULL)
860 		free(fmtsave);
861 	/*
862 	 * If printing to a character buffer then make sure it is
863 	 * null-terminated and only uses as much space as required.
864 	 */
865 	if (bptr != (wchar_t *)NULL) {
866 		*bptr = '\0';
867 		*cp = erealloc(*cp, (length+1) * sizeof (wchar_t));
868 	}
869 	return (length);
870 }
871 
872 /*
873  * Return the next argument from the list.
874  */
875 static NODE *
nextarg(NODE ** npp)876 nextarg(NODE **npp)
877 {
878 	NODE *np;
879 
880 	if ((np = getlist(npp)) == NNULL)
881 		awkerr(gettext("insufficient arguments to printf or sprintf"));
882 	if (isleaf(np->n_flags) && np->n_type == PARM)
883 		return (np->n_next);
884 	return (np);
885 }
886 
887 
888 /*
889  * Check and adjust the length of the buffer that has been passed in
890  * to make sure that it has space to accomodate the sequence string
891  * described in fmtstr. This routine is used by xprintf() to allow
892  * for arbitrarily long sprintf() strings.
893  *
894  * bp		= start of current buffer
895  * len		= length of current buffer
896  * offset	= offset in current buffer
897  * fmtstr	= format string to check
898  * slen		= size of string for %s formats
899  */
900 static void
adjust_buf(wchar_t ** bp,int * len,wchar_t ** offset,char * fmtstr,size_t slen)901 adjust_buf(wchar_t **bp, int *len, wchar_t **offset, char *fmtstr, size_t slen)
902 {
903 	int ioff;
904 	int width = 0;
905 	int prec = 0;
906 
907 	do {
908 		fmtstr++;
909 	} while (strchr("-+ 0", *fmtstr) != (char *)0 || *fmtstr == ('#'));
910 	if (*fmtstr != '*') {
911 		if (isdigit(*fmtstr)) {
912 			width = *fmtstr-'0';
913 			while (isdigit(*++fmtstr))
914 				width = width * 10 + *fmtstr - '0';
915 		}
916 	} else
917 		fmtstr++;
918 	if (*fmtstr == '.') {
919 		if (*++fmtstr != '*') {
920 			prec = *fmtstr-'0';
921 			while (isdigit(*++fmtstr))
922 				prec = prec * 10 + *fmtstr - '0';
923 		} else
924 			fmtstr++;
925 	}
926 	if (strchr("Llh", *fmtstr) != (char *)0)
927 		fmtstr++;
928 	if (*fmtstr == 'S') {
929 		if (width && slen < width)
930 			slen = width;
931 		if (prec && slen > prec)
932 			slen = prec;
933 		width = slen+1;
934 	} else
935 		if (width == 0)
936 			width = NUMSIZE;
937 
938 	if (*offset+ width > *bp+ *len) {
939 		ioff = *offset-*bp;
940 		*len += width+1;
941 		*bp = erealloc(*bp, *len * sizeof (wchar_t));
942 		*offset = *bp+ioff;
943 	}
944 }
945 
946 static void
awk_putwc(wchar_t c,FILE * fp)947 awk_putwc(wchar_t c, FILE *fp)
948 {
949 	char mb[MB_LEN_MAX];
950 	size_t mbl;
951 
952 	if ((mbl = wctomb(mb, c)) > 0) {
953 		mb[mbl] = '\0';
954 		(void) fputs(mb, fp);
955 	} else
956 		awkerr(gettext("invalid wide character %x"), c);
957 }
958