xref: /illumos-gate/usr/src/cmd/awk_xpg4/awk3.c (revision b2be350e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * awk -- executor
23  *
24  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  *
27  * Copyright 1985, 1994 by Mortice Kern Systems Inc.  All rights reserved.
28  *
29  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
30  */
31 
32 #include "awk.h"
33 #include "y.tab.h"
34 
35 static int	dohash(wchar_t *name);
36 static NODE	*arithmetic(NODE *np);
37 static NODE	*comparison(NODE *np);
38 static int	type_of(NODE *np);
39 static NODE	*lfield(INT fieldno, NODE *value);
40 static NODE	*rfield(INT fieldno);
41 static NODE	*userfunc(NODE *np);
42 static wchar_t	*lltoa(long long l);
43 static NODE	*exprconcat(NODE *np, int len);
44 static int	s_if(NODE *np);
45 static int	s_while(NODE *np);
46 static int	s_for(NODE *np);
47 static int	s_forin(NODE *np);
48 static void	setrefield(NODE *value);
49 static void	freetemps(void);
50 static int	action(NODE *np);
51 static wchar_t	*makeindex(NODE *np, wchar_t *array, int tag);
52 static int	exprtest(NODE *np);
53 
54 #define	regmatch(rp, s) REGWEXEC(rp, s, 0, (REGWMATCH_T*)NULL, 0)
55 
56 /*
57  * This code allows for integers to be stored in longs (type INT) and
58  * only promoted to double precision floating point numbers (type REAL)
59  * when overflow occurs during +, -, or * operations.  This is very
60  * non-portable if you desire such a speed optimisation.  You may wish
61  * to put something here for your system.  This "something" would likely
62  * include either an assembler "jump on overflow" instruction or a
63  * method to get traps on overflows from the hardware.
64  *
65  * This portable method works for ones and twos complement integer
66  * representations (which is, realistically) almost all machines.
67  */
68 #if	__TURBOC__
69 #define	addoverflow()	asm	jo	overflow
70 #define	suboverflow()	asm	jo	overflow
71 #else
72 /*
73  * These are portable to two's complement integer machines
74  */
75 #define	addoverflow()	if ((i1^i2) >= 0 && (iresult^i1) < 0) goto overflow
76 #define	suboverflow()	if ((i1^i2) < 0 && (iresult^i2) >= 0) goto overflow
77 #endif
78 #define	muloverflow()	if (((short)i1 != i1 || (short)i2 != i2) &&	\
79 			    ((i2 != 0 && iresult/i2 != i1) ||		\
80 			    (i1 == LONG_MIN && i2 == -1)))	  goto overflow
81 
82 static char	notarray[] = "scalar \"%s\" cannot be used as array";
83 static char	badarray[] = "array \"%s\" cannot be used as a scalar";
84 static char	varnotfunc[] = "variable \"%s\" cannot be used as a function";
85 static char	tmfld[] = "Too many fields (LIMIT: %d)";
86 static char	toolong[] = "Record too long (LIMIT: %d bytes)";
87 static char	divzero[] =  "division (/ or %%) by zero";
88 static char	toodeep[] = "too deeply nested for in loop (LIMIT: %d)";
89 
90 static wchar_t	numbuf[NUMSIZE];	/* Used to convert INTs to strings */
91 static wchar_t	*fields[NFIELD];	/* Cache of pointers into fieldbuf */
92 static wchar_t	*fieldbuf;		/* '\0' separated copy of linebuf */
93 static NODE	nodes[NSNODE];		/* Cache of quick access nodes */
94 static NODE	*fnodep = &nodes[0];
95 #define	NINDEXBUF	50
96 static wchar_t	indexbuf[NINDEXBUF];	/* Used for simple array indices */
97 static int	concflag;		/* In CONCAT operation (no frees) */
98 static NODE	*retval;		/* Last return value of a function */
99 
100 /*
101  * The following stack is used to store the next pointers for all nested
102  * for-in loops. This needs to be global so that delete can check to see
103  * if it is deleting the next node to be used by a loop.
104  */
105 #define	NFORINLOOP	10
106 static NODE*	forindex[NFORINLOOP];
107 static NODE**	next_forin = forindex;
108 
109 /*
110  * Assign a string directly to a NODE without creating an intermediate
111  * NODE.  This can handle either FALLOC, FSTATIC, FNOALLOC or FSENSE for
112  * "flags" argument.  Also the NODE "np" must be reduced to an lvalue
113  * (PARM nodes are not acceptable).
114  */
115 void
strassign(NODE * np,STRING string,int flags,size_t length)116 strassign(NODE *np, STRING string, int flags, size_t length)
117 {
118 	if (np->n_type == FUNC)
119 		awkerr(gettext("attempt to redefine builtin function"));
120 	else if (np->n_type == GETLINE || np->n_type == KEYWORD)
121 		awkerr(gettext("inadmissible use of reserved keyword"));
122 	if (np->n_flags & FSPECIAL) {
123 		(void) nassign(np, stringnode(string, flags, length));
124 		return;
125 	}
126 	if (isastring(np->n_flags))
127 		free((wchar_t *)np->n_string);
128 	np->n_strlen = length++;
129 	if (flags & FALLOC) {
130 		length *= sizeof (wchar_t);
131 		np->n_string = (STRING) emalloc(length);
132 		(void) memcpy((void *)np->n_string, string, length);
133 	} else {
134 		np->n_string = string;
135 		if (flags & FNOALLOC) {
136 			flags &= ~FNOALLOC;
137 			flags |= FALLOC;
138 		}
139 	}
140 	np->n_flags &= FSAVE;
141 	if (flags & FSENSE) {
142 		flags &= ~FSENSE;
143 		flags |= type_of(np);
144 	} else
145 		flags |= FSTRING;
146 	np->n_flags |= flags;
147 }
148 
149 /*
150  * Assign to a variable node.
151  * LHS must be a VAR type and RHS must be reduced by now.
152  * To speed certain operations up, check for
153  * certain things here and do special assignments.
154  */
155 NODE *
nassign(NODE * np,NODE * value)156 nassign(NODE *np, NODE *value)
157 {
158 	register wchar_t *cp;
159 	register int len;
160 
161 	/* short circuit assignment of a node to itself */
162 	if (np == value)
163 		return (np);
164 	if (np->n_flags & FSPECIAL) {
165 		if (np == varRS || np == varFS) {
166 			if (isastring(np->n_flags))
167 				free((void *)np->n_string);
168 			len = sizeof (wchar_t) * ((np->n_strlen =
169 				wcslen(cp = exprstring(value)))+1);
170 			np->n_string = emalloc(len);
171 			(void) memcpy((wchar_t *)np->n_string, cp, len);
172 			np->n_flags = FALLOC|FSTRING|FSPECIAL;
173 			if (np == varRS) {
174 				if (np->n_string[0] == '\n')
175 					awkrecord = defrecord;
176 				else if (np->n_string[0] == '\0')
177 					awkrecord = multirecord;
178 				else
179 					awkrecord = charrecord;
180 			} else if (np == varFS) {
181 				if (resep != (REGEXP)NULL) {
182 					REGWFREE(resep);
183 					resep = (REGEXP)NULL;
184 				}
185 				if (wcslen((wchar_t *)np->n_string) > 1)
186 					setrefield(np);
187 				else if (np->n_string[0] == ' ')
188 					awkfield = whitefield;
189 				else
190 					awkfield = blackfield;
191 			}
192 			return (np);
193 		}
194 	}
195 	if (isastring(np->n_flags))
196 		free((wchar_t *)np->n_string);
197 	if (isstring(value->n_flags)) {
198 		np->n_strlen = value->n_strlen;
199 		if (value->n_flags&FALLOC || value->n_string != _null) {
200 			len = (np->n_strlen+1) * sizeof (wchar_t);
201 			np->n_string = emalloc(len);
202 			(void) memcpy(np->n_string, value->n_string, len);
203 			np->n_flags &= FSAVE;
204 			np->n_flags |= value->n_flags & ~FSAVE;
205 			np->n_flags |= FALLOC;
206 			return (np);
207 		} else
208 			np->n_string = value->n_string;
209 	} else if (value->n_flags & FINT)
210 		np->n_int = value->n_int;
211 	else
212 		np->n_real = value->n_real;
213 	np->n_flags &= FSAVE;
214 	np->n_flags |= value->n_flags & ~FSAVE;
215 	return (np);
216 }
217 
218 /*
219  * Set regular expression FS value.
220  */
221 static void
setrefield(NODE * np)222 setrefield(NODE *np)
223 {
224 	static REGEXP re;
225 	int n;
226 
227 	if ((n = REGWCOMP(&re, np->n_string)) != REG_OK) {
228 		REGWERROR(n, &re, (char *)linebuf, sizeof (linebuf));
229 		awkerr(gettext("syntax error \"%s\" in /%s/\n"),
230 			(char *)linebuf, np->n_string);
231 	}
232 	resep = re;
233 	awkfield = refield;
234 }
235 
236 /*
237  * Assign to an l-value node.
238  */
239 NODE *
assign(NODE * left,NODE * right)240 assign(NODE *left, NODE *right)
241 {
242 	if (isleaf(right->n_flags)) {
243 		if (right->n_type == PARM)
244 			right = right->n_next;
245 	} else
246 		right = exprreduce(right);
247 top:
248 	switch (left->n_type) {
249 	case INDEX:
250 		left = exprreduce(left);
251 		/* FALLTHROUGH */
252 	case VAR:
253 		return (nassign(left, right));
254 
255 	case PARM:
256 		/*
257 		 * If it's a parameter then link to the actual value node and
258 		 * do the checks again.
259 		 */
260 		left = left->n_next;
261 		goto top;
262 
263 	case FIELD:
264 		return (lfield(exprint(left->n_left), right));
265 
266 	case CALLUFUNC:
267 	case UFUNC:
268 		awkerr(gettext("cannot assign to function \"%s\""),
269 		    left->n_name);
270 
271 	default:
272 		awkerr(gettext("lvalue required in assignment"));
273 	}
274 	/* NOTREACHED */
275 	return (0);
276 }
277 
278 /*
279  * Compiled tree non-terminal node.
280  */
281 NODE *
node(int type,NODE * left,NODE * right)282 node(int type, NODE *left, NODE *right)
283 {
284 	register NODE *np;
285 
286 	np = emptynode(type, 0);
287 	np->n_left = left;
288 	np->n_right = right;
289 	np->n_lineno = lineno;
290 	return (np);
291 }
292 
293 /*
294  * Create an integer node.
295  */
296 NODE *
intnode(INT i)297 intnode(INT i)
298 {
299 	register NODE *np;
300 
301 	np = emptynode(CONSTANT, 0);
302 	np->n_flags = FINT|FVINT;
303 	np->n_int = i;
304 	return (np);
305 }
306 
307 /*
308  * Create a real number node.
309  */
310 NODE *
realnode(REAL real)311 realnode(REAL real)
312 {
313 	register NODE *np;
314 
315 	np = emptynode(CONSTANT, 0);
316 	np->n_flags = FREAL|FVREAL;
317 	np->n_real = real;
318 	return (np);
319 }
320 
321 /*
322  * Make a node for a string.
323  */
324 NODE *
stringnode(STRING s,int how,size_t length)325 stringnode(STRING s, int how, size_t length)
326 {
327 	register NODE *np;
328 
329 	np = emptynode(CONSTANT, 0);
330 	np->n_strlen = length;
331 	if (how & FALLOC) {
332 		np->n_string = emalloc(length = (length+1) * sizeof (wchar_t));
333 		(void) memcpy(np->n_string, s, length);
334 	} else {
335 		np->n_string = s;
336 		if (how & FNOALLOC) {
337 			how &= ~FNOALLOC;
338 			how |= FALLOC;
339 		}
340 	}
341 	if (how & FSENSE) {
342 		np->n_flags = type_of(np);
343 		how &= ~FSENSE;
344 	} else
345 		np->n_flags = FSTRING;
346 	np->n_flags |= how;
347 	return (np);
348 }
349 
350 /*
351  * Save a copy of a string.
352  */
353 STRING
strsave(wchar_t * old)354 strsave(wchar_t *old)
355 {
356 	STRING new;
357 	register size_t len;
358 
359 	new = (STRING)emalloc(len = (wcslen(old)+1) * sizeof (wchar_t));
360 	(void) memcpy(new, old, len);
361 	return (new);
362 }
363 
364 /*
365  * Allocate an empty node of given type.
366  * String space for the node is given by `length'.
367  */
368 NODE *
emptynode(int type,size_t length)369 emptynode(int type, size_t length)
370 {
371 	register NODE *np;
372 
373 	if (length == 0 && running && fnodep < &nodes[NSNODE]) {
374 		np = fnodep++;
375 	} else {
376 		np = (NODE *)emalloc(sizeof (NODE) +
377 		    (length * sizeof (wchar_t)));
378 		if (running && type != VAR && type != ARRAY) {
379 			np->n_next = freelist;
380 			freelist = np;
381 		}
382 	}
383 	np->n_flags = FNONTOK;
384 	np->n_type = type;
385 	np->n_alink = NNULL;
386 
387 	return (np);
388 }
389 
390 /*
391  * Free a node.
392  */
393 void
freenode(NODE * np)394 freenode(NODE *np)
395 {
396 	if (isastring(np->n_flags))
397 		free((wchar_t *)np->n_string);
398 	else if (np->n_type == RE) {
399 		REGWFREE(np->n_regexp);
400 	}
401 	free((wchar_t *)np);
402 }
403 
404 /*
405  * Install a keyword of given `type'.
406  */
407 void
kinstall(LOCCHARP name,int type)408 kinstall(LOCCHARP name, int type)
409 {
410 	register NODE *np;
411 	register size_t l;
412 
413 	l = wcslen(name);
414 	np = emptynode(KEYWORD, l);
415 	np->n_keywtype = type;
416 	(void) memcpy(np->n_name, name, (l+1) * sizeof (wchar_t));
417 	addsymtab(np);
418 }
419 
420 /*
421  * Install built-in function.
422  */
423 NODE *
finstall(LOCCHARP name,FUNCTION func,int type)424 finstall(LOCCHARP name, FUNCTION func, int type)
425 {
426 	register NODE *np;
427 	register size_t l;
428 
429 	l = wcslen(name);
430 	np = emptynode(type, l);
431 	np->n_function = func;
432 	(void) memcpy(np->n_name, name, (l+1) * sizeof (wchar_t));
433 	addsymtab(np);
434 	return (np);
435 }
436 
437 /*
438  * Lookup an identifier.
439  * nocreate contains the following flag values:
440  *	1 if no creation of a new NODE,
441  *	0 if ok to create new NODE
442  */
443 NODE *
vlookup(wchar_t * name,int nocreate)444 vlookup(wchar_t *name, int nocreate)
445 {
446 	register ushort_t hash;
447 	register NODE *np;
448 
449 	np = symtab[hashbuck(hash = dohash((wchar_t *)name))];
450 	while (np != NNULL) {
451 		if (np->n_hash == hash && wcscmp(name, np->n_name) == 0)
452 			return (np);
453 		np = np->n_next;
454 	}
455 	if (nocreate) {
456 		np = NNULL;
457 	} else {
458 		np = emptynode(VAR, hash = wcslen(name));
459 		np->n_flags = FSTRING|FVINT;
460 		np->n_strlen = 0;
461 		np->n_string = _null;
462 		(void) memcpy(np->n_name, name,
463 			(hash+1) * sizeof (wchar_t));
464 		addsymtab(np);
465 	}
466 	return (np);
467 }
468 
469 /*
470  * Add a symbol to the table.
471  */
472 void
addsymtab(NODE * np)473 addsymtab(NODE *np)
474 {
475 	register NODE **spp;
476 
477 	np->n_hash = dohash((wchar_t *)np->n_name);
478 	spp = &symtab[hashbuck(np->n_hash)];
479 	np->n_next = *spp;
480 	*spp = np;
481 }
482 
483 /*
484  * Delete the given node from the symbol table.
485  * If fflag is non-zero, also free the node space.
486  * This routine must also check the stack of forin loop pointers. If
487  * we are deleting the next item to be used, then the pointer must be
488  * advanced.
489  */
490 void
delsymtab(NODE * np,int fflag)491 delsymtab(NODE *np, int fflag)
492 {
493 	register NODE *rnp;
494 	register NODE *prevp;
495 	register NODE **sptr;
496 	register ushort_t h;
497 
498 
499 
500 
501 
502 	h = hashbuck(np->n_hash);
503 	prevp = NNULL;
504 	for (rnp = symtab[h]; rnp != NNULL; rnp = rnp->n_next) {
505 		if (rnp == np) {
506 			/*
507 			 * check all of the for-in loop pointers
508 			 * to see if any need to be advanced because
509 			 * this element is being deleted.
510 			 */
511 			if (next_forin != forindex) {
512 				sptr = next_forin;
513 				do {
514 					if (*--sptr == rnp) {
515 						*sptr = rnp->n_next;
516 						break;
517 					}
518 				} while (sptr != forindex);
519 			}
520 			if (prevp == NNULL)
521 				symtab[h] = rnp->n_next; else
522 				prevp->n_next = rnp->n_next;
523 			if (fflag)
524 				freenode(rnp);
525 			break;
526 		}
527 		prevp = rnp;
528 	}
529 }
530 
531 /*
532  * Hashing function.
533  */
534 static int
dohash(wchar_t * name)535 dohash(wchar_t *name)
536 {
537 	register int hash = 0;
538 
539 	while (*name != '\0')
540 		hash += *name++;
541 	return (hash);
542 }
543 
544 /*
545  * Top level executor for an awk programme.
546  * This will be passed: pattern, action or a list of these.
547  * The former function to evaluate a pattern has been
548  * subsumed into this function for speed.
549  * Patterns are:
550  *	BEGIN,
551  *	END,
552  *	other expressions (including regular expressions)
553  */
554 void
execute(NODE * wp)555 execute(NODE *wp)
556 {
557 	register NODE *np;
558 	register int type;
559 	register NODE *tnp;
560 
561 	curnode = wp;
562 	if (phase != 0) {
563 		linebuf[0] = '\0';
564 		lbuflen = 0;
565 	}
566 	while (wp != NNULL) {
567 		if (wp->n_type == COMMA) {
568 			np = wp->n_left;
569 			wp = wp->n_right;
570 		} else {
571 			np = wp;
572 			wp = NNULL;
573 		}
574 		if (np->n_type != PACT)
575 			awkerr(interr, "PACT");
576 		/*
577 		 * Save the parent node and evaluate the pattern.
578 		 * If it evaluates to false (0) just continue
579 		 * to the next pattern/action (PACT) pair.
580 		 */
581 		tnp = np;
582 		np = np->n_left;
583 		if (np == NNULL) {
584 			if (phase != 0)
585 				continue;
586 		} else if (phase != 0) {
587 			if (np->n_type != phase)
588 				continue;
589 		} else if ((type = np->n_type) == BEGIN || type == END) {
590 			continue;
591 		} else if (type == COMMA) {
592 			/*
593 			 * The grammar only allows expressions
594 			 * to be separated by the ',' operator
595 			 * for range patterns.
596 			 */
597 			if (np->n_flags & FMATCH) {
598 				if (exprint(np->n_right) != 0)
599 					np->n_flags &= ~FMATCH;
600 			} else if (exprint(np->n_left) != 0) {
601 				if (exprint(np->n_right) == 0)
602 					np->n_flags |= FMATCH;
603 			} else
604 				continue;
605 		} else if (exprint(np) == 0)
606 			continue;
607 		np = tnp;
608 		if (action(np->n_right)) {
609 			loopexit = 0;
610 			break;
611 		}
612 	}
613 	if (freelist != NNULL)
614 		freetemps();
615 }
616 
617 /*
618  * Free all temporary nodes.
619  */
620 static void
freetemps()621 freetemps()
622 {
623 	register NODE *np, *nnp;
624 
625 	if (concflag)
626 		return;
627 	for (np = &nodes[0]; np < fnodep; np++) {
628 		if (isastring(np->n_flags)) {
629 			free((wchar_t *)np->n_string);
630 		} else if (np->n_type == RE) {
631 			REGWFREE(np->n_regexp);
632 		}
633 	}
634 	fnodep = &nodes[0];
635 	for (np = freelist; np != NNULL; np = nnp) {
636 		nnp = np->n_next;
637 		freenode(np);
638 	}
639 	freelist = NNULL;
640 }
641 
642 /*
643  * Do the given action.
644  * Actions are statements or expressions.
645  */
646 static int
action(NODE * wp)647 action(NODE *wp)
648 {
649 	register NODE *np;
650 	register int act = 0;
651 	register NODE *l;
652 
653 	while (wp != NNULL) {
654 		if (wp->n_type == COMMA) {
655 			np = wp->n_left;
656 			wp = wp->n_right;
657 		} else {
658 			np = wp;
659 			wp = NNULL;
660 		}
661 		if (freelist != NNULL)
662 			freetemps();
663 		curnode = np;
664 		/*
665 		 * Don't change order of these cases without
666 		 * changing order in awk.y declarations.
667 		 * The order is optimised.
668 		 */
669 		switch (np->n_type) {
670 		case ASG:
671 			(void) assign(np->n_left, np->n_right);
672 			continue;
673 
674 		case PRINT:
675 			s_print(np);
676 			continue;
677 
678 		case PRINTF:
679 			s_prf(np);
680 			continue;
681 
682 		case EXIT:
683 			if (np->n_left != NNULL)
684 				act = (int)exprint(np->n_left); else
685 				act = 0;
686 			doend(act);
687 			/* NOTREACHED */
688 
689 		case RETURN:
690 			if (slevel == 0)
691 				awkerr(gettext("return outside of a function"));
692 			np = np->n_left != NNULL
693 			    ? exprreduce(np->n_left)
694 			    : const0;
695 			retval = emptynode(CONSTANT, 0);
696 			retval->n_flags = FINT;
697 			(void) nassign(retval, np);
698 			return (RETURN);
699 
700 		case NEXT:
701 			loopexit = NEXT;
702 		/* FALLTHROUGH */
703 		case BREAK:
704 		case CONTINUE:
705 			return (np->n_type);
706 
707 		case DELETE:
708 			if ((l = np->n_left)->n_type == PARM) {
709 				l = l->n_next;
710 				if (!(l->n_flags & FLARRAY))
711 					l = l->n_alink;
712 			}
713 			switch (l->n_type) {
714 			case ARRAY:
715 				delarray(l);
716 				break;
717 
718 			case INDEX:
719 				if ((np = l->n_left)->n_type == PARM) {
720 					np = np->n_next;
721 					if (!(np->n_flags & FLARRAY))
722 						np = np->n_alink;
723 				}
724 				/*
725 				 * get pointer to the node for this array
726 				 * element using the hash key.
727 				 */
728 				l = exprreduce(l);
729 				/*
730 				 * now search linearly from the beginning of
731 				 * the list to find the element before the
732 				 * one being deleted. This must be done
733 				 * because arrays are singley-linked.
734 				 */
735 				while (np != NNULL) {
736 					if (np->n_alink == l) {
737 						np->n_alink = l->n_alink;
738 						break;
739 					}
740 					np = np->n_alink;
741 				}
742 				delsymtab(l, 1);
743 				break;
744 
745 			case VAR:
746 				if (isstring(l->n_flags) &&
747 				    l->n_string == _null)
748 					break;
749 				/* FALLTHROUGH */
750 			default:
751 				awkerr(gettext(
752 				    "may delete only array element or array"));
753 				break;
754 			}
755 			continue;
756 
757 		case WHILE:
758 		case DO:
759 			if ((act = s_while(np)) != 0)
760 				break;
761 			continue;
762 
763 		case FOR:
764 			if ((act = s_for(np)) != 0)
765 				break;
766 			continue;
767 
768 		case FORIN:
769 			if ((act = s_forin(np)) != 0)
770 				break;
771 			continue;
772 
773 		case IF:
774 			if ((act = s_if(np)) != 0)
775 				break;
776 			continue;
777 
778 		default:
779 			(void) exprreduce(np);
780 			if (loopexit != 0) {
781 				act = loopexit;
782 				break;
783 			}
784 			continue;
785 		}
786 		return (act);
787 	}
788 	return (0);
789 }
790 
791 /*
792  * Delete an entire array
793  */
794 void
delarray(NODE * np)795 delarray(NODE *np)
796 {
797 	register NODE *nnp;
798 
799 	nnp = np->n_alink;
800 	np->n_alink = NNULL;
801 	while (nnp != NNULL) {
802 		np = nnp->n_alink;
803 		delsymtab(nnp, 1);
804 		nnp = np;
805 	}
806 }
807 
808 /*
809  * Return the INT value of an expression.
810  */
811 INT
exprint(NODE * np)812 exprint(NODE *np)
813 {
814 	if (isleaf(np->n_flags)) {
815 		if (np->n_type == PARM)
816 			np = np->n_next;
817 		goto leaf;
818 	}
819 	np = exprreduce(np);
820 	switch (np->n_type) {
821 	case CONSTANT:
822 	case VAR:
823 	leaf:
824 		if (np->n_flags & FINT)
825 			return (np->n_int);
826 		if (np->n_flags & FREAL)
827 			return ((INT)np->n_real);
828 		return ((INT)wcstoll(np->n_string, NULL, 10));
829 
830 	default:
831 		awkerr(interr, "exprint");
832 	}
833 	/* NOTREACHED */
834 	return (0);
835 }
836 
837 /*
838  * Return a real number from an expression tree.
839  */
840 REAL
exprreal(NODE * np)841 exprreal(NODE *np)
842 {
843 	if (loopexit)
844 		return ((REAL)loopexit);
845 	if (isleaf(np->n_flags)) {
846 		if (np->n_type == PARM)
847 			np = np->n_next;
848 		goto leaf;
849 	}
850 	np = exprreduce(np);
851 	switch (np->n_type) {
852 	case CONSTANT:
853 	case VAR:
854 	leaf:
855 		if (np->n_flags & FREAL)
856 			return (np->n_real);
857 		if (np->n_flags & FINT)
858 			return ((REAL)np->n_int);
859 		return ((REAL)wcstod((wchar_t *)np->n_string, (wchar_t **)0));
860 
861 	default:
862 		awkerr(interr, "exprreal");
863 	}
864 	/* NOTREACHED */
865 	return ((REAL)0);
866 }
867 
868 /*
869  * Return a string from an expression tree.
870  */
871 STRING
exprstring(NODE * np)872 exprstring(NODE *np)
873 {
874 	if (isleaf(np->n_flags)) {
875 		if (np->n_type == PARM)
876 			np = np->n_next;
877 		goto leaf;
878 	}
879 	np = exprreduce(np);
880 	switch (np->n_type) {
881 	case CONSTANT:
882 	case VAR:
883 	leaf:
884 		if (isstring(np->n_flags))
885 			return (np->n_string);
886 		if (np->n_flags & FINT)
887 			return (STRING)lltoa((long long)np->n_int);
888 		{
889 			char *tmp;
890 			(void) wsprintf(numbuf,
891 		(const char *) (tmp = wcstombsdup(exprstring(varCONVFMT))),
892 				(double)np->n_real);
893 			if (tmp != NULL)
894 				free(tmp);
895 		}
896 		return ((STRING)numbuf);
897 
898 	default:
899 		awkerr(interr, "exprstring");
900 	}
901 	/* NOTREACHED */
902 	return (0);
903 }
904 
905 /*
906  * Convert number to string.
907  */
908 static wchar_t *
lltoa(long long l)909 lltoa(long long l)
910 {
911 	register wchar_t *p = &numbuf[NUMSIZE];
912 	register int s;
913 	register int neg;
914 	static wchar_t zero[] = M_MB_L("0");
915 
916 	if (l == 0)
917 		return (zero);
918 	*--p = '\0';
919 	if (l < 0)
920 		neg = 1, l = -l; else
921 		neg = 0;
922 	if ((s = (short)l) == l) {
923 		while (s != 0) {
924 			*--p = s%10 + '0';
925 			s /= 10;
926 		}
927 	} else {
928 		while (l != 0) {
929 			*--p = l%10 + '0';
930 			l /= 10;
931 		}
932 	}
933 	if (neg)
934 		*--p = '-';
935 	return (wcscpy(numbuf, p));
936 }
937 
938 /*
939  * Return pointer to node with concatenation of operands of CONCAT node.
940  * In the interest of speed, a left recursive tree of CONCAT nodes
941  * is handled with a single malloc.  The accumulated lengths of the
942  * right operands are passed down recursive invocations of this
943  * routine, which allocates a large enough string when the left
944  * operand is not a CONCAT node.
945  */
946 static NODE *
exprconcat(NODE * np,int len)947 exprconcat(NODE *np, int len)
948 {
949 	/* we KNOW (np->n_type==CONCAT) */
950 	register NODE *lnp = np->n_left;
951 	register NODE *rnp = np->n_right;
952 	register STRING	rsp;
953 	int rlen;
954 	size_t llen;
955 	wchar_t *cp;
956 	wchar_t rnumbuf[NUMSIZE];
957 
958 	if (isleaf(rnp->n_flags) && rnp->n_type == PARM)
959 		rnp = rnp->n_next;
960 	if (isstring(rnp->n_flags)) {
961 		rsp = rnp->n_string;
962 		rlen = rnp->n_strlen;
963 	} else
964 		rlen = wcslen((wchar_t *)(rsp = exprstring(rnp)));
965 	if (rsp == numbuf) {	/* static, so save a copy */
966 		(void) memcpy(rnumbuf, (wchar_t *)rsp,
967 			(rlen+1) * sizeof (wchar_t));
968 		rsp = rnumbuf;
969 	}
970 	len += rlen;
971 	if (lnp->n_type == CONCAT) {
972 		lnp = exprconcat(lnp, len);
973 		cp = lnp->n_string;
974 		llen = lnp->n_strlen;
975 	} else {
976 		register STRING	lsp;
977 
978 		if (isleaf(lnp->n_flags) && lnp->n_type == PARM)
979 			lnp = lnp->n_next;
980 		if (isstring(lnp->n_flags)) {
981 			lsp = lnp->n_string;
982 			llen = lnp->n_strlen;
983 		} else
984 			llen = wcslen((wchar_t *)(lsp = exprstring(lnp)));
985 		cp = emalloc((llen+len+1) * sizeof (wchar_t));
986 		(void) memcpy(cp, (wchar_t *)lsp, llen * sizeof (wchar_t));
987 		lnp = stringnode(cp, FNOALLOC, llen);
988 	}
989 	(void) memcpy(cp+llen, (wchar_t *)rsp, (rlen+1) * sizeof (wchar_t));
990 	lnp->n_strlen += rlen;
991 	return (lnp);
992 }
993 
994 /*
995  * Reduce an expression to a terminal node.
996  */
997 NODE *
exprreduce(NODE * np)998 exprreduce(NODE *np)
999 {
1000 	register wchar_t *cp;
1001 	NODE *tnp;
1002 	register int temp;
1003 	register int t;
1004 	register int  tag;
1005 	register wchar_t *fname;
1006 	register wchar_t *aname;
1007 
1008 	/*
1009 	 * a var or constant is a leaf-node (no further reduction required)
1010 	 * so return immediately.
1011 	 */
1012 	if ((t = np->n_type) == VAR || t == CONSTANT)
1013 		return (np);
1014 	/*
1015 	 * If it's a parameter then it is probably a leaf node but it
1016 	 * might be an array so we check.. If it is an array, then signal
1017 	 * an error as an array by itself cannot be used in this context.
1018 	 */
1019 	if (t == PARM)
1020 		if ((np = np->n_next)->n_type == ARRAY)
1021 			awkerr(badarray, np->n_name);
1022 		else
1023 			return (np);
1024 	/*
1025 	 * All the rest are non-leaf nodes.
1026 	 */
1027 	curnode = np;
1028 	switch (t) {
1029 	case CALLUFUNC:
1030 		return (userfunc(np));
1031 
1032 	case FIELD:
1033 		return (rfield(exprint(np->n_left)));
1034 
1035 	case IN:
1036 	case INDEX:
1037 		tag = 0;
1038 		temp = np->n_type;
1039 		tnp = np->n_left;
1040 		np = np->n_right;
1041 		/* initially formal var name and array key name are the same */
1042 		fname = aname = tnp->n_name;
1043 		if (tnp->n_type == PARM) {
1044 			tnp = tnp->n_next;
1045 			tag = tnp->n_scope;
1046 			if (!(tnp->n_flags & FLARRAY)) {
1047 				tnp = tnp->n_alink;
1048 			}
1049 			aname = tnp->n_name;
1050 		}
1051 		if (tnp->n_type != ARRAY) {
1052 			if (!isstring(tnp->n_flags) || tnp->n_string != _null)
1053 				awkerr(notarray, fname);
1054 			else {
1055 				/* promotion to array */
1056 				promote(tnp);
1057 				if (tnp->n_alink != NNULL) {
1058 					tag = tnp->n_scope;
1059 					if (!(tnp->n_flags & FLARRAY))
1060 						tnp = tnp->n_alink;
1061 					aname = tnp->n_name;
1062 				} else {
1063 					tag = 0;
1064 					if (tnp->n_flags & FLARRAY)
1065 						tag = tnp->n_scope;
1066 				}
1067 			}
1068 		}
1069 		if (tnp == varSYMTAB) {
1070 			if (np == NNULL || np->n_type == COMMA)
1071 				awkerr(gettext(
1072 				    "SYMTAB must have exactly one index"));
1073 			np = vlook(exprstring(np));
1074 			return (np);
1075 		}
1076 		cp = makeindex(np, aname, tag);
1077 		if (temp == INDEX) {
1078 			np = vlook(cp);
1079 			if (!(np->n_flags & FINARRAY)) {
1080 				np->n_alink = tnp->n_alink;
1081 				tnp->n_alink = np;
1082 				np->n_flags |= FINARRAY;
1083 			}
1084 		} else
1085 			np = vlookup(cp, 1) == NNULL ? const0 : const1;
1086 		if (cp != indexbuf)
1087 			free(cp);
1088 		return (np);
1089 
1090 	case CONCAT:
1091 		++concflag;
1092 		np = exprconcat(np, 0);
1093 		--concflag;
1094 		return (np);
1095 
1096 	case NOT:
1097 		return (intnode(exprtest(np->n_left) == 0 ? (INT)1 : (INT)0));
1098 
1099 	case AND:
1100 		return ((exprtest(np->n_left) != 0 &&
1101 		    exprtest(np->n_right) != 0) ? const1 : const0);
1102 
1103 	case OR:
1104 		return ((exprtest(np->n_left) != 0 ||
1105 		    exprtest(np->n_right) != 0) ? const1 : const0);
1106 
1107 	case EXP:
1108 		{
1109 			double f1, f2;
1110 
1111 			/*
1112 			 * evaluate expressions in proper order before
1113 			 * calling pow().
1114 			 * Can't guarantee that compiler will do this
1115 			 * correctly for us if we put them inline.
1116 			 */
1117 			f1 = (double)exprreal(np->n_left);
1118 			f2 = (double)exprreal(np->n_right);
1119 			return (realnode((REAL)pow(f1, f2)));
1120 		}
1121 
1122 	case QUEST:
1123 		if (np->n_right->n_type != COLON)
1124 			awkerr(interr, "?:");
1125 		if (exprtest(np->n_left))
1126 			np = np->n_right->n_left; else
1127 			np = np->n_right->n_right;
1128 		return (exprreduce(np));
1129 
1130 	case EQ:
1131 	case NE:
1132 	case GE:
1133 	case LE:
1134 	case GT:
1135 	case LT:
1136 		return (comparison(np));
1137 
1138 	case ADD:
1139 	case SUB:
1140 	case MUL:
1141 	case DIV:
1142 	case REM:
1143 		return (arithmetic(np));
1144 
1145 	case DEC:
1146 		inc_oper->n_type = SUB;
1147 		goto do_inc_op;
1148 	case INC:
1149 		inc_oper->n_type = ADD;
1150 do_inc_op:
1151 		if ((np = np->n_left)->n_type == INDEX)
1152 			np = exprreduce(np);
1153 		if (np->n_flags & FREAL)
1154 			tnp = realnode(np->n_real);
1155 		else
1156 			tnp = intnode(exprint(np));
1157 		inc_oper->n_left = np;
1158 		(void) assign(np, inc_oper);
1159 		return (tnp);
1160 
1161 	case PRE_DEC:
1162 		inc_oper->n_type = SUB;
1163 		goto do_pinc_op;
1164 	case PRE_INC:
1165 		inc_oper->n_type = ADD;
1166 do_pinc_op:
1167 		if ((np = np->n_left)->n_type == INDEX)
1168 			np = exprreduce(np);
1169 		inc_oper->n_left = np;
1170 		return (assign(np, inc_oper));
1171 
1172 	case AADD:
1173 		asn_oper->n_type = ADD;
1174 		goto do_asn_op;
1175 	case ASUB:
1176 		asn_oper->n_type = SUB;
1177 		goto do_asn_op;
1178 	case AMUL:
1179 		asn_oper->n_type = MUL;
1180 		goto do_asn_op;
1181 	case ADIV:
1182 		asn_oper->n_type = DIV;
1183 		goto do_asn_op;
1184 	case AREM:
1185 		asn_oper->n_type = REM;
1186 		goto do_asn_op;
1187 	case AEXP:
1188 		asn_oper->n_type = EXP;
1189 do_asn_op:
1190 		asn_oper->n_right = np->n_right;
1191 		if ((np = np->n_left)->n_type == INDEX)
1192 			np = exprreduce(np);
1193 		asn_oper->n_left = np;
1194 		return (assign(np, asn_oper));
1195 
1196 
1197 	case GETLINE:
1198 		return (f_getline(np));
1199 
1200 	case CALLFUNC:
1201 		return ((*np->n_left->n_function)(np->n_right));
1202 
1203 	case RE:
1204 		if (regmatch(np->n_regexp, linebuf) == REG_OK)
1205 			return (const1);
1206 		return (const0);
1207 
1208 	case TILDE:
1209 		cp = exprstring(np->n_left);
1210 		if (regmatch(getregexp(np->n_right), cp) == REG_OK)
1211 			return (const1);
1212 		return (const0);
1213 
1214 	case NRE:
1215 		cp = exprstring(np->n_left);
1216 		if (regmatch(getregexp(np->n_right), cp) != REG_OK)
1217 			return (const1);
1218 		return (const0);
1219 
1220 	case ASG:
1221 		return (assign(np->n_left, np->n_right));
1222 
1223 	case ARRAY:
1224 		awkerr(badarray, np->n_name);
1225 
1226 	case UFUNC:
1227 		awkerr(varnotfunc, np->n_name);
1228 
1229 	default:
1230 		awkerr(gettext("panic: exprreduce(%d)"), t);
1231 		/* NOTREACHED */
1232 	}
1233 	return (0);
1234 }
1235 
1236 /*
1237  * Do arithmetic operators.
1238  */
1239 static NODE *
arithmetic(NODE * np)1240 arithmetic(NODE *np)
1241 {
1242 	register NODE *left, *right;
1243 	int type;
1244 	register INT i1, i2;
1245 	register INT iresult;
1246 	register REAL r1, r2;
1247 
1248 	left = exprreduce(np->n_left);
1249 	if (isreal(left->n_flags) ||
1250 	    (isstring(left->n_flags) && (type_of(left)&FVREAL))) {
1251 		type = FREAL;
1252 		r1 = exprreal(left);
1253 		r2 = exprreal(np->n_right);
1254 	} else {
1255 		i1 = exprint(left);
1256 		right = exprreduce(np->n_right);
1257 		if (isreal(right->n_flags) ||
1258 		    (isstring(right->n_flags) && (type_of(right)&FVREAL))) {
1259 
1260 			type = FREAL;
1261 			r1 = i1;
1262 			r2 = exprreal(right);
1263 		} else {
1264 			type = FINT;
1265 			i2 = exprint(right);
1266 		}
1267 	}
1268 reswitch:
1269 	switch (np->n_type) {
1270 	case ADD:
1271 		if (type == FINT) {
1272 			iresult = i1 + i2;
1273 			addoverflow();
1274 		} else
1275 			r1 += r2;
1276 		break;
1277 
1278 	/*
1279 	 * Strategically placed between ADD and SUB
1280 	 * so "jo" branches will reach on 80*86
1281 	 */
1282 	overflow:
1283 		r1 = i1;
1284 		r2 = i2;
1285 		type = FREAL;
1286 		goto reswitch;
1287 
1288 	case SUB:
1289 		if (type == FINT) {
1290 			iresult = i1 - i2;
1291 			suboverflow();
1292 		} else
1293 			r1 -= r2;
1294 		break;
1295 
1296 	case MUL:
1297 		if (type == FINT) {
1298 			iresult = i1 * i2;
1299 			muloverflow();
1300 		} else
1301 			r1 *= r2;
1302 		break;
1303 
1304 	case DIV:
1305 		if (type == FINT) {
1306 			r1 = i1;
1307 			r2 = i2;
1308 			type = FREAL;
1309 		}
1310 		if (r2 == 0.0)
1311 			awkerr(divzero);
1312 		r1 /= r2;
1313 		break;
1314 
1315 	case REM:
1316 		if (type == FINT) {
1317 			if (i2 == 0)
1318 				awkerr(divzero);
1319 			iresult = i1 % i2;
1320 		} else {
1321 			double fmod(double x, double y);
1322 
1323 			errno = 0;
1324 			r1 = fmod(r1, r2);
1325 			if (errno == EDOM)
1326 				awkerr(divzero);
1327 		}
1328 		break;
1329 	}
1330 	return (type == FINT ? intnode(iresult) : realnode(r1));
1331 }
1332 
1333 /*
1334  * Do comparison operators.
1335  */
1336 static NODE *
comparison(NODE * np)1337 comparison(NODE *np)
1338 {
1339 	register NODE *left, *right;
1340 	register int cmp;
1341 	int tl, tr;
1342 	register REAL r1, r2;
1343 	register INT i1, i2;
1344 
1345 	left = np->n_left;
1346 	if (isleaf(left->n_flags)) {
1347 		if (left->n_type == PARM)
1348 			left = left->n_next;
1349 	} else
1350 		left = exprreduce(left);
1351 	tl = left->n_flags;
1352 	right = np->n_right;
1353 	if (isleaf(right->n_flags)) {
1354 		if (right->n_type == PARM)
1355 			right = right->n_next;
1356 	} else {
1357 		++concflag;
1358 		right = exprreduce(right);
1359 		--concflag;
1360 	}
1361 	tr = right->n_flags;
1362 	/*
1363 	 * Posix mandates semantics for the comparison operators that
1364 	 * are incompatible with traditional AWK behaviour. If the following
1365 	 * define is true then awk will use the traditional behaviour.
1366 	 * if it's false, then AWK will use the POSIX-mandated behaviour.
1367 	 */
1368 #define	TRADITIONAL 0
1369 #if TRADITIONAL
1370 	if (!isnumber(tl) || !isnumber(tr)) {
1371 		cmp = wcscoll((wchar_t *)exprstring(left),
1372 		    (wchar_t *)exprstring(right));
1373 	} else if (isreal(tl) || isreal(tr)) {
1374 		r1 = exprreal(left);
1375 		r2 = exprreal(right);
1376 		if (r1 < r2)
1377 			cmp = -1;
1378 		else if (r1 > r2)
1379 			cmp = 1;
1380 		else
1381 			cmp = 0;
1382 	} else {
1383 		i1 = exprint(left);
1384 		i2 = exprint(right);
1385 		if (i1 < i2)
1386 			cmp = -1;
1387 		else if (i1 > i2)
1388 			cmp = 1;
1389 		else
1390 			cmp = 0;
1391 	}
1392 #else
1393 	if (!isnumber(tl) && !isnumber(tr)) {
1394 do_strcmp:
1395 		cmp = wcscoll((wchar_t *)exprstring(left),
1396 		    (wchar_t *)exprstring(right));
1397 	} else {
1398 		if (isstring(tl))
1399 			tl = type_of(left);
1400 		if (isstring(tr))
1401 			tr = type_of(right);
1402 		if (!isnumber(tl) || !isnumber(tr))
1403 			goto do_strcmp;
1404 		if (isreal(tl) || isreal(tr)) {
1405 			r1 = exprreal(left);
1406 			r2 = exprreal(right);
1407 			if (r1 < r2)
1408 				cmp = -1;
1409 			else if (r1 > r2)
1410 				cmp = 1;
1411 			else
1412 				cmp = 0;
1413 		} else {
1414 			i1 = exprint(left);
1415 			i2 = exprint(right);
1416 			if (i1 < i2)
1417 				cmp = -1;
1418 			else if (i1 > i2)
1419 				cmp = 1;
1420 			else
1421 				cmp = 0;
1422 		}
1423 	}
1424 #endif
1425 	switch (np->n_type) {
1426 	case EQ:
1427 		return (cmp == 0 ? const1 : const0);
1428 
1429 	case  NE:
1430 		return (cmp != 0 ? const1 : const0);
1431 
1432 	case GE:
1433 		return (cmp >= 0 ? const1 : const0);
1434 
1435 	case LE:
1436 		return (cmp <= 0 ? const1 : const0);
1437 
1438 	case GT:
1439 		return (cmp > 0 ? const1 : const0);
1440 
1441 	case LT:
1442 		return (cmp < 0 ? const1 : const0);
1443 
1444 	default:
1445 		awkerr(interr, "comparison");
1446 	}
1447 	/* NOTREACHED */
1448 	return (0);
1449 }
1450 
1451 /*
1452  * Return the type of a constant that is a string.
1453  * The node must be a FSTRING type and the return value
1454  * will possibly have FVINT or FVREAL or'ed in.
1455  */
1456 static int
type_of(NODE * np)1457 type_of(NODE *np)
1458 {
1459 	wchar_t *cp;
1460 	int somedigits = 0;
1461 	int seene = 0;
1462 	int seenradix = 0;
1463 	int seensign = 0;
1464 	int digitsaftere = 0;
1465 
1466 	cp = (wchar_t *)np->n_string;
1467 	if (*cp == '\0')
1468 		return (FSTRING|FVINT);
1469 	while (iswspace(*cp))
1470 		cp++;
1471 	if (*cp == '-' || *cp == '+')
1472 		cp++;
1473 	while (*cp != '\0') {
1474 		switch (*cp) {
1475 		case '0':
1476 		case '1':
1477 		case '2':
1478 		case '3':
1479 		case '4':
1480 		case '5':
1481 		case '6':
1482 		case '7':
1483 		case '8':
1484 		case '9':
1485 			if (seene)
1486 				digitsaftere = 1;
1487 			somedigits++;
1488 			break;
1489 
1490 		case 'E':
1491 		case 'e':
1492 			if (seene || !somedigits)
1493 				return (FSTRING);
1494 			seene = 1;
1495 			break;
1496 
1497 		case '+':
1498 		case '-':
1499 			if (seensign || !seene || digitsaftere)
1500 				return (FSTRING);
1501 			seensign = 1;
1502 			break;
1503 
1504 		default:
1505 			if (*cp == radixpoint) {
1506 				if (seenradix || seene || (!somedigits &&
1507 				    !iswdigit(*++cp)))
1508 					return (FSTRING);
1509 			} else
1510 				return (FSTRING);
1511 			seenradix = 1;
1512 		}
1513 		cp++;
1514 	}
1515 	if (somedigits == 0)
1516 		return (FSTRING);
1517 	if (somedigits >= MAXDIGINT || seenradix || seene) {
1518 		if (seensign && !digitsaftere)
1519 			return (FSTRING);
1520 		else
1521 			return (FSTRING|FVREAL);
1522 	} else
1523 		return (FSTRING|FVINT);
1524 }
1525 
1526 /*
1527  * Return a field rvalue.
1528  */
1529 static NODE *
rfield(INT fieldno)1530 rfield(INT fieldno)
1531 {
1532 	register wchar_t *cp;
1533 
1534 	if (fieldno == 0)
1535 		return (stringnode(linebuf, FSTATIC|FSENSE, lbuflen));
1536 	if (!splitdone)
1537 		fieldsplit();
1538 	if (fieldno > nfield || fieldno < 0)
1539 		return (stringnode(_null, FSTATIC, 0));
1540 	cp = fields[fieldno-1];
1541 	return (stringnode(cp, FSTATIC|FSENSE, wcslen(cp)));
1542 }
1543 
1544 /*
1545  * Split linebuf into fields.  Done only once
1546  * per input record (maximum).
1547  */
1548 void
fieldsplit()1549 fieldsplit()
1550 {
1551 	register wchar_t *ip, *op;
1552 	register int n;
1553 	wchar_t *ep;
1554 
1555 	if (fieldbuf == NULL)
1556 		fieldbuf = emalloc(NLINE * sizeof (wchar_t));
1557 	fcount = 0;
1558 	ep = linebuf;
1559 	op = fieldbuf;
1560 	while ((ip = (*awkfield)(&ep)) != NULL) {
1561 		fields[fcount++] = op;
1562 		if (fcount > NFIELD)
1563 			awkerr(tmfld, NFIELD);
1564 		n = ep-ip;
1565 		(void) memcpy(op, ip, n * sizeof (wchar_t));
1566 		op += n;
1567 		*op++ = '\0';
1568 	}
1569 	if (varNF->n_flags & FINT)
1570 		varNF->n_int = fcount;
1571 	else {
1572 		constant->n_int = fcount;
1573 		(void) nassign(varNF, constant);
1574 	}
1575 	nfield = fcount;
1576 	splitdone++;
1577 }
1578 
1579 /*
1580  * Assign to a field as an lvalue.
1581  * Return the unevaluated node as one doesn't always need it
1582  * evaluated in an assignment.
1583  */
1584 static NODE *
lfield(INT fieldno,NODE * np)1585 lfield(INT fieldno, NODE *np)
1586 {
1587 	register wchar_t *cp;
1588 	register wchar_t *op;
1589 	register wchar_t *sep;
1590 	register int i;
1591 	register wchar_t *newval;
1592 	register int seplen;
1593 	register int newlen;
1594 
1595 	newlen = wcslen(newval = (wchar_t *)exprstring(np));
1596 	if (fieldno == 0) {
1597 		splitdone = 0;
1598 		(void) memcpy(linebuf, newval, (newlen+1) * sizeof (wchar_t));
1599 		lbuflen = newlen;
1600 		fieldsplit();
1601 	} else {
1602 		seplen = wcslen(sep = (wchar_t *)exprstring(varOFS));
1603 		if (!splitdone)
1604 			fieldsplit();
1605 		if (--fieldno < nfield &&
1606 		    (newlen <= wcslen(fields[fieldno]))) {
1607 			(void) memcpy(fields[fieldno], newval,
1608 				(newlen+1) * sizeof (wchar_t));
1609 		} else {
1610 			register wchar_t *buf;
1611 
1612 			buf = fieldbuf;
1613 			fieldbuf = emalloc(NLINE * sizeof (wchar_t));
1614 			if (fieldno >= nfield) {
1615 				if (fieldno >= NFIELD)
1616 					awkerr(tmfld, NFIELD);
1617 				while (nfield < fieldno)
1618 					fields[nfield++] = _null;
1619 				++nfield;
1620 			}
1621 			fields[fieldno] = newval;
1622 			op = fieldbuf;
1623 			for (i = 0; i < nfield; i++) {
1624 				newlen = wcslen(cp = fields[i])+1;
1625 				fields[i] = op;
1626 				if (op+newlen >= fieldbuf+NLINE)
1627 					awkerr(toolong, NLINE);
1628 				(void) memcpy(op, cp,
1629 				    newlen * sizeof (wchar_t));
1630 				op += newlen;
1631 			}
1632 			free(buf);
1633 		}
1634 		/*
1635 		 * Reconstruct $0
1636 		 */
1637 		op = linebuf;
1638 		i = 0;
1639 		while (i < nfield) {
1640 			newlen = wcslen(cp = fields[i++]);
1641 			(void) memcpy(op, cp, newlen * sizeof (wchar_t));
1642 			op += newlen;
1643 			if (i < nfield) {
1644 				(void) memcpy(op, sep,
1645 					seplen * sizeof (wchar_t));
1646 				op += seplen;
1647 			}
1648 			if (op >= &linebuf[NLINE])
1649 				awkerr(toolong, NLINE);
1650 		}
1651 		*op = '\0';
1652 		lbuflen = op-linebuf;
1653 		if (varNF->n_flags & FINT)
1654 			varNF->n_int = nfield;
1655 		else {
1656 			constant->n_int = nfield;
1657 			(void) nassign(varNF, constant);
1658 		}
1659 	}
1660 	return (np);
1661 }
1662 
1663 /*
1664  * Do a user function.
1665  * Each formal parameter must:
1666  *	have the actual parameter assigned to it (call by value),
1667  *	have a pointer to an array put into it (call by reference),
1668  *	and be made undefined (extra formal parameters)
1669  */
1670 static NODE *
userfunc(NODE * np)1671 userfunc(NODE *np)
1672 {
1673 	register NODE *temp;
1674 	NODE *fnp;
1675 
1676 	if ((fnp = np->n_left) == NNULL)
1677 		awkerr(gettext("impossible function call"));
1678 	if (fnp->n_type != UFUNC)
1679 		awkerr(varnotfunc, fnp->n_name);
1680 
1681 #ifndef M_STKCHK
1682 	if (slevel >= NRECUR)
1683 		awkerr(gettext("function \"%S\" nesting level > %u"),
1684 		    fnp->n_name, NRECUR);
1685 #else
1686 	if (!M_STKCHK)
1687 		awkerr(gettext("function \"%s\" nesting level too deep"),
1688 		    fnp->n_name);
1689 #endif
1690 
1691 	fnp = fnp->n_ufunc;
1692 	{
1693 		register NODE *formal;
1694 		register NODE *actual;
1695 		NODE *formlist, *actlist, *templist, *temptail;
1696 
1697 		templist = temptail = NNULL;
1698 		actlist = np->n_right;
1699 		formlist = fnp->n_left;
1700 		/*
1701 		 * pass through formal list, setting up a list
1702 		 * (on templist) containing temps for the values
1703 		 * of the actuals.
1704 		 * If the actual list runs out before the formal
1705 		 * list, assign 'constundef' as the value
1706 		 */
1707 		while ((formal = getlist(&formlist)) != NNULL) {
1708 			register NODE *array;
1709 			register int t;
1710 			register size_t len;
1711 			register int scope_tag;
1712 
1713 			actual = getlist(&actlist);
1714 			if (actual == NNULL) {
1715 				actual = constundef;
1716 				scope_tag = slevel+1;
1717 			} else
1718 				scope_tag = 0;
1719 			array = actual;
1720 			switch (actual->n_type) {
1721 			case ARRAY:
1722 				t = ARRAY;
1723 				scope_tag = 0;
1724 				break;
1725 
1726 			case PARM:
1727 				array = actual = actual->n_next;
1728 				t = actual->n_type;
1729 				scope_tag = actual->n_scope;
1730 				if (!(actual->n_flags & FLARRAY))
1731 					array = actual->n_alink;
1732 				break;
1733 
1734 			default:
1735 				t = VAR;
1736 				break;
1737 			}
1738 			temp = emptynode(t, len = wcslen(formal->n_name));
1739 			(void) memcpy(temp->n_name, formal->n_name,
1740 			    (len+1) * sizeof (wchar_t));
1741 			temp->n_flags = FSTRING|FVINT;
1742 			temp->n_string = _null;
1743 			temp->n_strlen = 0;
1744 			if (t == VAR)
1745 				(void) assign(temp, actual);
1746 			if (t != ARRAY)
1747 				temp->n_flags |= FLARRAY;
1748 			temp->n_scope = scope_tag;
1749 			/*
1750 			 * link to actual parameter in case of promotion to
1751 			 * array
1752 			 */
1753 			if (actual != constundef)
1754 				temp->n_alink = actual;
1755 			/*
1756 			 * Build the templist
1757 			 */
1758 			if (templist != NNULL) {
1759 				temptail->n_next = temp;
1760 				temptail = temp;
1761 			} else
1762 				templist = temptail = temp;
1763 			temp->n_next = NNULL;
1764 			if (actual->n_type == CONSTANT)
1765 				temp->n_alink = temp;
1766 			else
1767 				temp->n_alink = array;
1768 		}
1769 		/*
1770 		 * Bind results of the evaluation of actuals to formals.
1771 		 */
1772 		formlist = fnp->n_left;
1773 		while (templist != NNULL) {
1774 			temp = templist;
1775 			templist = temp->n_next;
1776 			formal = getlist(&formlist);
1777 			temp->n_next = formal->n_next;
1778 			formal->n_next = temp;
1779 
1780 
1781 
1782 
1783 
1784 
1785 
1786 
1787 		}
1788 	}
1789 	{
1790 		register NODE *savenode = curnode;
1791 
1792 		++slevel;
1793 		if (action(fnp->n_right) == RETURN)
1794 			np = retval; else
1795 			np = const0;
1796 		curnode = savenode;
1797 	}
1798 	{
1799 		register NODE *formal;
1800 		NODE *formlist;
1801 
1802 		formlist = fnp->n_left;
1803 		while ((formal = getlist(&formlist)) != NNULL) {
1804 			temp = formal->n_next;
1805 			formal->n_next = temp->n_next;
1806 			/* if node is a local array, free the elements */
1807 			if (temp->n_type == ARRAY && (temp->n_scope == slevel))
1808 				delarray(temp);
1809 			freenode(temp);
1810 		}
1811 	}
1812 	--slevel;
1813 	return (np);
1814 }
1815 
1816 /*
1817  * Get the regular expression from an expression tree.
1818  */
1819 REGEXP
getregexp(NODE * np)1820 getregexp(NODE *np)
1821 {
1822 	if (np->n_type == RE)
1823 		return (np->n_regexp);
1824 	np = renode((wchar_t *)exprstring(np));
1825 	return (np->n_regexp);
1826 }
1827 
1828 /*
1829  * Get the next element from a list.
1830  */
1831 NODE *
getlist(NODE ** npp)1832 getlist(NODE **npp)
1833 {
1834 	register NODE *np;
1835 
1836 	if ((np = *npp) == NNULL)
1837 		return (np);
1838 	if (np->n_type == COMMA) {
1839 		*npp = np->n_right;
1840 		return (np->n_left);
1841 	} else {
1842 		*npp = NNULL;
1843 		return (np);
1844 	}
1845 }
1846 
1847 /*
1848  * if statement.
1849  */
1850 static int
s_if(NODE * np)1851 s_if(NODE *np)
1852 {
1853 	register NODE *xp;
1854 	register int test;
1855 
1856 	test = exprtest(np->n_left);
1857 	xp = np->n_right;
1858 	if (xp->n_type != ELSE)
1859 		awkerr(interr, "if/else");
1860 	if (test)
1861 		xp = xp->n_left;
1862 	else
1863 		xp = xp->n_right;
1864 	return (action(xp));
1865 }
1866 
1867 /*
1868  * while and do{}while statements.
1869  */
1870 static int
s_while(NODE * np)1871 s_while(NODE *np)
1872 {
1873 	register int act = 0;
1874 
1875 	if (np->n_type == DO)
1876 		goto dowhile;
1877 	for (;;) {
1878 		if (exprtest(np->n_left) == 0)
1879 			break;
1880 	dowhile:
1881 		if ((act = action(np->n_right)) != 0) {
1882 			switch (act) {
1883 			case BREAK:
1884 				act = 0;
1885 				break;
1886 
1887 			case CONTINUE:
1888 				act = 0;
1889 				continue;
1890 			}
1891 			break;
1892 		}
1893 	}
1894 	return (act);
1895 }
1896 
1897 /*
1898  * for statement.
1899  */
1900 static int
s_for(NODE * np)1901 s_for(NODE *np)
1902 {
1903 	register NODE *testnp, *incnp, *initnp;
1904 	register int act = 0;
1905 	NODE *listp;
1906 
1907 	listp = np->n_left;
1908 	initnp = getlist(&listp);
1909 	testnp = getlist(&listp);
1910 	incnp = getlist(&listp);
1911 	if (initnp != NNULL)
1912 		(void) exprreduce(initnp);
1913 	for (;;) {
1914 		if (exprtest(testnp) == 0)
1915 			break;
1916 		if ((act = action(np->n_right)) != 0) {
1917 			switch (act) {
1918 			case BREAK:
1919 				act = 0;
1920 				break;
1921 
1922 			case CONTINUE:
1923 				act = 0;
1924 				goto clabel;
1925 			}
1926 			break;
1927 		}
1928 	clabel:
1929 		if (incnp != NNULL)
1930 			(void) exprreduce(incnp);
1931 	}
1932 	return (act);
1933 }
1934 
1935 /*
1936  * for variable in array statement.
1937  */
1938 static int
s_forin(NODE * np)1939 s_forin(NODE *np)
1940 {
1941 	register NODE *left;
1942 	register int act = 0;
1943 	register NODE *var;
1944 	register NODE **nnp;
1945 	register NODE *statement;
1946 	register int issymtab = 0;
1947 	wchar_t *index;
1948 	register int alen;
1949 	int nbuck;
1950 
1951 	left = np->n_left;
1952 	statement = np->n_right;
1953 	if (left->n_type != IN)
1954 		awkerr(interr, "for (var in array)");
1955 	if ((var = left->n_left)->n_type == PARM)
1956 		var = var->n_next;
1957 	np = left->n_right;
1958 	if (np->n_type == PARM) {
1959 		np = np->n_next;
1960 		if (!(np->n_flags & FLARRAY))
1961 			np = np->n_alink;
1962 	}
1963 	if (np == varSYMTAB) {
1964 		issymtab++;
1965 		np = NNULL;
1966 		nbuck = 0;
1967 	} else {
1968 		/*
1969 		 * At this point if the node is not actually an array
1970 		 * check to see if it has already been established as
1971 		 * a scalar. If it is a scalar then flag an error. If
1972 		 * not then promote the object to an array type.
1973 		 */
1974 		if (np->n_type != ARRAY) {
1975 			if (!isstring(np->n_flags) || np->n_string != _null)
1976 				awkerr(notarray, np->n_name);
1977 			else {
1978 				/* promotion to array */
1979 				promote(np);
1980 				if (np->n_alink != NNULL)
1981 					if (!(np->n_flags & FLARRAY))
1982 						np = np->n_alink;
1983 			}
1984 		}
1985 		/*
1986 		 * Set up a pointer to the first node in the array list.
1987 		 * Save this pointer on the delete stack. This information
1988 		 * is used by the delete function to advance any pointers
1989 		 * that might be pointing at a node which has been deleted.
1990 		 * See the delsymtab() function for more information. Note
1991 		 * that if the a_link field is nil, then just return 0 since
1992 		 * this array has no elements yet.
1993 		 */
1994 		if ((*(nnp = next_forin) = np->n_alink) == 0)
1995 			return (0);
1996 		if (++next_forin > &forindex[NFORINLOOP])
1997 			awkerr(toodeep, NFORINLOOP);
1998 		/*
1999 		 * array elements have names of the form
2000 		 *	<name>]<index> (global arrays)
2001 		 * or
2002 		 *	<name>[<scope>]<index> (local arrays)
2003 		 * We need to know the offset of the index portion of the
2004 		 * name string in order to place it in the index variable so
2005 		 * we look for the ']'. This is calculated here and then
2006 		 * used below.
2007 		 */
2008 		for (alen = 0; (*nnp)->n_name[alen++] != ']'; )
2009 			if ((*nnp)->n_name[alen] == '\0')
2010 				awkerr(interr, "for: invalid array");
2011 	}
2012 	for (;;) {
2013 		if (issymtab) {
2014 			if ((left = symwalk(&nbuck, &np)) == NNULL)
2015 				break;
2016 			index = left->n_name;
2017 		} else {
2018 			if ((np = *nnp) == NNULL)
2019 				break;
2020 			index = np->n_name+alen;
2021 			*nnp = np->n_alink;
2022 		}
2023 		strassign(var, index, FSTATIC, wcslen(index));
2024 		if ((act = action(statement)) != 0) {
2025 			switch (act) {
2026 			case BREAK:
2027 				act = 0;
2028 				break;
2029 
2030 			case CONTINUE:
2031 				act = 0;
2032 				continue;
2033 			}
2034 			break;
2035 		}
2036 	}
2037 	next_forin--;
2038 	return (act);
2039 }
2040 
2041 /*
2042  * Walk the symbol table using the same algorithm as arraynode.
2043  */
2044 NODE *
symwalk(int * buckp,NODE ** npp)2045 symwalk(int *buckp, NODE **npp)
2046 {
2047 	register NODE *np;
2048 
2049 	np = *npp;
2050 	for (;;) {
2051 		while (np == NNULL) {
2052 			if (*buckp >= NBUCKET)
2053 				return (*npp = NNULL);
2054 			np = symtab[(*buckp)++];
2055 		}
2056 		if (np->n_type == VAR &&
2057 		    (!isstring(np->n_flags) || np->n_string != _null)) {
2058 			*npp = np->n_next;
2059 			return (np);
2060 		}
2061 		np = np->n_next;
2062 	}
2063 	/* NOTREACHED */
2064 }
2065 
2066 /*
2067  * Test the result of an expression.
2068  */
2069 static int
exprtest(NODE * np)2070 exprtest(NODE *np)
2071 {
2072 	register int t;
2073 
2074 	if (np == NNULL)
2075 		return (1);
2076 	if (freelist != NNULL)
2077 		freetemps();
2078 	np = exprreduce(np);
2079 	if (isint(t = np->n_flags)) {
2080 		if (isstring(t))
2081 			return (exprint(np) != 0);
2082 		return (np->n_int != 0);
2083 	}
2084 	if (isreal(t)) {
2085 		REAL rval;
2086 
2087 		rval = isstring(t) ? exprreal(np) : np->n_real;
2088 		return (rval != 0.0);
2089 	}
2090 	return (*(wchar_t *)exprstring(np) != '\0');
2091 }
2092 
2093 /*
2094  * Return malloc'ed space that holds the given name "[" scope "]" index ...
2095  * concatenated string.
2096  * The node (np) is the list of indices and 'array' is the array name.
2097  */
2098 static wchar_t *
makeindex(NODE * np,wchar_t * array,int tag)2099 makeindex(NODE *np, wchar_t *array, int tag)
2100 {
2101 	static wchar_t tags[sizeof (int)];
2102 	static wchar_t tag_chars[] = M_MB_L("0123456789ABCDEF");
2103 	register wchar_t *cp;
2104 	register NODE *index;
2105 	register uint_t n;
2106 	register int len;
2107 	register wchar_t *indstr;
2108 	register wchar_t *sep;
2109 	register int seplen;
2110 	register int taglen;
2111 
2112 
2113 	/*
2114 	 * calculate and create the tag string
2115 	 */
2116 	for (taglen = 0; tag; tag >>= 4)
2117 		tags[taglen++] = tag_chars[tag & 0xf];
2118 	/*
2119 	 * Special (normal) case: only one index.
2120 	 */
2121 	if (np->n_type != COMMA) {
2122 		wchar_t *ocp;
2123 		size_t i;
2124 
2125 		if (isleaf(np->n_flags) && np->n_type == PARM)
2126 			np = np->n_next;
2127 		if (isstring(np->n_flags)) {
2128 			indstr = np->n_string;
2129 			len = np->n_strlen;
2130 		} else {
2131 			indstr = exprstring(np);
2132 			len = wcslen(indstr);
2133 		}
2134 		i = (n = wcslen(array)) + len + 3 + taglen;
2135 		if (i < NINDEXBUF)
2136 			ocp = indexbuf;
2137 		else
2138 			ocp = emalloc(i * sizeof (wchar_t));
2139 		(void) memcpy(ocp, array, n * sizeof (wchar_t));
2140 		cp = ocp+n;
2141 		if (taglen) {
2142 			*cp++ = '[';
2143 			while (taglen)
2144 				*cp++ = tags[--taglen];
2145 		}
2146 		*cp++ = ']';
2147 		(void) memcpy(cp, indstr, (len+1) * sizeof (wchar_t));
2148 
2149 		return (ocp);
2150 	}
2151 	n = 0;
2152 	seplen = wcslen(sep = (wchar_t *)exprstring(varSUBSEP));
2153 	while ((index = getlist(&np)) != NNULL) {
2154 		indstr = exprstring(index);
2155 		len = wcslen(indstr);
2156 		if (n == 0) {
2157 			cp = emalloc(sizeof (wchar_t) * ((n = wcslen(array)) +
2158 				len + 3 + taglen));
2159 			(void) memcpy(cp, array, n * sizeof (wchar_t));
2160 			if (taglen) {
2161 				cp[n++] = '[';
2162 				while (taglen)
2163 					cp[n++] = tags[--taglen];
2164 			}
2165 			cp[n++] = ']';
2166 		} else {
2167 			cp = erealloc(cp, (n+len+seplen+1) * sizeof (wchar_t));
2168 			(void) memcpy(cp+n, sep, seplen * sizeof (wchar_t));
2169 			n += seplen;
2170 		}
2171 		(void) memcpy(cp+n, indstr, (len+1) * sizeof (wchar_t));
2172 		n += len;
2173 	}
2174 	return (cp);
2175 }
2176 
2177 
2178 /*
2179  * Promote a node to an array. In the simplest case, just set the
2180  * node type field to ARRAY. The more complicated case involves walking
2181  * a list of variables that haven't been determined yet as scalar or array.
2182  * This routine plays with the pointers to avoid recursion.
2183  */
2184 void
promote(NODE * n)2185 promote(NODE *n)
2186 {
2187 	register NODE *prev = NNULL;
2188 	register NODE *next;
2189 
2190 	/*
2191 	 * walk down the variable chain, reversing the pointers and
2192 	 * setting each node to type array.
2193 	 */
2194 	while ((n->n_flags & FLARRAY) && (n->n_alink != n)) {
2195 		n->n_type = ARRAY;
2196 		next = n->n_alink;
2197 		n->n_alink = prev;
2198 		prev = n;
2199 		n = next;
2200 	}
2201 
2202 	/*
2203 	 * If the final entity on the chain is a local variable, then
2204 	 * reset it's alink field to NNULL - normally it points back
2205 	 * to itself - this is used in other parts of the code to
2206 	 * reduce the number of conditionals when handling locals.
2207 	 */
2208 	n->n_type = ARRAY;
2209 	if (n->n_flags & FLARRAY)
2210 		n->n_alink = NNULL;
2211 
2212 	/*
2213 	 * Now walk back up the list setting the alink to point to
2214 	 * the last entry in the chain and clear the 'local array'
2215 	 * flag.
2216 	 */
2217 	while (prev != NNULL) {
2218 		prev->n_flags &= ~FLARRAY;
2219 		next = prev->n_alink;
2220 		prev->n_alink = n;
2221 		prev = next;
2222 	}
2223 }
2224