xref: /illumos-gate/usr/src/cmd/awk/lex.c (revision 3ee4fc2a)
1*3ee4fc2aSCody Peter Mello /*
2*3ee4fc2aSCody Peter Mello  * Copyright (C) Lucent Technologies 1997
3*3ee4fc2aSCody Peter Mello  * All Rights Reserved
4*3ee4fc2aSCody Peter Mello  *
5*3ee4fc2aSCody Peter Mello  * Permission to use, copy, modify, and distribute this software and
6*3ee4fc2aSCody Peter Mello  * its documentation for any purpose and without fee is hereby
7*3ee4fc2aSCody Peter Mello  * granted, provided that the above copyright notice appear in all
8*3ee4fc2aSCody Peter Mello  * copies and that both that the copyright notice and this
9*3ee4fc2aSCody Peter Mello  * permission notice and warranty disclaimer appear in supporting
10*3ee4fc2aSCody Peter Mello  * documentation, and that the name Lucent Technologies or any of
11*3ee4fc2aSCody Peter Mello  * its entities not be used in advertising or publicity pertaining
12*3ee4fc2aSCody Peter Mello  * to distribution of the software without specific, written prior
13*3ee4fc2aSCody Peter Mello  * permission.
14*3ee4fc2aSCody Peter Mello  *
15*3ee4fc2aSCody Peter Mello  * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16*3ee4fc2aSCody Peter Mello  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17*3ee4fc2aSCody Peter Mello  * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18*3ee4fc2aSCody Peter Mello  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19*3ee4fc2aSCody Peter Mello  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20*3ee4fc2aSCody Peter Mello  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21*3ee4fc2aSCody Peter Mello  * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22*3ee4fc2aSCody Peter Mello  * THIS SOFTWARE.
23*3ee4fc2aSCody Peter Mello  */
24*3ee4fc2aSCody Peter Mello 
25*3ee4fc2aSCody Peter Mello #include <stdio.h>
26*3ee4fc2aSCody Peter Mello #include <stdlib.h>
27*3ee4fc2aSCody Peter Mello #include <string.h>
28*3ee4fc2aSCody Peter Mello #include <ctype.h>
29*3ee4fc2aSCody Peter Mello #include "awk.h"
30*3ee4fc2aSCody Peter Mello #include "y.tab.h"
31*3ee4fc2aSCody Peter Mello 
32*3ee4fc2aSCody Peter Mello extern YYSTYPE	yylval;
33*3ee4fc2aSCody Peter Mello extern int	infunc;
34*3ee4fc2aSCody Peter Mello 
35*3ee4fc2aSCody Peter Mello off_t	lineno	= 1;
36*3ee4fc2aSCody Peter Mello int	bracecnt = 0;
37*3ee4fc2aSCody Peter Mello int	brackcnt  = 0;
38*3ee4fc2aSCody Peter Mello int	parencnt = 0;
39*3ee4fc2aSCody Peter Mello 
40*3ee4fc2aSCody Peter Mello typedef struct Keyword {
41*3ee4fc2aSCody Peter Mello 	const char *word;
42*3ee4fc2aSCody Peter Mello 	int	sub;
43*3ee4fc2aSCody Peter Mello 	int	type;
44*3ee4fc2aSCody Peter Mello } Keyword;
45*3ee4fc2aSCody Peter Mello 
46*3ee4fc2aSCody Peter Mello Keyword keywords[] = {	/* keep sorted: binary searched */
47*3ee4fc2aSCody Peter Mello 	{ "BEGIN",	XBEGIN,		XBEGIN },
48*3ee4fc2aSCody Peter Mello 	{ "END",	XEND,		XEND },
49*3ee4fc2aSCody Peter Mello 	{ "NF",		VARNF,		VARNF },
50*3ee4fc2aSCody Peter Mello 	{ "atan2",	FATAN,		BLTIN },
51*3ee4fc2aSCody Peter Mello 	{ "break",	BREAK,		BREAK },
52*3ee4fc2aSCody Peter Mello 	{ "close",	CLOSE,		CLOSE },
53*3ee4fc2aSCody Peter Mello 	{ "continue",	CONTINUE,	CONTINUE },
54*3ee4fc2aSCody Peter Mello 	{ "cos",	FCOS,		BLTIN },
55*3ee4fc2aSCody Peter Mello 	{ "delete",	DELETE,		DELETE },
56*3ee4fc2aSCody Peter Mello 	{ "do",		DO,		DO },
57*3ee4fc2aSCody Peter Mello 	{ "else",	ELSE,		ELSE },
58*3ee4fc2aSCody Peter Mello 	{ "exit",	EXIT,		EXIT },
59*3ee4fc2aSCody Peter Mello 	{ "exp",	FEXP,		BLTIN },
60*3ee4fc2aSCody Peter Mello 	{ "fflush",	FFLUSH,		BLTIN },
61*3ee4fc2aSCody Peter Mello 	{ "for",	FOR,		FOR },
62*3ee4fc2aSCody Peter Mello 	{ "func",	FUNC,		FUNC },
63*3ee4fc2aSCody Peter Mello 	{ "function",	FUNC,		FUNC },
64*3ee4fc2aSCody Peter Mello 	{ "getline",	GETLINE,	GETLINE },
65*3ee4fc2aSCody Peter Mello 	{ "gsub",	GSUB,		GSUB },
66*3ee4fc2aSCody Peter Mello 	{ "if",		IF,		IF },
67*3ee4fc2aSCody Peter Mello 	{ "in",		IN,		IN },
68*3ee4fc2aSCody Peter Mello 	{ "index",	INDEX,		INDEX },
69*3ee4fc2aSCody Peter Mello 	{ "int",	FINT,		BLTIN },
70*3ee4fc2aSCody Peter Mello 	{ "length",	FLENGTH,	BLTIN },
71*3ee4fc2aSCody Peter Mello 	{ "log",	FLOG,		BLTIN },
72*3ee4fc2aSCody Peter Mello 	{ "match",	MATCHFCN,	MATCHFCN },
73*3ee4fc2aSCody Peter Mello 	{ "next",	NEXT,		NEXT },
74*3ee4fc2aSCody Peter Mello 	{ "nextfile",	NEXTFILE,	NEXTFILE },
75*3ee4fc2aSCody Peter Mello 	{ "print",	PRINT,		PRINT },
76*3ee4fc2aSCody Peter Mello 	{ "printf",	PRINTF,		PRINTF },
77*3ee4fc2aSCody Peter Mello 	{ "rand",	FRAND,		BLTIN },
78*3ee4fc2aSCody Peter Mello 	{ "return",	RETURN,		RETURN },
79*3ee4fc2aSCody Peter Mello 	{ "sin",	FSIN,		BLTIN },
80*3ee4fc2aSCody Peter Mello 	{ "split",	SPLIT,		SPLIT },
81*3ee4fc2aSCody Peter Mello 	{ "sprintf",	SPRINTF,	SPRINTF },
82*3ee4fc2aSCody Peter Mello 	{ "sqrt",	FSQRT,		BLTIN },
83*3ee4fc2aSCody Peter Mello 	{ "srand",	FSRAND,		BLTIN },
84*3ee4fc2aSCody Peter Mello 	{ "sub",	SUB,		SUB },
85*3ee4fc2aSCody Peter Mello 	{ "substr",	SUBSTR,		SUBSTR },
86*3ee4fc2aSCody Peter Mello 	{ "system",	FSYSTEM,	BLTIN },
87*3ee4fc2aSCody Peter Mello 	{ "tolower",	FTOLOWER,	BLTIN },
88*3ee4fc2aSCody Peter Mello 	{ "toupper",	FTOUPPER,	BLTIN },
89*3ee4fc2aSCody Peter Mello 	{ "while",	WHILE,		WHILE },
90*3ee4fc2aSCody Peter Mello };
91*3ee4fc2aSCody Peter Mello 
92*3ee4fc2aSCody Peter Mello #define	RET(x)	{ if (dbg) (void) printf("lex %s\n", tokname(x)); return (x); }
93*3ee4fc2aSCody Peter Mello 
94*3ee4fc2aSCody Peter Mello int
peek(void)95*3ee4fc2aSCody Peter Mello peek(void)
96*3ee4fc2aSCody Peter Mello {
97*3ee4fc2aSCody Peter Mello 	int c = input();
98*3ee4fc2aSCody Peter Mello 	unput(c);
99*3ee4fc2aSCody Peter Mello 	return (c);
100*3ee4fc2aSCody Peter Mello }
101*3ee4fc2aSCody Peter Mello 
102*3ee4fc2aSCody Peter Mello int
gettok(char ** pbuf,size_t * psz)103*3ee4fc2aSCody Peter Mello gettok(char **pbuf, size_t *psz)	/* get next input token */
104*3ee4fc2aSCody Peter Mello {
105*3ee4fc2aSCody Peter Mello 	int c, retc;
106*3ee4fc2aSCody Peter Mello 	char *buf = *pbuf;
107*3ee4fc2aSCody Peter Mello 	size_t sz = *psz;
108*3ee4fc2aSCody Peter Mello 	char *bp = buf;
109*3ee4fc2aSCody Peter Mello 
110*3ee4fc2aSCody Peter Mello 	c = input();
111*3ee4fc2aSCody Peter Mello 	if (c == 0)
112*3ee4fc2aSCody Peter Mello 		return (0);
113*3ee4fc2aSCody Peter Mello 	buf[0] = c;
114*3ee4fc2aSCody Peter Mello 	buf[1] = 0;
115*3ee4fc2aSCody Peter Mello 	if (!isalnum(c) && c != '.' && c != '_')
116*3ee4fc2aSCody Peter Mello 		return (c);
117*3ee4fc2aSCody Peter Mello 
118*3ee4fc2aSCody Peter Mello 	*bp++ = c;
119*3ee4fc2aSCody Peter Mello 	if (isalpha(c) || c == '_') {	/* it's a varname */
120*3ee4fc2aSCody Peter Mello 		for (; (c = input()) != 0; ) {
121*3ee4fc2aSCody Peter Mello 			if (bp-buf >= sz &&
122*3ee4fc2aSCody Peter Mello 			    !adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
123*3ee4fc2aSCody Peter Mello 				FATAL("out of space for name %.10s...", buf);
124*3ee4fc2aSCody Peter Mello 			if (isalnum(c) || c == '_')
125*3ee4fc2aSCody Peter Mello 				*bp++ = c;
126*3ee4fc2aSCody Peter Mello 			else {
127*3ee4fc2aSCody Peter Mello 				*bp = 0;
128*3ee4fc2aSCody Peter Mello 				unput(c);
129*3ee4fc2aSCody Peter Mello 				break;
130*3ee4fc2aSCody Peter Mello 			}
131*3ee4fc2aSCody Peter Mello 		}
132*3ee4fc2aSCody Peter Mello 		*bp = 0;
133*3ee4fc2aSCody Peter Mello 		retc = 'a';	/* alphanumeric */
134*3ee4fc2aSCody Peter Mello 	} else {	/* maybe it's a number, but could be . */
135*3ee4fc2aSCody Peter Mello 		char *rem;
136*3ee4fc2aSCody Peter Mello 		/* read input until can't be a number */
137*3ee4fc2aSCody Peter Mello 		for (; (c = input()) != 0; ) {
138*3ee4fc2aSCody Peter Mello 			if (bp-buf >= sz &&
139*3ee4fc2aSCody Peter Mello 			    !adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
140*3ee4fc2aSCody Peter Mello 				FATAL("out of space for number %.10s...", buf);
141*3ee4fc2aSCody Peter Mello 			if (isdigit(c) || c == 'e' || c == 'E' ||
142*3ee4fc2aSCody Peter Mello 			    c == '.' || c == '+' || c == '-')
143*3ee4fc2aSCody Peter Mello 				*bp++ = c;
144*3ee4fc2aSCody Peter Mello 			else {
145*3ee4fc2aSCody Peter Mello 				unput(c);
146*3ee4fc2aSCody Peter Mello 				break;
147*3ee4fc2aSCody Peter Mello 			}
148*3ee4fc2aSCody Peter Mello 		}
149*3ee4fc2aSCody Peter Mello 		*bp = 0;
150*3ee4fc2aSCody Peter Mello 		(void) strtod(buf, &rem);	/* parse the number */
151*3ee4fc2aSCody Peter Mello 		if (rem == buf) {	/* it wasn't a valid number at all */
152*3ee4fc2aSCody Peter Mello 			buf[1] = 0;	/* return one character as token */
153*3ee4fc2aSCody Peter Mello 			retc = buf[0];	/* character is its own type */
154*3ee4fc2aSCody Peter Mello 			unputstr(rem+1); /* put rest back for later */
155*3ee4fc2aSCody Peter Mello 		} else {	/* some prefix was a number */
156*3ee4fc2aSCody Peter Mello 			unputstr(rem);	/* put rest back for later */
157*3ee4fc2aSCody Peter Mello 			rem[0] = 0;	/* truncate buf after number part */
158*3ee4fc2aSCody Peter Mello 			retc = '0';	/* type is number */
159*3ee4fc2aSCody Peter Mello 		}
160*3ee4fc2aSCody Peter Mello 	}
161*3ee4fc2aSCody Peter Mello 	*pbuf = buf;
162*3ee4fc2aSCody Peter Mello 	*psz = sz;
163*3ee4fc2aSCody Peter Mello 	return (retc);
164*3ee4fc2aSCody Peter Mello }
165*3ee4fc2aSCody Peter Mello 
166*3ee4fc2aSCody Peter Mello int	word(char *);
167*3ee4fc2aSCody Peter Mello int	string(void);
168*3ee4fc2aSCody Peter Mello int	regexpr(void);
169*3ee4fc2aSCody Peter Mello int	sc	= 0;	/* 1 => return a } right now */
170*3ee4fc2aSCody Peter Mello int	reg	= 0;	/* 1 => return a REGEXPR now */
171*3ee4fc2aSCody Peter Mello 
172*3ee4fc2aSCody Peter Mello int
yylex(void)173*3ee4fc2aSCody Peter Mello yylex(void)
174*3ee4fc2aSCody Peter Mello {
175*3ee4fc2aSCody Peter Mello 	int c;
176*3ee4fc2aSCody Peter Mello 	static char *buf = NULL;
177*3ee4fc2aSCody Peter Mello 	/* BUG: setting this small causes core dump! */
178*3ee4fc2aSCody Peter Mello 	static size_t bufsize = 5;
179*3ee4fc2aSCody Peter Mello 
180*3ee4fc2aSCody Peter Mello 	if (buf == NULL && (buf = (char *)malloc(bufsize)) == NULL)
181*3ee4fc2aSCody Peter Mello 		FATAL("out of space in yylex");
182*3ee4fc2aSCody Peter Mello 	if (sc) {
183*3ee4fc2aSCody Peter Mello 		sc = 0;
184*3ee4fc2aSCody Peter Mello 		RET('}');
185*3ee4fc2aSCody Peter Mello 	}
186*3ee4fc2aSCody Peter Mello 	if (reg) {
187*3ee4fc2aSCody Peter Mello 		reg = 0;
188*3ee4fc2aSCody Peter Mello 		return (regexpr());
189*3ee4fc2aSCody Peter Mello 	}
190*3ee4fc2aSCody Peter Mello 	for (;;) {
191*3ee4fc2aSCody Peter Mello 		c = gettok(&buf, &bufsize);
192*3ee4fc2aSCody Peter Mello 		if (c == 0)
193*3ee4fc2aSCody Peter Mello 			return (0);
194*3ee4fc2aSCody Peter Mello 		if (isalpha(c) || c == '_')
195*3ee4fc2aSCody Peter Mello 			return (word(buf));
196*3ee4fc2aSCody Peter Mello 		if (isdigit(c)) {
197*3ee4fc2aSCody Peter Mello 			yylval.cp = setsymtab(
198*3ee4fc2aSCody Peter Mello 			    buf, tostring(buf), atof(buf), CON|NUM, symtab);
199*3ee4fc2aSCody Peter Mello 			/* should this also have STR set? */
200*3ee4fc2aSCody Peter Mello 			RET(NUMBER);
201*3ee4fc2aSCody Peter Mello 		}
202*3ee4fc2aSCody Peter Mello 
203*3ee4fc2aSCody Peter Mello 		yylval.i = c;
204*3ee4fc2aSCody Peter Mello 		switch (c) {
205*3ee4fc2aSCody Peter Mello 		case '\n':	/* {EOL} */
206*3ee4fc2aSCody Peter Mello 			lineno++;
207*3ee4fc2aSCody Peter Mello 			RET(NL);
208*3ee4fc2aSCody Peter Mello 		case '\r':	/* assume \n is coming */
209*3ee4fc2aSCody Peter Mello 		case ' ':	/* {WS}+ */
210*3ee4fc2aSCody Peter Mello 		case '\t':
211*3ee4fc2aSCody Peter Mello 			break;
212*3ee4fc2aSCody Peter Mello 		case '#':	/* #.* strip comments */
213*3ee4fc2aSCody Peter Mello 			while ((c = input()) != '\n' && c != 0)
214*3ee4fc2aSCody Peter Mello 				;
215*3ee4fc2aSCody Peter Mello 			unput(c);
216*3ee4fc2aSCody Peter Mello 			break;
217*3ee4fc2aSCody Peter Mello 		case ';':
218*3ee4fc2aSCody Peter Mello 			RET(';');
219*3ee4fc2aSCody Peter Mello 		case '\\':
220*3ee4fc2aSCody Peter Mello 			if (peek() == '\n') {
221*3ee4fc2aSCody Peter Mello 				(void) input();
222*3ee4fc2aSCody Peter Mello 				lineno++;
223*3ee4fc2aSCody Peter Mello 			} else if (peek() == '\r') {
224*3ee4fc2aSCody Peter Mello 				(void) input();
225*3ee4fc2aSCody Peter Mello 				(void) input();	/* BUG: check for \n */
226*3ee4fc2aSCody Peter Mello 				lineno++;
227*3ee4fc2aSCody Peter Mello 			} else {
228*3ee4fc2aSCody Peter Mello 				RET(c);
229*3ee4fc2aSCody Peter Mello 			}
230*3ee4fc2aSCody Peter Mello 			break;
231*3ee4fc2aSCody Peter Mello 		case '&':
232*3ee4fc2aSCody Peter Mello 			if (peek() == '&') {
233*3ee4fc2aSCody Peter Mello 				(void) input();
234*3ee4fc2aSCody Peter Mello 				RET(AND);
235*3ee4fc2aSCody Peter Mello 			} else
236*3ee4fc2aSCody Peter Mello 				RET('&');
237*3ee4fc2aSCody Peter Mello 		case '|':
238*3ee4fc2aSCody Peter Mello 			if (peek() == '|') {
239*3ee4fc2aSCody Peter Mello 				(void) input();
240*3ee4fc2aSCody Peter Mello 				RET(BOR);
241*3ee4fc2aSCody Peter Mello 			} else
242*3ee4fc2aSCody Peter Mello 				RET('|');
243*3ee4fc2aSCody Peter Mello 		case '!':
244*3ee4fc2aSCody Peter Mello 			if (peek() == '=') {
245*3ee4fc2aSCody Peter Mello 				(void) input();
246*3ee4fc2aSCody Peter Mello 				yylval.i = NE;
247*3ee4fc2aSCody Peter Mello 				RET(NE);
248*3ee4fc2aSCody Peter Mello 			} else if (peek() == '~') {
249*3ee4fc2aSCody Peter Mello 				(void) input();
250*3ee4fc2aSCody Peter Mello 				yylval.i = NOTMATCH;
251*3ee4fc2aSCody Peter Mello 				RET(MATCHOP);
252*3ee4fc2aSCody Peter Mello 			} else
253*3ee4fc2aSCody Peter Mello 				RET(NOT);
254*3ee4fc2aSCody Peter Mello 		case '~':
255*3ee4fc2aSCody Peter Mello 			yylval.i = MATCH;
256*3ee4fc2aSCody Peter Mello 			RET(MATCHOP);
257*3ee4fc2aSCody Peter Mello 		case '<':
258*3ee4fc2aSCody Peter Mello 			if (peek() == '=') {
259*3ee4fc2aSCody Peter Mello 				(void) input();
260*3ee4fc2aSCody Peter Mello 				yylval.i = LE;
261*3ee4fc2aSCody Peter Mello 				RET(LE);
262*3ee4fc2aSCody Peter Mello 			} else {
263*3ee4fc2aSCody Peter Mello 				yylval.i = LT;
264*3ee4fc2aSCody Peter Mello 				RET(LT);
265*3ee4fc2aSCody Peter Mello 			}
266*3ee4fc2aSCody Peter Mello 		case '=':
267*3ee4fc2aSCody Peter Mello 			if (peek() == '=') {
268*3ee4fc2aSCody Peter Mello 				(void) input();
269*3ee4fc2aSCody Peter Mello 				yylval.i = EQ;
270*3ee4fc2aSCody Peter Mello 				RET(EQ);
271*3ee4fc2aSCody Peter Mello 			} else {
272*3ee4fc2aSCody Peter Mello 				yylval.i = ASSIGN;
273*3ee4fc2aSCody Peter Mello 				RET(ASGNOP);
274*3ee4fc2aSCody Peter Mello 			}
275*3ee4fc2aSCody Peter Mello 		case '>':
276*3ee4fc2aSCody Peter Mello 			if (peek() == '=') {
277*3ee4fc2aSCody Peter Mello 				(void) input();
278*3ee4fc2aSCody Peter Mello 				yylval.i = GE;
279*3ee4fc2aSCody Peter Mello 				RET(GE);
280*3ee4fc2aSCody Peter Mello 			} else if (peek() == '>') {
281*3ee4fc2aSCody Peter Mello 				(void) input();
282*3ee4fc2aSCody Peter Mello 				yylval.i = APPEND;
283*3ee4fc2aSCody Peter Mello 				RET(APPEND);
284*3ee4fc2aSCody Peter Mello 			} else {
285*3ee4fc2aSCody Peter Mello 				yylval.i = GT;
286*3ee4fc2aSCody Peter Mello 				RET(GT);
287*3ee4fc2aSCody Peter Mello 			}
288*3ee4fc2aSCody Peter Mello 		case '+':
289*3ee4fc2aSCody Peter Mello 			if (peek() == '+') {
290*3ee4fc2aSCody Peter Mello 				(void) input();
291*3ee4fc2aSCody Peter Mello 				yylval.i = INCR;
292*3ee4fc2aSCody Peter Mello 				RET(INCR);
293*3ee4fc2aSCody Peter Mello 			} else if (peek() == '=') {
294*3ee4fc2aSCody Peter Mello 				(void) input();
295*3ee4fc2aSCody Peter Mello 				yylval.i = ADDEQ;
296*3ee4fc2aSCody Peter Mello 				RET(ASGNOP);
297*3ee4fc2aSCody Peter Mello 			} else
298*3ee4fc2aSCody Peter Mello 				RET('+');
299*3ee4fc2aSCody Peter Mello 		case '-':
300*3ee4fc2aSCody Peter Mello 			if (peek() == '-') {
301*3ee4fc2aSCody Peter Mello 				(void) input();
302*3ee4fc2aSCody Peter Mello 				yylval.i = DECR;
303*3ee4fc2aSCody Peter Mello 				RET(DECR);
304*3ee4fc2aSCody Peter Mello 			} else if (peek() == '=') {
305*3ee4fc2aSCody Peter Mello 				(void) input();
306*3ee4fc2aSCody Peter Mello 				yylval.i = SUBEQ;
307*3ee4fc2aSCody Peter Mello 				RET(ASGNOP);
308*3ee4fc2aSCody Peter Mello 			} else
309*3ee4fc2aSCody Peter Mello 				RET('-');
310*3ee4fc2aSCody Peter Mello 		case '*':
311*3ee4fc2aSCody Peter Mello 			if (peek() == '=') {	/* *= */
312*3ee4fc2aSCody Peter Mello 				(void) input();
313*3ee4fc2aSCody Peter Mello 				yylval.i = MULTEQ;
314*3ee4fc2aSCody Peter Mello 				RET(ASGNOP);
315*3ee4fc2aSCody Peter Mello 			} else if (peek() == '*') {	/* ** or **= */
316*3ee4fc2aSCody Peter Mello 				(void) input();	/* eat 2nd * */
317*3ee4fc2aSCody Peter Mello 				if (peek() == '=') {
318*3ee4fc2aSCody Peter Mello 					(void) input();
319*3ee4fc2aSCody Peter Mello 					yylval.i = POWEQ;
320*3ee4fc2aSCody Peter Mello 					RET(ASGNOP);
321*3ee4fc2aSCody Peter Mello 				} else {
322*3ee4fc2aSCody Peter Mello 					RET(POWER);
323*3ee4fc2aSCody Peter Mello 				}
324*3ee4fc2aSCody Peter Mello 			} else
325*3ee4fc2aSCody Peter Mello 				RET('*');
326*3ee4fc2aSCody Peter Mello 		case '/':
327*3ee4fc2aSCody Peter Mello 			RET('/');
328*3ee4fc2aSCody Peter Mello 		case '%':
329*3ee4fc2aSCody Peter Mello 			if (peek() == '=') {
330*3ee4fc2aSCody Peter Mello 				(void) input();
331*3ee4fc2aSCody Peter Mello 				yylval.i = MODEQ;
332*3ee4fc2aSCody Peter Mello 				RET(ASGNOP);
333*3ee4fc2aSCody Peter Mello 			} else
334*3ee4fc2aSCody Peter Mello 				RET('%');
335*3ee4fc2aSCody Peter Mello 		case '^':
336*3ee4fc2aSCody Peter Mello 			if (peek() == '=') {
337*3ee4fc2aSCody Peter Mello 				(void) input();
338*3ee4fc2aSCody Peter Mello 				yylval.i = POWEQ;
339*3ee4fc2aSCody Peter Mello 				RET(ASGNOP);
340*3ee4fc2aSCody Peter Mello 			} else
341*3ee4fc2aSCody Peter Mello 				RET(POWER);
342*3ee4fc2aSCody Peter Mello 
343*3ee4fc2aSCody Peter Mello 		case '$':
344*3ee4fc2aSCody Peter Mello 			/* BUG: awkward, if not wrong */
345*3ee4fc2aSCody Peter Mello 			c = gettok(&buf, &bufsize);
346*3ee4fc2aSCody Peter Mello 			if (isalpha(c)) {
347*3ee4fc2aSCody Peter Mello 				if (strcmp(buf, "NF") == 0) {
348*3ee4fc2aSCody Peter Mello 					/* very special */
349*3ee4fc2aSCody Peter Mello 					unputstr("(NF)");
350*3ee4fc2aSCody Peter Mello 					RET(INDIRECT);
351*3ee4fc2aSCody Peter Mello 				}
352*3ee4fc2aSCody Peter Mello 				c = peek();
353*3ee4fc2aSCody Peter Mello 				if (c == '(' || c == '[' ||
354*3ee4fc2aSCody Peter Mello 				    (infunc && isarg(buf) >= 0)) {
355*3ee4fc2aSCody Peter Mello 					unputstr(buf);
356*3ee4fc2aSCody Peter Mello 					RET(INDIRECT);
357*3ee4fc2aSCody Peter Mello 				}
358*3ee4fc2aSCody Peter Mello 				yylval.cp = setsymtab(
359*3ee4fc2aSCody Peter Mello 				    buf, "", 0.0, STR|NUM, symtab);
360*3ee4fc2aSCody Peter Mello 				RET(IVAR);
361*3ee4fc2aSCody Peter Mello 			} else if (c == 0) {	/*  */
362*3ee4fc2aSCody Peter Mello 				SYNTAX("unexpected end of input after $");
363*3ee4fc2aSCody Peter Mello 				RET(';');
364*3ee4fc2aSCody Peter Mello 			} else {
365*3ee4fc2aSCody Peter Mello 				unputstr(buf);
366*3ee4fc2aSCody Peter Mello 				RET(INDIRECT);
367*3ee4fc2aSCody Peter Mello 			}
368*3ee4fc2aSCody Peter Mello 
369*3ee4fc2aSCody Peter Mello 		case '}':
370*3ee4fc2aSCody Peter Mello 			if (--bracecnt < 0)
371*3ee4fc2aSCody Peter Mello 				SYNTAX("extra }");
372*3ee4fc2aSCody Peter Mello 			sc = 1;
373*3ee4fc2aSCody Peter Mello 			RET(';');
374*3ee4fc2aSCody Peter Mello 		case ']':
375*3ee4fc2aSCody Peter Mello 			if (--brackcnt < 0)
376*3ee4fc2aSCody Peter Mello 				SYNTAX("extra ]");
377*3ee4fc2aSCody Peter Mello 			RET(']');
378*3ee4fc2aSCody Peter Mello 		case ')':
379*3ee4fc2aSCody Peter Mello 			if (--parencnt < 0)
380*3ee4fc2aSCody Peter Mello 				SYNTAX("extra )");
381*3ee4fc2aSCody Peter Mello 			RET(')');
382*3ee4fc2aSCody Peter Mello 		case '{':
383*3ee4fc2aSCody Peter Mello 			bracecnt++;
384*3ee4fc2aSCody Peter Mello 			RET('{');
385*3ee4fc2aSCody Peter Mello 		case '[':
386*3ee4fc2aSCody Peter Mello 			brackcnt++;
387*3ee4fc2aSCody Peter Mello 			RET('[');
388*3ee4fc2aSCody Peter Mello 		case '(':
389*3ee4fc2aSCody Peter Mello 			parencnt++;
390*3ee4fc2aSCody Peter Mello 			RET('(');
391*3ee4fc2aSCody Peter Mello 
392*3ee4fc2aSCody Peter Mello 		case '"':
393*3ee4fc2aSCody Peter Mello 			/* BUG: should be like tran.c ? */
394*3ee4fc2aSCody Peter Mello 			return (string());
395*3ee4fc2aSCody Peter Mello 
396*3ee4fc2aSCody Peter Mello 		default:
397*3ee4fc2aSCody Peter Mello 			RET(c);
398*3ee4fc2aSCody Peter Mello 		}
399*3ee4fc2aSCody Peter Mello 	}
400*3ee4fc2aSCody Peter Mello }
401*3ee4fc2aSCody Peter Mello 
402*3ee4fc2aSCody Peter Mello int
string(void)403*3ee4fc2aSCody Peter Mello string(void)
404*3ee4fc2aSCody Peter Mello {
405*3ee4fc2aSCody Peter Mello 	int c, n;
406*3ee4fc2aSCody Peter Mello 	char *s, *bp;
407*3ee4fc2aSCody Peter Mello 	static char *buf = NULL;
408*3ee4fc2aSCody Peter Mello 	static size_t bufsz = 500;
409*3ee4fc2aSCody Peter Mello 
410*3ee4fc2aSCody Peter Mello 	if (buf == NULL && (buf = (char *)malloc(bufsz)) == NULL)
411*3ee4fc2aSCody Peter Mello 		FATAL("out of space for strings");
412*3ee4fc2aSCody Peter Mello 	for (bp = buf; (c = input()) != '"'; ) {
413*3ee4fc2aSCody Peter Mello 		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
414*3ee4fc2aSCody Peter Mello 			FATAL("out of space for string %.10s...", buf);
415*3ee4fc2aSCody Peter Mello 		switch (c) {
416*3ee4fc2aSCody Peter Mello 		case '\n':
417*3ee4fc2aSCody Peter Mello 		case '\r':
418*3ee4fc2aSCody Peter Mello 		case 0:
419*3ee4fc2aSCody Peter Mello 			*bp = '\0';
420*3ee4fc2aSCody Peter Mello 			SYNTAX("non-terminated string %.10s...", buf);
421*3ee4fc2aSCody Peter Mello 			if (c == 0)	/* hopeless */
422*3ee4fc2aSCody Peter Mello 				FATAL("giving up");
423*3ee4fc2aSCody Peter Mello 			lineno++;
424*3ee4fc2aSCody Peter Mello 			break;
425*3ee4fc2aSCody Peter Mello 		case '\\':
426*3ee4fc2aSCody Peter Mello 			c = input();
427*3ee4fc2aSCody Peter Mello 			switch (c) {
428*3ee4fc2aSCody Peter Mello 			case '"': *bp++ = '"'; break;
429*3ee4fc2aSCody Peter Mello 			case 'n': *bp++ = '\n'; break;
430*3ee4fc2aSCody Peter Mello 			case 't': *bp++ = '\t'; break;
431*3ee4fc2aSCody Peter Mello 			case 'f': *bp++ = '\f'; break;
432*3ee4fc2aSCody Peter Mello 			case 'r': *bp++ = '\r'; break;
433*3ee4fc2aSCody Peter Mello 			case 'b': *bp++ = '\b'; break;
434*3ee4fc2aSCody Peter Mello 			case 'v': *bp++ = '\v'; break;
435*3ee4fc2aSCody Peter Mello 			case 'a': *bp++ = '\007'; break;
436*3ee4fc2aSCody Peter Mello 			case '\\': *bp++ = '\\'; break;
437*3ee4fc2aSCody Peter Mello 
438*3ee4fc2aSCody Peter Mello 			case '0': case '1': case '2': /* octal: \d \dd \ddd */
439*3ee4fc2aSCody Peter Mello 			case '3': case '4': case '5': case '6': case '7':
440*3ee4fc2aSCody Peter Mello 				n = c - '0';
441*3ee4fc2aSCody Peter Mello 				if ((c = peek()) >= '0' && c < '8') {
442*3ee4fc2aSCody Peter Mello 					n = 8 * n + input() - '0';
443*3ee4fc2aSCody Peter Mello 					if ((c = peek()) >= '0' && c < '8')
444*3ee4fc2aSCody Peter Mello 						n = 8 * n + input() - '0';
445*3ee4fc2aSCody Peter Mello 				}
446*3ee4fc2aSCody Peter Mello 				*bp++ = n;
447*3ee4fc2aSCody Peter Mello 				break;
448*3ee4fc2aSCody Peter Mello 
449*3ee4fc2aSCody Peter Mello 			case 'x': {	/* hex  \x0-9a-fA-F + */
450*3ee4fc2aSCody Peter Mello 				char xbuf[100], *px;
451*3ee4fc2aSCody Peter Mello 				px = xbuf;
452*3ee4fc2aSCody Peter Mello 				while ((c = input()) != 0 && px-xbuf < 100-2) {
453*3ee4fc2aSCody Peter Mello 					if (isdigit(c) ||
454*3ee4fc2aSCody Peter Mello 					    (c >= 'a' && c <= 'f') ||
455*3ee4fc2aSCody Peter Mello 					    (c >= 'A' && c <= 'F'))
456*3ee4fc2aSCody Peter Mello 						*px++ = c;
457*3ee4fc2aSCody Peter Mello 					else
458*3ee4fc2aSCody Peter Mello 						break;
459*3ee4fc2aSCody Peter Mello 				}
460*3ee4fc2aSCody Peter Mello 				*px = 0;
461*3ee4fc2aSCody Peter Mello 				unput(c);
462*3ee4fc2aSCody Peter Mello 				(void) sscanf(xbuf, "%x", (unsigned int *)&n);
463*3ee4fc2aSCody Peter Mello 				*bp++ = n;
464*3ee4fc2aSCody Peter Mello 				break;
465*3ee4fc2aSCody Peter Mello 			}
466*3ee4fc2aSCody Peter Mello 
467*3ee4fc2aSCody Peter Mello 			default:
468*3ee4fc2aSCody Peter Mello 				*bp++ = c;
469*3ee4fc2aSCody Peter Mello 				break;
470*3ee4fc2aSCody Peter Mello 			}
471*3ee4fc2aSCody Peter Mello 			break;
472*3ee4fc2aSCody Peter Mello 		default:
473*3ee4fc2aSCody Peter Mello 			*bp++ = c;
474*3ee4fc2aSCody Peter Mello 			break;
475*3ee4fc2aSCody Peter Mello 		}
476*3ee4fc2aSCody Peter Mello 	}
477*3ee4fc2aSCody Peter Mello 	*bp = 0;
478*3ee4fc2aSCody Peter Mello 	s = tostring(buf);
479*3ee4fc2aSCody Peter Mello 	*bp++ = ' '; *bp++ = 0;
480*3ee4fc2aSCody Peter Mello 	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
481*3ee4fc2aSCody Peter Mello 	RET(STRING);
482*3ee4fc2aSCody Peter Mello }
483*3ee4fc2aSCody Peter Mello 
484*3ee4fc2aSCody Peter Mello 
485*3ee4fc2aSCody Peter Mello int
binsearch(char * w,Keyword * kp,int n)486*3ee4fc2aSCody Peter Mello binsearch(char *w, Keyword *kp, int n)
487*3ee4fc2aSCody Peter Mello {
488*3ee4fc2aSCody Peter Mello 	int cond, low, mid, high;
489*3ee4fc2aSCody Peter Mello 
490*3ee4fc2aSCody Peter Mello 	low = 0;
491*3ee4fc2aSCody Peter Mello 	high = n - 1;
492*3ee4fc2aSCody Peter Mello 	while (low <= high) {
493*3ee4fc2aSCody Peter Mello 		mid = (low + high) / 2;
494*3ee4fc2aSCody Peter Mello 		if ((cond = strcmp(w, kp[mid].word)) < 0)
495*3ee4fc2aSCody Peter Mello 			high = mid - 1;
496*3ee4fc2aSCody Peter Mello 		else if (cond > 0)
497*3ee4fc2aSCody Peter Mello 			low = mid + 1;
498*3ee4fc2aSCody Peter Mello 		else
499*3ee4fc2aSCody Peter Mello 			return (mid);
500*3ee4fc2aSCody Peter Mello 	}
501*3ee4fc2aSCody Peter Mello 	return (-1);
502*3ee4fc2aSCody Peter Mello }
503*3ee4fc2aSCody Peter Mello 
504*3ee4fc2aSCody Peter Mello int
word(char * w)505*3ee4fc2aSCody Peter Mello word(char *w)
506*3ee4fc2aSCody Peter Mello {
507*3ee4fc2aSCody Peter Mello 	Keyword *kp;
508*3ee4fc2aSCody Peter Mello 	int c, n;
509*3ee4fc2aSCody Peter Mello 
510*3ee4fc2aSCody Peter Mello 	n = binsearch(w, keywords, sizeof (keywords) / sizeof (keywords[0]));
511*3ee4fc2aSCody Peter Mello 	if (n != -1) {	/* found in table */
512*3ee4fc2aSCody Peter Mello 		kp = keywords + n;
513*3ee4fc2aSCody Peter Mello 		yylval.i = kp->sub;
514*3ee4fc2aSCody Peter Mello 		switch (kp->type) {	/* special handling */
515*3ee4fc2aSCody Peter Mello 		case BLTIN:
516*3ee4fc2aSCody Peter Mello 			if (kp->sub == FSYSTEM && safe)
517*3ee4fc2aSCody Peter Mello 				SYNTAX("system is unsafe");
518*3ee4fc2aSCody Peter Mello 			RET(kp->type);
519*3ee4fc2aSCody Peter Mello 		case FUNC:
520*3ee4fc2aSCody Peter Mello 			if (infunc)
521*3ee4fc2aSCody Peter Mello 				SYNTAX("illegal nested function");
522*3ee4fc2aSCody Peter Mello 			RET(kp->type);
523*3ee4fc2aSCody Peter Mello 		case RETURN:
524*3ee4fc2aSCody Peter Mello 			if (!infunc)
525*3ee4fc2aSCody Peter Mello 				SYNTAX("return not in function");
526*3ee4fc2aSCody Peter Mello 			RET(kp->type);
527*3ee4fc2aSCody Peter Mello 		case VARNF:
528*3ee4fc2aSCody Peter Mello 			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
529*3ee4fc2aSCody Peter Mello 			RET(VARNF);
530*3ee4fc2aSCody Peter Mello 		default:
531*3ee4fc2aSCody Peter Mello 			RET(kp->type);
532*3ee4fc2aSCody Peter Mello 		}
533*3ee4fc2aSCody Peter Mello 	}
534*3ee4fc2aSCody Peter Mello 	c = peek();	/* look for '(' */
535*3ee4fc2aSCody Peter Mello 	if (c != '(' && infunc && (n = isarg(w)) >= 0) {
536*3ee4fc2aSCody Peter Mello 		yylval.i = n;
537*3ee4fc2aSCody Peter Mello 		RET(ARG);
538*3ee4fc2aSCody Peter Mello 	} else {
539*3ee4fc2aSCody Peter Mello 		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
540*3ee4fc2aSCody Peter Mello 		if (c == '(') {
541*3ee4fc2aSCody Peter Mello 			RET(CALL);
542*3ee4fc2aSCody Peter Mello 		} else {
543*3ee4fc2aSCody Peter Mello 			RET(VAR);
544*3ee4fc2aSCody Peter Mello 		}
545*3ee4fc2aSCody Peter Mello 	}
546*3ee4fc2aSCody Peter Mello }
547*3ee4fc2aSCody Peter Mello 
548*3ee4fc2aSCody Peter Mello void
startreg(void)549*3ee4fc2aSCody Peter Mello startreg(void)	/* next call to yylex will return a regular expression */
550*3ee4fc2aSCody Peter Mello {
551*3ee4fc2aSCody Peter Mello 	reg = 1;
552*3ee4fc2aSCody Peter Mello }
553*3ee4fc2aSCody Peter Mello 
554*3ee4fc2aSCody Peter Mello int
regexpr(void)555*3ee4fc2aSCody Peter Mello regexpr(void)
556*3ee4fc2aSCody Peter Mello {
557*3ee4fc2aSCody Peter Mello 	int c;
558*3ee4fc2aSCody Peter Mello 	static char *buf = NULL;
559*3ee4fc2aSCody Peter Mello 	static size_t bufsz = 500;
560*3ee4fc2aSCody Peter Mello 	char *bp;
561*3ee4fc2aSCody Peter Mello 
562*3ee4fc2aSCody Peter Mello 	if (buf == NULL && (buf = (char *)malloc(bufsz)) == NULL)
563*3ee4fc2aSCody Peter Mello 		FATAL("out of space for rex expr");
564*3ee4fc2aSCody Peter Mello 	bp = buf;
565*3ee4fc2aSCody Peter Mello 	for (; (c = input()) != '/' && c != 0; ) {
566*3ee4fc2aSCody Peter Mello 		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
567*3ee4fc2aSCody Peter Mello 			FATAL("out of space for reg expr %.10s...", buf);
568*3ee4fc2aSCody Peter Mello 		if (c == '\n') {
569*3ee4fc2aSCody Peter Mello 			*bp = '\0';
570*3ee4fc2aSCody Peter Mello 			SYNTAX("newline in regular expression %.10s...", buf);
571*3ee4fc2aSCody Peter Mello 			unput('\n');
572*3ee4fc2aSCody Peter Mello 			break;
573*3ee4fc2aSCody Peter Mello 		} else if (c == '\\') {
574*3ee4fc2aSCody Peter Mello 			*bp++ = '\\';
575*3ee4fc2aSCody Peter Mello 			*bp++ = input();
576*3ee4fc2aSCody Peter Mello 		} else {
577*3ee4fc2aSCody Peter Mello 			*bp++ = c;
578*3ee4fc2aSCody Peter Mello 		}
579*3ee4fc2aSCody Peter Mello 	}
580*3ee4fc2aSCody Peter Mello 	*bp = 0;
581*3ee4fc2aSCody Peter Mello 	if (c == 0)
582*3ee4fc2aSCody Peter Mello 		SYNTAX("non-terminated regular expression %.10s...", buf);
583*3ee4fc2aSCody Peter Mello 	yylval.s = tostring(buf);
584*3ee4fc2aSCody Peter Mello 	unput('/');
585*3ee4fc2aSCody Peter Mello 	RET(REGEXPR);
586*3ee4fc2aSCody Peter Mello }
587*3ee4fc2aSCody Peter Mello 
588*3ee4fc2aSCody Peter Mello /* low-level lexical stuff, sort of inherited from lex */
589*3ee4fc2aSCody Peter Mello 
590*3ee4fc2aSCody Peter Mello char	ebuf[300];
591*3ee4fc2aSCody Peter Mello char	*ep = ebuf;
592*3ee4fc2aSCody Peter Mello char	yysbuf[100];	/* pushback buffer */
593*3ee4fc2aSCody Peter Mello char	*yysptr = yysbuf;
594*3ee4fc2aSCody Peter Mello FILE	*yyin = NULL;
595*3ee4fc2aSCody Peter Mello 
596*3ee4fc2aSCody Peter Mello int
input(void)597*3ee4fc2aSCody Peter Mello input(void)	/* get next lexical input character */
598*3ee4fc2aSCody Peter Mello {
599*3ee4fc2aSCody Peter Mello 	int c;
600*3ee4fc2aSCody Peter Mello 	extern char *lexprog;
601*3ee4fc2aSCody Peter Mello 
602*3ee4fc2aSCody Peter Mello 	if (yysptr > yysbuf)
603*3ee4fc2aSCody Peter Mello 		c = (uschar)*--yysptr;
604*3ee4fc2aSCody Peter Mello 	else if (lexprog != NULL) {	/* awk '...' */
605*3ee4fc2aSCody Peter Mello 		if ((c = (uschar)*lexprog) != 0)
606*3ee4fc2aSCody Peter Mello 			lexprog++;
607*3ee4fc2aSCody Peter Mello 	} else				/* awk -f ... */
608*3ee4fc2aSCody Peter Mello 		c = pgetc();
609*3ee4fc2aSCody Peter Mello 	if (c == EOF)
610*3ee4fc2aSCody Peter Mello 		c = 0;
611*3ee4fc2aSCody Peter Mello 	if (ep >= ebuf + sizeof (ebuf))
612*3ee4fc2aSCody Peter Mello 		ep = ebuf;
613*3ee4fc2aSCody Peter Mello 	*ep = c;
614*3ee4fc2aSCody Peter Mello 	if (c != 0) {
615*3ee4fc2aSCody Peter Mello 		ep++;
616*3ee4fc2aSCody Peter Mello 	}
617*3ee4fc2aSCody Peter Mello 	return (c);
618*3ee4fc2aSCody Peter Mello }
619*3ee4fc2aSCody Peter Mello 
620*3ee4fc2aSCody Peter Mello void
unput(int c)621*3ee4fc2aSCody Peter Mello unput(int c)	/* put lexical character back on input */
622*3ee4fc2aSCody Peter Mello {
623*3ee4fc2aSCody Peter Mello 	if (yysptr >= yysbuf + sizeof (yysbuf))
624*3ee4fc2aSCody Peter Mello 		FATAL("pushed back too much: %.20s...", yysbuf);
625*3ee4fc2aSCody Peter Mello 	*yysptr++ = c;
626*3ee4fc2aSCody Peter Mello 	if (--ep < ebuf)
627*3ee4fc2aSCody Peter Mello 		ep = ebuf + sizeof (ebuf) - 1;
628*3ee4fc2aSCody Peter Mello }
629*3ee4fc2aSCody Peter Mello 
630*3ee4fc2aSCody Peter Mello void
unputstr(const char * s)631*3ee4fc2aSCody Peter Mello unputstr(const char *s)	/* put a string back on input */
632*3ee4fc2aSCody Peter Mello {
633*3ee4fc2aSCody Peter Mello 	int i;
634*3ee4fc2aSCody Peter Mello 
635*3ee4fc2aSCody Peter Mello 	for (i = strlen(s)-1; i >= 0; i--)
636*3ee4fc2aSCody Peter Mello 		unput(s[i]);
637*3ee4fc2aSCody Peter Mello }
638