1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <ctype.h>
29#include "awk.h"
30#include "ytab.h"
31
32extern YYSTYPE	yylval;
33extern int	infunc;
34
35int	lineno	= 1;
36int	bracecnt = 0;
37int	brackcnt  = 0;
38int	parencnt = 0;
39
40typedef struct Keyword {
41	const char *word;
42	int	sub;
43	int	type;
44} Keyword;
45
46Keyword keywords[] ={	/* keep sorted: binary searched */
47	{ "BEGIN",	XBEGIN,		XBEGIN },
48	{ "END",	XEND,		XEND },
49	{ "NF",		VARNF,		VARNF },
50	{ "and",	FAND,		BLTIN },
51	{ "atan2",	FATAN,		BLTIN },
52	{ "break",	BREAK,		BREAK },
53	{ "close",	CLOSE,		CLOSE },
54	{ "compl",	FCOMPL,		BLTIN },
55	{ "continue",	CONTINUE,	CONTINUE },
56	{ "cos",	FCOS,		BLTIN },
57	{ "delete",	DELETE,		DELETE },
58	{ "do",		DO,		DO },
59	{ "else",	ELSE,		ELSE },
60	{ "exit",	EXIT,		EXIT },
61	{ "exp",	FEXP,		BLTIN },
62	{ "fflush",	FFLUSH,		BLTIN },
63	{ "for",	FOR,		FOR },
64	{ "func",	FUNC,		FUNC },
65	{ "function",	FUNC,		FUNC },
66	{ "getline",	GETLINE,	GETLINE },
67	{ "gsub",	GSUB,		GSUB },
68	{ "if",		IF,		IF },
69	{ "in",		IN,		IN },
70	{ "index",	INDEX,		INDEX },
71	{ "int",	FINT,		BLTIN },
72	{ "length",	FLENGTH,	BLTIN },
73	{ "log",	FLOG,		BLTIN },
74	{ "lshift",	FLSHIFT,	BLTIN },
75	{ "match",	MATCHFCN,	MATCHFCN },
76	{ "next",	NEXT,		NEXT },
77	{ "nextfile",	NEXTFILE,	NEXTFILE },
78	{ "or",		FFOR,		BLTIN },
79	{ "print",	PRINT,		PRINT },
80	{ "printf",	PRINTF,		PRINTF },
81	{ "rand",	FRAND,		BLTIN },
82	{ "return",	RETURN,		RETURN },
83	{ "rshift",	FRSHIFT,	BLTIN },
84	{ "sin",	FSIN,		BLTIN },
85	{ "split",	SPLIT,		SPLIT },
86	{ "sprintf",	SPRINTF,	SPRINTF },
87	{ "sqrt",	FSQRT,		BLTIN },
88	{ "srand",	FSRAND,		BLTIN },
89	{ "sub",	SUB,		SUB },
90	{ "substr",	SUBSTR,		SUBSTR },
91	{ "system",	FSYSTEM,	BLTIN },
92	{ "tolower",	FTOLOWER,	BLTIN },
93	{ "toupper",	FTOUPPER,	BLTIN },
94	{ "while",	WHILE,		WHILE },
95	{ "xor",	FXOR,		BLTIN },
96};
97
98#define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
99
100int peek(void)
101{
102	int c = input();
103	unput(c);
104	return c;
105}
106
107int gettok(char **pbuf, int *psz)	/* get next input token */
108{
109	int c, retc;
110	char *buf = *pbuf;
111	int sz = *psz;
112	char *bp = buf;
113
114	c = input();
115	if (c == 0)
116		return 0;
117	buf[0] = c;
118	buf[1] = 0;
119	if (!isalnum(c) && c != '.' && c != '_')
120		return c;
121
122	*bp++ = c;
123	if (isalpha(c) || c == '_') {	/* it's a varname */
124		for ( ; (c = input()) != 0; ) {
125			if (bp-buf >= sz)
126				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
127					FATAL( "out of space for name %.10s...", buf );
128			if (isalnum(c) || c == '_')
129				*bp++ = c;
130			else {
131				*bp = 0;
132				unput(c);
133				break;
134			}
135		}
136		*bp = 0;
137		retc = 'a';	/* alphanumeric */
138	} else {	/* maybe it's a number, but could be . */
139		char *rem;
140		/* read input until can't be a number */
141		for ( ; (c = input()) != 0; ) {
142			if (bp-buf >= sz)
143				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
144					FATAL( "out of space for number %.10s...", buf );
145			if (isdigit(c) || c == 'e' || c == 'E'
146			  || c == '.' || c == '+' || c == '-')
147				*bp++ = c;
148			else {
149				unput(c);
150				break;
151			}
152		}
153		*bp = 0;
154		strtod(buf, &rem);	/* parse the number */
155		if (rem == buf) {	/* it wasn't a valid number at all */
156			buf[1] = 0;	/* return one character as token */
157			retc = buf[0];	/* character is its own type */
158			unputstr(rem+1); /* put rest back for later */
159		} else {	/* some prefix was a number */
160			unputstr(rem);	/* put rest back for later */
161			rem[0] = 0;	/* truncate buf after number part */
162			retc = '0';	/* type is number */
163		}
164	}
165	*pbuf = buf;
166	*psz = sz;
167	return retc;
168}
169
170int	word(char *);
171int	string(void);
172int	regexpr(void);
173int	sc	= 0;	/* 1 => return a } right now */
174int	reg	= 0;	/* 1 => return a REGEXPR now */
175
176int yylex(void)
177{
178	int c;
179	static char *buf = NULL;
180	static int bufsize = 5; /* BUG: setting this small causes core dump! */
181
182	if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
183		FATAL( "out of space in yylex" );
184	if (sc) {
185		sc = 0;
186		RET('}');
187	}
188	if (reg) {
189		reg = 0;
190		return regexpr();
191	}
192	for (;;) {
193		c = gettok(&buf, &bufsize);
194		if (c == 0)
195			return 0;
196		if (isalpha(c) || c == '_')
197			return word(buf);
198		if (isdigit(c)) {
199			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
200			/* should this also have STR set? */
201			RET(NUMBER);
202		}
203
204		yylval.i = c;
205		switch (c) {
206		case '\n':	/* {EOL} */
207			lineno++;
208			RET(NL);
209		case '\r':	/* assume \n is coming */
210		case ' ':	/* {WS}+ */
211		case '\t':
212			break;
213		case '#':	/* #.* strip comments */
214			while ((c = input()) != '\n' && c != 0)
215				;
216			unput(c);
217			break;
218		case ';':
219			RET(';');
220		case '\\':
221			if (peek() == '\n') {
222				input();
223				lineno++;
224			} else if (peek() == '\r') {
225				input(); input();	/* \n */
226				lineno++;
227			} else {
228				RET(c);
229			}
230			break;
231		case '&':
232			if (peek() == '&') {
233				input(); RET(AND);
234			} else
235				RET('&');
236		case '|':
237			if (peek() == '|') {
238				input(); RET(BOR);
239			} else
240				RET('|');
241		case '!':
242			if (peek() == '=') {
243				input(); yylval.i = NE; RET(NE);
244			} else if (peek() == '~') {
245				input(); yylval.i = NOTMATCH; RET(MATCHOP);
246			} else
247				RET(NOT);
248		case '~':
249			yylval.i = MATCH;
250			RET(MATCHOP);
251		case '<':
252			if (peek() == '=') {
253				input(); yylval.i = LE; RET(LE);
254			} else {
255				yylval.i = LT; RET(LT);
256			}
257		case '=':
258			if (peek() == '=') {
259				input(); yylval.i = EQ; RET(EQ);
260			} else {
261				yylval.i = ASSIGN; RET(ASGNOP);
262			}
263		case '>':
264			if (peek() == '=') {
265				input(); yylval.i = GE; RET(GE);
266			} else if (peek() == '>') {
267				input(); yylval.i = APPEND; RET(APPEND);
268			} else {
269				yylval.i = GT; RET(GT);
270			}
271		case '+':
272			if (peek() == '+') {
273				input(); yylval.i = INCR; RET(INCR);
274			} else if (peek() == '=') {
275				input(); yylval.i = ADDEQ; RET(ASGNOP);
276			} else
277				RET('+');
278		case '-':
279			if (peek() == '-') {
280				input(); yylval.i = DECR; RET(DECR);
281			} else if (peek() == '=') {
282				input(); yylval.i = SUBEQ; RET(ASGNOP);
283			} else
284				RET('-');
285		case '*':
286			if (peek() == '=') {	/* *= */
287				input(); yylval.i = MULTEQ; RET(ASGNOP);
288			} else if (peek() == '*') {	/* ** or **= */
289				input();	/* eat 2nd * */
290				if (peek() == '=') {
291					input(); yylval.i = POWEQ; RET(ASGNOP);
292				} else {
293					RET(POWER);
294				}
295			} else
296				RET('*');
297		case '/':
298			RET('/');
299		case '%':
300			if (peek() == '=') {
301				input(); yylval.i = MODEQ; RET(ASGNOP);
302			} else
303				RET('%');
304		case '^':
305			if (peek() == '=') {
306				input(); yylval.i = POWEQ; RET(ASGNOP);
307			} else
308				RET(POWER);
309
310		case '$':
311			/* BUG: awkward, if not wrong */
312			c = gettok(&buf, &bufsize);
313			if (isalpha(c)) {
314				if (strcmp(buf, "NF") == 0) {	/* very special */
315					unputstr("(NF)");
316					RET(INDIRECT);
317				}
318				c = peek();
319				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
320					unputstr(buf);
321					RET(INDIRECT);
322				}
323				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
324				RET(IVAR);
325			} else if (c == 0) {	/*  */
326				SYNTAX( "unexpected end of input after $" );
327				RET(';');
328			} else {
329				unputstr(buf);
330				RET(INDIRECT);
331			}
332
333		case '}':
334			if (--bracecnt < 0)
335				SYNTAX( "extra }" );
336			sc = 1;
337			RET(';');
338		case ']':
339			if (--brackcnt < 0)
340				SYNTAX( "extra ]" );
341			RET(']');
342		case ')':
343			if (--parencnt < 0)
344				SYNTAX( "extra )" );
345			RET(')');
346		case '{':
347			bracecnt++;
348			RET('{');
349		case '[':
350			brackcnt++;
351			RET('[');
352		case '(':
353			parencnt++;
354			RET('(');
355
356		case '"':
357			return string();	/* BUG: should be like tran.c ? */
358
359		default:
360			RET(c);
361		}
362	}
363}
364
365int string(void)
366{
367	int c, n;
368	char *s, *bp;
369	static char *buf = NULL;
370	static int bufsz = 500;
371
372	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
373		FATAL("out of space for strings");
374	for (bp = buf; (c = input()) != '"'; ) {
375		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
376			FATAL("out of space for string %.10s...", buf);
377		switch (c) {
378		case '\n':
379		case '\r':
380		case 0:
381			*bp = '\0';
382			SYNTAX( "non-terminated string %.10s...", buf );
383			if (c == 0)	/* hopeless */
384				FATAL( "giving up" );
385			lineno++;
386			break;
387		case '\\':
388			c = input();
389			switch (c) {
390			case '"': *bp++ = '"'; break;
391			case 'n': *bp++ = '\n'; break;
392			case 't': *bp++ = '\t'; break;
393			case 'f': *bp++ = '\f'; break;
394			case 'r': *bp++ = '\r'; break;
395			case 'b': *bp++ = '\b'; break;
396			case 'v': *bp++ = '\v'; break;
397			case 'a': *bp++ = '\007'; break;
398			case '\\': *bp++ = '\\'; break;
399
400			case '0': case '1': case '2': /* octal: \d \dd \ddd */
401			case '3': case '4': case '5': case '6': case '7':
402				n = c - '0';
403				if ((c = peek()) >= '0' && c < '8') {
404					n = 8 * n + input() - '0';
405					if ((c = peek()) >= '0' && c < '8')
406						n = 8 * n + input() - '0';
407				}
408				*bp++ = n;
409				break;
410
411			case 'x':	/* hex  \x0-9a-fA-F + */
412			    {	char xbuf[100], *px;
413				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
414					if (isdigit(c)
415					 || (c >= 'a' && c <= 'f')
416					 || (c >= 'A' && c <= 'F'))
417						*px++ = c;
418					else
419						break;
420				}
421				*px = 0;
422				unput(c);
423	  			sscanf(xbuf, "%x", (unsigned int *) &n);
424				*bp++ = n;
425				break;
426			    }
427
428			default:
429				*bp++ = c;
430				break;
431			}
432			break;
433		default:
434			*bp++ = c;
435			break;
436		}
437	}
438	*bp = 0;
439	s = tostring(buf);
440	*bp++ = ' '; *bp++ = 0;
441	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
442	RET(STRING);
443}
444
445
446int binsearch(char *w, Keyword *kp, int n)
447{
448	int cond, low, mid, high;
449
450	low = 0;
451	high = n - 1;
452	while (low <= high) {
453		mid = (low + high) / 2;
454		if ((cond = strcmp(w, kp[mid].word)) < 0)
455			high = mid - 1;
456		else if (cond > 0)
457			low = mid + 1;
458		else
459			return mid;
460	}
461	return -1;
462}
463
464int word(char *w)
465{
466	Keyword *kp;
467	int c, n;
468
469	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
470	if (n != -1) {	/* found in table */
471		kp = keywords + n;
472		yylval.i = kp->sub;
473		switch (kp->type) {	/* special handling */
474		case BLTIN:
475			if (kp->sub == FSYSTEM && safe)
476				SYNTAX( "system is unsafe" );
477			RET(kp->type);
478		case FUNC:
479			if (infunc)
480				SYNTAX( "illegal nested function" );
481			RET(kp->type);
482		case RETURN:
483			if (!infunc)
484				SYNTAX( "return not in function" );
485			RET(kp->type);
486		case VARNF:
487			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
488			RET(VARNF);
489		default:
490			RET(kp->type);
491		}
492	}
493	c = peek();	/* look for '(' */
494	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
495		yylval.i = n;
496		RET(ARG);
497	} else {
498		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
499		if (c == '(') {
500			RET(CALL);
501		} else {
502			RET(VAR);
503		}
504	}
505}
506
507void startreg(void)	/* next call to yylex will return a regular expression */
508{
509	reg = 1;
510}
511
512int regexpr(void)
513{
514	int c;
515	static char *buf = NULL;
516	static int bufsz = 500;
517	char *bp;
518
519	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
520		FATAL("out of space for rex expr");
521	bp = buf;
522	for ( ; (c = input()) != '/' && c != 0; ) {
523		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
524			FATAL("out of space for reg expr %.10s...", buf);
525		if (c == '\n') {
526			*bp = '\0';
527			SYNTAX( "newline in regular expression %.10s...", buf );
528			unput('\n');
529			break;
530		} else if (c == '\\') {
531			*bp++ = '\\';
532			*bp++ = input();
533		} else {
534			*bp++ = c;
535		}
536	}
537	*bp = 0;
538	if (c == 0)
539		SYNTAX("non-terminated regular expression %.10s...", buf);
540	yylval.s = tostring(buf);
541	unput('/');
542	RET(REGEXPR);
543}
544
545/* low-level lexical stuff, sort of inherited from lex */
546
547char	ebuf[300];
548char	*ep = ebuf;
549char	yysbuf[100];	/* pushback buffer */
550char	*yysptr = yysbuf;
551FILE	*yyin = NULL;
552
553int input(void)	/* get next lexical input character */
554{
555	int c;
556	extern char *lexprog;
557
558	if (yysptr > yysbuf)
559		c = (uschar)*--yysptr;
560	else if (lexprog != NULL) {	/* awk '...' */
561		if ((c = (uschar)*lexprog) != 0)
562			lexprog++;
563	} else				/* awk -f ... */
564		c = pgetc();
565	if (c == EOF)
566		c = 0;
567	if (ep >= ebuf + sizeof ebuf)
568		ep = ebuf;
569	*ep = c;
570	if (c != 0) {
571		ep++;
572	}
573	return (c);
574}
575
576void unput(int c)	/* put lexical character back on input */
577{
578	if (yysptr >= yysbuf + sizeof(yysbuf))
579		FATAL("pushed back too much: %.20s...", yysbuf);
580	*yysptr++ = c;
581	if (--ep < ebuf)
582		ep = ebuf + sizeof(ebuf) - 1;
583}
584
585void unputstr(const char *s)	/* put a string back on input */
586{
587	int i;
588
589	for (i = strlen(s)-1; i >= 0; i--)
590		unput(s[i]);
591}
592