1/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <ctype.h>
29#include "awk.h"
30#include "ytab.h"
31
32extern YYSTYPE	yylval;
33extern int	infunc;
34
35int	lineno	= 1;
36int	bracecnt = 0;
37int	brackcnt  = 0;
38int	parencnt = 0;
39
40typedef struct Keyword {
41	const char *word;
42	int	sub;
43	int	type;
44} Keyword;
45
46Keyword keywords[] ={	/* keep sorted: binary searched */
47	{ "BEGIN",	XBEGIN,		XBEGIN },
48	{ "END",	XEND,		XEND },
49	{ "NF",		VARNF,		VARNF },
50	{ "and",	FAND,		BLTIN },
51	{ "atan2",	FATAN,		BLTIN },
52	{ "break",	BREAK,		BREAK },
53	{ "close",	CLOSE,		CLOSE },
54	{ "compl",	FCOMPL,		BLTIN },
55	{ "continue",	CONTINUE,	CONTINUE },
56	{ "cos",	FCOS,		BLTIN },
57	{ "delete",	DELETE,		DELETE },
58	{ "do",		DO,		DO },
59	{ "else",	ELSE,		ELSE },
60	{ "exit",	EXIT,		EXIT },
61	{ "exp",	FEXP,		BLTIN },
62	{ "fflush",	FFLUSH,		BLTIN },
63	{ "for",	FOR,		FOR },
64	{ "func",	FUNC,		FUNC },
65	{ "function",	FUNC,		FUNC },
66	{ "getline",	GETLINE,	GETLINE },
67	{ "gsub",	GSUB,		GSUB },
68	{ "if",		IF,		IF },
69	{ "in",		IN,		IN },
70	{ "index",	INDEX,		INDEX },
71	{ "int",	FINT,		BLTIN },
72	{ "length",	FLENGTH,	BLTIN },
73	{ "log",	FLOG,		BLTIN },
74	{ "lshift",	FLSHIFT,	BLTIN },
75	{ "match",	MATCHFCN,	MATCHFCN },
76	{ "next",	NEXT,		NEXT },
77	{ "nextfile",	NEXTFILE,	NEXTFILE },
78	{ "or",		FFOR,		BLTIN },
79	{ "print",	PRINT,		PRINT },
80	{ "printf",	PRINTF,		PRINTF },
81	{ "rand",	FRAND,		BLTIN },
82	{ "return",	RETURN,		RETURN },
83	{ "rshift",	FRSHIFT,	BLTIN },
84	{ "sin",	FSIN,		BLTIN },
85	{ "split",	SPLIT,		SPLIT },
86	{ "sprintf",	SPRINTF,	SPRINTF },
87	{ "sqrt",	FSQRT,		BLTIN },
88	{ "srand",	FSRAND,		BLTIN },
89	{ "sub",	SUB,		SUB },
90	{ "substr",	SUBSTR,		SUBSTR },
91	{ "system",	FSYSTEM,	BLTIN },
92	{ "tolower",	FTOLOWER,	BLTIN },
93	{ "toupper",	FTOUPPER,	BLTIN },
94	{ "while",	WHILE,		WHILE },
95	{ "xor",	FXOR,		BLTIN },
96};
97
98#define	RET(x)	{ if(dbg)printf("lex %s\n", tokname(x)); return(x); }
99
100int peek(void)
101{
102	int c = input();
103	unput(c);
104	return c;
105}
106
107int gettok(char **pbuf, int *psz)	/* get next input token */
108{
109	int c, retc;
110	char *buf = *pbuf;
111	int sz = *psz;
112	char *bp = buf;
113
114	c = input();
115	if (c == 0)
116		return 0;
117	buf[0] = c;
118	buf[1] = 0;
119	if (!isalnum(c) && c != '.' && c != '_')
120		return c;
121
122	*bp++ = c;
123	if (isalpha(c) || c == '_') {	/* it's a varname */
124		for ( ; (c = input()) != 0; ) {
125			if (bp-buf >= sz)
126				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
127					FATAL( "out of space for name %.10s...", buf );
128			if (isalnum(c) || c == '_')
129				*bp++ = c;
130			else {
131				*bp = 0;
132				unput(c);
133				break;
134			}
135		}
136		*bp = 0;
137		retc = 'a';	/* alphanumeric */
138	} else {	/* maybe it's a number, but could be . */
139		char *rem;
140		/* read input until can't be a number */
141		for ( ; (c = input()) != 0; ) {
142			if (bp-buf >= sz)
143				if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
144					FATAL( "out of space for number %.10s...", buf );
145			if (isdigit(c) || c == 'e' || c == 'E'
146			  || c == '.' || c == '+' || c == '-')
147				*bp++ = c;
148			else {
149				unput(c);
150				break;
151			}
152		}
153		*bp = 0;
154		strtod(buf, &rem);	/* parse the number */
155		if (rem == buf) {	/* it wasn't a valid number at all */
156			buf[1] = 0;	/* return one character as token */
157			retc = buf[0];	/* character is its own type */
158			unputstr(rem+1); /* put rest back for later */
159		} else {	/* some prefix was a number */
160			unputstr(rem);	/* put rest back for later */
161			rem[0] = 0;	/* truncate buf after number part */
162			retc = '0';	/* type is number */
163		}
164	}
165	*pbuf = buf;
166	*psz = sz;
167	return retc;
168}
169
170int	word(char *);
171int	string(void);
172int	regexpr(void);
173int	sc	= 0;	/* 1 => return a } right now */
174int	reg	= 0;	/* 1 => return a REGEXPR now */
175
176int yylex(void)
177{
178	int c;
179	static char *buf = NULL;
180	static int bufsize = 5; /* BUG: setting this small causes core dump! */
181
182	if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
183		FATAL( "out of space in yylex" );
184	if (sc) {
185		sc = 0;
186		RET('}');
187	}
188	if (reg) {
189		reg = 0;
190		return regexpr();
191	}
192	for (;;) {
193		c = gettok(&buf, &bufsize);
194		if (c == 0)
195			return 0;
196		if (isalpha(c) || c == '_')
197			return word(buf);
198		if (isdigit(c)) {
199			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
200			/* should this also have STR set? */
201			RET(NUMBER);
202		}
203
204		yylval.i = c;
205		switch (c) {
206		case '\n':	/* {EOL} */
207			lineno++;
208			RET(NL);
209		case '\r':	/* assume \n is coming */
210		case ' ':	/* {WS}+ */
211		case '\t':
212			break;
213		case '#':	/* #.* strip comments */
214			while ((c = input()) != '\n' && c != 0)
215				;
216			unput(c);
217			break;
218		case ';':
219			RET(';');
220		case '\\':
221			if (peek() == '\n') {
222				input();
223				lineno++;
224			} else if (peek() == '\r') {
225				input(); input();	/* \n */
226				lineno++;
227			} else {
228				RET(c);
229			}
230			break;
231		case '&':
232			if (peek() == '&') {
233				input(); RET(AND);
234			} else
235				RET('&');
236		case '|':
237			if (peek() == '|') {
238				input(); RET(BOR);
239			} else
240				RET('|');
241		case '!':
242			if (peek() == '=') {
243				input(); yylval.i = NE; RET(NE);
244			} else if (peek() == '~') {
245				input(); yylval.i = NOTMATCH; RET(MATCHOP);
246			} else
247				RET(NOT);
248		case '~':
249			yylval.i = MATCH;
250			RET(MATCHOP);
251		case '<':
252			if (peek() == '=') {
253				input(); yylval.i = LE; RET(LE);
254			} else {
255				yylval.i = LT; RET(LT);
256			}
257		case '=':
258			if (peek() == '=') {
259				input(); yylval.i = EQ; RET(EQ);
260			} else {
261				yylval.i = ASSIGN; RET(ASGNOP);
262			}
263		case '>':
264			if (peek() == '=') {
265				input(); yylval.i = GE; RET(GE);
266			} else if (peek() == '>') {
267				input(); yylval.i = APPEND; RET(APPEND);
268			} else {
269				yylval.i = GT; RET(GT);
270			}
271		case '+':
272			if (peek() == '+') {
273				input(); yylval.i = INCR; RET(INCR);
274			} else if (peek() == '=') {
275				input(); yylval.i = ADDEQ; RET(ASGNOP);
276			} else
277				RET('+');
278		case '-':
279			if (peek() == '-') {
280				input(); yylval.i = DECR; RET(DECR);
281			} else if (peek() == '=') {
282				input(); yylval.i = SUBEQ; RET(ASGNOP);
283			} else
284				RET('-');
285		case '*':
286			if (peek() == '=') {	/* *= */
287				input(); yylval.i = MULTEQ; RET(ASGNOP);
288			} else if (peek() == '*') {	/* ** or **= */
289				input();	/* eat 2nd * */
290				if (peek() == '=') {
291					input(); yylval.i = POWEQ; RET(ASGNOP);
292				} else {
293					RET(POWER);
294				}
295			} else
296				RET('*');
297		case '/':
298			RET('/');
299		case '%':
300			if (peek() == '=') {
301				input(); yylval.i = MODEQ; RET(ASGNOP);
302			} else
303				RET('%');
304		case '^':
305			if (peek() == '=') {
306				input(); yylval.i = POWEQ; RET(ASGNOP);
307			} else
308				RET(POWER);
309
310		case '$':
311			/* BUG: awkward, if not wrong */
312			c = gettok(&buf, &bufsize);
313			if (isalpha(c)) {
314				if (strcmp(buf, "NF") == 0) {	/* very special */
315					unputstr("(NF)");
316					RET(INDIRECT);
317				}
318				c = peek();
319				if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
320					unputstr(buf);
321					RET(INDIRECT);
322				}
323				yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
324				RET(IVAR);
325			} else if (c == 0) {	/*  */
326				SYNTAX( "unexpected end of input after $" );
327				RET(';');
328			} else {
329				unputstr(buf);
330				RET(INDIRECT);
331			}
332
333		case '}':
334			if (--bracecnt < 0)
335				SYNTAX( "extra }" );
336			sc = 1;
337			RET(';');
338		case ']':
339			if (--brackcnt < 0)
340				SYNTAX( "extra ]" );
341			RET(']');
342		case ')':
343			if (--parencnt < 0)
344				SYNTAX( "extra )" );
345			RET(')');
346		case '{':
347			bracecnt++;
348			RET('{');
349		case '[':
350			brackcnt++;
351			RET('[');
352		case '(':
353			parencnt++;
354			RET('(');
355
356		case '"':
357			return string();	/* BUG: should be like tran.c ? */
358
359		default:
360			RET(c);
361		}
362	}
363}
364
365int string(void)
366{
367	int c, n;
368	char *s, *bp;
369	static char *buf = NULL;
370	static int bufsz = 500;
371
372	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
373		FATAL("out of space for strings");
374	for (bp = buf; (c = input()) != '"'; ) {
375		if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
376			FATAL("out of space for string %.10s...", buf);
377		switch (c) {
378		case '\n':
379		case '\r':
380		case 0:
381			*bp = '\0';
382			SYNTAX( "non-terminated string %.10s...", buf );
383			if (c == 0)	/* hopeless */
384				FATAL( "giving up" );
385			lineno++;
386			break;
387		case '\\':
388			c = input();
389			switch (c) {
390			case '"': *bp++ = '"'; break;
391			case 'n': *bp++ = '\n'; break;
392			case 't': *bp++ = '\t'; break;
393			case 'f': *bp++ = '\f'; break;
394			case 'r': *bp++ = '\r'; break;
395			case 'b': *bp++ = '\b'; break;
396			case 'v': *bp++ = '\v'; break;
397			case 'a': *bp++ = '\007'; break;
398			case '\\': *bp++ = '\\'; break;
399
400			case '0': case '1': case '2': /* octal: \d \dd \ddd */
401			case '3': case '4': case '5': case '6': case '7':
402				n = c - '0';
403				if ((c = peek()) >= '0' && c < '8') {
404					n = 8 * n + input() - '0';
405					if ((c = peek()) >= '0' && c < '8')
406						n = 8 * n + input() - '0';
407				}
408				*bp++ = n;
409				break;
410
411			case 'x':	/* hex  \x0-9a-fA-F + */
412			    {	char xbuf[100], *px;
413				for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
414					if (isdigit(c)
415					 || (c >= 'a' && c <= 'f')
416					 || (c >= 'A' && c <= 'F'))
417						*px++ = c;
418					else
419						break;
420				}
421				*px = 0;
422				unput(c);
423	  			sscanf(xbuf, "%x", (unsigned int *) &n);
424				*bp++ = n;
425				break;
426			    }
427
428			default:
429				*bp++ = c;
430				break;
431			}
432			break;
433		default:
434			*bp++ = c;
435			break;
436		}
437	}
438	*bp = 0;
439	s = tostring(buf);
440	*bp++ = ' '; *bp++ = 0;
441	yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
442	RET(STRING);
443}
444
445
446int binsearch(char *w, Keyword *kp, int n)
447{
448	int cond, low, mid, high;
449
450	low = 0;
451	high = n - 1;
452	while (low <= high) {
453		mid = (low + high) / 2;
454		if ((cond = strcmp(w, kp[mid].word)) < 0)
455			high = mid - 1;
456		else if (cond > 0)
457			low = mid + 1;
458		else
459			return mid;
460	}
461	return -1;
462}
463
464int word(char *w)
465{
466	Keyword *kp;
467	int c, n;
468
469	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
470/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
471	kp = keywords + n;
472	if (n != -1) {	/* found in table */
473		yylval.i = kp->sub;
474		switch (kp->type) {	/* special handling */
475		case BLTIN:
476			if (kp->sub == FSYSTEM && safe)
477				SYNTAX( "system is unsafe" );
478			RET(kp->type);
479		case FUNC:
480			if (infunc)
481				SYNTAX( "illegal nested function" );
482			RET(kp->type);
483		case RETURN:
484			if (!infunc)
485				SYNTAX( "return not in function" );
486			RET(kp->type);
487		case VARNF:
488			yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
489			RET(VARNF);
490		default:
491			RET(kp->type);
492		}
493	}
494	c = peek();	/* look for '(' */
495	if (c != '(' && infunc && (n=isarg(w)) >= 0) {
496		yylval.i = n;
497		RET(ARG);
498	} else {
499		yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
500		if (c == '(') {
501			RET(CALL);
502		} else {
503			RET(VAR);
504		}
505	}
506}
507
508void startreg(void)	/* next call to yylex will return a regular expression */
509{
510	reg = 1;
511}
512
513int regexpr(void)
514{
515	int c;
516	static char *buf = NULL;
517	static int bufsz = 500;
518	char *bp;
519
520	if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
521		FATAL("out of space for rex expr");
522	bp = buf;
523	for ( ; (c = input()) != '/' && c != 0; ) {
524		if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
525			FATAL("out of space for reg expr %.10s...", buf);
526		if (c == '\n') {
527			*bp = '\0';
528			SYNTAX( "newline in regular expression %.10s...", buf );
529			unput('\n');
530			break;
531		} else if (c == '\\') {
532			*bp++ = '\\';
533			*bp++ = input();
534		} else {
535			*bp++ = c;
536		}
537	}
538	*bp = 0;
539	if (c == 0)
540		SYNTAX("non-terminated regular expression %.10s...", buf);
541	yylval.s = tostring(buf);
542	unput('/');
543	RET(REGEXPR);
544}
545
546/* low-level lexical stuff, sort of inherited from lex */
547
548char	ebuf[300];
549char	*ep = ebuf;
550char	yysbuf[100];	/* pushback buffer */
551char	*yysptr = yysbuf;
552FILE	*yyin = NULL;
553
554int input(void)	/* get next lexical input character */
555{
556	int c;
557	extern char *lexprog;
558
559	if (yysptr > yysbuf)
560		c = (uschar)*--yysptr;
561	else if (lexprog != NULL) {	/* awk '...' */
562		if ((c = (uschar)*lexprog) != 0)
563			lexprog++;
564	} else				/* awk -f ... */
565		c = pgetc();
566	if (c == EOF)
567		c = 0;
568	if (ep >= ebuf + sizeof ebuf)
569		ep = ebuf;
570	*ep = c;
571	if (c != 0) {
572		ep++;
573	}
574	return (c);
575}
576
577void unput(int c)	/* put lexical character back on input */
578{
579	if (yysptr >= yysbuf + sizeof(yysbuf))
580		FATAL("pushed back too much: %.20s...", yysbuf);
581	*yysptr++ = c;
582	if (--ep < ebuf)
583		ep = ebuf + sizeof(ebuf) - 1;
584}
585
586void unputstr(const char *s)	/* put a string back on input */
587{
588	int i;
589
590	for (i = strlen(s)-1; i >= 0; i--)
591		unput(s[i]);
592}
593