/* * Copyright (C) Lucent Technologies 1997 * All Rights Reserved * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that the above copyright notice appear in all * copies and that both that the copyright notice and this * permission notice and warranty disclaimer appear in supporting * documentation, and that the name Lucent Technologies or any of * its entities not be used in advertising or publicity pertaining * to distribution of the software without specific, written prior * permission. * * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF * THIS SOFTWARE. */ #include #include #include #include #include "awk.h" #include "y.tab.h" extern YYSTYPE yylval; extern int infunc; off_t lineno = 1; int bracecnt = 0; int brackcnt = 0; int parencnt = 0; typedef struct Keyword { const char *word; int sub; int type; } Keyword; Keyword keywords[] = { /* keep sorted: binary searched */ { "BEGIN", XBEGIN, XBEGIN }, { "END", XEND, XEND }, { "NF", VARNF, VARNF }, { "atan2", FATAN, BLTIN }, { "break", BREAK, BREAK }, { "close", CLOSE, CLOSE }, { "continue", CONTINUE, CONTINUE }, { "cos", FCOS, BLTIN }, { "delete", DELETE, DELETE }, { "do", DO, DO }, { "else", ELSE, ELSE }, { "exit", EXIT, EXIT }, { "exp", FEXP, BLTIN }, { "fflush", FFLUSH, BLTIN }, { "for", FOR, FOR }, { "func", FUNC, FUNC }, { "function", FUNC, FUNC }, { "getline", GETLINE, GETLINE }, { "gsub", GSUB, GSUB }, { "if", IF, IF }, { "in", IN, IN }, { "index", INDEX, INDEX }, { "int", FINT, BLTIN }, { "length", FLENGTH, BLTIN }, { "log", FLOG, BLTIN }, { "match", MATCHFCN, MATCHFCN }, { "next", NEXT, NEXT }, { "nextfile", NEXTFILE, NEXTFILE }, { "print", PRINT, PRINT }, { "printf", PRINTF, PRINTF }, { "rand", FRAND, BLTIN }, { "return", RETURN, RETURN }, { "sin", FSIN, BLTIN }, { "split", SPLIT, SPLIT }, { "sprintf", SPRINTF, SPRINTF }, { "sqrt", FSQRT, BLTIN }, { "srand", FSRAND, BLTIN }, { "sub", SUB, SUB }, { "substr", SUBSTR, SUBSTR }, { "system", FSYSTEM, BLTIN }, { "tolower", FTOLOWER, BLTIN }, { "toupper", FTOUPPER, BLTIN }, { "while", WHILE, WHILE }, }; #define RET(x) { if (dbg) (void) printf("lex %s\n", tokname(x)); return (x); } int peek(void) { int c = input(); unput(c); return (c); } int gettok(char **pbuf, size_t *psz) /* get next input token */ { int c, retc; char *buf = *pbuf; size_t sz = *psz; char *bp = buf; c = input(); if (c == 0) return (0); buf[0] = c; buf[1] = 0; if (!isalnum(c) && c != '.' && c != '_') return (c); *bp++ = c; if (isalpha(c) || c == '_') { /* it's a varname */ for (; (c = input()) != 0; ) { if (bp-buf >= sz && !adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) FATAL("out of space for name %.10s...", buf); if (isalnum(c) || c == '_') *bp++ = c; else { *bp = 0; unput(c); break; } } *bp = 0; retc = 'a'; /* alphanumeric */ } else { /* maybe it's a number, but could be . */ char *rem; /* read input until can't be a number */ for (; (c = input()) != 0; ) { if (bp-buf >= sz && !adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) FATAL("out of space for number %.10s...", buf); if (isdigit(c) || c == 'e' || c == 'E' || c == '.' || c == '+' || c == '-') *bp++ = c; else { unput(c); break; } } *bp = 0; (void) strtod(buf, &rem); /* parse the number */ if (rem == buf) { /* it wasn't a valid number at all */ buf[1] = 0; /* return one character as token */ retc = buf[0]; /* character is its own type */ unputstr(rem+1); /* put rest back for later */ } else { /* some prefix was a number */ unputstr(rem); /* put rest back for later */ rem[0] = 0; /* truncate buf after number part */ retc = '0'; /* type is number */ } } *pbuf = buf; *psz = sz; return (retc); } int word(char *); int string(void); int regexpr(void); int sc = 0; /* 1 => return a } right now */ int reg = 0; /* 1 => return a REGEXPR now */ int yylex(void) { int c; static char *buf = NULL; /* BUG: setting this small causes core dump! */ static size_t bufsize = 5; if (buf == NULL && (buf = (char *)malloc(bufsize)) == NULL) FATAL("out of space in yylex"); if (sc) { sc = 0; RET('}'); } if (reg) { reg = 0; return (regexpr()); } for (;;) { c = gettok(&buf, &bufsize); if (c == 0) return (0); if (isalpha(c) || c == '_') return (word(buf)); if (isdigit(c)) { yylval.cp = setsymtab( buf, tostring(buf), atof(buf), CON|NUM, symtab); /* should this also have STR set? */ RET(NUMBER); } yylval.i = c; switch (c) { case '\n': /* {EOL} */ lineno++; RET(NL); case '\r': /* assume \n is coming */ case ' ': /* {WS}+ */ case '\t': break; case '#': /* #.* strip comments */ while ((c = input()) != '\n' && c != 0) ; unput(c); break; case ';': RET(';'); case '\\': if (peek() == '\n') { (void) input(); lineno++; } else if (peek() == '\r') { (void) input(); (void) input(); /* BUG: check for \n */ lineno++; } else { RET(c); } break; case '&': if (peek() == '&') { (void) input(); RET(AND); } else RET('&'); case '|': if (peek() == '|') { (void) input(); RET(BOR); } else RET('|'); case '!': if (peek() == '=') { (void) input(); yylval.i = NE; RET(NE); } else if (peek() == '~') { (void) input(); yylval.i = NOTMATCH; RET(MATCHOP); } else RET(NOT); case '~': yylval.i = MATCH; RET(MATCHOP); case '<': if (peek() == '=') { (void) input(); yylval.i = LE; RET(LE); } else { yylval.i = LT; RET(LT); } case '=': if (peek() == '=') { (void) input(); yylval.i = EQ; RET(EQ); } else { yylval.i = ASSIGN; RET(ASGNOP); } case '>': if (peek() == '=') { (void) input(); yylval.i = GE; RET(GE); } else if (peek() == '>') { (void) input(); yylval.i = APPEND; RET(APPEND); } else { yylval.i = GT; RET(GT); } case '+': if (peek() == '+') { (void) input(); yylval.i = INCR; RET(INCR); } else if (peek() == '=') { (void) input(); yylval.i = ADDEQ; RET(ASGNOP); } else RET('+'); case '-': if (peek() == '-') { (void) input(); yylval.i = DECR; RET(DECR); } else if (peek() == '=') { (void) input(); yylval.i = SUBEQ; RET(ASGNOP); } else RET('-'); case '*': if (peek() == '=') { /* *= */ (void) input(); yylval.i = MULTEQ; RET(ASGNOP); } else if (peek() == '*') { /* ** or **= */ (void) input(); /* eat 2nd * */ if (peek() == '=') { (void) input(); yylval.i = POWEQ; RET(ASGNOP); } else { RET(POWER); } } else RET('*'); case '/': RET('/'); case '%': if (peek() == '=') { (void) input(); yylval.i = MODEQ; RET(ASGNOP); } else RET('%'); case '^': if (peek() == '=') { (void) input(); yylval.i = POWEQ; RET(ASGNOP); } else RET(POWER); case '$': /* BUG: awkward, if not wrong */ c = gettok(&buf, &bufsize); if (isalpha(c)) { if (strcmp(buf, "NF") == 0) { /* very special */ unputstr("(NF)"); RET(INDIRECT); } c = peek(); if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) { unputstr(buf); RET(INDIRECT); } yylval.cp = setsymtab( buf, "", 0.0, STR|NUM, symtab); RET(IVAR); } else if (c == 0) { /* */ SYNTAX("unexpected end of input after $"); RET(';'); } else { unputstr(buf); RET(INDIRECT); } case '}': if (--bracecnt < 0) SYNTAX("extra }"); sc = 1; RET(';'); case ']': if (--brackcnt < 0) SYNTAX("extra ]"); RET(']'); case ')': if (--parencnt < 0) SYNTAX("extra )"); RET(')'); case '{': bracecnt++; RET('{'); case '[': brackcnt++; RET('['); case '(': parencnt++; RET('('); case '"': /* BUG: should be like tran.c ? */ return (string()); default: RET(c); } } } int string(void) { int c, n; char *s, *bp; static char *buf = NULL; static size_t bufsz = 500; if (buf == NULL && (buf = (char *)malloc(bufsz)) == NULL) FATAL("out of space for strings"); for (bp = buf; (c = input()) != '"'; ) { if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string")) FATAL("out of space for string %.10s...", buf); switch (c) { case '\n': case '\r': case 0: *bp = '\0'; SYNTAX("non-terminated string %.10s...", buf); if (c == 0) /* hopeless */ FATAL("giving up"); lineno++; break; case '\\': c = input(); switch (c) { case '"': *bp++ = '"'; break; case 'n': *bp++ = '\n'; break; case 't': *bp++ = '\t'; break; case 'f': *bp++ = '\f'; break; case 'r': *bp++ = '\r'; break; case 'b': *bp++ = '\b'; break; case 'v': *bp++ = '\v'; break; case 'a': *bp++ = '\007'; break; case '\\': *bp++ = '\\'; break; case '0': case '1': case '2': /* octal: \d \dd \ddd */ case '3': case '4': case '5': case '6': case '7': n = c - '0'; if ((c = peek()) >= '0' && c < '8') { n = 8 * n + input() - '0'; if ((c = peek()) >= '0' && c < '8') n = 8 * n + input() - '0'; } *bp++ = n; break; case 'x': { /* hex \x0-9a-fA-F + */ char xbuf[100], *px; px = xbuf; while ((c = input()) != 0 && px-xbuf < 100-2) { if (isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) *px++ = c; else break; } *px = 0; unput(c); (void) sscanf(xbuf, "%x", (unsigned int *)&n); *bp++ = n; break; } default: *bp++ = c; break; } break; default: *bp++ = c; break; } } *bp = 0; s = tostring(buf); *bp++ = ' '; *bp++ = 0; yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab); RET(STRING); } int binsearch(char *w, Keyword *kp, int n) { int cond, low, mid, high; low = 0; high = n - 1; while (low <= high) { mid = (low + high) / 2; if ((cond = strcmp(w, kp[mid].word)) < 0) high = mid - 1; else if (cond > 0) low = mid + 1; else return (mid); } return (-1); } int word(char *w) { Keyword *kp; int c, n; n = binsearch(w, keywords, sizeof (keywords) / sizeof (keywords[0])); if (n != -1) { /* found in table */ kp = keywords + n; yylval.i = kp->sub; switch (kp->type) { /* special handling */ case BLTIN: if (kp->sub == FSYSTEM && safe) SYNTAX("system is unsafe"); RET(kp->type); case FUNC: if (infunc) SYNTAX("illegal nested function"); RET(kp->type); case RETURN: if (!infunc) SYNTAX("return not in function"); RET(kp->type); case VARNF: yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab); RET(VARNF); default: RET(kp->type); } } c = peek(); /* look for '(' */ if (c != '(' && infunc && (n = isarg(w)) >= 0) { yylval.i = n; RET(ARG); } else { yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab); if (c == '(') { RET(CALL); } else { RET(VAR); } } } void startreg(void) /* next call to yylex will return a regular expression */ { reg = 1; } int regexpr(void) { int c; static char *buf = NULL; static size_t bufsz = 500; char *bp; if (buf == NULL && (buf = (char *)malloc(bufsz)) == NULL) FATAL("out of space for rex expr"); bp = buf; for (; (c = input()) != '/' && c != 0; ) { if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) FATAL("out of space for reg expr %.10s...", buf); if (c == '\n') { *bp = '\0'; SYNTAX("newline in regular expression %.10s...", buf); unput('\n'); break; } else if (c == '\\') { *bp++ = '\\'; *bp++ = input(); } else { *bp++ = c; } } *bp = 0; if (c == 0) SYNTAX("non-terminated regular expression %.10s...", buf); yylval.s = tostring(buf); unput('/'); RET(REGEXPR); } /* low-level lexical stuff, sort of inherited from lex */ char ebuf[300]; char *ep = ebuf; char yysbuf[100]; /* pushback buffer */ char *yysptr = yysbuf; FILE *yyin = NULL; int input(void) /* get next lexical input character */ { int c; extern char *lexprog; if (yysptr > yysbuf) c = (uschar)*--yysptr; else if (lexprog != NULL) { /* awk '...' */ if ((c = (uschar)*lexprog) != 0) lexprog++; } else /* awk -f ... */ c = pgetc(); if (c == EOF) c = 0; if (ep >= ebuf + sizeof (ebuf)) ep = ebuf; *ep = c; if (c != 0) { ep++; } return (c); } void unput(int c) /* put lexical character back on input */ { if (yysptr >= yysbuf + sizeof (yysbuf)) FATAL("pushed back too much: %.20s...", yysbuf); *yysptr++ = c; if (--ep < ebuf) ep = ebuf + sizeof (ebuf) - 1; } void unputstr(const char *s) /* put a string back on input */ { int i; for (i = strlen(s)-1; i >= 0; i--) unput(s[i]); }