/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright 2020 Tintri by DDN, Inc. All rights reserved. */ #include #include #include "ndrgen.h" #include "y.tab.h" /* * C-like lexical analysis. * * 1. Define a "struct node" * 2. Define a "struct symbol" that encapsulates a struct node. * 3. Define a "struct integer" that encapsulates a struct node. * 4. Set the YACC stack type in the grammar: * %{ * #define YYSTYPE struct node * * %} * 5. Define %token's in the grammer for IDENTIFIER, STRING and INTEGER. * Using "_KW" as a suffix for keyword tokens, i.e. "struct" is * "%token STRUCT_KW": * // atomic values * %token INTEGER STRING IDENTIFIER * // keywords * %token STRUCT_KW CASE_KW * // operators * %token PLUS MINUS ASSIGN ARROW * // overloaded tokens (++ --, < > <= >=, == !=, += -= *= ...) * %token INCOP RELOP EQUOP ASSOP * 6. It's easiest to use the yacc(1) generated token numbers for node * labels. For node labels that are not actually part of the grammer, * use a %token with an L_ prefix: * // node labels (can't be generated by lex) * %token L_LT L_LTE L_GT L_GTE L_EQU L_NEQ * 7. Call set_lex_input() before parsing. */ #define SQ '\'' #define DQ '"' #define isquote(c) ((c) == SQ || (c) == DQ) #define iswhite(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\f') #define is_between(c, l, u) ((l) <= (c) && (c) <= (u)) #define is_white(c) ((c) == ' ' || c == '\r' || c == '\t' || c == '\f') #define is_lower(c) is_between((c), 'a', 'z') #define is_upper(c) is_between((c), 'A', 'Z') #define is_alpha(c) (is_lower(c) || is_upper(c)) #define is_digit(c) is_between((c), '0', '9') #define is_sstart(c) (is_alpha(c) || (c) == '_') #define is_sfollow(c) (is_sstart(c) || is_digit(c)) #define is_xdigit(c) \ (is_digit(c) || is_between((c), 'A', 'F') || is_between((c), 'a', 'f')) ndr_symbol_t *symbol_list; static ndr_integer_t *integer_list; static FILE *lex_infp; static ndr_symbol_t *file_name; int line_number; int n_compile_error; static int lex_at_bol; /* In yacc(1) generated parser */ extern struct node *yylval; /* * The keywtab[] and optable[] could be external to this lex * and it would all still work. */ static ndr_keyword_t keywtable[] = { { "struct", STRUCT_KW, 0 }, { "union", UNION_KW, 0 }, { "typedef", TYPEDEF_KW, 0 }, { "interface", INTERFACE_KW, 0 }, { "uuid", UUID_KW, 0 }, { "_no_reorder", _NO_REORDER_KW, 0 }, { "extern", EXTERN_KW, 0 }, { "reference", REFERENCE_KW, 0 }, { "align", ALIGN_KW, 0 }, { "operation", OPERATION_KW, 0 }, { "in", IN_KW, 0 }, { "out", OUT_KW, 0 }, { "string", STRING_KW, 0 }, { "size_is", SIZE_IS_KW, 0 }, { "length_is", LENGTH_IS_KW, 0 }, { "switch_is", SWITCH_IS_KW, 0 }, { "case", CASE_KW, 0 }, { "default", DEFAULT_KW, 0 }, { "transmit_as", TRANSMIT_AS_KW, 0 }, { "arg_is", ARG_IS_KW, 0 }, { "fake", FAKE_KW, 0 }, { "char", BASIC_TYPE, 1 }, { "uchar", BASIC_TYPE, 1 }, { "wchar", BASIC_TYPE, 2 }, { "short", BASIC_TYPE, 2 }, { "ushort", BASIC_TYPE, 2 }, { "long", BASIC_TYPE, 4 }, { "ulong", BASIC_TYPE, 4 }, {0} }; static ndr_keyword_t optable[] = { { "{", LC, 0 }, { "}", RC, 0 }, { "(", LP, 0 }, { ")", RP, 0 }, { "[", LB, 0 }, { "]", RB, 0 }, { "*", STAR, 0 }, { "/", DIV, 0 }, { "%", MOD, 0 }, { "-", MINUS, 0 }, { "+", PLUS, 0 }, { "&", AND, 0 }, { "|", OR, 0 }, { "^", XOR, 0 }, { ";", SEMI, 0 }, {0} }; static int getch(FILE *fp); static ndr_integer_t *int_enter(long); static ndr_symbol_t *sym_enter(char *); static ndr_symbol_t *sym_find(char *); static int str_to_sv(char *, char *sv[]); /* * Enter the symbols for keyword. */ static void keyw_tab_init(ndr_keyword_t kwtable[]) { int i; ndr_keyword_t *kw; ndr_symbol_t *sym; for (i = 0; kwtable[i].name; i++) { kw = &kwtable[i]; sym = sym_enter(kw->name); sym->kw = kw; } } void set_lex_input(FILE *fp, char *name) { keyw_tab_init(keywtable); keyw_tab_init(optable); lex_infp = fp; file_name = sym_enter(name); line_number = 1; lex_at_bol = 1; } static int getch(FILE *fp) { return (getc(fp)); } int yylex(void) { char lexeme[512]; char *p = lexeme; FILE *fp = lex_infp; int c, xc; ndr_symbol_t *sym; ndr_integer_t *intg; top: p = lexeme; c = getch(fp); if (c == EOF) return (EOF); if (c == '\n') { line_number++; lex_at_bol = 1; goto top; } /* * Handle preprocessor lines. This just notes * which file we're processing. */ if (c == '#' && lex_at_bol) { char *sv[10]; int sc; while ((c = getch(fp)) != EOF && c != '\n') *p++ = c; *p = 0; /* note: no ungetc() of newline, we don't want to count it */ if (*lexeme != ' ') { /* not a line we know */ goto top; } sc = str_to_sv(lexeme, sv); if (sc < 2) goto top; file_name = sym_enter(sv[1]); line_number = atoi(sv[0]); /* for next input line */ lex_at_bol = 1; goto top; } lex_at_bol = 0; /* * Skip white space */ if (is_white(c)) goto top; /* * Symbol? Might be a keyword or just an identifier */ if (is_sstart(c)) { /* we got a symbol */ do { *p++ = c; c = getch(fp); } while (is_sfollow(c)); (void) ungetc(c, fp); *p = 0; sym = sym_enter(lexeme); yylval = &sym->s_node; if (sym->kw) { return (sym->kw->token); } else { return (IDENTIFIER); } } /* * Integer constant? */ if (is_digit(c)) { /* we got a number */ *p++ = c; if (c == '0') { c = getch(fp); if (c == 'x' || c == 'X') { /* handle hex specially */ do { *p++ = c; c = getch(fp); } while (is_xdigit(c)); goto convert_icon; } else if (c == 'b' || c == 'B' || c == 'd' || c == 'D' || c == 'o' || c == 'O') { do { *p++ = c; c = getch(fp); } while (is_digit(c)); goto convert_icon; } (void) ungetc(c, fp); } /* could be anything */ c = getch(fp); while (is_digit(c)) { *p++ = c; c = getch(fp); } convert_icon: *p = 0; (void) ungetc(c, fp); intg = int_enter(strtol(lexeme, 0, 0)); yylval = &intg->s_node; return (INTEGER); } /* Could handle strings. We don't seem to need them yet */ yylval = 0; /* operator tokens have no value */ xc = getch(fp); /* get look-ahead for two-char lexemes */ lexeme[0] = c; lexeme[1] = xc; lexeme[2] = 0; /* * Look for to-end-of-line comment */ if (c == '/' && xc == '/') { /* eat the comment */ while ((c = getch(fp)) != EOF && c != '\n') ; (void) ungetc(c, fp); /* put back newline */ goto top; } /* * Look for multi-line comment */ if (c == '/' && xc == '*') { /* eat the comment */ xc = -1; while ((c = getch(fp)) != EOF) { if (xc == '*' && c == '/') { /* that's it */ break; } xc = c; if (c == '\n') line_number++; } goto top; } /* * Use symbol table lookup for two-character and * one character operator tokens. */ sym = sym_find(lexeme); if (sym) { /* there better be a keyword attached */ yylval = &sym->s_node; return (sym->kw->token); } /* Try a one-character form */ (void) ungetc(xc, fp); lexeme[1] = 0; sym = sym_find(lexeme); if (sym) { /* there better be a keyword attached */ yylval = &sym->s_node; return (sym->kw->token); } if (is_between(c, ' ', '~')) compile_error("unrecognized character: 0x%02x (%c)", c, c); else compile_error("unrecognized character: 0x%02x", c); goto top; } static ndr_symbol_t * sym_find(char *name) { ndr_symbol_t **pp; ndr_symbol_t *p; for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) { if (strcmp(p->name, name) == 0) return (p); } return (0); } static ndr_symbol_t * sym_enter(char *name) { ndr_symbol_t **pp; ndr_symbol_t *p; for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) { if (strcmp(p->name, name) == 0) return (p); } p = ndr_alloc(1, sizeof (ndr_symbol_t)); if ((p->name = strdup(name)) == NULL) fatal_error("%s", strerror(ENOMEM)); p->s_node.label = IDENTIFIER; p->s_node.n_sym = p; *pp = p; return (p); } static ndr_integer_t * int_enter(long value) { ndr_integer_t **pp; ndr_integer_t *p; for (pp = &integer_list; (p = *pp) != 0; pp = &p->next) { if (p->value == value) return (p); } p = ndr_alloc(1, sizeof (ndr_integer_t)); p->value = value; p->s_node.label = INTEGER; p->s_node.n_int = value; *pp = p; return (p); } void * ndr_alloc(size_t nelem, size_t elsize) { void *p; if ((p = calloc(nelem, elsize)) == NULL) { fatal_error("%s", strerror(ENOMEM)); /* NOTREACHED */ } return (p); } /* * The input context (filename, line number) is maintained by the * lexical analysis, and we generally want such info reported for * errors in a consistent manner. */ void compile_error(const char *fmt, ...) { char buf[NDLBUFSZ]; va_list ap; va_start(ap, fmt); (void) vsnprintf(buf, NDLBUFSZ, fmt, ap); va_end(ap); (void) fprintf(stderr, "ndrgen: compile error: %s:%d: %s\n", file_name->name, line_number, buf); n_compile_error++; } void fatal_error(const char *fmt, ...) { char buf[NDLBUFSZ]; va_list ap; va_start(ap, fmt); (void) vsnprintf(buf, NDLBUFSZ, fmt, ap); va_end(ap); (void) fprintf(stderr, "ndrgen: fatal error: %s\n", buf); exit(1); } /* * Setup nodes for the lexical analyzer. */ struct node * n_cons(int label, ...) { ndr_node_t *np; va_list ap; np = ndr_alloc(1, sizeof (ndr_node_t)); va_start(ap, label); np->label = label; np->n_arg[0] = va_arg(ap, void *); np->n_arg[1] = va_arg(ap, void *); np->n_arg[2] = va_arg(ap, void *); va_end(ap); np->line_number = line_number; np->file_name = file_name; return (np); } /* * list: item * | list item ={ n_splice($1, $2); } * ; */ void n_splice(struct node *np1, struct node *np2) { while (np1->n_next) np1 = np1->n_next; np1->n_next = np2; } /* * Convert a string of words to a vector of strings. * Returns the number of words. */ static int str_to_sv(char *buf, char *sv[]) { char **pp = sv; char *p = buf; char *q = buf; int in_word = 0; int c; for (;;) { c = *p++; if (c == 0) break; if (!in_word) { if (iswhite(c)) continue; *pp++ = q; in_word = 1; } if (isquote(c)) { int qc = c; while (((c = *p++) != 0) && (c != qc)) *q++ = c; if (c == 0) break; } else if (iswhite(c)) { /* end of word */ *q++ = 0; in_word = 0; } else { /* still inside word */ *q++ = c; } } if (in_word) *q++ = 0; *pp = (char *)0; return (pp - sv); }