1*1f5207b7SJohn Levon #ifndef TOKEN_H 2*1f5207b7SJohn Levon #define TOKEN_H 3*1f5207b7SJohn Levon /* 4*1f5207b7SJohn Levon * Basic tokenization structures. NOTE! Those tokens had better 5*1f5207b7SJohn Levon * be pretty small, since we're going to keep them all in memory 6*1f5207b7SJohn Levon * indefinitely. 7*1f5207b7SJohn Levon * 8*1f5207b7SJohn Levon * Copyright (C) 2003 Transmeta Corp. 9*1f5207b7SJohn Levon * 2003 Linus Torvalds 10*1f5207b7SJohn Levon * 11*1f5207b7SJohn Levon * Permission is hereby granted, free of charge, to any person obtaining a copy 12*1f5207b7SJohn Levon * of this software and associated documentation files (the "Software"), to deal 13*1f5207b7SJohn Levon * in the Software without restriction, including without limitation the rights 14*1f5207b7SJohn Levon * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15*1f5207b7SJohn Levon * copies of the Software, and to permit persons to whom the Software is 16*1f5207b7SJohn Levon * furnished to do so, subject to the following conditions: 17*1f5207b7SJohn Levon * 18*1f5207b7SJohn Levon * The above copyright notice and this permission notice shall be included in 19*1f5207b7SJohn Levon * all copies or substantial portions of the Software. 20*1f5207b7SJohn Levon * 21*1f5207b7SJohn Levon * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22*1f5207b7SJohn Levon * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23*1f5207b7SJohn Levon * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24*1f5207b7SJohn Levon * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25*1f5207b7SJohn Levon * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26*1f5207b7SJohn Levon * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27*1f5207b7SJohn Levon * THE SOFTWARE. 28*1f5207b7SJohn Levon */ 29*1f5207b7SJohn Levon 30*1f5207b7SJohn Levon #include <sys/types.h> 31*1f5207b7SJohn Levon #include "lib.h" 32*1f5207b7SJohn Levon 33*1f5207b7SJohn Levon /* 34*1f5207b7SJohn Levon * This describes the pure lexical elements (tokens), with 35*1f5207b7SJohn Levon * no semantic meaning. In other words, an identifier doesn't 36*1f5207b7SJohn Levon * have a type or meaning, it is only a specific string in 37*1f5207b7SJohn Levon * the input stream. 38*1f5207b7SJohn Levon * 39*1f5207b7SJohn Levon * Semantic meaning is handled elsewhere. 40*1f5207b7SJohn Levon */ 41*1f5207b7SJohn Levon 42*1f5207b7SJohn Levon enum constantfile { 43*1f5207b7SJohn Levon CONSTANT_FILE_MAYBE, // To be determined, not inside any #ifs in this file 44*1f5207b7SJohn Levon CONSTANT_FILE_IFNDEF, // To be determined, currently inside #ifndef 45*1f5207b7SJohn Levon CONSTANT_FILE_NOPE, // No 46*1f5207b7SJohn Levon CONSTANT_FILE_YES // Yes 47*1f5207b7SJohn Levon }; 48*1f5207b7SJohn Levon 49*1f5207b7SJohn Levon extern const char *includepath[]; 50*1f5207b7SJohn Levon 51*1f5207b7SJohn Levon struct stream { 52*1f5207b7SJohn Levon int fd; 53*1f5207b7SJohn Levon const char *name; 54*1f5207b7SJohn Levon const char *path; // input-file path - see set_stream_include_path() 55*1f5207b7SJohn Levon const char **next_path; 56*1f5207b7SJohn Levon 57*1f5207b7SJohn Levon /* Use these to check for "already parsed" */ 58*1f5207b7SJohn Levon enum constantfile constant; 59*1f5207b7SJohn Levon int dirty, next_stream, once; 60*1f5207b7SJohn Levon struct ident *protect; 61*1f5207b7SJohn Levon struct token *ifndef; 62*1f5207b7SJohn Levon struct token *top_if; 63*1f5207b7SJohn Levon }; 64*1f5207b7SJohn Levon 65*1f5207b7SJohn Levon extern int input_stream_nr; 66*1f5207b7SJohn Levon extern struct stream *input_streams; 67*1f5207b7SJohn Levon extern unsigned int tabstop; 68*1f5207b7SJohn Levon extern int no_lineno; 69*1f5207b7SJohn Levon extern int *hash_stream(const char *name); 70*1f5207b7SJohn Levon 71*1f5207b7SJohn Levon struct ident { 72*1f5207b7SJohn Levon struct ident *next; /* Hash chain of identifiers */ 73*1f5207b7SJohn Levon struct symbol *symbols; /* Pointer to semantic meaning list */ 74*1f5207b7SJohn Levon unsigned char len; /* Length of identifier name */ 75*1f5207b7SJohn Levon unsigned char tainted:1, 76*1f5207b7SJohn Levon reserved:1, 77*1f5207b7SJohn Levon keyword:1; 78*1f5207b7SJohn Levon char name[]; /* Actual identifier */ 79*1f5207b7SJohn Levon }; 80*1f5207b7SJohn Levon 81*1f5207b7SJohn Levon enum token_type { 82*1f5207b7SJohn Levon TOKEN_EOF, 83*1f5207b7SJohn Levon TOKEN_ERROR, 84*1f5207b7SJohn Levon TOKEN_IDENT, 85*1f5207b7SJohn Levon TOKEN_ZERO_IDENT, 86*1f5207b7SJohn Levon TOKEN_NUMBER, 87*1f5207b7SJohn Levon TOKEN_CHAR, 88*1f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_0, 89*1f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_1, 90*1f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_2, 91*1f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_3, 92*1f5207b7SJohn Levon TOKEN_WIDE_CHAR, 93*1f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_0, 94*1f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_1, 95*1f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_2, 96*1f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_3, 97*1f5207b7SJohn Levon TOKEN_STRING, 98*1f5207b7SJohn Levon TOKEN_WIDE_STRING, 99*1f5207b7SJohn Levon TOKEN_SPECIAL, 100*1f5207b7SJohn Levon TOKEN_STREAMBEGIN, 101*1f5207b7SJohn Levon TOKEN_STREAMEND, 102*1f5207b7SJohn Levon TOKEN_MACRO_ARGUMENT, 103*1f5207b7SJohn Levon TOKEN_STR_ARGUMENT, 104*1f5207b7SJohn Levon TOKEN_QUOTED_ARGUMENT, 105*1f5207b7SJohn Levon TOKEN_CONCAT, 106*1f5207b7SJohn Levon TOKEN_GNU_KLUDGE, 107*1f5207b7SJohn Levon TOKEN_UNTAINT, 108*1f5207b7SJohn Levon TOKEN_ARG_COUNT, 109*1f5207b7SJohn Levon TOKEN_IF, 110*1f5207b7SJohn Levon TOKEN_SKIP_GROUPS, 111*1f5207b7SJohn Levon TOKEN_ELSE, 112*1f5207b7SJohn Levon }; 113*1f5207b7SJohn Levon 114*1f5207b7SJohn Levon /* Combination tokens */ 115*1f5207b7SJohn Levon #define COMBINATION_STRINGS { \ 116*1f5207b7SJohn Levon "+=", "++", \ 117*1f5207b7SJohn Levon "-=", "--", "->", \ 118*1f5207b7SJohn Levon "*=", \ 119*1f5207b7SJohn Levon "/=", \ 120*1f5207b7SJohn Levon "%=", \ 121*1f5207b7SJohn Levon "<=", ">=", \ 122*1f5207b7SJohn Levon "==", "!=", \ 123*1f5207b7SJohn Levon "&&", "&=", \ 124*1f5207b7SJohn Levon "||", "|=", \ 125*1f5207b7SJohn Levon "^=", "##", \ 126*1f5207b7SJohn Levon "<<", ">>", "..", \ 127*1f5207b7SJohn Levon "<<=", ">>=", "...", \ 128*1f5207b7SJohn Levon "", \ 129*1f5207b7SJohn Levon "<", ">", "<=", ">=" \ 130*1f5207b7SJohn Levon } 131*1f5207b7SJohn Levon 132*1f5207b7SJohn Levon extern unsigned char combinations[][4]; 133*1f5207b7SJohn Levon 134*1f5207b7SJohn Levon enum special_token { 135*1f5207b7SJohn Levon SPECIAL_BASE = 256, 136*1f5207b7SJohn Levon SPECIAL_ADD_ASSIGN = SPECIAL_BASE, 137*1f5207b7SJohn Levon SPECIAL_INCREMENT, 138*1f5207b7SJohn Levon SPECIAL_SUB_ASSIGN, 139*1f5207b7SJohn Levon SPECIAL_DECREMENT, 140*1f5207b7SJohn Levon SPECIAL_DEREFERENCE, 141*1f5207b7SJohn Levon SPECIAL_MUL_ASSIGN, 142*1f5207b7SJohn Levon SPECIAL_DIV_ASSIGN, 143*1f5207b7SJohn Levon SPECIAL_MOD_ASSIGN, 144*1f5207b7SJohn Levon SPECIAL_LTE, 145*1f5207b7SJohn Levon SPECIAL_GTE, 146*1f5207b7SJohn Levon SPECIAL_EQUAL, 147*1f5207b7SJohn Levon SPECIAL_NOTEQUAL, 148*1f5207b7SJohn Levon SPECIAL_LOGICAL_AND, 149*1f5207b7SJohn Levon SPECIAL_AND_ASSIGN, 150*1f5207b7SJohn Levon SPECIAL_LOGICAL_OR, 151*1f5207b7SJohn Levon SPECIAL_OR_ASSIGN, 152*1f5207b7SJohn Levon SPECIAL_XOR_ASSIGN, 153*1f5207b7SJohn Levon SPECIAL_HASHHASH, 154*1f5207b7SJohn Levon SPECIAL_LEFTSHIFT, 155*1f5207b7SJohn Levon SPECIAL_RIGHTSHIFT, 156*1f5207b7SJohn Levon SPECIAL_DOTDOT, 157*1f5207b7SJohn Levon SPECIAL_SHL_ASSIGN, 158*1f5207b7SJohn Levon SPECIAL_SHR_ASSIGN, 159*1f5207b7SJohn Levon SPECIAL_ELLIPSIS, 160*1f5207b7SJohn Levon SPECIAL_ARG_SEPARATOR, 161*1f5207b7SJohn Levon SPECIAL_UNSIGNED_LT, 162*1f5207b7SJohn Levon SPECIAL_UNSIGNED_GT, 163*1f5207b7SJohn Levon SPECIAL_UNSIGNED_LTE, 164*1f5207b7SJohn Levon SPECIAL_UNSIGNED_GTE, 165*1f5207b7SJohn Levon }; 166*1f5207b7SJohn Levon 167*1f5207b7SJohn Levon struct string { 168*1f5207b7SJohn Levon unsigned int length:31; 169*1f5207b7SJohn Levon unsigned int immutable:1; 170*1f5207b7SJohn Levon char data[]; 171*1f5207b7SJohn Levon }; 172*1f5207b7SJohn Levon 173*1f5207b7SJohn Levon /* will fit into 32 bits */ 174*1f5207b7SJohn Levon struct argcount { 175*1f5207b7SJohn Levon unsigned normal:10; 176*1f5207b7SJohn Levon unsigned quoted:10; 177*1f5207b7SJohn Levon unsigned str:10; 178*1f5207b7SJohn Levon unsigned vararg:1; 179*1f5207b7SJohn Levon }; 180*1f5207b7SJohn Levon 181*1f5207b7SJohn Levon /* 182*1f5207b7SJohn Levon * This is a very common data structure, it should be kept 183*1f5207b7SJohn Levon * as small as humanly possible. Big (rare) types go as 184*1f5207b7SJohn Levon * pointers. 185*1f5207b7SJohn Levon */ 186*1f5207b7SJohn Levon struct token { 187*1f5207b7SJohn Levon struct position pos; 188*1f5207b7SJohn Levon struct token *next; 189*1f5207b7SJohn Levon union { 190*1f5207b7SJohn Levon const char *number; 191*1f5207b7SJohn Levon struct ident *ident; 192*1f5207b7SJohn Levon unsigned int special; 193*1f5207b7SJohn Levon struct string *string; 194*1f5207b7SJohn Levon int argnum; 195*1f5207b7SJohn Levon struct argcount count; 196*1f5207b7SJohn Levon char embedded[4]; 197*1f5207b7SJohn Levon }; 198*1f5207b7SJohn Levon }; 199*1f5207b7SJohn Levon 200*1f5207b7SJohn Levon #define MAX_STRING 8191 201*1f5207b7SJohn Levon 202*1f5207b7SJohn Levon static inline struct token *containing_token(struct token **p) 203*1f5207b7SJohn Levon { 204*1f5207b7SJohn Levon void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0); 205*1f5207b7SJohn Levon return addr; 206*1f5207b7SJohn Levon } 207*1f5207b7SJohn Levon 208*1f5207b7SJohn Levon #define token_type(x) ((x)->pos.type) 209*1f5207b7SJohn Levon 210*1f5207b7SJohn Levon /* 211*1f5207b7SJohn Levon * Last token in the stream - points to itself. 212*1f5207b7SJohn Levon * This allows us to not test for NULL pointers 213*1f5207b7SJohn Levon * when following the token->next chain.. 214*1f5207b7SJohn Levon */ 215*1f5207b7SJohn Levon extern struct token eof_token_entry; 216*1f5207b7SJohn Levon #define eof_token(x) ((x) == &eof_token_entry) 217*1f5207b7SJohn Levon 218*1f5207b7SJohn Levon extern int init_stream(const char *, int fd, const char **next_path); 219*1f5207b7SJohn Levon extern const char *stream_name(int stream); 220*1f5207b7SJohn Levon extern struct ident *hash_ident(struct ident *); 221*1f5207b7SJohn Levon extern struct ident *built_in_ident(const char *); 222*1f5207b7SJohn Levon extern struct token *built_in_token(int, struct ident *); 223*1f5207b7SJohn Levon extern const char *show_special(int); 224*1f5207b7SJohn Levon extern const char *show_ident(const struct ident *); 225*1f5207b7SJohn Levon extern const char *show_string(const struct string *string); 226*1f5207b7SJohn Levon extern const char *show_token(const struct token *); 227*1f5207b7SJohn Levon extern const char *quote_token(const struct token *); 228*1f5207b7SJohn Levon extern struct token * tokenize(const char *, int, struct token *, const char **next_path); 229*1f5207b7SJohn Levon extern struct token * tokenize_buffer(void *, unsigned long, struct token **); 230*1f5207b7SJohn Levon 231*1f5207b7SJohn Levon extern void show_identifier_stats(void); 232*1f5207b7SJohn Levon extern void init_include_path(void); 233*1f5207b7SJohn Levon extern struct token *preprocess(struct token *); 234*1f5207b7SJohn Levon 235*1f5207b7SJohn Levon extern void store_all_tokens(struct token *token); 236*1f5207b7SJohn Levon extern struct token *pos_get_token(struct position pos); 237*1f5207b7SJohn Levon extern char *pos_ident(struct position pos); 238*1f5207b7SJohn Levon 239*1f5207b7SJohn Levon extern void store_macro_pos(struct token *); 240*1f5207b7SJohn Levon extern char *get_macro_name(struct position pos); 241*1f5207b7SJohn Levon 242*1f5207b7SJohn Levon static inline int match_op(struct token *token, unsigned int op) 243*1f5207b7SJohn Levon { 244*1f5207b7SJohn Levon return token->pos.type == TOKEN_SPECIAL && token->special == op; 245*1f5207b7SJohn Levon } 246*1f5207b7SJohn Levon 247*1f5207b7SJohn Levon static inline int match_ident(struct token *token, struct ident *id) 248*1f5207b7SJohn Levon { 249*1f5207b7SJohn Levon return token->pos.type == TOKEN_IDENT && token->ident == id; 250*1f5207b7SJohn Levon } 251*1f5207b7SJohn Levon 252*1f5207b7SJohn Levon #endif 253