11f5207b7SJohn Levon #ifndef TOKEN_H 21f5207b7SJohn Levon #define TOKEN_H 31f5207b7SJohn Levon /* 41f5207b7SJohn Levon * Basic tokenization structures. NOTE! Those tokens had better 51f5207b7SJohn Levon * be pretty small, since we're going to keep them all in memory 61f5207b7SJohn Levon * indefinitely. 71f5207b7SJohn Levon * 81f5207b7SJohn Levon * Copyright (C) 2003 Transmeta Corp. 91f5207b7SJohn Levon * 2003 Linus Torvalds 101f5207b7SJohn Levon * 111f5207b7SJohn Levon * Permission is hereby granted, free of charge, to any person obtaining a copy 121f5207b7SJohn Levon * of this software and associated documentation files (the "Software"), to deal 131f5207b7SJohn Levon * in the Software without restriction, including without limitation the rights 141f5207b7SJohn Levon * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 151f5207b7SJohn Levon * copies of the Software, and to permit persons to whom the Software is 161f5207b7SJohn Levon * furnished to do so, subject to the following conditions: 171f5207b7SJohn Levon * 181f5207b7SJohn Levon * The above copyright notice and this permission notice shall be included in 191f5207b7SJohn Levon * all copies or substantial portions of the Software. 201f5207b7SJohn Levon * 211f5207b7SJohn Levon * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 221f5207b7SJohn Levon * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 231f5207b7SJohn Levon * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 241f5207b7SJohn Levon * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 251f5207b7SJohn Levon * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 261f5207b7SJohn Levon * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 271f5207b7SJohn Levon * THE SOFTWARE. 281f5207b7SJohn Levon */ 291f5207b7SJohn Levon 301f5207b7SJohn Levon #include <sys/types.h> 311f5207b7SJohn Levon #include "lib.h" 321f5207b7SJohn Levon 331f5207b7SJohn Levon /* 341f5207b7SJohn Levon * This describes the pure lexical elements (tokens), with 351f5207b7SJohn Levon * no semantic meaning. In other words, an identifier doesn't 361f5207b7SJohn Levon * have a type or meaning, it is only a specific string in 371f5207b7SJohn Levon * the input stream. 381f5207b7SJohn Levon * 391f5207b7SJohn Levon * Semantic meaning is handled elsewhere. 401f5207b7SJohn Levon */ 411f5207b7SJohn Levon 421f5207b7SJohn Levon enum constantfile { 431f5207b7SJohn Levon CONSTANT_FILE_MAYBE, // To be determined, not inside any #ifs in this file 441f5207b7SJohn Levon CONSTANT_FILE_IFNDEF, // To be determined, currently inside #ifndef 451f5207b7SJohn Levon CONSTANT_FILE_NOPE, // No 461f5207b7SJohn Levon CONSTANT_FILE_YES // Yes 471f5207b7SJohn Levon }; 481f5207b7SJohn Levon 491f5207b7SJohn Levon extern const char *includepath[]; 501f5207b7SJohn Levon 511f5207b7SJohn Levon struct stream { 521f5207b7SJohn Levon int fd; 531f5207b7SJohn Levon const char *name; 541f5207b7SJohn Levon const char *path; // input-file path - see set_stream_include_path() 551f5207b7SJohn Levon const char **next_path; 561f5207b7SJohn Levon 571f5207b7SJohn Levon /* Use these to check for "already parsed" */ 581f5207b7SJohn Levon enum constantfile constant; 591f5207b7SJohn Levon int dirty, next_stream, once; 601f5207b7SJohn Levon struct ident *protect; 611f5207b7SJohn Levon struct token *ifndef; 621f5207b7SJohn Levon struct token *top_if; 631f5207b7SJohn Levon }; 641f5207b7SJohn Levon 651f5207b7SJohn Levon extern int input_stream_nr; 661f5207b7SJohn Levon extern struct stream *input_streams; 671f5207b7SJohn Levon extern unsigned int tabstop; 681f5207b7SJohn Levon extern int no_lineno; 691f5207b7SJohn Levon extern int *hash_stream(const char *name); 701f5207b7SJohn Levon 711f5207b7SJohn Levon struct ident { 721f5207b7SJohn Levon struct ident *next; /* Hash chain of identifiers */ 731f5207b7SJohn Levon struct symbol *symbols; /* Pointer to semantic meaning list */ 741f5207b7SJohn Levon unsigned char len; /* Length of identifier name */ 751f5207b7SJohn Levon unsigned char tainted:1, 761f5207b7SJohn Levon reserved:1, 771f5207b7SJohn Levon keyword:1; 781f5207b7SJohn Levon char name[]; /* Actual identifier */ 791f5207b7SJohn Levon }; 801f5207b7SJohn Levon 811f5207b7SJohn Levon enum token_type { 821f5207b7SJohn Levon TOKEN_EOF, 83c85f09ccSJohn Levon TOKEN_BAD, 841f5207b7SJohn Levon TOKEN_ERROR, 851f5207b7SJohn Levon TOKEN_IDENT, 861f5207b7SJohn Levon TOKEN_ZERO_IDENT, 871f5207b7SJohn Levon TOKEN_NUMBER, 881f5207b7SJohn Levon TOKEN_CHAR, 891f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_0, 901f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_1, 911f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_2, 921f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_3, 931f5207b7SJohn Levon TOKEN_WIDE_CHAR, 941f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_0, 951f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_1, 961f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_2, 971f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_3, 981f5207b7SJohn Levon TOKEN_STRING, 991f5207b7SJohn Levon TOKEN_WIDE_STRING, 1001f5207b7SJohn Levon TOKEN_SPECIAL, 1011f5207b7SJohn Levon TOKEN_STREAMBEGIN, 1021f5207b7SJohn Levon TOKEN_STREAMEND, 1031f5207b7SJohn Levon TOKEN_MACRO_ARGUMENT, 1041f5207b7SJohn Levon TOKEN_STR_ARGUMENT, 1051f5207b7SJohn Levon TOKEN_QUOTED_ARGUMENT, 1061f5207b7SJohn Levon TOKEN_CONCAT, 1071f5207b7SJohn Levon TOKEN_GNU_KLUDGE, 1081f5207b7SJohn Levon TOKEN_UNTAINT, 1091f5207b7SJohn Levon TOKEN_ARG_COUNT, 1101f5207b7SJohn Levon TOKEN_IF, 1111f5207b7SJohn Levon TOKEN_SKIP_GROUPS, 1121f5207b7SJohn Levon TOKEN_ELSE, 1131f5207b7SJohn Levon }; 1141f5207b7SJohn Levon 1151f5207b7SJohn Levon /* Combination tokens */ 1161f5207b7SJohn Levon #define COMBINATION_STRINGS { \ 1171f5207b7SJohn Levon "+=", "++", \ 1181f5207b7SJohn Levon "-=", "--", "->", \ 1191f5207b7SJohn Levon "*=", \ 1201f5207b7SJohn Levon "/=", \ 1211f5207b7SJohn Levon "%=", \ 1221f5207b7SJohn Levon "<=", ">=", \ 1231f5207b7SJohn Levon "==", "!=", \ 1241f5207b7SJohn Levon "&&", "&=", \ 1251f5207b7SJohn Levon "||", "|=", \ 1261f5207b7SJohn Levon "^=", "##", \ 1271f5207b7SJohn Levon "<<", ">>", "..", \ 1281f5207b7SJohn Levon "<<=", ">>=", "...", \ 1291f5207b7SJohn Levon "", \ 1301f5207b7SJohn Levon "<", ">", "<=", ">=" \ 1311f5207b7SJohn Levon } 1321f5207b7SJohn Levon 1331f5207b7SJohn Levon extern unsigned char combinations[][4]; 1341f5207b7SJohn Levon 1351f5207b7SJohn Levon enum special_token { 1361f5207b7SJohn Levon SPECIAL_BASE = 256, 1371f5207b7SJohn Levon SPECIAL_ADD_ASSIGN = SPECIAL_BASE, 1381f5207b7SJohn Levon SPECIAL_INCREMENT, 1391f5207b7SJohn Levon SPECIAL_SUB_ASSIGN, 1401f5207b7SJohn Levon SPECIAL_DECREMENT, 1411f5207b7SJohn Levon SPECIAL_DEREFERENCE, 1421f5207b7SJohn Levon SPECIAL_MUL_ASSIGN, 1431f5207b7SJohn Levon SPECIAL_DIV_ASSIGN, 1441f5207b7SJohn Levon SPECIAL_MOD_ASSIGN, 1451f5207b7SJohn Levon SPECIAL_LTE, 1461f5207b7SJohn Levon SPECIAL_GTE, 1471f5207b7SJohn Levon SPECIAL_EQUAL, 1481f5207b7SJohn Levon SPECIAL_NOTEQUAL, 1491f5207b7SJohn Levon SPECIAL_LOGICAL_AND, 1501f5207b7SJohn Levon SPECIAL_AND_ASSIGN, 1511f5207b7SJohn Levon SPECIAL_LOGICAL_OR, 1521f5207b7SJohn Levon SPECIAL_OR_ASSIGN, 1531f5207b7SJohn Levon SPECIAL_XOR_ASSIGN, 1541f5207b7SJohn Levon SPECIAL_HASHHASH, 1551f5207b7SJohn Levon SPECIAL_LEFTSHIFT, 1561f5207b7SJohn Levon SPECIAL_RIGHTSHIFT, 1571f5207b7SJohn Levon SPECIAL_DOTDOT, 1581f5207b7SJohn Levon SPECIAL_SHL_ASSIGN, 1591f5207b7SJohn Levon SPECIAL_SHR_ASSIGN, 1601f5207b7SJohn Levon SPECIAL_ELLIPSIS, 1611f5207b7SJohn Levon SPECIAL_ARG_SEPARATOR, 1621f5207b7SJohn Levon SPECIAL_UNSIGNED_LT, 1631f5207b7SJohn Levon SPECIAL_UNSIGNED_GT, 1641f5207b7SJohn Levon SPECIAL_UNSIGNED_LTE, 1651f5207b7SJohn Levon SPECIAL_UNSIGNED_GTE, 1661f5207b7SJohn Levon }; 1671f5207b7SJohn Levon 1681f5207b7SJohn Levon struct string { 1691f5207b7SJohn Levon unsigned int length:31; 1701f5207b7SJohn Levon unsigned int immutable:1; 1711f5207b7SJohn Levon char data[]; 1721f5207b7SJohn Levon }; 1731f5207b7SJohn Levon 1741f5207b7SJohn Levon /* will fit into 32 bits */ 1751f5207b7SJohn Levon struct argcount { 1761f5207b7SJohn Levon unsigned normal:10; 1771f5207b7SJohn Levon unsigned quoted:10; 1781f5207b7SJohn Levon unsigned str:10; 1791f5207b7SJohn Levon unsigned vararg:1; 1801f5207b7SJohn Levon }; 1811f5207b7SJohn Levon 1821f5207b7SJohn Levon /* 1831f5207b7SJohn Levon * This is a very common data structure, it should be kept 1841f5207b7SJohn Levon * as small as humanly possible. Big (rare) types go as 1851f5207b7SJohn Levon * pointers. 1861f5207b7SJohn Levon */ 1871f5207b7SJohn Levon struct token { 1881f5207b7SJohn Levon struct position pos; 1891f5207b7SJohn Levon struct token *next; 1901f5207b7SJohn Levon union { 1911f5207b7SJohn Levon const char *number; 1921f5207b7SJohn Levon struct ident *ident; 1931f5207b7SJohn Levon unsigned int special; 1941f5207b7SJohn Levon struct string *string; 1951f5207b7SJohn Levon int argnum; 1961f5207b7SJohn Levon struct argcount count; 1971f5207b7SJohn Levon char embedded[4]; 1981f5207b7SJohn Levon }; 1991f5207b7SJohn Levon }; 2001f5207b7SJohn Levon 2011f5207b7SJohn Levon #define MAX_STRING 8191 2021f5207b7SJohn Levon 2031f5207b7SJohn Levon static inline struct token *containing_token(struct token **p) 2041f5207b7SJohn Levon { 2051f5207b7SJohn Levon void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0); 2061f5207b7SJohn Levon return addr; 2071f5207b7SJohn Levon } 2081f5207b7SJohn Levon 2091f5207b7SJohn Levon #define token_type(x) ((x)->pos.type) 2101f5207b7SJohn Levon 2111f5207b7SJohn Levon /* 2121f5207b7SJohn Levon * Last token in the stream - points to itself. 2131f5207b7SJohn Levon * This allows us to not test for NULL pointers 2141f5207b7SJohn Levon * when following the token->next chain.. 2151f5207b7SJohn Levon */ 2161f5207b7SJohn Levon extern struct token eof_token_entry; 2171f5207b7SJohn Levon #define eof_token(x) ((x) == &eof_token_entry) 2181f5207b7SJohn Levon 2191f5207b7SJohn Levon extern int init_stream(const char *, int fd, const char **next_path); 2201f5207b7SJohn Levon extern const char *stream_name(int stream); 221*6523a3aaSJohn Levon struct ident *alloc_ident(const char *name, int len); 2221f5207b7SJohn Levon extern struct ident *hash_ident(struct ident *); 2231f5207b7SJohn Levon extern struct ident *built_in_ident(const char *); 2241f5207b7SJohn Levon extern struct token *built_in_token(int, struct ident *); 2251f5207b7SJohn Levon extern const char *show_special(int); 2261f5207b7SJohn Levon extern const char *show_ident(const struct ident *); 2271f5207b7SJohn Levon extern const char *show_string(const struct string *string); 2281f5207b7SJohn Levon extern const char *show_token(const struct token *); 2291f5207b7SJohn Levon extern const char *quote_token(const struct token *); 2301f5207b7SJohn Levon extern struct token * tokenize(const char *, int, struct token *, const char **next_path); 2311f5207b7SJohn Levon extern struct token * tokenize_buffer(void *, unsigned long, struct token **); 2321f5207b7SJohn Levon 2331f5207b7SJohn Levon extern void show_identifier_stats(void); 2341f5207b7SJohn Levon extern void init_include_path(void); 2351f5207b7SJohn Levon extern struct token *preprocess(struct token *); 2361f5207b7SJohn Levon 2371f5207b7SJohn Levon extern void store_all_tokens(struct token *token); 2381f5207b7SJohn Levon extern struct token *pos_get_token(struct position pos); 2391f5207b7SJohn Levon extern char *pos_ident(struct position pos); 2401f5207b7SJohn Levon 2411f5207b7SJohn Levon extern void store_macro_pos(struct token *); 2421f5207b7SJohn Levon extern char *get_macro_name(struct position pos); 243c85f09ccSJohn Levon extern char *get_inner_macro(struct position pos); 244c85f09ccSJohn Levon extern struct string_list *get_all_macros(struct position pos); 2451f5207b7SJohn Levon 2461f5207b7SJohn Levon static inline int match_op(struct token *token, unsigned int op) 2471f5207b7SJohn Levon { 2481f5207b7SJohn Levon return token->pos.type == TOKEN_SPECIAL && token->special == op; 2491f5207b7SJohn Levon } 2501f5207b7SJohn Levon 2511f5207b7SJohn Levon static inline int match_ident(struct token *token, struct ident *id) 2521f5207b7SJohn Levon { 2531f5207b7SJohn Levon return token->pos.type == TOKEN_IDENT && token->ident == id; 2541f5207b7SJohn Levon } 2551f5207b7SJohn Levon 2561f5207b7SJohn Levon #endif 257