11f5207b7SJohn Levon #ifndef TOKEN_H
21f5207b7SJohn Levon #define TOKEN_H
31f5207b7SJohn Levon /*
41f5207b7SJohn Levon * Basic tokenization structures. NOTE! Those tokens had better
51f5207b7SJohn Levon * be pretty small, since we're going to keep them all in memory
61f5207b7SJohn Levon * indefinitely.
71f5207b7SJohn Levon *
81f5207b7SJohn Levon * Copyright (C) 2003 Transmeta Corp.
91f5207b7SJohn Levon * 2003 Linus Torvalds
101f5207b7SJohn Levon *
111f5207b7SJohn Levon * Permission is hereby granted, free of charge, to any person obtaining a copy
121f5207b7SJohn Levon * of this software and associated documentation files (the "Software"), to deal
131f5207b7SJohn Levon * in the Software without restriction, including without limitation the rights
141f5207b7SJohn Levon * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
151f5207b7SJohn Levon * copies of the Software, and to permit persons to whom the Software is
161f5207b7SJohn Levon * furnished to do so, subject to the following conditions:
171f5207b7SJohn Levon *
181f5207b7SJohn Levon * The above copyright notice and this permission notice shall be included in
191f5207b7SJohn Levon * all copies or substantial portions of the Software.
201f5207b7SJohn Levon *
211f5207b7SJohn Levon * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
221f5207b7SJohn Levon * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
231f5207b7SJohn Levon * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
241f5207b7SJohn Levon * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
251f5207b7SJohn Levon * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
261f5207b7SJohn Levon * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
271f5207b7SJohn Levon * THE SOFTWARE.
281f5207b7SJohn Levon */
291f5207b7SJohn Levon
301f5207b7SJohn Levon #include <sys/types.h>
311f5207b7SJohn Levon #include "lib.h"
321f5207b7SJohn Levon
331f5207b7SJohn Levon /*
341f5207b7SJohn Levon * This describes the pure lexical elements (tokens), with
351f5207b7SJohn Levon * no semantic meaning. In other words, an identifier doesn't
361f5207b7SJohn Levon * have a type or meaning, it is only a specific string in
371f5207b7SJohn Levon * the input stream.
381f5207b7SJohn Levon *
391f5207b7SJohn Levon * Semantic meaning is handled elsewhere.
401f5207b7SJohn Levon */
411f5207b7SJohn Levon
421f5207b7SJohn Levon enum constantfile {
431f5207b7SJohn Levon CONSTANT_FILE_MAYBE, // To be determined, not inside any #ifs in this file
441f5207b7SJohn Levon CONSTANT_FILE_IFNDEF, // To be determined, currently inside #ifndef
451f5207b7SJohn Levon CONSTANT_FILE_NOPE, // No
461f5207b7SJohn Levon CONSTANT_FILE_YES // Yes
471f5207b7SJohn Levon };
481f5207b7SJohn Levon
491f5207b7SJohn Levon extern const char *includepath[];
501f5207b7SJohn Levon
511f5207b7SJohn Levon struct stream {
521f5207b7SJohn Levon int fd;
531f5207b7SJohn Levon const char *name;
541f5207b7SJohn Levon const char *path; // input-file path - see set_stream_include_path()
551f5207b7SJohn Levon const char **next_path;
561f5207b7SJohn Levon
571f5207b7SJohn Levon /* Use these to check for "already parsed" */
581f5207b7SJohn Levon enum constantfile constant;
591f5207b7SJohn Levon int dirty, next_stream, once;
601f5207b7SJohn Levon struct ident *protect;
611f5207b7SJohn Levon struct token *ifndef;
621f5207b7SJohn Levon struct token *top_if;
631f5207b7SJohn Levon };
641f5207b7SJohn Levon
651f5207b7SJohn Levon extern int input_stream_nr;
661f5207b7SJohn Levon extern struct stream *input_streams;
671f5207b7SJohn Levon extern unsigned int tabstop;
681f5207b7SJohn Levon extern int no_lineno;
691f5207b7SJohn Levon extern int *hash_stream(const char *name);
701f5207b7SJohn Levon
711f5207b7SJohn Levon struct ident {
721f5207b7SJohn Levon struct ident *next; /* Hash chain of identifiers */
731f5207b7SJohn Levon struct symbol *symbols; /* Pointer to semantic meaning list */
741f5207b7SJohn Levon unsigned char len; /* Length of identifier name */
751f5207b7SJohn Levon unsigned char tainted:1,
761f5207b7SJohn Levon reserved:1,
771f5207b7SJohn Levon keyword:1;
781f5207b7SJohn Levon char name[]; /* Actual identifier */
791f5207b7SJohn Levon };
801f5207b7SJohn Levon
811f5207b7SJohn Levon enum token_type {
821f5207b7SJohn Levon TOKEN_EOF,
83c85f09ccSJohn Levon TOKEN_BAD,
841f5207b7SJohn Levon TOKEN_ERROR,
851f5207b7SJohn Levon TOKEN_IDENT,
861f5207b7SJohn Levon TOKEN_ZERO_IDENT,
871f5207b7SJohn Levon TOKEN_NUMBER,
881f5207b7SJohn Levon TOKEN_CHAR,
891f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_0,
901f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_1,
911f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_2,
921f5207b7SJohn Levon TOKEN_CHAR_EMBEDDED_3,
931f5207b7SJohn Levon TOKEN_WIDE_CHAR,
941f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_0,
951f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_1,
961f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_2,
971f5207b7SJohn Levon TOKEN_WIDE_CHAR_EMBEDDED_3,
981f5207b7SJohn Levon TOKEN_STRING,
991f5207b7SJohn Levon TOKEN_WIDE_STRING,
1001f5207b7SJohn Levon TOKEN_SPECIAL,
1011f5207b7SJohn Levon TOKEN_STREAMBEGIN,
1021f5207b7SJohn Levon TOKEN_STREAMEND,
1031f5207b7SJohn Levon TOKEN_MACRO_ARGUMENT,
1041f5207b7SJohn Levon TOKEN_STR_ARGUMENT,
1051f5207b7SJohn Levon TOKEN_QUOTED_ARGUMENT,
1061f5207b7SJohn Levon TOKEN_CONCAT,
1071f5207b7SJohn Levon TOKEN_GNU_KLUDGE,
1081f5207b7SJohn Levon TOKEN_UNTAINT,
1091f5207b7SJohn Levon TOKEN_ARG_COUNT,
1101f5207b7SJohn Levon TOKEN_IF,
1111f5207b7SJohn Levon TOKEN_SKIP_GROUPS,
1121f5207b7SJohn Levon TOKEN_ELSE,
1131f5207b7SJohn Levon };
1141f5207b7SJohn Levon
1151f5207b7SJohn Levon /* Combination tokens */
1161f5207b7SJohn Levon #define COMBINATION_STRINGS { \
1171f5207b7SJohn Levon "+=", "++", \
1181f5207b7SJohn Levon "-=", "--", "->", \
1191f5207b7SJohn Levon "*=", \
1201f5207b7SJohn Levon "/=", \
1211f5207b7SJohn Levon "%=", \
1221f5207b7SJohn Levon "<=", ">=", \
1231f5207b7SJohn Levon "==", "!=", \
1241f5207b7SJohn Levon "&&", "&=", \
1251f5207b7SJohn Levon "||", "|=", \
1261f5207b7SJohn Levon "^=", "##", \
1271f5207b7SJohn Levon "<<", ">>", "..", \
1281f5207b7SJohn Levon "<<=", ">>=", "...", \
1291f5207b7SJohn Levon "", \
1301f5207b7SJohn Levon "<", ">", "<=", ">=" \
1311f5207b7SJohn Levon }
1321f5207b7SJohn Levon
1331f5207b7SJohn Levon extern unsigned char combinations[][4];
1341f5207b7SJohn Levon
1351f5207b7SJohn Levon enum special_token {
1361f5207b7SJohn Levon SPECIAL_BASE = 256,
1371f5207b7SJohn Levon SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
1381f5207b7SJohn Levon SPECIAL_INCREMENT,
1391f5207b7SJohn Levon SPECIAL_SUB_ASSIGN,
1401f5207b7SJohn Levon SPECIAL_DECREMENT,
1411f5207b7SJohn Levon SPECIAL_DEREFERENCE,
1421f5207b7SJohn Levon SPECIAL_MUL_ASSIGN,
1431f5207b7SJohn Levon SPECIAL_DIV_ASSIGN,
1441f5207b7SJohn Levon SPECIAL_MOD_ASSIGN,
1451f5207b7SJohn Levon SPECIAL_LTE,
1461f5207b7SJohn Levon SPECIAL_GTE,
1471f5207b7SJohn Levon SPECIAL_EQUAL,
1481f5207b7SJohn Levon SPECIAL_NOTEQUAL,
1491f5207b7SJohn Levon SPECIAL_LOGICAL_AND,
1501f5207b7SJohn Levon SPECIAL_AND_ASSIGN,
1511f5207b7SJohn Levon SPECIAL_LOGICAL_OR,
1521f5207b7SJohn Levon SPECIAL_OR_ASSIGN,
1531f5207b7SJohn Levon SPECIAL_XOR_ASSIGN,
1541f5207b7SJohn Levon SPECIAL_HASHHASH,
1551f5207b7SJohn Levon SPECIAL_LEFTSHIFT,
1561f5207b7SJohn Levon SPECIAL_RIGHTSHIFT,
1571f5207b7SJohn Levon SPECIAL_DOTDOT,
1581f5207b7SJohn Levon SPECIAL_SHL_ASSIGN,
1591f5207b7SJohn Levon SPECIAL_SHR_ASSIGN,
1601f5207b7SJohn Levon SPECIAL_ELLIPSIS,
1611f5207b7SJohn Levon SPECIAL_ARG_SEPARATOR,
1621f5207b7SJohn Levon SPECIAL_UNSIGNED_LT,
1631f5207b7SJohn Levon SPECIAL_UNSIGNED_GT,
1641f5207b7SJohn Levon SPECIAL_UNSIGNED_LTE,
1651f5207b7SJohn Levon SPECIAL_UNSIGNED_GTE,
1661f5207b7SJohn Levon };
1671f5207b7SJohn Levon
1681f5207b7SJohn Levon struct string {
1691f5207b7SJohn Levon unsigned int length:31;
1701f5207b7SJohn Levon unsigned int immutable:1;
1711f5207b7SJohn Levon char data[];
1721f5207b7SJohn Levon };
1731f5207b7SJohn Levon
1741f5207b7SJohn Levon /* will fit into 32 bits */
1751f5207b7SJohn Levon struct argcount {
1761f5207b7SJohn Levon unsigned normal:10;
1771f5207b7SJohn Levon unsigned quoted:10;
1781f5207b7SJohn Levon unsigned str:10;
1791f5207b7SJohn Levon unsigned vararg:1;
1801f5207b7SJohn Levon };
1811f5207b7SJohn Levon
1821f5207b7SJohn Levon /*
1831f5207b7SJohn Levon * This is a very common data structure, it should be kept
1841f5207b7SJohn Levon * as small as humanly possible. Big (rare) types go as
1851f5207b7SJohn Levon * pointers.
1861f5207b7SJohn Levon */
1871f5207b7SJohn Levon struct token {
1881f5207b7SJohn Levon struct position pos;
1891f5207b7SJohn Levon struct token *next;
1901f5207b7SJohn Levon union {
1911f5207b7SJohn Levon const char *number;
1921f5207b7SJohn Levon struct ident *ident;
1931f5207b7SJohn Levon unsigned int special;
1941f5207b7SJohn Levon struct string *string;
1951f5207b7SJohn Levon int argnum;
1961f5207b7SJohn Levon struct argcount count;
1971f5207b7SJohn Levon char embedded[4];
1981f5207b7SJohn Levon };
1991f5207b7SJohn Levon };
2001f5207b7SJohn Levon
2011f5207b7SJohn Levon #define MAX_STRING 8191
2021f5207b7SJohn Levon
containing_token(struct token ** p)2031f5207b7SJohn Levon static inline struct token *containing_token(struct token **p)
2041f5207b7SJohn Levon {
2051f5207b7SJohn Levon void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
2061f5207b7SJohn Levon return addr;
2071f5207b7SJohn Levon }
2081f5207b7SJohn Levon
2091f5207b7SJohn Levon #define token_type(x) ((x)->pos.type)
2101f5207b7SJohn Levon
2111f5207b7SJohn Levon /*
2121f5207b7SJohn Levon * Last token in the stream - points to itself.
2131f5207b7SJohn Levon * This allows us to not test for NULL pointers
2141f5207b7SJohn Levon * when following the token->next chain..
2151f5207b7SJohn Levon */
2161f5207b7SJohn Levon extern struct token eof_token_entry;
2171f5207b7SJohn Levon #define eof_token(x) ((x) == &eof_token_entry)
2181f5207b7SJohn Levon
2191f5207b7SJohn Levon extern int init_stream(const char *, int fd, const char **next_path);
2201f5207b7SJohn Levon extern const char *stream_name(int stream);
221*6523a3aaSJohn Levon struct ident *alloc_ident(const char *name, int len);
2221f5207b7SJohn Levon extern struct ident *hash_ident(struct ident *);
2231f5207b7SJohn Levon extern struct ident *built_in_ident(const char *);
2241f5207b7SJohn Levon extern struct token *built_in_token(int, struct ident *);
2251f5207b7SJohn Levon extern const char *show_special(int);
2261f5207b7SJohn Levon extern const char *show_ident(const struct ident *);
2271f5207b7SJohn Levon extern const char *show_string(const struct string *string);
2281f5207b7SJohn Levon extern const char *show_token(const struct token *);
2291f5207b7SJohn Levon extern const char *quote_token(const struct token *);
2301f5207b7SJohn Levon extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
2311f5207b7SJohn Levon extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
2321f5207b7SJohn Levon
2331f5207b7SJohn Levon extern void show_identifier_stats(void);
2341f5207b7SJohn Levon extern void init_include_path(void);
2351f5207b7SJohn Levon extern struct token *preprocess(struct token *);
2361f5207b7SJohn Levon
2371f5207b7SJohn Levon extern void store_all_tokens(struct token *token);
2381f5207b7SJohn Levon extern struct token *pos_get_token(struct position pos);
2391f5207b7SJohn Levon extern char *pos_ident(struct position pos);
2401f5207b7SJohn Levon
2411f5207b7SJohn Levon extern void store_macro_pos(struct token *);
2421f5207b7SJohn Levon extern char *get_macro_name(struct position pos);
243c85f09ccSJohn Levon extern char *get_inner_macro(struct position pos);
244c85f09ccSJohn Levon extern struct string_list *get_all_macros(struct position pos);
2451f5207b7SJohn Levon
match_op(struct token * token,unsigned int op)2461f5207b7SJohn Levon static inline int match_op(struct token *token, unsigned int op)
2471f5207b7SJohn Levon {
2481f5207b7SJohn Levon return token->pos.type == TOKEN_SPECIAL && token->special == op;
2491f5207b7SJohn Levon }
2501f5207b7SJohn Levon
match_ident(struct token * token,struct ident * id)2511f5207b7SJohn Levon static inline int match_ident(struct token *token, struct ident *id)
2521f5207b7SJohn Levon {
2531f5207b7SJohn Levon return token->pos.type == TOKEN_IDENT && token->ident == id;
2541f5207b7SJohn Levon }
2551f5207b7SJohn Levon
2561f5207b7SJohn Levon #endif
257