11f5207bJohn Levon#ifndef TOKEN_H
21f5207bJohn Levon#define TOKEN_H
31f5207bJohn Levon/*
41f5207bJohn Levon * Basic tokenization structures. NOTE! Those tokens had better
51f5207bJohn Levon * be pretty small, since we're going to keep them all in memory
61f5207bJohn Levon * indefinitely.
71f5207bJohn Levon *
81f5207bJohn Levon * Copyright (C) 2003 Transmeta Corp.
91f5207bJohn Levon *               2003 Linus Torvalds
101f5207bJohn Levon *
111f5207bJohn Levon * Permission is hereby granted, free of charge, to any person obtaining a copy
121f5207bJohn Levon * of this software and associated documentation files (the "Software"), to deal
131f5207bJohn Levon * in the Software without restriction, including without limitation the rights
141f5207bJohn Levon * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
151f5207bJohn Levon * copies of the Software, and to permit persons to whom the Software is
161f5207bJohn Levon * furnished to do so, subject to the following conditions:
171f5207bJohn Levon *
181f5207bJohn Levon * The above copyright notice and this permission notice shall be included in
191f5207bJohn Levon * all copies or substantial portions of the Software.
201f5207bJohn Levon *
211f5207bJohn Levon * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
221f5207bJohn Levon * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
231f5207bJohn Levon * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
241f5207bJohn Levon * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
251f5207bJohn Levon * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
261f5207bJohn Levon * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
271f5207bJohn Levon * THE SOFTWARE.
281f5207bJohn Levon */
291f5207bJohn Levon
301f5207bJohn Levon#include <sys/types.h>
311f5207bJohn Levon#include "lib.h"
321f5207bJohn Levon
331f5207bJohn Levon/*
341f5207bJohn Levon * This describes the pure lexical elements (tokens), with
351f5207bJohn Levon * no semantic meaning. In other words, an identifier doesn't
361f5207bJohn Levon * have a type or meaning, it is only a specific string in
371f5207bJohn Levon * the input stream.
381f5207bJohn Levon *
391f5207bJohn Levon * Semantic meaning is handled elsewhere.
401f5207bJohn Levon */
411f5207bJohn Levon
421f5207bJohn Levonenum constantfile {
431f5207bJohn Levon  CONSTANT_FILE_MAYBE,    // To be determined, not inside any #ifs in this file
441f5207bJohn Levon  CONSTANT_FILE_IFNDEF,   // To be determined, currently inside #ifndef
451f5207bJohn Levon  CONSTANT_FILE_NOPE,     // No
461f5207bJohn Levon  CONSTANT_FILE_YES       // Yes
471f5207bJohn Levon};
481f5207bJohn Levon
491f5207bJohn Levonextern const char *includepath[];
501f5207bJohn Levon
511f5207bJohn Levonstruct stream {
521f5207bJohn Levon	int fd;
531f5207bJohn Levon	const char *name;
541f5207bJohn Levon	const char *path;    // input-file path - see set_stream_include_path()
551f5207bJohn Levon	const char **next_path;
561f5207bJohn Levon
571f5207bJohn Levon	/* Use these to check for "already parsed" */
581f5207bJohn Levon	enum constantfile constant;
591f5207bJohn Levon	int dirty, next_stream, once;
601f5207bJohn Levon	struct ident *protect;
611f5207bJohn Levon	struct token *ifndef;
621f5207bJohn Levon	struct token *top_if;
631f5207bJohn Levon};
641f5207bJohn Levon
651f5207bJohn Levonextern int input_stream_nr;
661f5207bJohn Levonextern struct stream *input_streams;
671f5207bJohn Levonextern unsigned int tabstop;
681f5207bJohn Levonextern int no_lineno;
691f5207bJohn Levonextern int *hash_stream(const char *name);
701f5207bJohn Levon
711f5207bJohn Levonstruct ident {
721f5207bJohn Levon	struct ident *next;	/* Hash chain of identifiers */
731f5207bJohn Levon	struct symbol *symbols;	/* Pointer to semantic meaning list */
741f5207bJohn Levon	unsigned char len;	/* Length of identifier name */
751f5207bJohn Levon	unsigned char tainted:1,
761f5207bJohn Levon	              reserved:1,
771f5207bJohn Levon		      keyword:1;
781f5207bJohn Levon	char name[];		/* Actual identifier */
791f5207bJohn Levon};
801f5207bJohn Levon
811f5207bJohn Levonenum token_type {
821f5207bJohn Levon	TOKEN_EOF,
83c85f09cJohn Levon	TOKEN_BAD,
841f5207bJohn Levon	TOKEN_ERROR,
851f5207bJohn Levon	TOKEN_IDENT,
861f5207bJohn Levon	TOKEN_ZERO_IDENT,
871f5207bJohn Levon	TOKEN_NUMBER,
881f5207bJohn Levon	TOKEN_CHAR,
891f5207bJohn Levon	TOKEN_CHAR_EMBEDDED_0,
901f5207bJohn Levon	TOKEN_CHAR_EMBEDDED_1,
911f5207bJohn Levon	TOKEN_CHAR_EMBEDDED_2,
921f5207bJohn Levon	TOKEN_CHAR_EMBEDDED_3,
931f5207bJohn Levon	TOKEN_WIDE_CHAR,
941f5207bJohn Levon	TOKEN_WIDE_CHAR_EMBEDDED_0,
951f5207bJohn Levon	TOKEN_WIDE_CHAR_EMBEDDED_1,
961f5207bJohn Levon	TOKEN_WIDE_CHAR_EMBEDDED_2,
971f5207bJohn Levon	TOKEN_WIDE_CHAR_EMBEDDED_3,
981f5207bJohn Levon	TOKEN_STRING,
991f5207bJohn Levon	TOKEN_WIDE_STRING,
1001f5207bJohn Levon	TOKEN_SPECIAL,
1011f5207bJohn Levon	TOKEN_STREAMBEGIN,
1021f5207bJohn Levon	TOKEN_STREAMEND,
1031f5207bJohn Levon	TOKEN_MACRO_ARGUMENT,
1041f5207bJohn Levon	TOKEN_STR_ARGUMENT,
1051f5207bJohn Levon	TOKEN_QUOTED_ARGUMENT,
1061f5207bJohn Levon	TOKEN_CONCAT,
1071f5207bJohn Levon	TOKEN_GNU_KLUDGE,
1081f5207bJohn Levon	TOKEN_UNTAINT,
1091f5207bJohn Levon	TOKEN_ARG_COUNT,
1101f5207bJohn Levon	TOKEN_IF,
1111f5207bJohn Levon	TOKEN_SKIP_GROUPS,
1121f5207bJohn Levon	TOKEN_ELSE,
1131f5207bJohn Levon};
1141f5207bJohn Levon
1151f5207bJohn Levon/* Combination tokens */
1161f5207bJohn Levon#define COMBINATION_STRINGS {	\
1171f5207bJohn Levon	"+=", "++",		\
1181f5207bJohn Levon	"-=", "--", "->",	\
1191f5207bJohn Levon	"*=",			\
1201f5207bJohn Levon	"/=",			\
1211f5207bJohn Levon	"%=",			\
1221f5207bJohn Levon	"<=", ">=",		\
1231f5207bJohn Levon	"==", "!=",		\
1241f5207bJohn Levon	"&&", "&=",		\
1251f5207bJohn Levon	"||", "|=",		\
1261f5207bJohn Levon	"^=", "##",		\
1271f5207bJohn Levon	"<<", ">>", "..",	\
1281f5207bJohn Levon	"<<=", ">>=", "...",	\
1291f5207bJohn Levon	"",			\
1301f5207bJohn Levon	"<", ">", "<=", ">="	\
1311f5207bJohn Levon}
1321f5207bJohn Levon
1331f5207bJohn Levonextern unsigned char combinations[][4];
1341f5207bJohn Levon
1351f5207bJohn Levonenum special_token {
1361f5207bJohn Levon	SPECIAL_BASE = 256,
1371f5207bJohn Levon	SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
1381f5207bJohn Levon	SPECIAL_INCREMENT,
1391f5207bJohn Levon	SPECIAL_SUB_ASSIGN,
1401f5207bJohn Levon	SPECIAL_DECREMENT,
1411f5207bJohn Levon	SPECIAL_DEREFERENCE,
1421f5207bJohn Levon	SPECIAL_MUL_ASSIGN,
1431f5207bJohn Levon	SPECIAL_DIV_ASSIGN,
1441f5207bJohn Levon	SPECIAL_MOD_ASSIGN,
1451f5207bJohn Levon	SPECIAL_LTE,
1461f5207bJohn Levon	SPECIAL_GTE,
1471f5207bJohn Levon	SPECIAL_EQUAL,
1481f5207bJohn Levon	SPECIAL_NOTEQUAL,
1491f5207bJohn Levon	SPECIAL_LOGICAL_AND,
1501f5207bJohn Levon	SPECIAL_AND_ASSIGN,
1511f5207bJohn Levon	SPECIAL_LOGICAL_OR,
1521f5207bJohn Levon	SPECIAL_OR_ASSIGN,
1531f5207bJohn Levon	SPECIAL_XOR_ASSIGN,
1541f5207bJohn Levon	SPECIAL_HASHHASH,
1551f5207bJohn Levon	SPECIAL_LEFTSHIFT,
1561f5207bJohn Levon	SPECIAL_RIGHTSHIFT,
1571f5207bJohn Levon	SPECIAL_DOTDOT,
1581f5207bJohn Levon	SPECIAL_SHL_ASSIGN,
1591f5207bJohn Levon	SPECIAL_SHR_ASSIGN,
1601f5207bJohn Levon	SPECIAL_ELLIPSIS,
1611f5207bJohn Levon	SPECIAL_ARG_SEPARATOR,
1621f5207bJohn Levon	SPECIAL_UNSIGNED_LT,
1631f5207bJohn Levon	SPECIAL_UNSIGNED_GT,
1641f5207bJohn Levon	SPECIAL_UNSIGNED_LTE,
1651f5207bJohn Levon	SPECIAL_UNSIGNED_GTE,
1661f5207bJohn Levon};
1671f5207bJohn Levon
1681f5207bJohn Levonstruct string {
1691f5207bJohn Levon	unsigned int length:31;
1701f5207bJohn Levon	unsigned int immutable:1;
1711f5207bJohn Levon	char data[];
1721f5207bJohn Levon};
1731f5207bJohn Levon
1741f5207bJohn Levon/* will fit into 32 bits */
1751f5207bJohn Levonstruct argcount {
1761f5207bJohn Levon	unsigned normal:10;
1771f5207bJohn Levon	unsigned quoted:10;
1781f5207bJohn Levon	unsigned str:10;
1791f5207bJohn Levon	unsigned vararg:1;
1801f5207bJohn Levon};
1811f5207bJohn Levon
1821f5207bJohn Levon/*
1831f5207bJohn Levon * This is a very common data structure, it should be kept
1841f5207bJohn Levon * as small as humanly possible. Big (rare) types go as
1851f5207bJohn Levon * pointers.
1861f5207bJohn Levon */
1871f5207bJohn Levonstruct token {
1881f5207bJohn Levon	struct position pos;
1891f5207bJohn Levon	struct token *next;
1901f5207bJohn Levon	union {
1911f5207bJohn Levon		const char *number;
1921f5207bJohn Levon		struct ident *ident;
1931f5207bJohn Levon		unsigned int special;
1941f5207bJohn Levon		struct string *string;
1951f5207bJohn Levon		int argnum;
1961f5207bJohn Levon		struct argcount count;
1971f5207bJohn Levon		char embedded[4];
1981f5207bJohn Levon	};
1991f5207bJohn Levon};
2001f5207bJohn Levon
2011f5207bJohn Levon#define MAX_STRING 8191
2021f5207bJohn Levon
2031f5207bJohn Levonstatic inline struct token *containing_token(struct token **p)
2041f5207bJohn Levon{
2051f5207bJohn Levon	void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
2061f5207bJohn Levon	return addr;
2071f5207bJohn Levon}
2081f5207bJohn Levon
2091f5207bJohn Levon#define token_type(x) ((x)->pos.type)
2101f5207bJohn Levon
2111f5207bJohn Levon/*
2121f5207bJohn Levon * Last token in the stream - points to itself.
2131f5207bJohn Levon * This allows us to not test for NULL pointers
2141f5207bJohn Levon * when following the token->next chain..
2151f5207bJohn Levon */
2161f5207bJohn Levonextern struct token eof_token_entry;
2171f5207bJohn Levon#define eof_token(x) ((x) == &eof_token_entry)
2181f5207bJohn Levon
2191f5207bJohn Levonextern int init_stream(const char *, int fd, const char **next_path);
2201f5207bJohn Levonextern const char *stream_name(int stream);
2216523a3aJohn Levonstruct ident *alloc_ident(const char *name, int len);
2221f5207bJohn Levonextern struct ident *hash_ident(struct ident *);
2231f5207bJohn Levonextern struct ident *built_in_ident(const char *);
2241f5207bJohn Levonextern struct token *built_in_token(int, struct ident *);
2251f5207bJohn Levonextern const char *show_special(int);
2261f5207bJohn Levonextern const char *show_ident(const struct ident *);
2271f5207bJohn Levonextern const char *show_string(const struct string *string);
2281f5207bJohn Levonextern const char *show_token(const struct token *);
2291f5207bJohn Levonextern const char *quote_token(const struct token *);
2301f5207bJohn Levonextern struct token * tokenize(const char *, int, struct token *, const char **next_path);
2311f5207bJohn Levonextern struct token * tokenize_buffer(void *, unsigned long, struct token **);
2321f5207bJohn Levon
2331f5207bJohn Levonextern void show_identifier_stats(void);
2341f5207bJohn Levonextern void init_include_path(void);
2351f5207bJohn Levonextern struct token *preprocess(struct token *);
2361f5207bJohn Levon
2371f5207bJohn Levonextern void store_all_tokens(struct token *token);
2381f5207bJohn Levonextern struct token *pos_get_token(struct position pos);
2391f5207bJohn Levonextern char *pos_ident(struct position pos);
2401f5207bJohn Levon
2411f5207bJohn Levonextern void store_macro_pos(struct token *);
2421f5207bJohn Levonextern char *get_macro_name(struct position pos);
243c85f09cJohn Levonextern char *get_inner_macro(struct position pos);
244c85f09cJohn Levonextern struct string_list *get_all_macros(struct position pos);
2451f5207bJohn Levon
2461f5207bJohn Levonstatic inline int match_op(struct token *token, unsigned int op)
2471f5207bJohn Levon{
2481f5207bJohn Levon	return token->pos.type == TOKEN_SPECIAL && token->special == op;
2491f5207bJohn Levon}
2501f5207bJohn Levon
2511f5207bJohn Levonstatic inline int match_ident(struct token *token, struct ident *id)
2521f5207bJohn Levon{
2531f5207bJohn Levon	return token->pos.type == TOKEN_IDENT && token->ident == id;
2541f5207bJohn Levon}
2551f5207bJohn Levon
2561f5207bJohn Levon#endif
257