xref: /illumos-gate/usr/src/tools/smatch/src/token.h (revision 6523a3aa7f325d64841382707603be7a86e68147)
11f5207b7SJohn Levon #ifndef TOKEN_H
21f5207b7SJohn Levon #define TOKEN_H
31f5207b7SJohn Levon /*
41f5207b7SJohn Levon  * Basic tokenization structures. NOTE! Those tokens had better
51f5207b7SJohn Levon  * be pretty small, since we're going to keep them all in memory
61f5207b7SJohn Levon  * indefinitely.
71f5207b7SJohn Levon  *
81f5207b7SJohn Levon  * Copyright (C) 2003 Transmeta Corp.
91f5207b7SJohn Levon  *               2003 Linus Torvalds
101f5207b7SJohn Levon  *
111f5207b7SJohn Levon  * Permission is hereby granted, free of charge, to any person obtaining a copy
121f5207b7SJohn Levon  * of this software and associated documentation files (the "Software"), to deal
131f5207b7SJohn Levon  * in the Software without restriction, including without limitation the rights
141f5207b7SJohn Levon  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
151f5207b7SJohn Levon  * copies of the Software, and to permit persons to whom the Software is
161f5207b7SJohn Levon  * furnished to do so, subject to the following conditions:
171f5207b7SJohn Levon  *
181f5207b7SJohn Levon  * The above copyright notice and this permission notice shall be included in
191f5207b7SJohn Levon  * all copies or substantial portions of the Software.
201f5207b7SJohn Levon  *
211f5207b7SJohn Levon  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
221f5207b7SJohn Levon  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
231f5207b7SJohn Levon  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
241f5207b7SJohn Levon  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
251f5207b7SJohn Levon  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
261f5207b7SJohn Levon  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
271f5207b7SJohn Levon  * THE SOFTWARE.
281f5207b7SJohn Levon  */
291f5207b7SJohn Levon 
301f5207b7SJohn Levon #include <sys/types.h>
311f5207b7SJohn Levon #include "lib.h"
321f5207b7SJohn Levon 
331f5207b7SJohn Levon /*
341f5207b7SJohn Levon  * This describes the pure lexical elements (tokens), with
351f5207b7SJohn Levon  * no semantic meaning. In other words, an identifier doesn't
361f5207b7SJohn Levon  * have a type or meaning, it is only a specific string in
371f5207b7SJohn Levon  * the input stream.
381f5207b7SJohn Levon  *
391f5207b7SJohn Levon  * Semantic meaning is handled elsewhere.
401f5207b7SJohn Levon  */
411f5207b7SJohn Levon 
421f5207b7SJohn Levon enum constantfile {
431f5207b7SJohn Levon   CONSTANT_FILE_MAYBE,    // To be determined, not inside any #ifs in this file
441f5207b7SJohn Levon   CONSTANT_FILE_IFNDEF,   // To be determined, currently inside #ifndef
451f5207b7SJohn Levon   CONSTANT_FILE_NOPE,     // No
461f5207b7SJohn Levon   CONSTANT_FILE_YES       // Yes
471f5207b7SJohn Levon };
481f5207b7SJohn Levon 
491f5207b7SJohn Levon extern const char *includepath[];
501f5207b7SJohn Levon 
511f5207b7SJohn Levon struct stream {
521f5207b7SJohn Levon 	int fd;
531f5207b7SJohn Levon 	const char *name;
541f5207b7SJohn Levon 	const char *path;    // input-file path - see set_stream_include_path()
551f5207b7SJohn Levon 	const char **next_path;
561f5207b7SJohn Levon 
571f5207b7SJohn Levon 	/* Use these to check for "already parsed" */
581f5207b7SJohn Levon 	enum constantfile constant;
591f5207b7SJohn Levon 	int dirty, next_stream, once;
601f5207b7SJohn Levon 	struct ident *protect;
611f5207b7SJohn Levon 	struct token *ifndef;
621f5207b7SJohn Levon 	struct token *top_if;
631f5207b7SJohn Levon };
641f5207b7SJohn Levon 
651f5207b7SJohn Levon extern int input_stream_nr;
661f5207b7SJohn Levon extern struct stream *input_streams;
671f5207b7SJohn Levon extern unsigned int tabstop;
681f5207b7SJohn Levon extern int no_lineno;
691f5207b7SJohn Levon extern int *hash_stream(const char *name);
701f5207b7SJohn Levon 
711f5207b7SJohn Levon struct ident {
721f5207b7SJohn Levon 	struct ident *next;	/* Hash chain of identifiers */
731f5207b7SJohn Levon 	struct symbol *symbols;	/* Pointer to semantic meaning list */
741f5207b7SJohn Levon 	unsigned char len;	/* Length of identifier name */
751f5207b7SJohn Levon 	unsigned char tainted:1,
761f5207b7SJohn Levon 	              reserved:1,
771f5207b7SJohn Levon 		      keyword:1;
781f5207b7SJohn Levon 	char name[];		/* Actual identifier */
791f5207b7SJohn Levon };
801f5207b7SJohn Levon 
811f5207b7SJohn Levon enum token_type {
821f5207b7SJohn Levon 	TOKEN_EOF,
83c85f09ccSJohn Levon 	TOKEN_BAD,
841f5207b7SJohn Levon 	TOKEN_ERROR,
851f5207b7SJohn Levon 	TOKEN_IDENT,
861f5207b7SJohn Levon 	TOKEN_ZERO_IDENT,
871f5207b7SJohn Levon 	TOKEN_NUMBER,
881f5207b7SJohn Levon 	TOKEN_CHAR,
891f5207b7SJohn Levon 	TOKEN_CHAR_EMBEDDED_0,
901f5207b7SJohn Levon 	TOKEN_CHAR_EMBEDDED_1,
911f5207b7SJohn Levon 	TOKEN_CHAR_EMBEDDED_2,
921f5207b7SJohn Levon 	TOKEN_CHAR_EMBEDDED_3,
931f5207b7SJohn Levon 	TOKEN_WIDE_CHAR,
941f5207b7SJohn Levon 	TOKEN_WIDE_CHAR_EMBEDDED_0,
951f5207b7SJohn Levon 	TOKEN_WIDE_CHAR_EMBEDDED_1,
961f5207b7SJohn Levon 	TOKEN_WIDE_CHAR_EMBEDDED_2,
971f5207b7SJohn Levon 	TOKEN_WIDE_CHAR_EMBEDDED_3,
981f5207b7SJohn Levon 	TOKEN_STRING,
991f5207b7SJohn Levon 	TOKEN_WIDE_STRING,
1001f5207b7SJohn Levon 	TOKEN_SPECIAL,
1011f5207b7SJohn Levon 	TOKEN_STREAMBEGIN,
1021f5207b7SJohn Levon 	TOKEN_STREAMEND,
1031f5207b7SJohn Levon 	TOKEN_MACRO_ARGUMENT,
1041f5207b7SJohn Levon 	TOKEN_STR_ARGUMENT,
1051f5207b7SJohn Levon 	TOKEN_QUOTED_ARGUMENT,
1061f5207b7SJohn Levon 	TOKEN_CONCAT,
1071f5207b7SJohn Levon 	TOKEN_GNU_KLUDGE,
1081f5207b7SJohn Levon 	TOKEN_UNTAINT,
1091f5207b7SJohn Levon 	TOKEN_ARG_COUNT,
1101f5207b7SJohn Levon 	TOKEN_IF,
1111f5207b7SJohn Levon 	TOKEN_SKIP_GROUPS,
1121f5207b7SJohn Levon 	TOKEN_ELSE,
1131f5207b7SJohn Levon };
1141f5207b7SJohn Levon 
1151f5207b7SJohn Levon /* Combination tokens */
1161f5207b7SJohn Levon #define COMBINATION_STRINGS {	\
1171f5207b7SJohn Levon 	"+=", "++",		\
1181f5207b7SJohn Levon 	"-=", "--", "->",	\
1191f5207b7SJohn Levon 	"*=",			\
1201f5207b7SJohn Levon 	"/=",			\
1211f5207b7SJohn Levon 	"%=",			\
1221f5207b7SJohn Levon 	"<=", ">=",		\
1231f5207b7SJohn Levon 	"==", "!=",		\
1241f5207b7SJohn Levon 	"&&", "&=",		\
1251f5207b7SJohn Levon 	"||", "|=",		\
1261f5207b7SJohn Levon 	"^=", "##",		\
1271f5207b7SJohn Levon 	"<<", ">>", "..",	\
1281f5207b7SJohn Levon 	"<<=", ">>=", "...",	\
1291f5207b7SJohn Levon 	"",			\
1301f5207b7SJohn Levon 	"<", ">", "<=", ">="	\
1311f5207b7SJohn Levon }
1321f5207b7SJohn Levon 
1331f5207b7SJohn Levon extern unsigned char combinations[][4];
1341f5207b7SJohn Levon 
1351f5207b7SJohn Levon enum special_token {
1361f5207b7SJohn Levon 	SPECIAL_BASE = 256,
1371f5207b7SJohn Levon 	SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
1381f5207b7SJohn Levon 	SPECIAL_INCREMENT,
1391f5207b7SJohn Levon 	SPECIAL_SUB_ASSIGN,
1401f5207b7SJohn Levon 	SPECIAL_DECREMENT,
1411f5207b7SJohn Levon 	SPECIAL_DEREFERENCE,
1421f5207b7SJohn Levon 	SPECIAL_MUL_ASSIGN,
1431f5207b7SJohn Levon 	SPECIAL_DIV_ASSIGN,
1441f5207b7SJohn Levon 	SPECIAL_MOD_ASSIGN,
1451f5207b7SJohn Levon 	SPECIAL_LTE,
1461f5207b7SJohn Levon 	SPECIAL_GTE,
1471f5207b7SJohn Levon 	SPECIAL_EQUAL,
1481f5207b7SJohn Levon 	SPECIAL_NOTEQUAL,
1491f5207b7SJohn Levon 	SPECIAL_LOGICAL_AND,
1501f5207b7SJohn Levon 	SPECIAL_AND_ASSIGN,
1511f5207b7SJohn Levon 	SPECIAL_LOGICAL_OR,
1521f5207b7SJohn Levon 	SPECIAL_OR_ASSIGN,
1531f5207b7SJohn Levon 	SPECIAL_XOR_ASSIGN,
1541f5207b7SJohn Levon 	SPECIAL_HASHHASH,
1551f5207b7SJohn Levon 	SPECIAL_LEFTSHIFT,
1561f5207b7SJohn Levon 	SPECIAL_RIGHTSHIFT,
1571f5207b7SJohn Levon 	SPECIAL_DOTDOT,
1581f5207b7SJohn Levon 	SPECIAL_SHL_ASSIGN,
1591f5207b7SJohn Levon 	SPECIAL_SHR_ASSIGN,
1601f5207b7SJohn Levon 	SPECIAL_ELLIPSIS,
1611f5207b7SJohn Levon 	SPECIAL_ARG_SEPARATOR,
1621f5207b7SJohn Levon 	SPECIAL_UNSIGNED_LT,
1631f5207b7SJohn Levon 	SPECIAL_UNSIGNED_GT,
1641f5207b7SJohn Levon 	SPECIAL_UNSIGNED_LTE,
1651f5207b7SJohn Levon 	SPECIAL_UNSIGNED_GTE,
1661f5207b7SJohn Levon };
1671f5207b7SJohn Levon 
1681f5207b7SJohn Levon struct string {
1691f5207b7SJohn Levon 	unsigned int length:31;
1701f5207b7SJohn Levon 	unsigned int immutable:1;
1711f5207b7SJohn Levon 	char data[];
1721f5207b7SJohn Levon };
1731f5207b7SJohn Levon 
1741f5207b7SJohn Levon /* will fit into 32 bits */
1751f5207b7SJohn Levon struct argcount {
1761f5207b7SJohn Levon 	unsigned normal:10;
1771f5207b7SJohn Levon 	unsigned quoted:10;
1781f5207b7SJohn Levon 	unsigned str:10;
1791f5207b7SJohn Levon 	unsigned vararg:1;
1801f5207b7SJohn Levon };
1811f5207b7SJohn Levon 
1821f5207b7SJohn Levon /*
1831f5207b7SJohn Levon  * This is a very common data structure, it should be kept
1841f5207b7SJohn Levon  * as small as humanly possible. Big (rare) types go as
1851f5207b7SJohn Levon  * pointers.
1861f5207b7SJohn Levon  */
1871f5207b7SJohn Levon struct token {
1881f5207b7SJohn Levon 	struct position pos;
1891f5207b7SJohn Levon 	struct token *next;
1901f5207b7SJohn Levon 	union {
1911f5207b7SJohn Levon 		const char *number;
1921f5207b7SJohn Levon 		struct ident *ident;
1931f5207b7SJohn Levon 		unsigned int special;
1941f5207b7SJohn Levon 		struct string *string;
1951f5207b7SJohn Levon 		int argnum;
1961f5207b7SJohn Levon 		struct argcount count;
1971f5207b7SJohn Levon 		char embedded[4];
1981f5207b7SJohn Levon 	};
1991f5207b7SJohn Levon };
2001f5207b7SJohn Levon 
2011f5207b7SJohn Levon #define MAX_STRING 8191
2021f5207b7SJohn Levon 
2031f5207b7SJohn Levon static inline struct token *containing_token(struct token **p)
2041f5207b7SJohn Levon {
2051f5207b7SJohn Levon 	void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
2061f5207b7SJohn Levon 	return addr;
2071f5207b7SJohn Levon }
2081f5207b7SJohn Levon 
2091f5207b7SJohn Levon #define token_type(x) ((x)->pos.type)
2101f5207b7SJohn Levon 
2111f5207b7SJohn Levon /*
2121f5207b7SJohn Levon  * Last token in the stream - points to itself.
2131f5207b7SJohn Levon  * This allows us to not test for NULL pointers
2141f5207b7SJohn Levon  * when following the token->next chain..
2151f5207b7SJohn Levon  */
2161f5207b7SJohn Levon extern struct token eof_token_entry;
2171f5207b7SJohn Levon #define eof_token(x) ((x) == &eof_token_entry)
2181f5207b7SJohn Levon 
2191f5207b7SJohn Levon extern int init_stream(const char *, int fd, const char **next_path);
2201f5207b7SJohn Levon extern const char *stream_name(int stream);
221*6523a3aaSJohn Levon struct ident *alloc_ident(const char *name, int len);
2221f5207b7SJohn Levon extern struct ident *hash_ident(struct ident *);
2231f5207b7SJohn Levon extern struct ident *built_in_ident(const char *);
2241f5207b7SJohn Levon extern struct token *built_in_token(int, struct ident *);
2251f5207b7SJohn Levon extern const char *show_special(int);
2261f5207b7SJohn Levon extern const char *show_ident(const struct ident *);
2271f5207b7SJohn Levon extern const char *show_string(const struct string *string);
2281f5207b7SJohn Levon extern const char *show_token(const struct token *);
2291f5207b7SJohn Levon extern const char *quote_token(const struct token *);
2301f5207b7SJohn Levon extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
2311f5207b7SJohn Levon extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
2321f5207b7SJohn Levon 
2331f5207b7SJohn Levon extern void show_identifier_stats(void);
2341f5207b7SJohn Levon extern void init_include_path(void);
2351f5207b7SJohn Levon extern struct token *preprocess(struct token *);
2361f5207b7SJohn Levon 
2371f5207b7SJohn Levon extern void store_all_tokens(struct token *token);
2381f5207b7SJohn Levon extern struct token *pos_get_token(struct position pos);
2391f5207b7SJohn Levon extern char *pos_ident(struct position pos);
2401f5207b7SJohn Levon 
2411f5207b7SJohn Levon extern void store_macro_pos(struct token *);
2421f5207b7SJohn Levon extern char *get_macro_name(struct position pos);
243c85f09ccSJohn Levon extern char *get_inner_macro(struct position pos);
244c85f09ccSJohn Levon extern struct string_list *get_all_macros(struct position pos);
2451f5207b7SJohn Levon 
2461f5207b7SJohn Levon static inline int match_op(struct token *token, unsigned int op)
2471f5207b7SJohn Levon {
2481f5207b7SJohn Levon 	return token->pos.type == TOKEN_SPECIAL && token->special == op;
2491f5207b7SJohn Levon }
2501f5207b7SJohn Levon 
2511f5207b7SJohn Levon static inline int match_ident(struct token *token, struct ident *id)
2521f5207b7SJohn Levon {
2531f5207b7SJohn Levon 	return token->pos.type == TOKEN_IDENT && token->ident == id;
2541f5207b7SJohn Levon }
2551f5207b7SJohn Levon 
2561f5207b7SJohn Levon #endif
257