xref: /illumos-gate/usr/src/tools/smatch/src/token.h (revision 1f5207b7)
1*1f5207b7SJohn Levon #ifndef TOKEN_H
2*1f5207b7SJohn Levon #define TOKEN_H
3*1f5207b7SJohn Levon /*
4*1f5207b7SJohn Levon  * Basic tokenization structures. NOTE! Those tokens had better
5*1f5207b7SJohn Levon  * be pretty small, since we're going to keep them all in memory
6*1f5207b7SJohn Levon  * indefinitely.
7*1f5207b7SJohn Levon  *
8*1f5207b7SJohn Levon  * Copyright (C) 2003 Transmeta Corp.
9*1f5207b7SJohn Levon  *               2003 Linus Torvalds
10*1f5207b7SJohn Levon  *
11*1f5207b7SJohn Levon  * Permission is hereby granted, free of charge, to any person obtaining a copy
12*1f5207b7SJohn Levon  * of this software and associated documentation files (the "Software"), to deal
13*1f5207b7SJohn Levon  * in the Software without restriction, including without limitation the rights
14*1f5207b7SJohn Levon  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15*1f5207b7SJohn Levon  * copies of the Software, and to permit persons to whom the Software is
16*1f5207b7SJohn Levon  * furnished to do so, subject to the following conditions:
17*1f5207b7SJohn Levon  *
18*1f5207b7SJohn Levon  * The above copyright notice and this permission notice shall be included in
19*1f5207b7SJohn Levon  * all copies or substantial portions of the Software.
20*1f5207b7SJohn Levon  *
21*1f5207b7SJohn Levon  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22*1f5207b7SJohn Levon  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23*1f5207b7SJohn Levon  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24*1f5207b7SJohn Levon  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25*1f5207b7SJohn Levon  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26*1f5207b7SJohn Levon  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27*1f5207b7SJohn Levon  * THE SOFTWARE.
28*1f5207b7SJohn Levon  */
29*1f5207b7SJohn Levon 
30*1f5207b7SJohn Levon #include <sys/types.h>
31*1f5207b7SJohn Levon #include "lib.h"
32*1f5207b7SJohn Levon 
33*1f5207b7SJohn Levon /*
34*1f5207b7SJohn Levon  * This describes the pure lexical elements (tokens), with
35*1f5207b7SJohn Levon  * no semantic meaning. In other words, an identifier doesn't
36*1f5207b7SJohn Levon  * have a type or meaning, it is only a specific string in
37*1f5207b7SJohn Levon  * the input stream.
38*1f5207b7SJohn Levon  *
39*1f5207b7SJohn Levon  * Semantic meaning is handled elsewhere.
40*1f5207b7SJohn Levon  */
41*1f5207b7SJohn Levon 
42*1f5207b7SJohn Levon enum constantfile {
43*1f5207b7SJohn Levon   CONSTANT_FILE_MAYBE,    // To be determined, not inside any #ifs in this file
44*1f5207b7SJohn Levon   CONSTANT_FILE_IFNDEF,   // To be determined, currently inside #ifndef
45*1f5207b7SJohn Levon   CONSTANT_FILE_NOPE,     // No
46*1f5207b7SJohn Levon   CONSTANT_FILE_YES       // Yes
47*1f5207b7SJohn Levon };
48*1f5207b7SJohn Levon 
49*1f5207b7SJohn Levon extern const char *includepath[];
50*1f5207b7SJohn Levon 
51*1f5207b7SJohn Levon struct stream {
52*1f5207b7SJohn Levon 	int fd;
53*1f5207b7SJohn Levon 	const char *name;
54*1f5207b7SJohn Levon 	const char *path;    // input-file path - see set_stream_include_path()
55*1f5207b7SJohn Levon 	const char **next_path;
56*1f5207b7SJohn Levon 
57*1f5207b7SJohn Levon 	/* Use these to check for "already parsed" */
58*1f5207b7SJohn Levon 	enum constantfile constant;
59*1f5207b7SJohn Levon 	int dirty, next_stream, once;
60*1f5207b7SJohn Levon 	struct ident *protect;
61*1f5207b7SJohn Levon 	struct token *ifndef;
62*1f5207b7SJohn Levon 	struct token *top_if;
63*1f5207b7SJohn Levon };
64*1f5207b7SJohn Levon 
65*1f5207b7SJohn Levon extern int input_stream_nr;
66*1f5207b7SJohn Levon extern struct stream *input_streams;
67*1f5207b7SJohn Levon extern unsigned int tabstop;
68*1f5207b7SJohn Levon extern int no_lineno;
69*1f5207b7SJohn Levon extern int *hash_stream(const char *name);
70*1f5207b7SJohn Levon 
71*1f5207b7SJohn Levon struct ident {
72*1f5207b7SJohn Levon 	struct ident *next;	/* Hash chain of identifiers */
73*1f5207b7SJohn Levon 	struct symbol *symbols;	/* Pointer to semantic meaning list */
74*1f5207b7SJohn Levon 	unsigned char len;	/* Length of identifier name */
75*1f5207b7SJohn Levon 	unsigned char tainted:1,
76*1f5207b7SJohn Levon 	              reserved:1,
77*1f5207b7SJohn Levon 		      keyword:1;
78*1f5207b7SJohn Levon 	char name[];		/* Actual identifier */
79*1f5207b7SJohn Levon };
80*1f5207b7SJohn Levon 
81*1f5207b7SJohn Levon enum token_type {
82*1f5207b7SJohn Levon 	TOKEN_EOF,
83*1f5207b7SJohn Levon 	TOKEN_ERROR,
84*1f5207b7SJohn Levon 	TOKEN_IDENT,
85*1f5207b7SJohn Levon 	TOKEN_ZERO_IDENT,
86*1f5207b7SJohn Levon 	TOKEN_NUMBER,
87*1f5207b7SJohn Levon 	TOKEN_CHAR,
88*1f5207b7SJohn Levon 	TOKEN_CHAR_EMBEDDED_0,
89*1f5207b7SJohn Levon 	TOKEN_CHAR_EMBEDDED_1,
90*1f5207b7SJohn Levon 	TOKEN_CHAR_EMBEDDED_2,
91*1f5207b7SJohn Levon 	TOKEN_CHAR_EMBEDDED_3,
92*1f5207b7SJohn Levon 	TOKEN_WIDE_CHAR,
93*1f5207b7SJohn Levon 	TOKEN_WIDE_CHAR_EMBEDDED_0,
94*1f5207b7SJohn Levon 	TOKEN_WIDE_CHAR_EMBEDDED_1,
95*1f5207b7SJohn Levon 	TOKEN_WIDE_CHAR_EMBEDDED_2,
96*1f5207b7SJohn Levon 	TOKEN_WIDE_CHAR_EMBEDDED_3,
97*1f5207b7SJohn Levon 	TOKEN_STRING,
98*1f5207b7SJohn Levon 	TOKEN_WIDE_STRING,
99*1f5207b7SJohn Levon 	TOKEN_SPECIAL,
100*1f5207b7SJohn Levon 	TOKEN_STREAMBEGIN,
101*1f5207b7SJohn Levon 	TOKEN_STREAMEND,
102*1f5207b7SJohn Levon 	TOKEN_MACRO_ARGUMENT,
103*1f5207b7SJohn Levon 	TOKEN_STR_ARGUMENT,
104*1f5207b7SJohn Levon 	TOKEN_QUOTED_ARGUMENT,
105*1f5207b7SJohn Levon 	TOKEN_CONCAT,
106*1f5207b7SJohn Levon 	TOKEN_GNU_KLUDGE,
107*1f5207b7SJohn Levon 	TOKEN_UNTAINT,
108*1f5207b7SJohn Levon 	TOKEN_ARG_COUNT,
109*1f5207b7SJohn Levon 	TOKEN_IF,
110*1f5207b7SJohn Levon 	TOKEN_SKIP_GROUPS,
111*1f5207b7SJohn Levon 	TOKEN_ELSE,
112*1f5207b7SJohn Levon };
113*1f5207b7SJohn Levon 
114*1f5207b7SJohn Levon /* Combination tokens */
115*1f5207b7SJohn Levon #define COMBINATION_STRINGS {	\
116*1f5207b7SJohn Levon 	"+=", "++",		\
117*1f5207b7SJohn Levon 	"-=", "--", "->",	\
118*1f5207b7SJohn Levon 	"*=",			\
119*1f5207b7SJohn Levon 	"/=",			\
120*1f5207b7SJohn Levon 	"%=",			\
121*1f5207b7SJohn Levon 	"<=", ">=",		\
122*1f5207b7SJohn Levon 	"==", "!=",		\
123*1f5207b7SJohn Levon 	"&&", "&=",		\
124*1f5207b7SJohn Levon 	"||", "|=",		\
125*1f5207b7SJohn Levon 	"^=", "##",		\
126*1f5207b7SJohn Levon 	"<<", ">>", "..",	\
127*1f5207b7SJohn Levon 	"<<=", ">>=", "...",	\
128*1f5207b7SJohn Levon 	"",			\
129*1f5207b7SJohn Levon 	"<", ">", "<=", ">="	\
130*1f5207b7SJohn Levon }
131*1f5207b7SJohn Levon 
132*1f5207b7SJohn Levon extern unsigned char combinations[][4];
133*1f5207b7SJohn Levon 
134*1f5207b7SJohn Levon enum special_token {
135*1f5207b7SJohn Levon 	SPECIAL_BASE = 256,
136*1f5207b7SJohn Levon 	SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
137*1f5207b7SJohn Levon 	SPECIAL_INCREMENT,
138*1f5207b7SJohn Levon 	SPECIAL_SUB_ASSIGN,
139*1f5207b7SJohn Levon 	SPECIAL_DECREMENT,
140*1f5207b7SJohn Levon 	SPECIAL_DEREFERENCE,
141*1f5207b7SJohn Levon 	SPECIAL_MUL_ASSIGN,
142*1f5207b7SJohn Levon 	SPECIAL_DIV_ASSIGN,
143*1f5207b7SJohn Levon 	SPECIAL_MOD_ASSIGN,
144*1f5207b7SJohn Levon 	SPECIAL_LTE,
145*1f5207b7SJohn Levon 	SPECIAL_GTE,
146*1f5207b7SJohn Levon 	SPECIAL_EQUAL,
147*1f5207b7SJohn Levon 	SPECIAL_NOTEQUAL,
148*1f5207b7SJohn Levon 	SPECIAL_LOGICAL_AND,
149*1f5207b7SJohn Levon 	SPECIAL_AND_ASSIGN,
150*1f5207b7SJohn Levon 	SPECIAL_LOGICAL_OR,
151*1f5207b7SJohn Levon 	SPECIAL_OR_ASSIGN,
152*1f5207b7SJohn Levon 	SPECIAL_XOR_ASSIGN,
153*1f5207b7SJohn Levon 	SPECIAL_HASHHASH,
154*1f5207b7SJohn Levon 	SPECIAL_LEFTSHIFT,
155*1f5207b7SJohn Levon 	SPECIAL_RIGHTSHIFT,
156*1f5207b7SJohn Levon 	SPECIAL_DOTDOT,
157*1f5207b7SJohn Levon 	SPECIAL_SHL_ASSIGN,
158*1f5207b7SJohn Levon 	SPECIAL_SHR_ASSIGN,
159*1f5207b7SJohn Levon 	SPECIAL_ELLIPSIS,
160*1f5207b7SJohn Levon 	SPECIAL_ARG_SEPARATOR,
161*1f5207b7SJohn Levon 	SPECIAL_UNSIGNED_LT,
162*1f5207b7SJohn Levon 	SPECIAL_UNSIGNED_GT,
163*1f5207b7SJohn Levon 	SPECIAL_UNSIGNED_LTE,
164*1f5207b7SJohn Levon 	SPECIAL_UNSIGNED_GTE,
165*1f5207b7SJohn Levon };
166*1f5207b7SJohn Levon 
167*1f5207b7SJohn Levon struct string {
168*1f5207b7SJohn Levon 	unsigned int length:31;
169*1f5207b7SJohn Levon 	unsigned int immutable:1;
170*1f5207b7SJohn Levon 	char data[];
171*1f5207b7SJohn Levon };
172*1f5207b7SJohn Levon 
173*1f5207b7SJohn Levon /* will fit into 32 bits */
174*1f5207b7SJohn Levon struct argcount {
175*1f5207b7SJohn Levon 	unsigned normal:10;
176*1f5207b7SJohn Levon 	unsigned quoted:10;
177*1f5207b7SJohn Levon 	unsigned str:10;
178*1f5207b7SJohn Levon 	unsigned vararg:1;
179*1f5207b7SJohn Levon };
180*1f5207b7SJohn Levon 
181*1f5207b7SJohn Levon /*
182*1f5207b7SJohn Levon  * This is a very common data structure, it should be kept
183*1f5207b7SJohn Levon  * as small as humanly possible. Big (rare) types go as
184*1f5207b7SJohn Levon  * pointers.
185*1f5207b7SJohn Levon  */
186*1f5207b7SJohn Levon struct token {
187*1f5207b7SJohn Levon 	struct position pos;
188*1f5207b7SJohn Levon 	struct token *next;
189*1f5207b7SJohn Levon 	union {
190*1f5207b7SJohn Levon 		const char *number;
191*1f5207b7SJohn Levon 		struct ident *ident;
192*1f5207b7SJohn Levon 		unsigned int special;
193*1f5207b7SJohn Levon 		struct string *string;
194*1f5207b7SJohn Levon 		int argnum;
195*1f5207b7SJohn Levon 		struct argcount count;
196*1f5207b7SJohn Levon 		char embedded[4];
197*1f5207b7SJohn Levon 	};
198*1f5207b7SJohn Levon };
199*1f5207b7SJohn Levon 
200*1f5207b7SJohn Levon #define MAX_STRING 8191
201*1f5207b7SJohn Levon 
202*1f5207b7SJohn Levon static inline struct token *containing_token(struct token **p)
203*1f5207b7SJohn Levon {
204*1f5207b7SJohn Levon 	void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
205*1f5207b7SJohn Levon 	return addr;
206*1f5207b7SJohn Levon }
207*1f5207b7SJohn Levon 
208*1f5207b7SJohn Levon #define token_type(x) ((x)->pos.type)
209*1f5207b7SJohn Levon 
210*1f5207b7SJohn Levon /*
211*1f5207b7SJohn Levon  * Last token in the stream - points to itself.
212*1f5207b7SJohn Levon  * This allows us to not test for NULL pointers
213*1f5207b7SJohn Levon  * when following the token->next chain..
214*1f5207b7SJohn Levon  */
215*1f5207b7SJohn Levon extern struct token eof_token_entry;
216*1f5207b7SJohn Levon #define eof_token(x) ((x) == &eof_token_entry)
217*1f5207b7SJohn Levon 
218*1f5207b7SJohn Levon extern int init_stream(const char *, int fd, const char **next_path);
219*1f5207b7SJohn Levon extern const char *stream_name(int stream);
220*1f5207b7SJohn Levon extern struct ident *hash_ident(struct ident *);
221*1f5207b7SJohn Levon extern struct ident *built_in_ident(const char *);
222*1f5207b7SJohn Levon extern struct token *built_in_token(int, struct ident *);
223*1f5207b7SJohn Levon extern const char *show_special(int);
224*1f5207b7SJohn Levon extern const char *show_ident(const struct ident *);
225*1f5207b7SJohn Levon extern const char *show_string(const struct string *string);
226*1f5207b7SJohn Levon extern const char *show_token(const struct token *);
227*1f5207b7SJohn Levon extern const char *quote_token(const struct token *);
228*1f5207b7SJohn Levon extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
229*1f5207b7SJohn Levon extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
230*1f5207b7SJohn Levon 
231*1f5207b7SJohn Levon extern void show_identifier_stats(void);
232*1f5207b7SJohn Levon extern void init_include_path(void);
233*1f5207b7SJohn Levon extern struct token *preprocess(struct token *);
234*1f5207b7SJohn Levon 
235*1f5207b7SJohn Levon extern void store_all_tokens(struct token *token);
236*1f5207b7SJohn Levon extern struct token *pos_get_token(struct position pos);
237*1f5207b7SJohn Levon extern char *pos_ident(struct position pos);
238*1f5207b7SJohn Levon 
239*1f5207b7SJohn Levon extern void store_macro_pos(struct token *);
240*1f5207b7SJohn Levon extern char *get_macro_name(struct position pos);
241*1f5207b7SJohn Levon 
242*1f5207b7SJohn Levon static inline int match_op(struct token *token, unsigned int op)
243*1f5207b7SJohn Levon {
244*1f5207b7SJohn Levon 	return token->pos.type == TOKEN_SPECIAL && token->special == op;
245*1f5207b7SJohn Levon }
246*1f5207b7SJohn Levon 
247*1f5207b7SJohn Levon static inline int match_ident(struct token *token, struct ident *id)
248*1f5207b7SJohn Levon {
249*1f5207b7SJohn Levon 	return token->pos.type == TOKEN_IDENT && token->ident == id;
250*1f5207b7SJohn Levon }
251*1f5207b7SJohn Levon 
252*1f5207b7SJohn Levon #endif
253