xref: /illumos-gate/usr/src/tools/smatch/src/token.h (revision 1f5207b7604fb44407eb4342aff613f7c4508508)
1 #ifndef TOKEN_H
2 #define TOKEN_H
3 /*
4  * Basic tokenization structures. NOTE! Those tokens had better
5  * be pretty small, since we're going to keep them all in memory
6  * indefinitely.
7  *
8  * Copyright (C) 2003 Transmeta Corp.
9  *               2003 Linus Torvalds
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a copy
12  * of this software and associated documentation files (the "Software"), to deal
13  * in the Software without restriction, including without limitation the rights
14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  * copies of the Software, and to permit persons to whom the Software is
16  * furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included in
19  * all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27  * THE SOFTWARE.
28  */
29 
30 #include <sys/types.h>
31 #include "lib.h"
32 
33 /*
34  * This describes the pure lexical elements (tokens), with
35  * no semantic meaning. In other words, an identifier doesn't
36  * have a type or meaning, it is only a specific string in
37  * the input stream.
38  *
39  * Semantic meaning is handled elsewhere.
40  */
41 
42 enum constantfile {
43   CONSTANT_FILE_MAYBE,    // To be determined, not inside any #ifs in this file
44   CONSTANT_FILE_IFNDEF,   // To be determined, currently inside #ifndef
45   CONSTANT_FILE_NOPE,     // No
46   CONSTANT_FILE_YES       // Yes
47 };
48 
49 extern const char *includepath[];
50 
51 struct stream {
52 	int fd;
53 	const char *name;
54 	const char *path;    // input-file path - see set_stream_include_path()
55 	const char **next_path;
56 
57 	/* Use these to check for "already parsed" */
58 	enum constantfile constant;
59 	int dirty, next_stream, once;
60 	struct ident *protect;
61 	struct token *ifndef;
62 	struct token *top_if;
63 };
64 
65 extern int input_stream_nr;
66 extern struct stream *input_streams;
67 extern unsigned int tabstop;
68 extern int no_lineno;
69 extern int *hash_stream(const char *name);
70 
71 struct ident {
72 	struct ident *next;	/* Hash chain of identifiers */
73 	struct symbol *symbols;	/* Pointer to semantic meaning list */
74 	unsigned char len;	/* Length of identifier name */
75 	unsigned char tainted:1,
76 	              reserved:1,
77 		      keyword:1;
78 	char name[];		/* Actual identifier */
79 };
80 
81 enum token_type {
82 	TOKEN_EOF,
83 	TOKEN_ERROR,
84 	TOKEN_IDENT,
85 	TOKEN_ZERO_IDENT,
86 	TOKEN_NUMBER,
87 	TOKEN_CHAR,
88 	TOKEN_CHAR_EMBEDDED_0,
89 	TOKEN_CHAR_EMBEDDED_1,
90 	TOKEN_CHAR_EMBEDDED_2,
91 	TOKEN_CHAR_EMBEDDED_3,
92 	TOKEN_WIDE_CHAR,
93 	TOKEN_WIDE_CHAR_EMBEDDED_0,
94 	TOKEN_WIDE_CHAR_EMBEDDED_1,
95 	TOKEN_WIDE_CHAR_EMBEDDED_2,
96 	TOKEN_WIDE_CHAR_EMBEDDED_3,
97 	TOKEN_STRING,
98 	TOKEN_WIDE_STRING,
99 	TOKEN_SPECIAL,
100 	TOKEN_STREAMBEGIN,
101 	TOKEN_STREAMEND,
102 	TOKEN_MACRO_ARGUMENT,
103 	TOKEN_STR_ARGUMENT,
104 	TOKEN_QUOTED_ARGUMENT,
105 	TOKEN_CONCAT,
106 	TOKEN_GNU_KLUDGE,
107 	TOKEN_UNTAINT,
108 	TOKEN_ARG_COUNT,
109 	TOKEN_IF,
110 	TOKEN_SKIP_GROUPS,
111 	TOKEN_ELSE,
112 };
113 
114 /* Combination tokens */
115 #define COMBINATION_STRINGS {	\
116 	"+=", "++",		\
117 	"-=", "--", "->",	\
118 	"*=",			\
119 	"/=",			\
120 	"%=",			\
121 	"<=", ">=",		\
122 	"==", "!=",		\
123 	"&&", "&=",		\
124 	"||", "|=",		\
125 	"^=", "##",		\
126 	"<<", ">>", "..",	\
127 	"<<=", ">>=", "...",	\
128 	"",			\
129 	"<", ">", "<=", ">="	\
130 }
131 
132 extern unsigned char combinations[][4];
133 
134 enum special_token {
135 	SPECIAL_BASE = 256,
136 	SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
137 	SPECIAL_INCREMENT,
138 	SPECIAL_SUB_ASSIGN,
139 	SPECIAL_DECREMENT,
140 	SPECIAL_DEREFERENCE,
141 	SPECIAL_MUL_ASSIGN,
142 	SPECIAL_DIV_ASSIGN,
143 	SPECIAL_MOD_ASSIGN,
144 	SPECIAL_LTE,
145 	SPECIAL_GTE,
146 	SPECIAL_EQUAL,
147 	SPECIAL_NOTEQUAL,
148 	SPECIAL_LOGICAL_AND,
149 	SPECIAL_AND_ASSIGN,
150 	SPECIAL_LOGICAL_OR,
151 	SPECIAL_OR_ASSIGN,
152 	SPECIAL_XOR_ASSIGN,
153 	SPECIAL_HASHHASH,
154 	SPECIAL_LEFTSHIFT,
155 	SPECIAL_RIGHTSHIFT,
156 	SPECIAL_DOTDOT,
157 	SPECIAL_SHL_ASSIGN,
158 	SPECIAL_SHR_ASSIGN,
159 	SPECIAL_ELLIPSIS,
160 	SPECIAL_ARG_SEPARATOR,
161 	SPECIAL_UNSIGNED_LT,
162 	SPECIAL_UNSIGNED_GT,
163 	SPECIAL_UNSIGNED_LTE,
164 	SPECIAL_UNSIGNED_GTE,
165 };
166 
167 struct string {
168 	unsigned int length:31;
169 	unsigned int immutable:1;
170 	char data[];
171 };
172 
173 /* will fit into 32 bits */
174 struct argcount {
175 	unsigned normal:10;
176 	unsigned quoted:10;
177 	unsigned str:10;
178 	unsigned vararg:1;
179 };
180 
181 /*
182  * This is a very common data structure, it should be kept
183  * as small as humanly possible. Big (rare) types go as
184  * pointers.
185  */
186 struct token {
187 	struct position pos;
188 	struct token *next;
189 	union {
190 		const char *number;
191 		struct ident *ident;
192 		unsigned int special;
193 		struct string *string;
194 		int argnum;
195 		struct argcount count;
196 		char embedded[4];
197 	};
198 };
199 
200 #define MAX_STRING 8191
201 
202 static inline struct token *containing_token(struct token **p)
203 {
204 	void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
205 	return addr;
206 }
207 
208 #define token_type(x) ((x)->pos.type)
209 
210 /*
211  * Last token in the stream - points to itself.
212  * This allows us to not test for NULL pointers
213  * when following the token->next chain..
214  */
215 extern struct token eof_token_entry;
216 #define eof_token(x) ((x) == &eof_token_entry)
217 
218 extern int init_stream(const char *, int fd, const char **next_path);
219 extern const char *stream_name(int stream);
220 extern struct ident *hash_ident(struct ident *);
221 extern struct ident *built_in_ident(const char *);
222 extern struct token *built_in_token(int, struct ident *);
223 extern const char *show_special(int);
224 extern const char *show_ident(const struct ident *);
225 extern const char *show_string(const struct string *string);
226 extern const char *show_token(const struct token *);
227 extern const char *quote_token(const struct token *);
228 extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
229 extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
230 
231 extern void show_identifier_stats(void);
232 extern void init_include_path(void);
233 extern struct token *preprocess(struct token *);
234 
235 extern void store_all_tokens(struct token *token);
236 extern struct token *pos_get_token(struct position pos);
237 extern char *pos_ident(struct position pos);
238 
239 extern void store_macro_pos(struct token *);
240 extern char *get_macro_name(struct position pos);
241 
242 static inline int match_op(struct token *token, unsigned int op)
243 {
244 	return token->pos.type == TOKEN_SPECIAL && token->special == op;
245 }
246 
247 static inline int match_ident(struct token *token, struct ident *id)
248 {
249 	return token->pos.type == TOKEN_IDENT && token->ident == id;
250 }
251 
252 #endif
253