1#ifndef TOKEN_H
2#define TOKEN_H
3/*
4 * Basic tokenization structures. NOTE! Those tokens had better
5 * be pretty small, since we're going to keep them all in memory
6 * indefinitely.
7 *
8 * Copyright (C) 2003 Transmeta Corp.
9 *               2003 Linus Torvalds
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 */
29
30#include <sys/types.h>
31#include "lib.h"
32
33/*
34 * This describes the pure lexical elements (tokens), with
35 * no semantic meaning. In other words, an identifier doesn't
36 * have a type or meaning, it is only a specific string in
37 * the input stream.
38 *
39 * Semantic meaning is handled elsewhere.
40 */
41
42enum constantfile {
43  CONSTANT_FILE_MAYBE,    // To be determined, not inside any #ifs in this file
44  CONSTANT_FILE_IFNDEF,   // To be determined, currently inside #ifndef
45  CONSTANT_FILE_NOPE,     // No
46  CONSTANT_FILE_YES       // Yes
47};
48
49extern const char *includepath[];
50
51struct stream {
52	int fd;
53	const char *name;
54	const char *path;    // input-file path - see set_stream_include_path()
55	const char **next_path;
56
57	/* Use these to check for "already parsed" */
58	enum constantfile constant;
59	int dirty, next_stream, once;
60	struct ident *protect;
61	struct token *ifndef;
62	struct token *top_if;
63};
64
65extern int input_stream_nr;
66extern struct stream *input_streams;
67extern unsigned int tabstop;
68extern int no_lineno;
69extern int *hash_stream(const char *name);
70
71struct ident {
72	struct ident *next;	/* Hash chain of identifiers */
73	struct symbol *symbols;	/* Pointer to semantic meaning list */
74	unsigned char len;	/* Length of identifier name */
75	unsigned char tainted:1,
76	              reserved:1,
77		      keyword:1;
78	char name[];		/* Actual identifier */
79};
80
81enum token_type {
82	TOKEN_EOF,
83	TOKEN_BAD,
84	TOKEN_ERROR,
85	TOKEN_IDENT,
86	TOKEN_ZERO_IDENT,
87	TOKEN_NUMBER,
88	TOKEN_CHAR,
89	TOKEN_CHAR_EMBEDDED_0,
90	TOKEN_CHAR_EMBEDDED_1,
91	TOKEN_CHAR_EMBEDDED_2,
92	TOKEN_CHAR_EMBEDDED_3,
93	TOKEN_WIDE_CHAR,
94	TOKEN_WIDE_CHAR_EMBEDDED_0,
95	TOKEN_WIDE_CHAR_EMBEDDED_1,
96	TOKEN_WIDE_CHAR_EMBEDDED_2,
97	TOKEN_WIDE_CHAR_EMBEDDED_3,
98	TOKEN_STRING,
99	TOKEN_WIDE_STRING,
100	TOKEN_SPECIAL,
101	TOKEN_STREAMBEGIN,
102	TOKEN_STREAMEND,
103	TOKEN_MACRO_ARGUMENT,
104	TOKEN_STR_ARGUMENT,
105	TOKEN_QUOTED_ARGUMENT,
106	TOKEN_CONCAT,
107	TOKEN_GNU_KLUDGE,
108	TOKEN_UNTAINT,
109	TOKEN_ARG_COUNT,
110	TOKEN_IF,
111	TOKEN_SKIP_GROUPS,
112	TOKEN_ELSE,
113};
114
115/* Combination tokens */
116#define COMBINATION_STRINGS {	\
117	"+=", "++",		\
118	"-=", "--", "->",	\
119	"*=",			\
120	"/=",			\
121	"%=",			\
122	"<=", ">=",		\
123	"==", "!=",		\
124	"&&", "&=",		\
125	"||", "|=",		\
126	"^=", "##",		\
127	"<<", ">>", "..",	\
128	"<<=", ">>=", "...",	\
129	"",			\
130	"<", ">", "<=", ">="	\
131}
132
133extern unsigned char combinations[][4];
134
135enum special_token {
136	SPECIAL_BASE = 256,
137	SPECIAL_ADD_ASSIGN = SPECIAL_BASE,
138	SPECIAL_INCREMENT,
139	SPECIAL_SUB_ASSIGN,
140	SPECIAL_DECREMENT,
141	SPECIAL_DEREFERENCE,
142	SPECIAL_MUL_ASSIGN,
143	SPECIAL_DIV_ASSIGN,
144	SPECIAL_MOD_ASSIGN,
145	SPECIAL_LTE,
146	SPECIAL_GTE,
147	SPECIAL_EQUAL,
148	SPECIAL_NOTEQUAL,
149	SPECIAL_LOGICAL_AND,
150	SPECIAL_AND_ASSIGN,
151	SPECIAL_LOGICAL_OR,
152	SPECIAL_OR_ASSIGN,
153	SPECIAL_XOR_ASSIGN,
154	SPECIAL_HASHHASH,
155	SPECIAL_LEFTSHIFT,
156	SPECIAL_RIGHTSHIFT,
157	SPECIAL_DOTDOT,
158	SPECIAL_SHL_ASSIGN,
159	SPECIAL_SHR_ASSIGN,
160	SPECIAL_ELLIPSIS,
161	SPECIAL_ARG_SEPARATOR,
162	SPECIAL_UNSIGNED_LT,
163	SPECIAL_UNSIGNED_GT,
164	SPECIAL_UNSIGNED_LTE,
165	SPECIAL_UNSIGNED_GTE,
166};
167
168struct string {
169	unsigned int length:31;
170	unsigned int immutable:1;
171	char data[];
172};
173
174/* will fit into 32 bits */
175struct argcount {
176	unsigned normal:10;
177	unsigned quoted:10;
178	unsigned str:10;
179	unsigned vararg:1;
180};
181
182/*
183 * This is a very common data structure, it should be kept
184 * as small as humanly possible. Big (rare) types go as
185 * pointers.
186 */
187struct token {
188	struct position pos;
189	struct token *next;
190	union {
191		const char *number;
192		struct ident *ident;
193		unsigned int special;
194		struct string *string;
195		int argnum;
196		struct argcount count;
197		char embedded[4];
198	};
199};
200
201#define MAX_STRING 8191
202
203static inline struct token *containing_token(struct token **p)
204{
205	void *addr = (char *)p - ((char *)&((struct token *)0)->next - (char *)0);
206	return addr;
207}
208
209#define token_type(x) ((x)->pos.type)
210
211/*
212 * Last token in the stream - points to itself.
213 * This allows us to not test for NULL pointers
214 * when following the token->next chain..
215 */
216extern struct token eof_token_entry;
217#define eof_token(x) ((x) == &eof_token_entry)
218
219extern int init_stream(const char *, int fd, const char **next_path);
220extern const char *stream_name(int stream);
221extern struct ident *hash_ident(struct ident *);
222extern struct ident *built_in_ident(const char *);
223extern struct token *built_in_token(int, struct ident *);
224extern const char *show_special(int);
225extern const char *show_ident(const struct ident *);
226extern const char *show_string(const struct string *string);
227extern const char *show_token(const struct token *);
228extern const char *quote_token(const struct token *);
229extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
230extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
231
232extern void show_identifier_stats(void);
233extern void init_include_path(void);
234extern struct token *preprocess(struct token *);
235
236extern void store_all_tokens(struct token *token);
237extern struct token *pos_get_token(struct position pos);
238extern char *pos_ident(struct position pos);
239
240extern void store_macro_pos(struct token *);
241extern char *get_macro_name(struct position pos);
242extern char *get_inner_macro(struct position pos);
243extern struct string_list *get_all_macros(struct position pos);
244
245static inline int match_op(struct token *token, unsigned int op)
246{
247	return token->pos.type == TOKEN_SPECIAL && token->special == op;
248}
249
250static inline int match_ident(struct token *token, struct ident *id)
251{
252	return token->pos.type == TOKEN_IDENT && token->ident == id;
253}
254
255#endif
256