1/*
2 * Do C preprocessing, based on a token list gathered by
3 * the tokenizer.
4 *
5 * This may not be the smartest preprocessor on the planet.
6 *
7 * Copyright (C) 2003 Transmeta Corp.
8 *               2003-2004 Linus Torvalds
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
28#include <stdio.h>
29#include <stdlib.h>
30#include <stdarg.h>
31#include <stddef.h>
32#include <string.h>
33#include <ctype.h>
34#include <unistd.h>
35#include <fcntl.h>
36#include <limits.h>
37#include <time.h>
38#include <dirent.h>
39#include <sys/stat.h>
40
41#include "lib.h"
42#include "allocate.h"
43#include "parse.h"
44#include "token.h"
45#include "symbol.h"
46#include "expression.h"
47#include "scope.h"
48
49static struct ident_list *macros;	// only needed for -dD
50static int false_nesting = 0;
51static int counter_macro = 0;		// __COUNTER__ expansion
52static int include_level = 0;
53
54#define INCLUDEPATHS 300
55const char *includepath[INCLUDEPATHS+1] = {
56	"",
57	"/usr/include",
58	"/usr/local/include",
59	NULL
60};
61
62static const char **quote_includepath = includepath;
63static const char **angle_includepath = includepath + 1;
64static const char **isys_includepath   = includepath + 1;
65static const char **sys_includepath   = includepath + 1;
66static const char **dirafter_includepath = includepath + 3;
67
68#define dirty_stream(stream)				\
69	do {						\
70		if (!stream->dirty) {			\
71			stream->dirty = 1;		\
72			if (!stream->ifndef)		\
73				stream->protect = NULL;	\
74		}					\
75	} while(0)
76
77#define end_group(stream)					\
78	do {							\
79		if (stream->ifndef == stream->top_if) {		\
80			stream->ifndef = NULL;			\
81			if (!stream->dirty)			\
82				stream->protect = NULL;		\
83			else if (stream->protect)		\
84				stream->dirty = 0;		\
85		}						\
86	} while(0)
87
88#define nesting_error(stream)		\
89	do {				\
90		stream->dirty = 1;	\
91		stream->ifndef = NULL;	\
92		stream->protect = NULL;	\
93	} while(0)
94
95static struct token *alloc_token(struct position *pos)
96{
97	struct token *token = __alloc_token(0);
98
99	token->pos.stream = pos->stream;
100	token->pos.line = pos->line;
101	token->pos.pos = pos->pos;
102	token->pos.whitespace = 1;
103	return token;
104}
105
106/* Expand symbol 'sym' at '*list' */
107static int expand(struct token **, struct symbol *);
108
109static void replace_with_string(struct token *token, const char *str)
110{
111	int size = strlen(str) + 1;
112	struct string *s = __alloc_string(size);
113
114	s->length = size;
115	memcpy(s->data, str, size);
116	token_type(token) = TOKEN_STRING;
117	token->string = s;
118}
119
120static void replace_with_integer(struct token *token, unsigned int val)
121{
122	char *buf = __alloc_bytes(11);
123	sprintf(buf, "%u", val);
124	token_type(token) = TOKEN_NUMBER;
125	token->number = buf;
126}
127
128static struct symbol *lookup_macro(struct ident *ident)
129{
130	struct symbol *sym = lookup_symbol(ident, NS_MACRO | NS_UNDEF);
131	if (sym && sym->namespace != NS_MACRO)
132		sym = NULL;
133	return sym;
134}
135
136static int token_defined(struct token *token)
137{
138	if (token_type(token) == TOKEN_IDENT) {
139		struct symbol *sym = lookup_macro(token->ident);
140		if (sym) {
141			sym->used_in = file_scope;
142			return 1;
143		}
144		return 0;
145	}
146
147	sparse_error(token->pos, "expected preprocessor identifier");
148	return 0;
149}
150
151static void replace_with_bool(struct token *token, bool val)
152{
153	static const char *string[] = { "0", "1" };
154
155	token_type(token) = TOKEN_NUMBER;
156	token->number = string[val];
157}
158
159static void replace_with_defined(struct token *token)
160{
161	replace_with_bool(token, token_defined(token));
162}
163
164static void replace_with_has_builtin(struct token *token)
165{
166	struct symbol *sym = lookup_symbol(token->ident, NS_SYMBOL);
167	replace_with_bool(token, sym && sym->builtin);
168}
169
170static void replace_with_has_attribute(struct token *token)
171{
172	struct symbol *sym = lookup_symbol(token->ident, NS_KEYWORD);
173	replace_with_bool(token, sym && sym->op && sym->op->attribute);
174}
175
176static void expand_line(struct token *token)
177{
178	replace_with_integer(token, token->pos.line);
179}
180
181static void expand_file(struct token *token)
182{
183	replace_with_string(token, stream_name(token->pos.stream));
184}
185
186static void expand_basefile(struct token *token)
187{
188	replace_with_string(token, base_filename);
189}
190
191static time_t t = 0;
192static void expand_date(struct token *token)
193{
194	static char buffer[12]; /* __DATE__: 3 + ' ' + 2 + ' ' + 4 + '\0' */
195
196	if (!t)
197		time(&t);
198	strftime(buffer, 12, "%b %e %Y", localtime(&t));
199	replace_with_string(token, buffer);
200}
201
202static void expand_time(struct token *token)
203{
204	static char buffer[9]; /* __TIME__: 2 + ':' + 2 + ':' + 2 + '\0' */
205
206	if (!t)
207		time(&t);
208	strftime(buffer, 9, "%T", localtime(&t));
209	replace_with_string(token, buffer);
210}
211
212static void expand_counter(struct token *token)
213{
214	replace_with_integer(token, counter_macro++);
215}
216
217static void expand_include_level(struct token *token)
218{
219	replace_with_integer(token, include_level - 1);
220}
221
222static int expand_one_symbol(struct token **list)
223{
224	struct token *token = *list;
225	struct symbol *sym;
226
227	if (token->pos.noexpand)
228		return 1;
229
230	sym = lookup_macro(token->ident);
231	if (!sym)
232		return 1;
233	store_macro_pos(token);
234	if (sym->expander) {
235		sym->expander(token);
236		return 1;
237	} else {
238		sym->used_in = file_scope;
239		return expand(list, sym);
240	}
241}
242
243static inline struct token *scan_next(struct token **where)
244{
245	struct token *token = *where;
246	if (token_type(token) != TOKEN_UNTAINT)
247		return token;
248	do {
249		token->ident->tainted = 0;
250		token = token->next;
251	} while (token_type(token) == TOKEN_UNTAINT);
252	*where = token;
253	return token;
254}
255
256static void expand_list(struct token **list)
257{
258	struct token *next;
259	while (!eof_token(next = scan_next(list))) {
260		if (token_type(next) != TOKEN_IDENT || expand_one_symbol(list))
261			list = &next->next;
262	}
263}
264
265static void preprocessor_line(struct stream *stream, struct token **line);
266
267static struct token *collect_arg(struct token *prev, int vararg, struct position *pos, int count)
268{
269	struct stream *stream = input_streams + prev->pos.stream;
270	struct token **p = &prev->next;
271	struct token *next;
272	int nesting = 0;
273
274	while (!eof_token(next = scan_next(p))) {
275		if (next->pos.newline && match_op(next, '#')) {
276			if (!next->pos.noexpand) {
277				sparse_error(next->pos,
278					     "directive in argument list");
279				preprocessor_line(stream, p);
280				__free_token(next);	/* Free the '#' token */
281				continue;
282			}
283		}
284		switch (token_type(next)) {
285		case TOKEN_STREAMEND:
286		case TOKEN_STREAMBEGIN:
287			*p = &eof_token_entry;
288			return next;
289		case TOKEN_STRING:
290		case TOKEN_WIDE_STRING:
291			if (count > 1)
292				next->string->immutable = 1;
293			break;
294		}
295		if (false_nesting) {
296			*p = next->next;
297			__free_token(next);
298			continue;
299		}
300		if (match_op(next, '(')) {
301			nesting++;
302		} else if (match_op(next, ')')) {
303			if (!nesting--)
304				break;
305		} else if (match_op(next, ',') && !nesting && !vararg) {
306			break;
307		}
308		next->pos.stream = pos->stream;
309		next->pos.line = pos->line;
310		next->pos.pos = pos->pos;
311		p = &next->next;
312	}
313	*p = &eof_token_entry;
314	return next;
315}
316
317/*
318 * We store arglist as <counter> [arg1] <number of uses for arg1> ... eof
319 */
320
321struct arg {
322	struct token *arg;
323	struct token *expanded;
324	struct token *str;
325	int n_normal;
326	int n_quoted;
327	int n_str;
328};
329
330static int collect_arguments(struct token *start, struct token *arglist, struct arg *args, struct token *what)
331{
332	int wanted = arglist->count.normal;
333	struct token *next = NULL;
334	int count = 0;
335
336	arglist = arglist->next;	/* skip counter */
337
338	if (!wanted) {
339		next = collect_arg(start, 0, &what->pos, 0);
340		if (eof_token(next))
341			goto Eclosing;
342		if (!eof_token(start->next) || !match_op(next, ')')) {
343			count++;
344			goto Emany;
345		}
346	} else {
347		for (count = 0; count < wanted; count++) {
348			struct argcount *p = &arglist->next->count;
349			next = collect_arg(start, p->vararg, &what->pos, p->normal);
350			if (eof_token(next))
351				goto Eclosing;
352			if (p->vararg && wanted == 1 && eof_token(start->next))
353				break;
354			arglist = arglist->next->next;
355			args[count].arg = start->next;
356			args[count].n_normal = p->normal;
357			args[count].n_quoted = p->quoted;
358			args[count].n_str = p->str;
359			if (match_op(next, ')')) {
360				count++;
361				break;
362			}
363			start = next;
364		}
365		if (count == wanted && !match_op(next, ')'))
366			goto Emany;
367		if (count == wanted - 1) {
368			struct argcount *p = &arglist->next->count;
369			if (!p->vararg)
370				goto Efew;
371			args[count].arg = NULL;
372			args[count].n_normal = p->normal;
373			args[count].n_quoted = p->quoted;
374			args[count].n_str = p->str;
375		}
376		if (count < wanted - 1)
377			goto Efew;
378	}
379	what->next = next->next;
380	return 1;
381
382Efew:
383	sparse_error(what->pos, "macro \"%s\" requires %d arguments, but only %d given",
384		show_token(what), wanted, count);
385	goto out;
386Emany:
387	while (match_op(next, ',')) {
388		next = collect_arg(next, 0, &what->pos, 0);
389		count++;
390	}
391	if (eof_token(next))
392		goto Eclosing;
393	sparse_error(what->pos, "macro \"%s\" passed %d arguments, but takes just %d",
394		show_token(what), count, wanted);
395	goto out;
396Eclosing:
397	sparse_error(what->pos, "unterminated argument list invoking macro \"%s\"",
398		show_token(what));
399out:
400	what->next = next->next;
401	return 0;
402}
403
404static struct token *dup_list(struct token *list)
405{
406	struct token *res = NULL;
407	struct token **p = &res;
408
409	while (!eof_token(list)) {
410		struct token *newtok = __alloc_token(0);
411		*newtok = *list;
412		*p = newtok;
413		p = &newtok->next;
414		list = list->next;
415	}
416	return res;
417}
418
419static const char *show_token_sequence(struct token *token, int quote)
420{
421	static char buffer[MAX_STRING];
422	char *ptr = buffer;
423	int whitespace = 0;
424
425	if (!token && !quote)
426		return "<none>";
427	while (!eof_token(token)) {
428		const char *val = quote ? quote_token(token) : show_token(token);
429		int len = strlen(val);
430
431		if (ptr + whitespace + len >= buffer + sizeof(buffer)) {
432			sparse_error(token->pos, "too long token expansion");
433			break;
434		}
435
436		if (whitespace)
437			*ptr++ = ' ';
438		memcpy(ptr, val, len);
439		ptr += len;
440		token = token->next;
441		whitespace = token->pos.whitespace;
442	}
443	*ptr = 0;
444	return buffer;
445}
446
447static struct token *stringify(struct token *arg)
448{
449	const char *s = show_token_sequence(arg, 1);
450	int size = strlen(s)+1;
451	struct token *token = __alloc_token(0);
452	struct string *string = __alloc_string(size);
453
454	memcpy(string->data, s, size);
455	string->length = size;
456	token->pos = arg->pos;
457	token_type(token) = TOKEN_STRING;
458	token->string = string;
459	token->next = &eof_token_entry;
460	return token;
461}
462
463static void expand_arguments(int count, struct arg *args)
464{
465	int i;
466	for (i = 0; i < count; i++) {
467		struct token *arg = args[i].arg;
468		if (!arg)
469			arg = &eof_token_entry;
470		if (args[i].n_str)
471			args[i].str = stringify(arg);
472		if (args[i].n_normal) {
473			if (!args[i].n_quoted) {
474				args[i].expanded = arg;
475				args[i].arg = NULL;
476			} else if (eof_token(arg)) {
477				args[i].expanded = arg;
478			} else {
479				args[i].expanded = dup_list(arg);
480			}
481			expand_list(&args[i].expanded);
482		}
483	}
484}
485
486/*
487 * Possibly valid combinations:
488 *  - ident + ident -> ident
489 *  - ident + number -> ident unless number contains '.', '+' or '-'.
490 *  - 'L' + char constant -> wide char constant
491 *  - 'L' + string literal -> wide string literal
492 *  - number + number -> number
493 *  - number + ident -> number
494 *  - number + '.' -> number
495 *  - number + '+' or '-' -> number, if number used to end on [eEpP].
496 *  - '.' + number -> number, if number used to start with a digit.
497 *  - special + special -> either special or an error.
498 */
499static enum token_type combine(struct token *left, struct token *right, char *p)
500{
501	int len;
502	enum token_type t1 = token_type(left), t2 = token_type(right);
503
504	if (t1 != TOKEN_IDENT && t1 != TOKEN_NUMBER && t1 != TOKEN_SPECIAL)
505		return TOKEN_ERROR;
506
507	if (t1 == TOKEN_IDENT && left->ident == &L_ident) {
508		if (t2 >= TOKEN_CHAR && t2 < TOKEN_WIDE_CHAR)
509			return t2 + TOKEN_WIDE_CHAR - TOKEN_CHAR;
510		if (t2 == TOKEN_STRING)
511			return TOKEN_WIDE_STRING;
512	}
513
514	if (t2 != TOKEN_IDENT && t2 != TOKEN_NUMBER && t2 != TOKEN_SPECIAL)
515		return TOKEN_ERROR;
516
517	strcpy(p, show_token(left));
518	strcat(p, show_token(right));
519	len = strlen(p);
520
521	if (len >= 256)
522		return TOKEN_ERROR;
523
524	if (t1 == TOKEN_IDENT) {
525		if (t2 == TOKEN_SPECIAL)
526			return TOKEN_ERROR;
527		if (t2 == TOKEN_NUMBER && strpbrk(p, "+-."))
528			return TOKEN_ERROR;
529		return TOKEN_IDENT;
530	}
531
532	if (t1 == TOKEN_NUMBER) {
533		if (t2 == TOKEN_SPECIAL) {
534			switch (right->special) {
535			case '.':
536				break;
537			case '+': case '-':
538				if (strchr("eEpP", p[len - 2]))
539					break;
540			default:
541				return TOKEN_ERROR;
542			}
543		}
544		return TOKEN_NUMBER;
545	}
546
547	if (p[0] == '.' && isdigit((unsigned char)p[1]))
548		return TOKEN_NUMBER;
549
550	return TOKEN_SPECIAL;
551}
552
553static int merge(struct token *left, struct token *right)
554{
555	static char buffer[512];
556	enum token_type res = combine(left, right, buffer);
557	int n;
558
559	switch (res) {
560	case TOKEN_IDENT:
561		left->ident = built_in_ident(buffer);
562		left->pos.noexpand = 0;
563		return 1;
564
565	case TOKEN_NUMBER:
566		token_type(left) = TOKEN_NUMBER;	/* could be . + num */
567		left->number = xstrdup(buffer);
568		return 1;
569
570	case TOKEN_SPECIAL:
571		if (buffer[2] && buffer[3])
572			break;
573		for (n = SPECIAL_BASE; n < SPECIAL_ARG_SEPARATOR; n++) {
574			if (!memcmp(buffer, combinations[n-SPECIAL_BASE], 3)) {
575				left->special = n;
576				return 1;
577			}
578		}
579		break;
580
581	case TOKEN_WIDE_CHAR:
582	case TOKEN_WIDE_STRING:
583		token_type(left) = res;
584		left->pos.noexpand = 0;
585		left->string = right->string;
586		return 1;
587
588	case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3:
589		token_type(left) = res;
590		left->pos.noexpand = 0;
591		memcpy(left->embedded, right->embedded, 4);
592		return 1;
593
594	default:
595		;
596	}
597	sparse_error(left->pos, "'##' failed: concatenation is not a valid token");
598	return 0;
599}
600
601static struct token *dup_token(struct token *token, struct position *streampos)
602{
603	struct token *alloc = alloc_token(streampos);
604	token_type(alloc) = token_type(token);
605	alloc->pos.newline = token->pos.newline;
606	alloc->pos.whitespace = token->pos.whitespace;
607	alloc->number = token->number;
608	alloc->pos.noexpand = token->pos.noexpand;
609	return alloc;
610}
611
612static struct token **copy(struct token **where, struct token *list, int *count)
613{
614	int need_copy = --*count;
615	while (!eof_token(list)) {
616		struct token *token;
617		if (need_copy)
618			token = dup_token(list, &list->pos);
619		else
620			token = list;
621		if (token_type(token) == TOKEN_IDENT && token->ident->tainted)
622			token->pos.noexpand = 1;
623		*where = token;
624		where = &token->next;
625		list = list->next;
626	}
627	*where = &eof_token_entry;
628	return where;
629}
630
631static int handle_kludge(struct token **p, struct arg *args)
632{
633	struct token *t = (*p)->next->next;
634	while (1) {
635		struct arg *v = &args[t->argnum];
636		if (token_type(t->next) != TOKEN_CONCAT) {
637			if (v->arg) {
638				/* ignore the first ## */
639				*p = (*p)->next;
640				return 0;
641			}
642			/* skip the entire thing */
643			*p = t;
644			return 1;
645		}
646		if (v->arg && !eof_token(v->arg))
647			return 0; /* no magic */
648		t = t->next->next;
649	}
650}
651
652static struct token **substitute(struct token **list, struct token *body, struct arg *args)
653{
654	struct position *base_pos = &(*list)->pos;
655	int *count;
656	enum {Normal, Placeholder, Concat} state = Normal;
657
658	for (; !eof_token(body); body = body->next) {
659		struct token *added, *arg;
660		struct token **tail;
661		struct token *t;
662
663		switch (token_type(body)) {
664		case TOKEN_GNU_KLUDGE:
665			/*
666			 * GNU kludge: if we had <comma>##<vararg>, behaviour
667			 * depends on whether we had enough arguments to have
668			 * a vararg.  If we did, ## is just ignored.  Otherwise
669			 * both , and ## are ignored.  Worse, there can be
670			 * an arbitrary number of ##<arg> in between; if all of
671			 * those are empty, we act as if they hadn't been there,
672			 * otherwise we act as if the kludge didn't exist.
673			 */
674			t = body;
675			if (handle_kludge(&body, args)) {
676				if (state == Concat)
677					state = Normal;
678				else
679					state = Placeholder;
680				continue;
681			}
682			added = dup_token(t, base_pos);
683			token_type(added) = TOKEN_SPECIAL;
684			tail = &added->next;
685			break;
686
687		case TOKEN_STR_ARGUMENT:
688			arg = args[body->argnum].str;
689			count = &args[body->argnum].n_str;
690			goto copy_arg;
691
692		case TOKEN_QUOTED_ARGUMENT:
693			arg = args[body->argnum].arg;
694			count = &args[body->argnum].n_quoted;
695			if (!arg || eof_token(arg)) {
696				if (state == Concat)
697					state = Normal;
698				else
699					state = Placeholder;
700				continue;
701			}
702			goto copy_arg;
703
704		case TOKEN_MACRO_ARGUMENT:
705			arg = args[body->argnum].expanded;
706			count = &args[body->argnum].n_normal;
707			if (eof_token(arg)) {
708				state = Normal;
709				continue;
710			}
711		copy_arg:
712			tail = copy(&added, arg, count);
713			added->pos.newline = body->pos.newline;
714			added->pos.whitespace = body->pos.whitespace;
715			break;
716
717		case TOKEN_CONCAT:
718			if (state == Placeholder)
719				state = Normal;
720			else
721				state = Concat;
722			continue;
723
724		case TOKEN_IDENT:
725			added = dup_token(body, base_pos);
726			if (added->ident->tainted)
727				added->pos.noexpand = 1;
728			tail = &added->next;
729			break;
730
731		default:
732			added = dup_token(body, base_pos);
733			tail = &added->next;
734			break;
735		}
736
737		/*
738		 * if we got to doing real concatenation, we already have
739		 * added something into the list, so containing_token() is OK.
740		 */
741		if (state == Concat && merge(containing_token(list), added)) {
742			*list = added->next;
743			if (tail != &added->next)
744				list = tail;
745		} else {
746			*list = added;
747			list = tail;
748		}
749		state = Normal;
750	}
751	*list = &eof_token_entry;
752	return list;
753}
754
755static int expand(struct token **list, struct symbol *sym)
756{
757	struct token *last;
758	struct token *token = *list;
759	struct ident *expanding = token->ident;
760	struct token **tail;
761	int nargs = sym->arglist ? sym->arglist->count.normal : 0;
762	struct arg args[nargs];
763
764	if (expanding->tainted) {
765		token->pos.noexpand = 1;
766		return 1;
767	}
768
769	if (sym->arglist) {
770		if (!match_op(scan_next(&token->next), '('))
771			return 1;
772		if (!collect_arguments(token->next, sym->arglist, args, token))
773			return 1;
774		expand_arguments(nargs, args);
775	}
776
777	expanding->tainted = 1;
778
779	last = token->next;
780	tail = substitute(list, sym->expansion, args);
781	/*
782	 * Note that it won't be eof - at least TOKEN_UNTAINT will be there.
783	 * We still can lose the newline flag if the sucker expands to nothing,
784	 * but the price of dealing with that is probably too high (we'd need
785	 * to collect the flags during scan_next())
786	 */
787	(*list)->pos.newline = token->pos.newline;
788	(*list)->pos.whitespace = token->pos.whitespace;
789	*tail = last;
790
791	return 0;
792}
793
794static const char *token_name_sequence(struct token *token, int endop, struct token *start)
795{
796	static char buffer[256];
797	char *ptr = buffer;
798
799	while (!eof_token(token) && !match_op(token, endop)) {
800		int len;
801		const char *val = token->string->data;
802		if (token_type(token) != TOKEN_STRING)
803			val = show_token(token);
804		len = strlen(val);
805		memcpy(ptr, val, len);
806		ptr += len;
807		token = token->next;
808	}
809	*ptr = 0;
810	if (endop && !match_op(token, endop))
811		sparse_error(start->pos, "expected '>' at end of filename");
812	return buffer;
813}
814
815static int already_tokenized(const char *path)
816{
817	int stream, next;
818
819	for (stream = *hash_stream(path); stream >= 0 ; stream = next) {
820		struct stream *s = input_streams + stream;
821
822		next = s->next_stream;
823		if (s->once) {
824			if (strcmp(path, s->name))
825				continue;
826			return 1;
827		}
828		if (s->constant != CONSTANT_FILE_YES)
829			continue;
830		if (strcmp(path, s->name))
831			continue;
832		if (s->protect && !lookup_macro(s->protect))
833			continue;
834		return 1;
835	}
836	return 0;
837}
838
839/* Handle include of header files.
840 * The relevant options are made compatible with gcc. The only options that
841 * are not supported is -withprefix and friends.
842 *
843 * Three set of include paths are known:
844 * quote_includepath:	Path to search when using #include "file.h"
845 * angle_includepath:	Paths to search when using #include <file.h>
846 * isys_includepath:	Paths specified with -isystem, come before the
847 *			built-in system include paths. Gcc would suppress
848 *			warnings from system headers. Here we separate
849 *			them from the angle_ ones to keep search ordering.
850 *
851 * sys_includepath:	Built-in include paths.
852 * dirafter_includepath Paths added with -dirafter.
853 *
854 * The above is implemented as one array with pointers
855 *                         +--------------+
856 * quote_includepath --->  |              |
857 *                         +--------------+
858 *                         |              |
859 *                         +--------------+
860 * angle_includepath --->  |              |
861 *                         +--------------+
862 * isys_includepath  --->  |              |
863 *                         +--------------+
864 * sys_includepath   --->  |              |
865 *                         +--------------+
866 * dirafter_includepath -> |              |
867 *                         +--------------+
868 *
869 * -I dir insert dir just before isys_includepath and move the rest
870 * -I- makes all dirs specified with -I before to quote dirs only and
871 *   angle_includepath is set equal to isys_includepath.
872 * -nostdinc removes all sys dirs by storing NULL in entry pointed
873 *   to by * sys_includepath. Note that this will reset all dirs built-in
874 *   and added before -nostdinc by -isystem and -idirafter.
875 * -isystem dir adds dir where isys_includepath points adding this dir as
876 *   first systemdir
877 * -idirafter dir adds dir to the end of the list
878 */
879
880static void set_stream_include_path(struct stream *stream)
881{
882	const char *path = stream->path;
883	if (!path) {
884		const char *p = strrchr(stream->name, '/');
885		path = "";
886		if (p) {
887			int len = p - stream->name + 1;
888			char *m = malloc(len+1);
889			/* This includes the final "/" */
890			memcpy(m, stream->name, len);
891			m[len] = 0;
892			path = m;
893		}
894		stream->path = path;
895	}
896	includepath[0] = path;
897}
898
899#ifndef PATH_MAX
900#define PATH_MAX 4096	// for Hurd where it's not defined
901#endif
902
903static int try_include(const char *path, const char *filename, int flen, struct token **where, const char **next_path)
904{
905	int fd;
906	int plen = strlen(path);
907	static char fullname[PATH_MAX];
908
909	memcpy(fullname, path, plen);
910	if (plen && path[plen-1] != '/') {
911		fullname[plen] = '/';
912		plen++;
913	}
914	memcpy(fullname+plen, filename, flen);
915	if (already_tokenized(fullname))
916		return 1;
917	fd = open(fullname, O_RDONLY);
918	if (fd >= 0) {
919		char *streamname = xmemdup(fullname, plen + flen);
920		*where = tokenize(streamname, fd, *where, next_path);
921		close(fd);
922		return 1;
923	}
924	return 0;
925}
926
927static int do_include_path(const char **pptr, struct token **list, struct token *token, const char *filename, int flen)
928{
929	const char *path;
930
931	while ((path = *pptr++) != NULL) {
932		if (!try_include(path, filename, flen, list, pptr))
933			continue;
934		return 1;
935	}
936	return 0;
937}
938
939static int free_preprocessor_line(struct token *token)
940{
941	while (token_type(token) != TOKEN_EOF) {
942		struct token *free = token;
943		token = token->next;
944		__free_token(free);
945	};
946	return 1;
947}
948
949const char *find_include(const char *skip, const char *look_for)
950{
951	DIR *dp;
952	struct dirent *entry;
953	struct stat statbuf;
954	const char *ret;
955	char cwd[PATH_MAX];
956	static char buf[PATH_MAX + 1];
957
958	dp = opendir(".");
959	if (!dp)
960		return NULL;
961
962	if (!getcwd(cwd, sizeof(cwd)))
963		goto close;
964
965	while ((entry = readdir(dp))) {
966		lstat(entry->d_name, &statbuf);
967
968		if (strcmp(entry->d_name, look_for) == 0) {
969			snprintf(buf, sizeof(buf), "%s/%s", cwd, entry->d_name);
970			closedir(dp);
971			return buf;
972		}
973
974		if (S_ISDIR(statbuf.st_mode)) {
975			/* Found a directory, but ignore . and .. */
976			if (strcmp(".", entry->d_name) == 0 ||
977			    strcmp("..", entry->d_name) == 0 ||
978			    strcmp(skip, entry->d_name) == 0)
979				continue;
980
981			chdir(entry->d_name);
982			ret = find_include("", look_for);
983			chdir("..");
984			if (ret) {
985				closedir(dp);
986				return ret;
987			}
988		}
989	}
990close:
991	closedir(dp);
992
993	return NULL;
994}
995
996const char *search_dir(const char *stop, const char *look_for)
997{
998	char cwd[PATH_MAX];
999	int len;
1000	const char *ret;
1001	int cnt = 0;
1002
1003	if (!getcwd(cwd, sizeof(cwd)))
1004		return NULL;
1005
1006	len = strlen(cwd);
1007	while (len >= 0) {
1008		ret = find_include(cnt++ ? cwd + len + 1 : "", look_for);
1009		if (ret)
1010			return ret;
1011
1012		if (strcmp(cwd, stop) == 0 ||
1013		    strcmp(cwd, "/usr/include") == 0 ||
1014		    strcmp(cwd, "/usr/local/include") == 0 ||
1015		    strlen(cwd) <= 10 ||  /* heck...  don't search /usr/lib/ */
1016		    strcmp(cwd, "/") == 0)
1017			return NULL;
1018
1019		while (--len >= 0) {
1020			if (cwd[len] == '/') {
1021				cwd[len] = '\0';
1022				break;
1023			}
1024		}
1025
1026		chdir("..");
1027	}
1028	return NULL;
1029}
1030
1031static void use_best_guess_header_file(struct token *token, const char *filename, struct token **list)
1032{
1033	char cwd[PATH_MAX];
1034	char dir_part[PATH_MAX];
1035	const char *file_part;
1036	const char *include_name;
1037	static int cnt;
1038	int len;
1039
1040	/* Avoid guessing includes recursively. */
1041	if (cnt++ > 1000)
1042		return;
1043
1044	if (!filename || filename[0] == '\0')
1045		return;
1046
1047	file_part = filename;
1048	while ((filename = strchr(filename, '/'))) {
1049		++filename;
1050		if (filename[0])
1051			file_part = filename;
1052	}
1053
1054	snprintf(dir_part, sizeof(dir_part), "%s", stream_name(token->pos.stream));
1055	len = strlen(dir_part);
1056	while (--len >= 0) {
1057		if (dir_part[len] == '/') {
1058			dir_part[len] = '\0';
1059			break;
1060		}
1061	}
1062	if (len < 0)
1063		sprintf(dir_part, ".");
1064
1065	if (!getcwd(cwd, sizeof(cwd)))
1066		return;
1067
1068	chdir(dir_part);
1069	include_name = search_dir(cwd, file_part);
1070	chdir(cwd);
1071	if (!include_name)
1072		return;
1073	sparse_error(token->pos, "using '%s'", include_name);
1074
1075	try_include("", include_name, strlen(include_name), list, includepath);
1076}
1077
1078static int handle_include_path(struct stream *stream, struct token **list, struct token *token, int how)
1079{
1080	const char *filename;
1081	struct token *next;
1082	const char **path;
1083	int expect;
1084	int flen;
1085
1086	next = token->next;
1087	expect = '>';
1088	if (!match_op(next, '<')) {
1089		expand_list(&token->next);
1090		expect = 0;
1091		next = token;
1092		if (match_op(token->next, '<')) {
1093			next = token->next;
1094			expect = '>';
1095		}
1096	}
1097
1098	token = next->next;
1099	filename = token_name_sequence(token, expect, token);
1100	flen = strlen(filename) + 1;
1101
1102	/* Absolute path? */
1103	if (filename[0] == '/') {
1104		if (try_include("", filename, flen, list, includepath))
1105			return 0;
1106		goto out;
1107	}
1108
1109	switch (how) {
1110	case 1:
1111		path = stream->next_path;
1112		break;
1113	case 2:
1114		includepath[0] = "";
1115		path = includepath;
1116		break;
1117	default:
1118		/* Dir of input file is first dir to search for quoted includes */
1119		set_stream_include_path(stream);
1120		path = expect ? angle_includepath : quote_includepath;
1121		break;
1122	}
1123	/* Check the standard include paths.. */
1124	if (do_include_path(path, list, token, filename, flen))
1125		return 0;
1126out:
1127	sparse_error(token->pos, "unable to open '%s'", filename);
1128	use_best_guess_header_file(token, filename, list);
1129	return 0;
1130}
1131
1132static int handle_include(struct stream *stream, struct token **list, struct token *token)
1133{
1134	return handle_include_path(stream, list, token, 0);
1135}
1136
1137static int handle_include_next(struct stream *stream, struct token **list, struct token *token)
1138{
1139	return handle_include_path(stream, list, token, 1);
1140}
1141
1142static int handle_argv_include(struct stream *stream, struct token **list, struct token *token)
1143{
1144	return handle_include_path(stream, list, token, 2);
1145}
1146
1147static int token_different(struct token *t1, struct token *t2)
1148{
1149	int different;
1150
1151	if (token_type(t1) != token_type(t2))
1152		return 1;
1153
1154	switch (token_type(t1)) {
1155	case TOKEN_IDENT:
1156		different = t1->ident != t2->ident;
1157		break;
1158	case TOKEN_ARG_COUNT:
1159	case TOKEN_UNTAINT:
1160	case TOKEN_CONCAT:
1161	case TOKEN_GNU_KLUDGE:
1162		different = 0;
1163		break;
1164	case TOKEN_NUMBER:
1165		different = strcmp(t1->number, t2->number);
1166		break;
1167	case TOKEN_SPECIAL:
1168		different = t1->special != t2->special;
1169		break;
1170	case TOKEN_MACRO_ARGUMENT:
1171	case TOKEN_QUOTED_ARGUMENT:
1172	case TOKEN_STR_ARGUMENT:
1173		different = t1->argnum != t2->argnum;
1174		break;
1175	case TOKEN_CHAR_EMBEDDED_0 ... TOKEN_CHAR_EMBEDDED_3:
1176	case TOKEN_WIDE_CHAR_EMBEDDED_0 ... TOKEN_WIDE_CHAR_EMBEDDED_3:
1177		different = memcmp(t1->embedded, t2->embedded, 4);
1178		break;
1179	case TOKEN_CHAR:
1180	case TOKEN_WIDE_CHAR:
1181	case TOKEN_STRING:
1182	case TOKEN_WIDE_STRING: {
1183		struct string *s1, *s2;
1184
1185		s1 = t1->string;
1186		s2 = t2->string;
1187		different = 1;
1188		if (s1->length != s2->length)
1189			break;
1190		different = memcmp(s1->data, s2->data, s1->length);
1191		break;
1192	}
1193	default:
1194		different = 1;
1195		break;
1196	}
1197	return different;
1198}
1199
1200static int token_list_different(struct token *list1, struct token *list2)
1201{
1202	for (;;) {
1203		if (list1 == list2)
1204			return 0;
1205		if (!list1 || !list2)
1206			return 1;
1207		if (token_different(list1, list2))
1208			return 1;
1209		list1 = list1->next;
1210		list2 = list2->next;
1211	}
1212}
1213
1214static inline void set_arg_count(struct token *token)
1215{
1216	token_type(token) = TOKEN_ARG_COUNT;
1217	token->count.normal = token->count.quoted =
1218	token->count.str = token->count.vararg = 0;
1219}
1220
1221static struct token *parse_arguments(struct token *list)
1222{
1223	struct token *arg = list->next, *next = list;
1224	struct argcount *count = &list->count;
1225
1226	set_arg_count(list);
1227
1228	if (match_op(arg, ')')) {
1229		next = arg->next;
1230		list->next = &eof_token_entry;
1231		return next;
1232	}
1233
1234	while (token_type(arg) == TOKEN_IDENT) {
1235		if (arg->ident == &__VA_ARGS___ident)
1236			goto Eva_args;
1237		if (!++count->normal)
1238			goto Eargs;
1239		next = arg->next;
1240
1241		if (match_op(next, ',')) {
1242			set_arg_count(next);
1243			arg = next->next;
1244			continue;
1245		}
1246
1247		if (match_op(next, ')')) {
1248			set_arg_count(next);
1249			next = next->next;
1250			arg->next->next = &eof_token_entry;
1251			return next;
1252		}
1253
1254		/* normal cases are finished here */
1255
1256		if (match_op(next, SPECIAL_ELLIPSIS)) {
1257			if (match_op(next->next, ')')) {
1258				set_arg_count(next);
1259				next->count.vararg = 1;
1260				next = next->next;
1261				arg->next->next = &eof_token_entry;
1262				return next->next;
1263			}
1264
1265			arg = next;
1266			goto Enotclosed;
1267		}
1268
1269		if (eof_token(next)) {
1270			goto Enotclosed;
1271		} else {
1272			arg = next;
1273			goto Ebadstuff;
1274		}
1275	}
1276
1277	if (match_op(arg, SPECIAL_ELLIPSIS)) {
1278		next = arg->next;
1279		token_type(arg) = TOKEN_IDENT;
1280		arg->ident = &__VA_ARGS___ident;
1281		if (!match_op(next, ')'))
1282			goto Enotclosed;
1283		if (!++count->normal)
1284			goto Eargs;
1285		set_arg_count(next);
1286		next->count.vararg = 1;
1287		next = next->next;
1288		arg->next->next = &eof_token_entry;
1289		return next;
1290	}
1291
1292	if (eof_token(arg)) {
1293		arg = next;
1294		goto Enotclosed;
1295	}
1296	if (match_op(arg, ','))
1297		goto Emissing;
1298	else
1299		goto Ebadstuff;
1300
1301
1302Emissing:
1303	sparse_error(arg->pos, "parameter name missing");
1304	return NULL;
1305Ebadstuff:
1306	sparse_error(arg->pos, "\"%s\" may not appear in macro parameter list",
1307		show_token(arg));
1308	return NULL;
1309Enotclosed:
1310	sparse_error(arg->pos, "missing ')' in macro parameter list");
1311	return NULL;
1312Eva_args:
1313	sparse_error(arg->pos, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
1314	return NULL;
1315Eargs:
1316	sparse_error(arg->pos, "too many arguments in macro definition");
1317	return NULL;
1318}
1319
1320static int try_arg(struct token *token, enum token_type type, struct token *arglist)
1321{
1322	struct ident *ident = token->ident;
1323	int nr;
1324
1325	if (!arglist || token_type(token) != TOKEN_IDENT)
1326		return 0;
1327
1328	arglist = arglist->next;
1329
1330	for (nr = 0; !eof_token(arglist); nr++, arglist = arglist->next->next) {
1331		if (arglist->ident == ident) {
1332			struct argcount *count = &arglist->next->count;
1333			int n;
1334
1335			token->argnum = nr;
1336			token_type(token) = type;
1337			switch (type) {
1338			case TOKEN_MACRO_ARGUMENT:
1339				n = ++count->normal;
1340				break;
1341			case TOKEN_QUOTED_ARGUMENT:
1342				n = ++count->quoted;
1343				break;
1344			default:
1345				n = ++count->str;
1346			}
1347			if (n)
1348				return count->vararg ? 2 : 1;
1349			/*
1350			 * XXX - need saner handling of that
1351			 * (>= 1024 instances of argument)
1352			 */
1353			token_type(token) = TOKEN_ERROR;
1354			return -1;
1355		}
1356	}
1357	return 0;
1358}
1359
1360static struct token *handle_hash(struct token **p, struct token *arglist)
1361{
1362	struct token *token = *p;
1363	if (arglist) {
1364		struct token *next = token->next;
1365		if (!try_arg(next, TOKEN_STR_ARGUMENT, arglist))
1366			goto Equote;
1367		next->pos.whitespace = token->pos.whitespace;
1368		__free_token(token);
1369		token = *p = next;
1370	} else {
1371		token->pos.noexpand = 1;
1372	}
1373	return token;
1374
1375Equote:
1376	sparse_error(token->pos, "'#' is not followed by a macro parameter");
1377	return NULL;
1378}
1379
1380/* token->next is ## */
1381static struct token *handle_hashhash(struct token *token, struct token *arglist)
1382{
1383	struct token *last = token;
1384	struct token *concat;
1385	int state = match_op(token, ',');
1386
1387	try_arg(token, TOKEN_QUOTED_ARGUMENT, arglist);
1388
1389	while (1) {
1390		struct token *t;
1391		int is_arg;
1392
1393		/* eat duplicate ## */
1394		concat = token->next;
1395		while (match_op(t = concat->next, SPECIAL_HASHHASH)) {
1396			token->next = t;
1397			__free_token(concat);
1398			concat = t;
1399		}
1400		token_type(concat) = TOKEN_CONCAT;
1401
1402		if (eof_token(t))
1403			goto Econcat;
1404
1405		if (match_op(t, '#')) {
1406			t = handle_hash(&concat->next, arglist);
1407			if (!t)
1408				return NULL;
1409		}
1410
1411		is_arg = try_arg(t, TOKEN_QUOTED_ARGUMENT, arglist);
1412
1413		if (state == 1 && is_arg) {
1414			state = is_arg;
1415		} else {
1416			last = t;
1417			state = match_op(t, ',');
1418		}
1419
1420		token = t;
1421		if (!match_op(token->next, SPECIAL_HASHHASH))
1422			break;
1423	}
1424	/* handle GNU ,##__VA_ARGS__ kludge, in all its weirdness */
1425	if (state == 2)
1426		token_type(last) = TOKEN_GNU_KLUDGE;
1427	return token;
1428
1429Econcat:
1430	sparse_error(concat->pos, "'##' cannot appear at the ends of macro expansion");
1431	return NULL;
1432}
1433
1434static struct token *parse_expansion(struct token *expansion, struct token *arglist, struct ident *name)
1435{
1436	struct token *token = expansion;
1437	struct token **p;
1438
1439	if (match_op(token, SPECIAL_HASHHASH))
1440		goto Econcat;
1441
1442	for (p = &expansion; !eof_token(token); p = &token->next, token = *p) {
1443		if (match_op(token, '#')) {
1444			token = handle_hash(p, arglist);
1445			if (!token)
1446				return NULL;
1447		}
1448		if (match_op(token->next, SPECIAL_HASHHASH)) {
1449			token = handle_hashhash(token, arglist);
1450			if (!token)
1451				return NULL;
1452		} else {
1453			try_arg(token, TOKEN_MACRO_ARGUMENT, arglist);
1454		}
1455		switch (token_type(token)) {
1456		case TOKEN_ERROR:
1457			goto Earg;
1458
1459		case TOKEN_STRING:
1460		case TOKEN_WIDE_STRING:
1461			token->string->immutable = 1;
1462			break;
1463		}
1464	}
1465	token = alloc_token(&expansion->pos);
1466	token_type(token) = TOKEN_UNTAINT;
1467	token->ident = name;
1468	token->next = *p;
1469	*p = token;
1470	return expansion;
1471
1472Econcat:
1473	sparse_error(token->pos, "'##' cannot appear at the ends of macro expansion");
1474	return NULL;
1475Earg:
1476	sparse_error(token->pos, "too many instances of argument in body");
1477	return NULL;
1478}
1479
1480static int do_define(struct position pos, struct token *token, struct ident *name,
1481		     struct token *arglist, struct token *expansion, int attr)
1482{
1483	struct symbol *sym;
1484	int ret = 1;
1485
1486	expansion = parse_expansion(expansion, arglist, name);
1487	if (!expansion)
1488		return 1;
1489
1490	sym = lookup_symbol(name, NS_MACRO | NS_UNDEF);
1491	if (sym) {
1492		int clean;
1493
1494		if (attr < sym->attr)
1495			goto out;
1496
1497		clean = (attr == sym->attr && sym->namespace == NS_MACRO);
1498
1499		if (token_list_different(sym->expansion, expansion) ||
1500		    token_list_different(sym->arglist, arglist)) {
1501			ret = 0;
1502			if ((clean && attr == SYM_ATTR_NORMAL)
1503					|| sym->used_in == file_scope) {
1504				warning(pos, "preprocessor token %.*s redefined",
1505						name->len, name->name);
1506				info(sym->pos, "this was the original definition");
1507			}
1508		} else if (clean)
1509			goto out;
1510	}
1511
1512	if (!sym || sym->scope != file_scope) {
1513		sym = alloc_symbol(pos, SYM_NODE);
1514		bind_symbol(sym, name, NS_MACRO);
1515		add_ident(&macros, name);
1516		ret = 0;
1517	}
1518
1519	if (!ret) {
1520		sym->expansion = expansion;
1521		sym->arglist = arglist;
1522		if (token) /* Free the "define" token, but not the rest of the line */
1523			__free_token(token);
1524	}
1525
1526	sym->namespace = NS_MACRO;
1527	sym->used_in = NULL;
1528	sym->attr = attr;
1529out:
1530	return ret;
1531}
1532
1533///
1534// predefine a macro with a printf-formatted value
1535// @name: the name of the macro
1536// @weak: 0/1 for a normal or a weak define
1537// @fmt: the printf format followed by it's arguments.
1538//
1539// The type of the value is automatically infered:
1540// TOKEN_NUMBER if it starts by a digit, TOKEN_IDENT otherwise.
1541// If @fmt is null or empty, the macro is defined with an empty definition.
1542void predefine(const char *name, int weak, const char *fmt, ...)
1543{
1544	struct ident *ident = built_in_ident(name);
1545	struct token *value = &eof_token_entry;
1546	int attr = weak ? SYM_ATTR_WEAK : SYM_ATTR_NORMAL;
1547
1548	if (fmt && fmt[0]) {
1549		static char buf[256];
1550		va_list ap;
1551
1552		va_start(ap, fmt);
1553		vsnprintf(buf, sizeof(buf), fmt, ap);
1554		va_end(ap);
1555
1556		value = __alloc_token(0);
1557		if (isdigit(buf[0])) {
1558			token_type(value) = TOKEN_NUMBER;
1559			value->number = xstrdup(buf);
1560		} else {
1561			token_type(value) = TOKEN_IDENT;
1562			value->ident = built_in_ident(buf);
1563		}
1564		value->pos.whitespace = 1;
1565		value->next = &eof_token_entry;
1566	}
1567
1568	do_define(value->pos, NULL, ident, NULL, value, attr);
1569}
1570
1571///
1572// like predefine() but only if one of the non-standard dialect is chosen
1573void predefine_nostd(const char *name)
1574{
1575	if ((standard & STANDARD_GNU) || (standard == STANDARD_NONE))
1576		predefine(name, 1, "1");
1577}
1578
1579static int do_handle_define(struct stream *stream, struct token **line, struct token *token, int attr)
1580{
1581	struct token *arglist, *expansion;
1582	struct token *left = token->next;
1583	struct ident *name;
1584
1585	if (token_type(left) != TOKEN_IDENT) {
1586		sparse_error(token->pos, "expected identifier to 'define'");
1587		return 1;
1588	}
1589
1590	name = left->ident;
1591
1592	arglist = NULL;
1593	expansion = left->next;
1594	if (!expansion->pos.whitespace) {
1595		if (match_op(expansion, '(')) {
1596			arglist = expansion;
1597			expansion = parse_arguments(expansion);
1598			if (!expansion)
1599				return 1;
1600		} else if (!eof_token(expansion)) {
1601			warning(expansion->pos,
1602				"no whitespace before object-like macro body");
1603		}
1604	}
1605
1606	return do_define(left->pos, token, name, arglist, expansion, attr);
1607}
1608
1609static int handle_define(struct stream *stream, struct token **line, struct token *token)
1610{
1611	return do_handle_define(stream, line, token, SYM_ATTR_NORMAL);
1612}
1613
1614static int handle_weak_define(struct stream *stream, struct token **line, struct token *token)
1615{
1616	return do_handle_define(stream, line, token, SYM_ATTR_WEAK);
1617}
1618
1619static int handle_strong_define(struct stream *stream, struct token **line, struct token *token)
1620{
1621	return do_handle_define(stream, line, token, SYM_ATTR_STRONG);
1622}
1623
1624static int do_handle_undef(struct stream *stream, struct token **line, struct token *token, int attr)
1625{
1626	struct token *left = token->next;
1627	struct symbol *sym;
1628
1629	if (token_type(left) != TOKEN_IDENT) {
1630		sparse_error(token->pos, "expected identifier to 'undef'");
1631		return 1;
1632	}
1633
1634	sym = lookup_symbol(left->ident, NS_MACRO | NS_UNDEF);
1635	if (sym) {
1636		if (attr < sym->attr)
1637			return 1;
1638		if (attr == sym->attr && sym->namespace == NS_UNDEF)
1639			return 1;
1640	} else if (attr <= SYM_ATTR_NORMAL)
1641		return 1;
1642
1643	if (!sym || sym->scope != file_scope) {
1644		sym = alloc_symbol(left->pos, SYM_NODE);
1645		bind_symbol(sym, left->ident, NS_MACRO);
1646	}
1647
1648	sym->namespace = NS_UNDEF;
1649	sym->used_in = NULL;
1650	sym->attr = attr;
1651
1652	return 1;
1653}
1654
1655static int handle_undef(struct stream *stream, struct token **line, struct token *token)
1656{
1657	return do_handle_undef(stream, line, token, SYM_ATTR_NORMAL);
1658}
1659
1660static int handle_strong_undef(struct stream *stream, struct token **line, struct token *token)
1661{
1662	return do_handle_undef(stream, line, token, SYM_ATTR_STRONG);
1663}
1664
1665static int preprocessor_if(struct stream *stream, struct token *token, int cond)
1666{
1667	token_type(token) = false_nesting ? TOKEN_SKIP_GROUPS : TOKEN_IF;
1668	free_preprocessor_line(token->next);
1669	token->next = stream->top_if;
1670	stream->top_if = token;
1671	if (false_nesting || cond != 1)
1672		false_nesting++;
1673	return 0;
1674}
1675
1676static int handle_ifdef(struct stream *stream, struct token **line, struct token *token)
1677{
1678	struct token *next = token->next;
1679	int arg;
1680	if (token_type(next) == TOKEN_IDENT) {
1681		arg = token_defined(next);
1682	} else {
1683		dirty_stream(stream);
1684		if (!false_nesting)
1685			sparse_error(token->pos, "expected preprocessor identifier");
1686		arg = -1;
1687	}
1688	return preprocessor_if(stream, token, arg);
1689}
1690
1691static int handle_ifndef(struct stream *stream, struct token **line, struct token *token)
1692{
1693	struct token *next = token->next;
1694	int arg;
1695	if (token_type(next) == TOKEN_IDENT) {
1696		if (!stream->dirty && !stream->ifndef) {
1697			if (!stream->protect) {
1698				stream->ifndef = token;
1699				stream->protect = next->ident;
1700			} else if (stream->protect == next->ident) {
1701				stream->ifndef = token;
1702				stream->dirty = 1;
1703			}
1704		}
1705		arg = !token_defined(next);
1706	} else {
1707		dirty_stream(stream);
1708		if (!false_nesting)
1709			sparse_error(token->pos, "expected preprocessor identifier");
1710		arg = -1;
1711	}
1712
1713	return preprocessor_if(stream, token, arg);
1714}
1715
1716static const char *show_token_sequence(struct token *token, int quote);
1717
1718/*
1719 * Expression handling for #if and #elif; it differs from normal expansion
1720 * due to special treatment of "defined".
1721 */
1722static int expression_value(struct token **where)
1723{
1724	struct expression *expr;
1725	struct token *p;
1726	struct token **list = where, **beginning = NULL;
1727	long long value;
1728	int state = 0;
1729
1730	while (!eof_token(p = scan_next(list))) {
1731		switch (state) {
1732		case 0:
1733			if (token_type(p) != TOKEN_IDENT)
1734				break;
1735			if (p->ident == &defined_ident) {
1736				state = 1;
1737				beginning = list;
1738				break;
1739			} else if (p->ident == &__has_builtin_ident) {
1740				state = 4;
1741				beginning = list;
1742				break;
1743			} else if (p->ident == &__has_attribute_ident) {
1744				state = 6;
1745				beginning = list;
1746				break;
1747			}
1748			if (!expand_one_symbol(list))
1749				continue;
1750			if (token_type(p) != TOKEN_IDENT)
1751				break;
1752			token_type(p) = TOKEN_ZERO_IDENT;
1753			break;
1754		case 1:
1755			if (match_op(p, '(')) {
1756				state = 2;
1757			} else {
1758				state = 0;
1759				replace_with_defined(p);
1760				*beginning = p;
1761			}
1762			break;
1763		case 2:
1764			if (token_type(p) == TOKEN_IDENT)
1765				state = 3;
1766			else
1767				state = 0;
1768			replace_with_defined(p);
1769			*beginning = p;
1770			break;
1771		case 3:
1772			state = 0;
1773			if (!match_op(p, ')'))
1774				sparse_error(p->pos, "missing ')' after \"defined\"");
1775			*list = p->next;
1776			continue;
1777
1778		// __has_builtin(x) or __has_attribute(x)
1779		case 4: case 6:
1780			if (match_op(p, '(')) {
1781				state++;
1782			} else {
1783				sparse_error(p->pos, "missing '(' after \"__has_%s\"",
1784					state == 4 ? "builtin" : "attribute");
1785				state = 0;
1786			}
1787			*beginning = p;
1788			break;
1789		case 5: case 7:
1790			if (token_type(p) != TOKEN_IDENT) {
1791				sparse_error(p->pos, "identifier expected");
1792				state = 0;
1793				break;
1794			}
1795			if (!match_op(p->next, ')'))
1796				sparse_error(p->pos, "missing ')' after \"__has_%s\"",
1797					state == 5 ? "builtin" : "attribute");
1798			if (state == 5)
1799				replace_with_has_builtin(p);
1800			else
1801				replace_with_has_attribute(p);
1802			state = 8;
1803			*beginning = p;
1804			break;
1805		case 8:
1806			state = 0;
1807			*list = p->next;
1808			continue;
1809		}
1810		list = &p->next;
1811	}
1812
1813	p = constant_expression(*where, &expr);
1814	if (!eof_token(p))
1815		sparse_error(p->pos, "garbage at end: %s", show_token_sequence(p, 0));
1816	value = get_expression_value(expr);
1817	return value != 0;
1818}
1819
1820static int handle_if(struct stream *stream, struct token **line, struct token *token)
1821{
1822	int value = 0;
1823	if (!false_nesting)
1824		value = expression_value(&token->next);
1825
1826	dirty_stream(stream);
1827	return preprocessor_if(stream, token, value);
1828}
1829
1830static int handle_elif(struct stream * stream, struct token **line, struct token *token)
1831{
1832	struct token *top_if = stream->top_if;
1833	end_group(stream);
1834
1835	if (!top_if) {
1836		nesting_error(stream);
1837		sparse_error(token->pos, "unmatched #elif within stream");
1838		return 1;
1839	}
1840
1841	if (token_type(top_if) == TOKEN_ELSE) {
1842		nesting_error(stream);
1843		sparse_error(token->pos, "#elif after #else");
1844		if (!false_nesting)
1845			false_nesting = 1;
1846		return 1;
1847	}
1848
1849	dirty_stream(stream);
1850	if (token_type(top_if) != TOKEN_IF)
1851		return 1;
1852	if (false_nesting) {
1853		false_nesting = 0;
1854		if (!expression_value(&token->next))
1855			false_nesting = 1;
1856	} else {
1857		false_nesting = 1;
1858		token_type(top_if) = TOKEN_SKIP_GROUPS;
1859	}
1860	return 1;
1861}
1862
1863static int handle_else(struct stream *stream, struct token **line, struct token *token)
1864{
1865	struct token *top_if = stream->top_if;
1866	end_group(stream);
1867
1868	if (!top_if) {
1869		nesting_error(stream);
1870		sparse_error(token->pos, "unmatched #else within stream");
1871		return 1;
1872	}
1873
1874	if (token_type(top_if) == TOKEN_ELSE) {
1875		nesting_error(stream);
1876		sparse_error(token->pos, "#else after #else");
1877	}
1878	if (false_nesting) {
1879		if (token_type(top_if) == TOKEN_IF)
1880			false_nesting = 0;
1881	} else {
1882		false_nesting = 1;
1883	}
1884	token_type(top_if) = TOKEN_ELSE;
1885	return 1;
1886}
1887
1888static int handle_endif(struct stream *stream, struct token **line, struct token *token)
1889{
1890	struct token *top_if = stream->top_if;
1891	end_group(stream);
1892	if (!top_if) {
1893		nesting_error(stream);
1894		sparse_error(token->pos, "unmatched #endif in stream");
1895		return 1;
1896	}
1897	if (false_nesting)
1898		false_nesting--;
1899	stream->top_if = top_if->next;
1900	__free_token(top_if);
1901	return 1;
1902}
1903
1904static int handle_warning(struct stream *stream, struct token **line, struct token *token)
1905{
1906	warning(token->pos, "%s", show_token_sequence(token->next, 0));
1907	return 1;
1908}
1909
1910static int handle_error(struct stream *stream, struct token **line, struct token *token)
1911{
1912	sparse_error(token->pos, "%s", show_token_sequence(token->next, 0));
1913	return 1;
1914}
1915
1916static int handle_nostdinc(struct stream *stream, struct token **line, struct token *token)
1917{
1918	/*
1919	 * Do we have any non-system includes?
1920	 * Clear them out if so..
1921	 */
1922	*sys_includepath = NULL;
1923	return 1;
1924}
1925
1926static inline void update_inc_ptrs(const char ***where)
1927{
1928
1929	if (*where <= dirafter_includepath) {
1930		dirafter_includepath++;
1931		/* If this was the entry that we prepend, don't
1932		 * rise the lower entries, even if they are at
1933		 * the same level. */
1934		if (where == &dirafter_includepath)
1935			return;
1936	}
1937	if (*where <= sys_includepath) {
1938		sys_includepath++;
1939		if (where == &sys_includepath)
1940			return;
1941	}
1942	if (*where <= isys_includepath) {
1943		isys_includepath++;
1944		if (where == &isys_includepath)
1945			return;
1946	}
1947
1948	/* angle_includepath is actually never updated, since we
1949	 * don't suppport -iquote rught now. May change some day. */
1950	if (*where <= angle_includepath) {
1951		angle_includepath++;
1952		if (where == &angle_includepath)
1953			return;
1954	}
1955}
1956
1957/* Add a path before 'where' and update the pointers associated with the
1958 * includepath array */
1959static void add_path_entry(struct token *token, const char *path,
1960	const char ***where)
1961{
1962	const char **dst;
1963	const char *next;
1964
1965	/* Need one free entry.. */
1966	if (includepath[INCLUDEPATHS-2])
1967		error_die(token->pos, "too many include path entries");
1968
1969	/* check that this is not a duplicate */
1970	dst = includepath;
1971	while (*dst) {
1972		if (strcmp(*dst, path) == 0)
1973			return;
1974		dst++;
1975	}
1976	next = path;
1977	dst = *where;
1978
1979	update_inc_ptrs(where);
1980
1981	/*
1982	 * Move them all up starting at dst,
1983	 * insert the new entry..
1984	 */
1985	do {
1986		const char *tmp = *dst;
1987		*dst = next;
1988		next = tmp;
1989		dst++;
1990	} while (next);
1991}
1992
1993static int handle_add_include(struct stream *stream, struct token **line, struct token *token)
1994{
1995	for (;;) {
1996		token = token->next;
1997		if (eof_token(token))
1998			return 1;
1999		if (token_type(token) != TOKEN_STRING) {
2000			warning(token->pos, "expected path string");
2001			return 1;
2002		}
2003		add_path_entry(token, token->string->data, &isys_includepath);
2004	}
2005}
2006
2007static int handle_add_isystem(struct stream *stream, struct token **line, struct token *token)
2008{
2009	for (;;) {
2010		token = token->next;
2011		if (eof_token(token))
2012			return 1;
2013		if (token_type(token) != TOKEN_STRING) {
2014			sparse_error(token->pos, "expected path string");
2015			return 1;
2016		}
2017		add_path_entry(token, token->string->data, &sys_includepath);
2018	}
2019}
2020
2021static int handle_add_system(struct stream *stream, struct token **line, struct token *token)
2022{
2023	for (;;) {
2024		token = token->next;
2025		if (eof_token(token))
2026			return 1;
2027		if (token_type(token) != TOKEN_STRING) {
2028			sparse_error(token->pos, "expected path string");
2029			return 1;
2030		}
2031		add_path_entry(token, token->string->data, &dirafter_includepath);
2032	}
2033}
2034
2035/* Add to end on includepath list - no pointer updates */
2036static void add_dirafter_entry(struct token *token, const char *path)
2037{
2038	const char **dst = includepath;
2039
2040	/* Need one free entry.. */
2041	if (includepath[INCLUDEPATHS-2])
2042		error_die(token->pos, "too many include path entries");
2043
2044	/* Add to the end */
2045	while (*dst)
2046		dst++;
2047	*dst = path;
2048	dst++;
2049	*dst = NULL;
2050}
2051
2052static int handle_add_dirafter(struct stream *stream, struct token **line, struct token *token)
2053{
2054	for (;;) {
2055		token = token->next;
2056		if (eof_token(token))
2057			return 1;
2058		if (token_type(token) != TOKEN_STRING) {
2059			sparse_error(token->pos, "expected path string");
2060			return 1;
2061		}
2062		add_dirafter_entry(token, token->string->data);
2063	}
2064}
2065
2066static int handle_split_include(struct stream *stream, struct token **line, struct token *token)
2067{
2068	/*
2069	 * -I-
2070	 *  From info gcc:
2071	 *  Split the include path.  Any directories specified with `-I'
2072	 *  options before `-I-' are searched only for headers requested with
2073	 *  `#include "FILE"'; they are not searched for `#include <FILE>'.
2074	 *  If additional directories are specified with `-I' options after
2075	 *  the `-I-', those directories are searched for all `#include'
2076	 *  directives.
2077	 *  In addition, `-I-' inhibits the use of the directory of the current
2078	 *  file directory as the first search directory for `#include "FILE"'.
2079	 */
2080	quote_includepath = includepath+1;
2081	angle_includepath = sys_includepath;
2082	return 1;
2083}
2084
2085/*
2086 * We replace "#pragma xxx" with "__pragma__" in the token
2087 * stream. Just as an example.
2088 *
2089 * We'll just #define that away for now, but the theory here
2090 * is that we can use this to insert arbitrary token sequences
2091 * to turn the pragmas into internal front-end sequences for
2092 * when we actually start caring about them.
2093 *
2094 * So eventually this will turn into some kind of extended
2095 * __attribute__() like thing, except called __pragma__(xxx).
2096 */
2097static int handle_pragma(struct stream *stream, struct token **line, struct token *token)
2098{
2099	struct token *next = *line;
2100
2101	if (match_ident(token->next, &once_ident) && eof_token(token->next->next)) {
2102		stream->once = 1;
2103		return 1;
2104	}
2105	token->ident = &pragma_ident;
2106	token->pos.newline = 1;
2107	token->pos.whitespace = 1;
2108	token->pos.pos = 1;
2109	*line = token;
2110	token->next = next;
2111	return 0;
2112}
2113
2114/*
2115 * We ignore #line for now.
2116 */
2117static int handle_line(struct stream *stream, struct token **line, struct token *token)
2118{
2119	return 1;
2120}
2121
2122static int handle_ident(struct stream *stream, struct token **line, struct token *token)
2123{
2124	return 1;
2125}
2126
2127static int handle_nondirective(struct stream *stream, struct token **line, struct token *token)
2128{
2129	sparse_error(token->pos, "unrecognized preprocessor line '%s'", show_token_sequence(token, 0));
2130	return 1;
2131}
2132
2133
2134static void init_preprocessor(void)
2135{
2136	int i;
2137	int stream = init_stream("preprocessor", -1, includepath);
2138	static struct {
2139		const char *name;
2140		int (*handler)(struct stream *, struct token **, struct token *);
2141	} normal[] = {
2142		{ "define",		handle_define },
2143		{ "weak_define",	handle_weak_define },
2144		{ "strong_define",	handle_strong_define },
2145		{ "undef",		handle_undef },
2146		{ "strong_undef",	handle_strong_undef },
2147		{ "warning",		handle_warning },
2148		{ "error",		handle_error },
2149		{ "include",		handle_include },
2150		{ "include_next",	handle_include_next },
2151		{ "pragma",		handle_pragma },
2152		{ "line",		handle_line },
2153		{ "ident",		handle_ident },
2154
2155		// our internal preprocessor tokens
2156		{ "nostdinc",	   handle_nostdinc },
2157		{ "add_include",   handle_add_include },
2158		{ "add_isystem",   handle_add_isystem },
2159		{ "add_system",    handle_add_system },
2160		{ "add_dirafter",  handle_add_dirafter },
2161		{ "split_include", handle_split_include },
2162		{ "argv_include",  handle_argv_include },
2163	}, special[] = {
2164		{ "ifdef",	handle_ifdef },
2165		{ "ifndef",	handle_ifndef },
2166		{ "else",	handle_else },
2167		{ "endif",	handle_endif },
2168		{ "if",		handle_if },
2169		{ "elif",	handle_elif },
2170	};
2171	static struct {
2172		const char *name;
2173		void (*expander)(struct token *);
2174	} dynamic[] = {
2175		{ "__LINE__",		expand_line },
2176		{ "__FILE__",		expand_file },
2177		{ "__BASE_FILE__",	expand_basefile },
2178		{ "__DATE__",		expand_date },
2179		{ "__TIME__",		expand_time },
2180		{ "__COUNTER__",	expand_counter },
2181		{ "__INCLUDE_LEVEL__",	expand_include_level },
2182	};
2183
2184	for (i = 0; i < ARRAY_SIZE(normal); i++) {
2185		struct symbol *sym;
2186		sym = create_symbol(stream, normal[i].name, SYM_PREPROCESSOR, NS_PREPROCESSOR);
2187		sym->handler = normal[i].handler;
2188		sym->normal = 1;
2189	}
2190	for (i = 0; i < ARRAY_SIZE(special); i++) {
2191		struct symbol *sym;
2192		sym = create_symbol(stream, special[i].name, SYM_PREPROCESSOR, NS_PREPROCESSOR);
2193		sym->handler = special[i].handler;
2194		sym->normal = 0;
2195	}
2196	for (i = 0; i < ARRAY_SIZE(dynamic); i++) {
2197		struct symbol *sym;
2198		sym = create_symbol(stream, dynamic[i].name, SYM_NODE, NS_MACRO);
2199		sym->expander = dynamic[i].expander;
2200	}
2201
2202	counter_macro = 0;
2203}
2204
2205static void handle_preprocessor_line(struct stream *stream, struct token **line, struct token *start)
2206{
2207	int (*handler)(struct stream *, struct token **, struct token *);
2208	struct token *token = start->next;
2209	int is_normal = 1;
2210
2211	if (eof_token(token))
2212		return;
2213
2214	if (token_type(token) == TOKEN_IDENT) {
2215		struct symbol *sym = lookup_symbol(token->ident, NS_PREPROCESSOR);
2216		if (sym) {
2217			handler = sym->handler;
2218			is_normal = sym->normal;
2219		} else {
2220			handler = handle_nondirective;
2221		}
2222	} else if (token_type(token) == TOKEN_NUMBER) {
2223		handler = handle_line;
2224	} else {
2225		handler = handle_nondirective;
2226	}
2227
2228	if (is_normal) {
2229		dirty_stream(stream);
2230		if (false_nesting)
2231			goto out;
2232	}
2233	if (!handler(stream, line, token))	/* all set */
2234		return;
2235
2236out:
2237	free_preprocessor_line(token);
2238}
2239
2240static void preprocessor_line(struct stream *stream, struct token **line)
2241{
2242	struct token *start = *line, *next;
2243	struct token **tp = &start->next;
2244
2245	for (;;) {
2246		next = *tp;
2247		if (next->pos.newline)
2248			break;
2249		tp = &next->next;
2250	}
2251	*line = next;
2252	*tp = &eof_token_entry;
2253	handle_preprocessor_line(stream, line, start);
2254}
2255
2256static void do_preprocess(struct token **list)
2257{
2258	struct token *next;
2259
2260	while (!eof_token(next = scan_next(list))) {
2261		struct stream *stream = input_streams + next->pos.stream;
2262
2263		if (next->pos.newline && match_op(next, '#')) {
2264			if (!next->pos.noexpand) {
2265				preprocessor_line(stream, list);
2266				__free_token(next);	/* Free the '#' token */
2267				continue;
2268			}
2269		}
2270
2271		switch (token_type(next)) {
2272		case TOKEN_STREAMEND:
2273			if (stream->top_if) {
2274				nesting_error(stream);
2275				sparse_error(stream->top_if->pos, "unterminated preprocessor conditional");
2276				stream->top_if = NULL;
2277				false_nesting = 0;
2278			}
2279			if (!stream->dirty)
2280				stream->constant = CONSTANT_FILE_YES;
2281			*list = next->next;
2282			include_level--;
2283			continue;
2284		case TOKEN_STREAMBEGIN:
2285			*list = next->next;
2286			include_level++;
2287			continue;
2288
2289		default:
2290			dirty_stream(stream);
2291			if (false_nesting) {
2292				*list = next->next;
2293				__free_token(next);
2294				continue;
2295			}
2296
2297			if (token_type(next) != TOKEN_IDENT ||
2298			    expand_one_symbol(list))
2299				list = &next->next;
2300		}
2301	}
2302}
2303
2304void init_include_path(void)
2305{
2306	FILE *fp;
2307	char path[256];
2308	char arch[32];
2309	char os[32];
2310
2311	fp = popen("/bin/uname -m", "r");
2312	if (!fp)
2313		return;
2314	if (!fgets(arch, sizeof(arch) - 1, fp))
2315		return;
2316	pclose(fp);
2317	if (arch[strlen(arch) - 1] == '\n')
2318		arch[strlen(arch) - 1] = '\0';
2319
2320	fp = popen("/bin/uname -o", "r");
2321	if (!fp)
2322		return;
2323	fgets(os, sizeof(os) - 1, fp);
2324	pclose(fp);
2325
2326	if (strcmp(os, "GNU/Linux\n") != 0)
2327		return;
2328	strcpy(os, "linux-gnu");
2329
2330	snprintf(path, sizeof(path), "/usr/include/%s-%s/", arch, os);
2331	add_pre_buffer("#add_system \"%s/\"\n", path);
2332}
2333
2334struct token * preprocess(struct token *token)
2335{
2336	preprocessing = 1;
2337	init_preprocessor();
2338	do_preprocess(&token);
2339
2340	// Drop all expressions from preprocessing, they're not used any more.
2341	// This is not true when we have multiple files, though ;/
2342	// clear_expression_alloc();
2343	preprocessing = 0;
2344
2345	return token;
2346}
2347
2348static int is_VA_ARGS_token(struct token *token)
2349{
2350	return (token_type(token) == TOKEN_IDENT) &&
2351		(token->ident == &__VA_ARGS___ident);
2352}
2353
2354static void dump_macro(struct symbol *sym)
2355{
2356	int nargs = sym->arglist ? sym->arglist->count.normal : 0;
2357	struct token *args[nargs];
2358	struct token *token;
2359
2360	printf("#define %s", show_ident(sym->ident));
2361	token = sym->arglist;
2362	if (token) {
2363		const char *sep = "";
2364		int narg = 0;
2365		putchar('(');
2366		for (; !eof_token(token); token = token->next) {
2367			if (token_type(token) == TOKEN_ARG_COUNT)
2368				continue;
2369			if (is_VA_ARGS_token(token))
2370				printf("%s...", sep);
2371			else
2372				printf("%s%s", sep, show_token(token));
2373			args[narg++] = token;
2374			sep = ",";
2375		}
2376		putchar(')');
2377	}
2378
2379	token = sym->expansion;
2380	while (token_type(token) != TOKEN_UNTAINT) {
2381		struct token *next = token->next;
2382		if (token->pos.whitespace)
2383			putchar(' ');
2384		switch (token_type(token)) {
2385		case TOKEN_CONCAT:
2386			printf("##");
2387			break;
2388		case TOKEN_STR_ARGUMENT:
2389			printf("#");
2390			/* fall-through */
2391		case TOKEN_QUOTED_ARGUMENT:
2392		case TOKEN_MACRO_ARGUMENT:
2393			token = args[token->argnum];
2394			/* fall-through */
2395		default:
2396			printf("%s", show_token(token));
2397		}
2398		token = next;
2399	}
2400	putchar('\n');
2401}
2402
2403void dump_macro_definitions(void)
2404{
2405	struct ident *name;
2406
2407	FOR_EACH_PTR(macros, name) {
2408		struct symbol *sym = lookup_macro(name);
2409		if (sym)
2410			dump_macro(sym);
2411	} END_FOR_EACH_PTR(name);
2412}
2413