1/*
2 * Copyright (C) 2019 ARM.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt
16 */
17
18#include "smatch.h"
19#include "smatch_extra.h"
20#include "smatch_function_hashtable.h"
21
22static bool expr_has_memory_addr(struct expression *expr);
23
24static DEFINE_HASHTABLE_SEARCH(search_symbol, char, char);
25static DEFINE_HASHTABLE_INSERT(insert_symbol, char, char);
26static struct hashtable *symbols;
27
28static void match_assign(struct expression *expr)
29{
30	char *left_name;
31	struct symbol *left_sym;
32
33	left_name = expr_to_var_sym(expr->left, &left_sym);
34	if (!left_name || !left_sym)
35		return;
36
37	/*
38	 * Once we have spotted a symbol of interest (one that may hold
39	 * an untagged memory address), we keep track of any assignments
40	 * made, such that we can also treat the assigned symbol as something
41	 * of interest. This tracking is limited in scope to the function.
42	 */
43	if (expr_has_memory_addr(expr->right))
44		insert_symbol(symbols, left_name, left_name);
45}
46
47static void match_endfunc(struct symbol *sym)
48{
49	destroy_function_hashtable(symbols);
50	symbols = create_function_hashtable(4000);
51}
52
53static bool expr_has_untagged_symbol(struct expression *expr)
54{
55	char *name;
56	struct symbol *sym;
57
58	if (expr->type != EXPR_SYMBOL)
59		return false;
60
61	name = expr_to_var_sym(expr, &sym);
62	if (!name || !sym)
63		return false;
64
65	/* See if this is something we already know is of interest */
66	if (search_symbol(symbols, name))
67		return true;
68
69	return false;
70}
71
72static bool expr_has_untagged_member(struct expression *expr)
73{
74	if (expr->type != EXPR_DEREF)
75		return false;
76
77	if (!strcmp(expr->member->name, "vm_start") ||
78	    !strcmp(expr->member->name, "vm_end") ||
79	    !strcmp(expr->member->name, "addr_limit"))
80		return true;
81
82	return false;
83}
84
85static bool expr_has_macro_with_name(struct expression *expr, const char *macro_name)
86{
87	char *name;
88
89	name = get_macro_name(expr->pos);
90	return (name && !strcmp(name, macro_name));
91}
92
93static bool expr_has_untagged_macro(struct expression *expr)
94{
95	if (expr_has_macro_with_name(expr, "PAGE_SIZE") ||
96	    expr_has_macro_with_name(expr, "PAGE_MASK") ||
97	    expr_has_macro_with_name(expr, "TASK_SIZE"))
98		return true;
99
100	/**
101	 * We can't detect a marco (such as PAGE_MASK) inside another macro
102	 * such as offset_in_page, therefore we have to detect the outer macro
103	 * instead.
104	 */
105	if (expr_has_macro_with_name(expr, "offset_in_page"))
106		return true;
107
108	return false;
109}
110
111/*
112 * Identify expressions that contain memory addresses, in the future
113 * we may use annotations on symbols or function parameters.
114 */
115static bool expr_has_memory_addr(struct expression *expr)
116{
117	if (expr->type == EXPR_PREOP || expr->type == EXPR_POSTOP)
118		expr = strip_expr(expr->unop);
119
120	if (expr_has_untagged_member(expr))
121		return true;
122
123	if (expr_has_untagged_macro(expr))
124		return true;
125
126	if (expr_has_untagged_symbol(expr))
127		return true;
128
129	return false;
130}
131
132int rl_is_larger_or_equal(struct range_list *rl, sval_t sval)
133{
134	struct data_range *tmp;
135
136	FOR_EACH_PTR(rl, tmp) {
137		if (sval_cmp(tmp->max, sval) >= 0)
138			return 1;
139	} END_FOR_EACH_PTR(tmp);
140	return 0;
141}
142
143int rl_range_has_min_value(struct range_list *rl, sval_t sval)
144{
145	struct data_range *tmp;
146
147	FOR_EACH_PTR(rl, tmp) {
148		if (!sval_cmp(tmp->min, sval)) {
149			return 1;
150		}
151	} END_FOR_EACH_PTR(tmp);
152	return 0;
153}
154
155static bool rl_is_tagged(struct range_list *rl)
156{
157	sval_t invalid;
158	sval_t invalid_kernel;
159
160	invalid.type = &ullong_ctype;
161	invalid.value = 1ULL << 56;
162	invalid_kernel.type = &ullong_ctype;
163	invalid_kernel.value = 0xff8ULL << 52;
164
165	/*
166	 * We only care for tagged addresses, thus ignore anything where the
167	 * ranges of potential values cannot possibly have any of the top byte
168	 * bits set.
169	 */
170	if (!rl_is_larger_or_equal(rl, invalid))
171		return false;
172
173	/*
174	 * Tagged addresses are untagged in the kernel by using sign_extend64 in
175	 * the untagged_addr macro. For userspace addresses bit 55 will always
176	 * be 0 and thus this has the effect of clearing the top byte. However
177	 * for kernel addresses this is not true and the top bits end up set to
178	 * all 1s. The untagged_addr macro results in leaving a gap in the range
179	 * of possible values which can exist, thus let's look for a tell-tale
180	 * range which starts from (0xff8ULL << 52).
181	 */
182	if (rl_range_has_min_value(rl, invalid_kernel))
183		return false;
184
185	return true;
186}
187
188static void match_condition(struct expression *expr)
189{
190	struct range_list *rl = NULL;
191	struct expression *val = NULL;
192        struct symbol *type;
193	char *var_name;
194
195	/*
196	 * Match instances where something is compared against something
197	 * else - we include binary operators as these are commonly used
198	 * to make a comparison, e.g. if (start & ~PAGE_MASK).
199	 */
200	if (expr->type != EXPR_COMPARE &&
201	    expr->type != EXPR_BINOP)
202		return;
203
204	/*
205	 * Look on both sides of the comparison for something that shouldn't
206	 * be compared with a tagged address, e.g. macros such as PAGE_MASK
207	 * or struct members named .vm_start.
208	 */
209	if (expr_has_memory_addr(expr->left))
210		val = expr->right;
211
212	/*
213	 * The macro 'offset_in_page' has the PAGE_MASK macro inside it, this
214	 * results in 'expr_has_memory_addr' returning true for both sides. To
215	 * work around this we assume PAGE_MASK (or similar) is on the right
216	 * side, thus we do the following test last.
217	 */
218	if (expr_has_memory_addr(expr->right))
219		val = expr->left;
220
221	if (!val)
222		return;
223
224	/* We only care about memory addresses which are 64 bits */
225        type = get_type(val);
226	if (!type || type_bits(type) != 64)
227		return;
228
229	/* We only care for comparison against user originated data */
230	if (!get_user_rl(val, &rl))
231		return;
232
233	/* We only care for tagged addresses */
234	if (!rl_is_tagged(rl))
235		return;
236
237	/* Finally, we believe we may have spotted a risky comparison */
238	var_name = expr_to_var(val);
239	if (var_name)
240		sm_warning("comparison of a potentially tagged address (%s, %d, %s)", get_function(), get_param_num(val), var_name);
241}
242
243void check_arm64_tagged(int id)
244{
245	char *arch;
246
247	if (option_project != PROJ_KERNEL)
248		return;
249
250	/* Limit to aarch64 */
251	arch = getenv("ARCH");
252	if (!arch || strcmp(arch, "arm64"))
253		return;
254
255	symbols = create_function_hashtable(4000);
256
257	add_hook(&match_assign, ASSIGNMENT_HOOK);
258	add_hook(&match_condition, CONDITION_HOOK);
259	add_hook(&match_endfunc, END_FUNC_HOOK);
260}
261