1 /*
2  * Copyright (C) 2019 ARM.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt
16  */
17 
18 #include "smatch.h"
19 #include "smatch_extra.h"
20 #include "smatch_function_hashtable.h"
21 
22 static bool expr_has_memory_addr(struct expression *expr);
23 
24 static DEFINE_HASHTABLE_SEARCH(search_symbol, char, char);
25 static DEFINE_HASHTABLE_INSERT(insert_symbol, char, char);
26 static struct hashtable *symbols;
27 
match_assign(struct expression * expr)28 static void match_assign(struct expression *expr)
29 {
30 	char *left_name;
31 	struct symbol *left_sym;
32 
33 	left_name = expr_to_var_sym(expr->left, &left_sym);
34 	if (!left_name || !left_sym)
35 		return;
36 
37 	/*
38 	 * Once we have spotted a symbol of interest (one that may hold
39 	 * an untagged memory address), we keep track of any assignments
40 	 * made, such that we can also treat the assigned symbol as something
41 	 * of interest. This tracking is limited in scope to the function.
42 	 */
43 	if (expr_has_memory_addr(expr->right))
44 		insert_symbol(symbols, left_name, left_name);
45 }
46 
match_endfunc(struct symbol * sym)47 static void match_endfunc(struct symbol *sym)
48 {
49 	destroy_function_hashtable(symbols);
50 	symbols = create_function_hashtable(4000);
51 }
52 
expr_has_untagged_symbol(struct expression * expr)53 static bool expr_has_untagged_symbol(struct expression *expr)
54 {
55 	char *name;
56 	struct symbol *sym;
57 
58 	if (expr->type != EXPR_SYMBOL)
59 		return false;
60 
61 	name = expr_to_var_sym(expr, &sym);
62 	if (!name || !sym)
63 		return false;
64 
65 	/* See if this is something we already know is of interest */
66 	if (search_symbol(symbols, name))
67 		return true;
68 
69 	return false;
70 }
71 
expr_has_untagged_member(struct expression * expr)72 static bool expr_has_untagged_member(struct expression *expr)
73 {
74 	if (expr->type != EXPR_DEREF)
75 		return false;
76 
77 	if (!strcmp(expr->member->name, "vm_start") ||
78 	    !strcmp(expr->member->name, "vm_end") ||
79 	    !strcmp(expr->member->name, "addr_limit"))
80 		return true;
81 
82 	return false;
83 }
84 
expr_has_macro_with_name(struct expression * expr,const char * macro_name)85 static bool expr_has_macro_with_name(struct expression *expr, const char *macro_name)
86 {
87 	char *name;
88 
89 	name = get_macro_name(expr->pos);
90 	return (name && !strcmp(name, macro_name));
91 }
92 
expr_has_untagged_macro(struct expression * expr)93 static bool expr_has_untagged_macro(struct expression *expr)
94 {
95 	if (expr_has_macro_with_name(expr, "PAGE_SIZE") ||
96 	    expr_has_macro_with_name(expr, "PAGE_MASK") ||
97 	    expr_has_macro_with_name(expr, "TASK_SIZE"))
98 		return true;
99 
100 	/**
101 	 * We can't detect a marco (such as PAGE_MASK) inside another macro
102 	 * such as offset_in_page, therefore we have to detect the outer macro
103 	 * instead.
104 	 */
105 	if (expr_has_macro_with_name(expr, "offset_in_page"))
106 		return true;
107 
108 	return false;
109 }
110 
111 /*
112  * Identify expressions that contain memory addresses, in the future
113  * we may use annotations on symbols or function parameters.
114  */
expr_has_memory_addr(struct expression * expr)115 static bool expr_has_memory_addr(struct expression *expr)
116 {
117 	if (expr->type == EXPR_PREOP || expr->type == EXPR_POSTOP)
118 		expr = strip_expr(expr->unop);
119 
120 	if (expr_has_untagged_member(expr))
121 		return true;
122 
123 	if (expr_has_untagged_macro(expr))
124 		return true;
125 
126 	if (expr_has_untagged_symbol(expr))
127 		return true;
128 
129 	return false;
130 }
131 
rl_is_larger_or_equal(struct range_list * rl,sval_t sval)132 int rl_is_larger_or_equal(struct range_list *rl, sval_t sval)
133 {
134 	struct data_range *tmp;
135 
136 	FOR_EACH_PTR(rl, tmp) {
137 		if (sval_cmp(tmp->max, sval) >= 0)
138 			return 1;
139 	} END_FOR_EACH_PTR(tmp);
140 	return 0;
141 }
142 
rl_range_has_min_value(struct range_list * rl,sval_t sval)143 int rl_range_has_min_value(struct range_list *rl, sval_t sval)
144 {
145 	struct data_range *tmp;
146 
147 	FOR_EACH_PTR(rl, tmp) {
148 		if (!sval_cmp(tmp->min, sval)) {
149 			return 1;
150 		}
151 	} END_FOR_EACH_PTR(tmp);
152 	return 0;
153 }
154 
rl_is_tagged(struct range_list * rl)155 static bool rl_is_tagged(struct range_list *rl)
156 {
157 	sval_t invalid;
158 	sval_t invalid_kernel;
159 
160 	invalid.type = &ullong_ctype;
161 	invalid.value = 1ULL << 56;
162 	invalid_kernel.type = &ullong_ctype;
163 	invalid_kernel.value = 0xff8ULL << 52;
164 
165 	/*
166 	 * We only care for tagged addresses, thus ignore anything where the
167 	 * ranges of potential values cannot possibly have any of the top byte
168 	 * bits set.
169 	 */
170 	if (!rl_is_larger_or_equal(rl, invalid))
171 		return false;
172 
173 	/*
174 	 * Tagged addresses are untagged in the kernel by using sign_extend64 in
175 	 * the untagged_addr macro. For userspace addresses bit 55 will always
176 	 * be 0 and thus this has the effect of clearing the top byte. However
177 	 * for kernel addresses this is not true and the top bits end up set to
178 	 * all 1s. The untagged_addr macro results in leaving a gap in the range
179 	 * of possible values which can exist, thus let's look for a tell-tale
180 	 * range which starts from (0xff8ULL << 52).
181 	 */
182 	if (rl_range_has_min_value(rl, invalid_kernel))
183 		return false;
184 
185 	return true;
186 }
187 
match_condition(struct expression * expr)188 static void match_condition(struct expression *expr)
189 {
190 	struct range_list *rl = NULL;
191 	struct expression *val = NULL;
192         struct symbol *type;
193 	char *var_name;
194 
195 	/*
196 	 * Match instances where something is compared against something
197 	 * else - we include binary operators as these are commonly used
198 	 * to make a comparison, e.g. if (start & ~PAGE_MASK).
199 	 */
200 	if (expr->type != EXPR_COMPARE &&
201 	    expr->type != EXPR_BINOP)
202 		return;
203 
204 	/*
205 	 * Look on both sides of the comparison for something that shouldn't
206 	 * be compared with a tagged address, e.g. macros such as PAGE_MASK
207 	 * or struct members named .vm_start.
208 	 */
209 	if (expr_has_memory_addr(expr->left))
210 		val = expr->right;
211 
212 	/*
213 	 * The macro 'offset_in_page' has the PAGE_MASK macro inside it, this
214 	 * results in 'expr_has_memory_addr' returning true for both sides. To
215 	 * work around this we assume PAGE_MASK (or similar) is on the right
216 	 * side, thus we do the following test last.
217 	 */
218 	if (expr_has_memory_addr(expr->right))
219 		val = expr->left;
220 
221 	if (!val)
222 		return;
223 
224 	/* We only care about memory addresses which are 64 bits */
225         type = get_type(val);
226 	if (!type || type_bits(type) != 64)
227 		return;
228 
229 	/* We only care for comparison against user originated data */
230 	if (!get_user_rl(val, &rl))
231 		return;
232 
233 	/* We only care for tagged addresses */
234 	if (!rl_is_tagged(rl))
235 		return;
236 
237 	/* Finally, we believe we may have spotted a risky comparison */
238 	var_name = expr_to_var(val);
239 	if (var_name)
240 		sm_warning("comparison of a potentially tagged address (%s, %d, %s)", get_function(), get_param_num(val), var_name);
241 }
242 
check_arm64_tagged(int id)243 void check_arm64_tagged(int id)
244 {
245 	char *arch;
246 
247 	if (option_project != PROJ_KERNEL)
248 		return;
249 
250 	/* Limit to aarch64 */
251 	arch = getenv("ARCH");
252 	if (!arch || strcmp(arch, "arm64"))
253 		return;
254 
255 	symbols = create_function_hashtable(4000);
256 
257 	add_hook(&match_assign, ASSIGNMENT_HOOK);
258 	add_hook(&match_condition, CONDITION_HOOK);
259 	add_hook(&match_endfunc, END_FUNC_HOOK);
260 }
261