xref: /illumos-gate/usr/src/uts/common/fs/zfs/range_tree.c (revision c4ab0d3f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
27  */
28 
29 #include <sys/zfs_context.h>
30 #include <sys/spa.h>
31 #include <sys/dmu.h>
32 #include <sys/dnode.h>
33 #include <sys/zio.h>
34 #include <sys/range_tree.h>
35 
36 kmem_cache_t *range_seg_cache;
37 
38 void
39 range_tree_init(void)
40 {
41 	ASSERT(range_seg_cache == NULL);
42 	range_seg_cache = kmem_cache_create("range_seg_cache",
43 	    sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
44 }
45 
46 void
47 range_tree_fini(void)
48 {
49 	kmem_cache_destroy(range_seg_cache);
50 	range_seg_cache = NULL;
51 }
52 
53 void
54 range_tree_stat_verify(range_tree_t *rt)
55 {
56 	range_seg_t *rs;
57 	uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
58 	int i;
59 
60 	for (rs = avl_first(&rt->rt_root); rs != NULL;
61 	    rs = AVL_NEXT(&rt->rt_root, rs)) {
62 		uint64_t size = rs->rs_end - rs->rs_start;
63 		int idx	= highbit64(size) - 1;
64 
65 		hist[idx]++;
66 		ASSERT3U(hist[idx], !=, 0);
67 	}
68 
69 	for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
70 		if (hist[i] != rt->rt_histogram[i]) {
71 			zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
72 			    i, hist, hist[i], rt->rt_histogram[i]);
73 		}
74 		VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
75 	}
76 }
77 
78 static void
79 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
80 {
81 	uint64_t size = rs->rs_end - rs->rs_start;
82 	int idx = highbit64(size) - 1;
83 
84 	ASSERT(size != 0);
85 	ASSERT3U(idx, <,
86 	    sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
87 
88 	rt->rt_histogram[idx]++;
89 	ASSERT3U(rt->rt_histogram[idx], !=, 0);
90 }
91 
92 static void
93 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
94 {
95 	uint64_t size = rs->rs_end - rs->rs_start;
96 	int idx = highbit64(size) - 1;
97 
98 	ASSERT(size != 0);
99 	ASSERT3U(idx, <,
100 	    sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
101 
102 	ASSERT3U(rt->rt_histogram[idx], !=, 0);
103 	rt->rt_histogram[idx]--;
104 }
105 
106 /*
107  * NOTE: caller is responsible for all locking.
108  */
109 static int
110 range_tree_seg_compare(const void *x1, const void *x2)
111 {
112 	const range_seg_t *r1 = (const range_seg_t *)x1;
113 	const range_seg_t *r2 = (const range_seg_t *)x2;
114 
115 	ASSERT3U(r1->rs_start, <=, r1->rs_end);
116 	ASSERT3U(r2->rs_start, <=, r2->rs_end);
117 
118 	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
119 }
120 
121 range_tree_t *
122 range_tree_create(range_tree_ops_t *ops, void *arg)
123 {
124 	range_tree_t *rt;
125 
126 	rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
127 
128 	avl_create(&rt->rt_root, range_tree_seg_compare,
129 	    sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
130 
131 	rt->rt_ops = ops;
132 	rt->rt_arg = arg;
133 
134 	if (rt->rt_ops != NULL)
135 		rt->rt_ops->rtop_create(rt, rt->rt_arg);
136 
137 	return (rt);
138 }
139 
140 void
141 range_tree_destroy(range_tree_t *rt)
142 {
143 	VERIFY0(rt->rt_space);
144 
145 	if (rt->rt_ops != NULL)
146 		rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
147 
148 	avl_destroy(&rt->rt_root);
149 	kmem_free(rt, sizeof (*rt));
150 }
151 
152 void
153 range_tree_add(void *arg, uint64_t start, uint64_t size)
154 {
155 	range_tree_t *rt = arg;
156 	avl_index_t where;
157 	range_seg_t rsearch, *rs_before, *rs_after, *rs;
158 	uint64_t end = start + size;
159 	boolean_t merge_before, merge_after;
160 
161 	VERIFY(size != 0);
162 
163 	rsearch.rs_start = start;
164 	rsearch.rs_end = end;
165 	rs = avl_find(&rt->rt_root, &rsearch, &where);
166 
167 	if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) {
168 		zfs_panic_recover("zfs: allocating allocated segment"
169 		    "(offset=%llu size=%llu)\n",
170 		    (longlong_t)start, (longlong_t)size);
171 		return;
172 	}
173 
174 	/* Make sure we don't overlap with either of our neighbors */
175 	VERIFY3P(rs, ==, NULL);
176 
177 	rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
178 	rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
179 
180 	merge_before = (rs_before != NULL && rs_before->rs_end == start);
181 	merge_after = (rs_after != NULL && rs_after->rs_start == end);
182 
183 	if (merge_before && merge_after) {
184 		avl_remove(&rt->rt_root, rs_before);
185 		if (rt->rt_ops != NULL) {
186 			rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
187 			rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
188 		}
189 
190 		range_tree_stat_decr(rt, rs_before);
191 		range_tree_stat_decr(rt, rs_after);
192 
193 		rs_after->rs_start = rs_before->rs_start;
194 		kmem_cache_free(range_seg_cache, rs_before);
195 		rs = rs_after;
196 	} else if (merge_before) {
197 		if (rt->rt_ops != NULL)
198 			rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
199 
200 		range_tree_stat_decr(rt, rs_before);
201 
202 		rs_before->rs_end = end;
203 		rs = rs_before;
204 	} else if (merge_after) {
205 		if (rt->rt_ops != NULL)
206 			rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
207 
208 		range_tree_stat_decr(rt, rs_after);
209 
210 		rs_after->rs_start = start;
211 		rs = rs_after;
212 	} else {
213 		rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
214 		rs->rs_start = start;
215 		rs->rs_end = end;
216 		avl_insert(&rt->rt_root, rs, where);
217 	}
218 
219 	if (rt->rt_ops != NULL)
220 		rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
221 
222 	range_tree_stat_incr(rt, rs);
223 	rt->rt_space += size;
224 }
225 
226 void
227 range_tree_remove(void *arg, uint64_t start, uint64_t size)
228 {
229 	range_tree_t *rt = arg;
230 	avl_index_t where;
231 	range_seg_t rsearch, *rs, *newseg;
232 	uint64_t end = start + size;
233 	boolean_t left_over, right_over;
234 
235 	VERIFY3U(size, !=, 0);
236 	VERIFY3U(size, <=, rt->rt_space);
237 
238 	rsearch.rs_start = start;
239 	rsearch.rs_end = end;
240 	rs = avl_find(&rt->rt_root, &rsearch, &where);
241 
242 	/* Make sure we completely overlap with someone */
243 	if (rs == NULL) {
244 		zfs_panic_recover("zfs: freeing free segment "
245 		    "(offset=%llu size=%llu)",
246 		    (longlong_t)start, (longlong_t)size);
247 		return;
248 	}
249 	VERIFY3U(rs->rs_start, <=, start);
250 	VERIFY3U(rs->rs_end, >=, end);
251 
252 	left_over = (rs->rs_start != start);
253 	right_over = (rs->rs_end != end);
254 
255 	range_tree_stat_decr(rt, rs);
256 
257 	if (rt->rt_ops != NULL)
258 		rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
259 
260 	if (left_over && right_over) {
261 		newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
262 		newseg->rs_start = end;
263 		newseg->rs_end = rs->rs_end;
264 		range_tree_stat_incr(rt, newseg);
265 
266 		rs->rs_end = start;
267 
268 		avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
269 		if (rt->rt_ops != NULL)
270 			rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
271 	} else if (left_over) {
272 		rs->rs_end = start;
273 	} else if (right_over) {
274 		rs->rs_start = end;
275 	} else {
276 		avl_remove(&rt->rt_root, rs);
277 		kmem_cache_free(range_seg_cache, rs);
278 		rs = NULL;
279 	}
280 
281 	if (rs != NULL) {
282 		range_tree_stat_incr(rt, rs);
283 
284 		if (rt->rt_ops != NULL)
285 			rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
286 	}
287 
288 	rt->rt_space -= size;
289 }
290 
291 static range_seg_t *
292 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
293 {
294 	range_seg_t rsearch;
295 	uint64_t end = start + size;
296 
297 	VERIFY(size != 0);
298 
299 	rsearch.rs_start = start;
300 	rsearch.rs_end = end;
301 	return (avl_find(&rt->rt_root, &rsearch, NULL));
302 }
303 
304 static range_seg_t *
305 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
306 {
307 	range_seg_t *rs = range_tree_find_impl(rt, start, size);
308 	if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
309 		return (rs);
310 	return (NULL);
311 }
312 
313 void
314 range_tree_verify_not_present(range_tree_t *rt, uint64_t off, uint64_t size)
315 {
316 	range_seg_t *rs = range_tree_find(rt, off, size);
317 	if (rs != NULL)
318 		panic("segment already in tree; rs=%p", (void *)rs);
319 }
320 
321 boolean_t
322 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
323 {
324 	return (range_tree_find(rt, start, size) != NULL);
325 }
326 
327 /*
328  * Ensure that this range is not in the tree, regardless of whether
329  * it is currently in the tree.
330  */
331 void
332 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
333 {
334 	range_seg_t *rs;
335 
336 	if (size == 0)
337 		return;
338 
339 	while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
340 		uint64_t free_start = MAX(rs->rs_start, start);
341 		uint64_t free_end = MIN(rs->rs_end, start + size);
342 		range_tree_remove(rt, free_start, free_end - free_start);
343 	}
344 }
345 
346 void
347 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
348 {
349 	range_tree_t *rt;
350 
351 	ASSERT0(range_tree_space(*rtdst));
352 	ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
353 
354 	rt = *rtsrc;
355 	*rtsrc = *rtdst;
356 	*rtdst = rt;
357 }
358 
359 void
360 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
361 {
362 	range_seg_t *rs;
363 	void *cookie = NULL;
364 
365 
366 	if (rt->rt_ops != NULL)
367 		rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
368 
369 	while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
370 		if (func != NULL)
371 			func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
372 		kmem_cache_free(range_seg_cache, rs);
373 	}
374 
375 	bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
376 	rt->rt_space = 0;
377 }
378 
379 void
380 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
381 {
382 	range_seg_t *rs;
383 
384 	for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
385 		func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
386 }
387 
388 uint64_t
389 range_tree_space(range_tree_t *rt)
390 {
391 	return (rt->rt_space);
392 }
393 
394 boolean_t
395 range_tree_is_empty(range_tree_t *rt)
396 {
397 	ASSERT(rt != NULL);
398 	return (range_tree_space(rt) == 0);
399 }
400 
401 uint64_t
402 range_tree_min(range_tree_t *rt)
403 {
404 	range_seg_t *rs = avl_first(&rt->rt_root);
405 	return (rs != NULL ? rs->rs_start : 0);
406 }
407 
408 uint64_t
409 range_tree_max(range_tree_t *rt)
410 {
411 	range_seg_t *rs = avl_last(&rt->rt_root);
412 	return (rs != NULL ? rs->rs_end : 0);
413 }
414 
415 uint64_t
416 range_tree_span(range_tree_t *rt)
417 {
418 	return (range_tree_max(rt) - range_tree_min(rt));
419 }
420