xref: /illumos-gate/usr/src/uts/common/fs/zfs/range_tree.c (revision af1d63aba5cec023f92214c1f1faec9b489ac517)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2013, 2019 by Delphix. All rights reserved.
27  */
28 
29 #include <sys/zfs_context.h>
30 #include <sys/spa.h>
31 #include <sys/dmu.h>
32 #include <sys/dnode.h>
33 #include <sys/zio.h>
34 #include <sys/range_tree.h>
35 
36 /*
37  * Range trees are tree-based data structures that can be used to
38  * track free space or generally any space allocation information.
39  * A range tree keeps track of individual segments and automatically
40  * provides facilities such as adjacent extent merging and extent
41  * splitting in response to range add/remove requests.
42  *
43  * A range tree starts out completely empty, with no segments in it.
44  * Adding an allocation via range_tree_add to the range tree can either:
45  * 1) create a new extent
46  * 2) extend an adjacent extent
47  * 3) merge two adjacent extents
48  * Conversely, removing an allocation via range_tree_remove can:
49  * 1) completely remove an extent
50  * 2) shorten an extent (if the allocation was near one of its ends)
51  * 3) split an extent into two extents, in effect punching a hole
52  *
53  * A range tree is also capable of 'bridging' gaps when adding
54  * allocations. This is useful for cases when close proximity of
55  * allocations is an important detail that needs to be represented
56  * in the range tree. See range_tree_set_gap(). The default behavior
57  * is not to bridge gaps (i.e. the maximum allowed gap size is 0).
58  *
59  * In order to traverse a range tree, use either the range_tree_walk()
60  * or range_tree_vacate() functions.
61  *
62  * To obtain more accurate information on individual segment
63  * operations that the range tree performs "under the hood", you can
64  * specify a set of callbacks by passing a range_tree_ops_t structure
65  * to the range_tree_create function. Any callbacks that are non-NULL
66  * are then called at the appropriate times.
67  *
68  * The range tree code also supports a special variant of range trees
69  * that can bridge small gaps between segments. This kind of tree is used
70  * by the dsl scanning code to group I/Os into mostly sequential chunks to
71  * optimize disk performance. The code here attempts to do this with as
72  * little memory and computational overhead as possible. One limitation of
73  * this implementation is that segments of range trees with gaps can only
74  * support removing complete segments.
75  */
76 
77 kmem_cache_t *range_seg_cache;
78 
79 /* Generic ops for managing an AVL tree alongside a range tree */
80 struct range_tree_ops rt_avl_ops = {
81 	.rtop_create = rt_avl_create,
82 	.rtop_destroy = rt_avl_destroy,
83 	.rtop_add = rt_avl_add,
84 	.rtop_remove = rt_avl_remove,
85 	.rtop_vacate = rt_avl_vacate,
86 };
87 
88 void
89 range_tree_init(void)
90 {
91 	ASSERT(range_seg_cache == NULL);
92 	range_seg_cache = kmem_cache_create("range_seg_cache",
93 	    sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
94 }
95 
96 void
97 range_tree_fini(void)
98 {
99 	kmem_cache_destroy(range_seg_cache);
100 	range_seg_cache = NULL;
101 }
102 
103 void
104 range_tree_stat_verify(range_tree_t *rt)
105 {
106 	range_seg_t *rs;
107 	uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
108 	int i;
109 
110 	for (rs = avl_first(&rt->rt_root); rs != NULL;
111 	    rs = AVL_NEXT(&rt->rt_root, rs)) {
112 		uint64_t size = rs->rs_end - rs->rs_start;
113 		int idx	= highbit64(size) - 1;
114 
115 		hist[idx]++;
116 		ASSERT3U(hist[idx], !=, 0);
117 	}
118 
119 	for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
120 		if (hist[i] != rt->rt_histogram[i]) {
121 			zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
122 			    i, hist, hist[i], rt->rt_histogram[i]);
123 		}
124 		VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
125 	}
126 }
127 
128 static void
129 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
130 {
131 	uint64_t size = rs->rs_end - rs->rs_start;
132 	int idx = highbit64(size) - 1;
133 
134 	ASSERT(size != 0);
135 	ASSERT3U(idx, <,
136 	    sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
137 
138 	rt->rt_histogram[idx]++;
139 	ASSERT3U(rt->rt_histogram[idx], !=, 0);
140 }
141 
142 static void
143 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
144 {
145 	uint64_t size = rs->rs_end - rs->rs_start;
146 	int idx = highbit64(size) - 1;
147 
148 	ASSERT(size != 0);
149 	ASSERT3U(idx, <,
150 	    sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
151 
152 	ASSERT3U(rt->rt_histogram[idx], !=, 0);
153 	rt->rt_histogram[idx]--;
154 }
155 
156 /*
157  * NOTE: caller is responsible for all locking.
158  */
159 static int
160 range_tree_seg_compare(const void *x1, const void *x2)
161 {
162 	const range_seg_t *r1 = (const range_seg_t *)x1;
163 	const range_seg_t *r2 = (const range_seg_t *)x2;
164 
165 	ASSERT3U(r1->rs_start, <=, r1->rs_end);
166 	ASSERT3U(r2->rs_start, <=, r2->rs_end);
167 
168 	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
169 }
170 
171 range_tree_t *
172 range_tree_create_impl(range_tree_ops_t *ops, void *arg,
173     int (*avl_compare) (const void *, const void *), uint64_t gap)
174 {
175 	range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
176 
177 	avl_create(&rt->rt_root, range_tree_seg_compare,
178 	    sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
179 
180 	rt->rt_ops = ops;
181 	rt->rt_arg = arg;
182 	rt->rt_gap = gap;
183 	rt->rt_avl_compare = avl_compare;
184 
185 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL)
186 		rt->rt_ops->rtop_create(rt, rt->rt_arg);
187 
188 	return (rt);
189 }
190 
191 range_tree_t *
192 range_tree_create(range_tree_ops_t *ops, void *arg)
193 {
194 	return (range_tree_create_impl(ops, arg, NULL, 0));
195 }
196 
197 void
198 range_tree_destroy(range_tree_t *rt)
199 {
200 	VERIFY0(rt->rt_space);
201 
202 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL)
203 		rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
204 
205 	avl_destroy(&rt->rt_root);
206 	kmem_free(rt, sizeof (*rt));
207 }
208 
209 void
210 range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta)
211 {
212 	ASSERT3U(rs->rs_fill + delta, !=, 0);
213 	ASSERT3U(rs->rs_fill + delta, <=, rs->rs_end - rs->rs_start);
214 
215 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
216 		rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
217 	rs->rs_fill += delta;
218 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
219 		rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
220 }
221 
222 static void
223 range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill)
224 {
225 	range_tree_t *rt = arg;
226 	avl_index_t where;
227 	range_seg_t rsearch, *rs_before, *rs_after, *rs;
228 	uint64_t end = start + size, gap = rt->rt_gap;
229 	uint64_t bridge_size = 0;
230 	boolean_t merge_before, merge_after;
231 
232 	ASSERT3U(size, !=, 0);
233 	ASSERT3U(fill, <=, size);
234 
235 	rsearch.rs_start = start;
236 	rsearch.rs_end = end;
237 	rs = avl_find(&rt->rt_root, &rsearch, &where);
238 
239 	if (gap == 0 && rs != NULL &&
240 	    rs->rs_start <= start && rs->rs_end >= end) {
241 		zfs_panic_recover("zfs: allocating allocated segment"
242 		    "(offset=%llu size=%llu) of (offset=%llu size=%llu)\n",
243 		    (longlong_t)start, (longlong_t)size,
244 		    (longlong_t)rs->rs_start,
245 		    (longlong_t)rs->rs_end - rs->rs_start);
246 		return;
247 	}
248 
249 	/*
250 	 * If this is a gap-supporting range tree, it is possible that we
251 	 * are inserting into an existing segment. In this case simply
252 	 * bump the fill count and call the remove / add callbacks. If the
253 	 * new range will extend an existing segment, we remove the
254 	 * existing one, apply the new extent to it and re-insert it using
255 	 * the normal code paths.
256 	 */
257 	if (rs != NULL) {
258 		ASSERT3U(gap, !=, 0);
259 		if (rs->rs_start <= start && rs->rs_end >= end) {
260 			range_tree_adjust_fill(rt, rs, fill);
261 			return;
262 		}
263 
264 		avl_remove(&rt->rt_root, rs);
265 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
266 			rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
267 
268 		range_tree_stat_decr(rt, rs);
269 		rt->rt_space -= rs->rs_end - rs->rs_start;
270 
271 		fill += rs->rs_fill;
272 		start = MIN(start, rs->rs_start);
273 		end = MAX(end, rs->rs_end);
274 		size = end - start;
275 
276 		range_tree_add_impl(rt, start, size, fill);
277 
278 		kmem_cache_free(range_seg_cache, rs);
279 		return;
280 	}
281 
282 	ASSERT3P(rs, ==, NULL);
283 
284 	/*
285 	 * Determine whether or not we will have to merge with our neighbors.
286 	 * If gap != 0, we might need to merge with our neighbors even if we
287 	 * aren't directly touching.
288 	 */
289 	rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
290 	rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
291 
292 	merge_before = (rs_before != NULL && rs_before->rs_end >= start - gap);
293 	merge_after = (rs_after != NULL && rs_after->rs_start <= end + gap);
294 
295 	if (merge_before && gap != 0)
296 		bridge_size += start - rs_before->rs_end;
297 	if (merge_after && gap != 0)
298 		bridge_size += rs_after->rs_start - end;
299 
300 	if (merge_before && merge_after) {
301 		avl_remove(&rt->rt_root, rs_before);
302 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) {
303 			rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
304 			rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
305 		}
306 
307 		range_tree_stat_decr(rt, rs_before);
308 		range_tree_stat_decr(rt, rs_after);
309 
310 		rs_after->rs_fill += rs_before->rs_fill + fill;
311 		rs_after->rs_start = rs_before->rs_start;
312 		kmem_cache_free(range_seg_cache, rs_before);
313 		rs = rs_after;
314 	} else if (merge_before) {
315 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
316 			rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
317 
318 		range_tree_stat_decr(rt, rs_before);
319 
320 		rs_before->rs_fill += fill;
321 		rs_before->rs_end = end;
322 		rs = rs_before;
323 	} else if (merge_after) {
324 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
325 			rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
326 
327 		range_tree_stat_decr(rt, rs_after);
328 
329 		rs_after->rs_fill += fill;
330 		rs_after->rs_start = start;
331 		rs = rs_after;
332 	} else {
333 		rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
334 
335 		rs->rs_fill = fill;
336 		rs->rs_start = start;
337 		rs->rs_end = end;
338 		avl_insert(&rt->rt_root, rs, where);
339 	}
340 
341 	if (gap != 0)
342 		ASSERT3U(rs->rs_fill, <=, rs->rs_end - rs->rs_start);
343 	else
344 		ASSERT3U(rs->rs_fill, ==, rs->rs_end - rs->rs_start);
345 
346 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
347 		rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
348 
349 	range_tree_stat_incr(rt, rs);
350 	rt->rt_space += size + bridge_size;
351 }
352 
353 void
354 range_tree_add(void *arg, uint64_t start, uint64_t size)
355 {
356 	range_tree_add_impl(arg, start, size, size);
357 }
358 
359 static void
360 range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size,
361     boolean_t do_fill)
362 {
363 	avl_index_t where;
364 	range_seg_t rsearch, *rs, *newseg;
365 	uint64_t end = start + size;
366 	boolean_t left_over, right_over;
367 
368 	VERIFY3U(size, !=, 0);
369 	VERIFY3U(size, <=, rt->rt_space);
370 
371 	rsearch.rs_start = start;
372 	rsearch.rs_end = end;
373 	rs = avl_find(&rt->rt_root, &rsearch, &where);
374 
375 	/* Make sure we completely overlap with someone */
376 	if (rs == NULL) {
377 		zfs_panic_recover("zfs: freeing free segment "
378 		    "(offset=%llu size=%llu)",
379 		    (longlong_t)start, (longlong_t)size);
380 		return;
381 	}
382 
383 	/*
384 	 * Range trees with gap support must only remove complete segments
385 	 * from the tree. This allows us to maintain accurate fill accounting
386 	 * and to ensure that bridged sections are not leaked. If we need to
387 	 * remove less than the full segment, we can only adjust the fill count.
388 	 */
389 	if (rt->rt_gap != 0) {
390 		if (do_fill) {
391 			if (rs->rs_fill == size) {
392 				start = rs->rs_start;
393 				end = rs->rs_end;
394 				size = end - start;
395 			} else {
396 				range_tree_adjust_fill(rt, rs, -size);
397 				return;
398 			}
399 		} else if (rs->rs_start != start || rs->rs_end != end) {
400 			zfs_panic_recover("zfs: freeing partial segment of "
401 			    "gap tree (offset=%llu size=%llu) of "
402 			    "(offset=%llu size=%llu)",
403 			    (longlong_t)start, (longlong_t)size,
404 			    (longlong_t)rs->rs_start,
405 			    (longlong_t)rs->rs_end - rs->rs_start);
406 			return;
407 		}
408 	}
409 
410 	VERIFY3U(rs->rs_start, <=, start);
411 	VERIFY3U(rs->rs_end, >=, end);
412 
413 	left_over = (rs->rs_start != start);
414 	right_over = (rs->rs_end != end);
415 
416 	range_tree_stat_decr(rt, rs);
417 
418 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
419 		rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
420 
421 	if (left_over && right_over) {
422 		newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
423 		newseg->rs_start = end;
424 		newseg->rs_end = rs->rs_end;
425 		newseg->rs_fill = newseg->rs_end - newseg->rs_start;
426 		range_tree_stat_incr(rt, newseg);
427 
428 		rs->rs_end = start;
429 
430 		avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
431 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
432 			rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
433 	} else if (left_over) {
434 		rs->rs_end = start;
435 	} else if (right_over) {
436 		rs->rs_start = end;
437 	} else {
438 		avl_remove(&rt->rt_root, rs);
439 		kmem_cache_free(range_seg_cache, rs);
440 		rs = NULL;
441 	}
442 
443 	if (rs != NULL) {
444 		/*
445 		 * The fill of the leftover segment will always be equal to
446 		 * the size, since we do not support removing partial segments
447 		 * of range trees with gaps.
448 		 */
449 		rs->rs_fill = rs->rs_end - rs->rs_start;
450 		range_tree_stat_incr(rt, rs);
451 
452 		if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
453 			rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
454 	}
455 
456 	rt->rt_space -= size;
457 }
458 
459 void
460 range_tree_remove(void *arg, uint64_t start, uint64_t size)
461 {
462 	range_tree_remove_impl(arg, start, size, B_FALSE);
463 }
464 
465 void
466 range_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size)
467 {
468 	range_tree_remove_impl(rt, start, size, B_TRUE);
469 }
470 
471 void
472 range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs,
473     uint64_t newstart, uint64_t newsize)
474 {
475 	int64_t delta = newsize - (rs->rs_end - rs->rs_start);
476 
477 	range_tree_stat_decr(rt, rs);
478 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL)
479 		rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
480 
481 	rs->rs_start = newstart;
482 	rs->rs_end = newstart + newsize;
483 
484 	range_tree_stat_incr(rt, rs);
485 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL)
486 		rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
487 
488 	rt->rt_space += delta;
489 }
490 
491 static range_seg_t *
492 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
493 {
494 	range_seg_t rsearch;
495 	uint64_t end = start + size;
496 
497 	VERIFY(size != 0);
498 
499 	rsearch.rs_start = start;
500 	rsearch.rs_end = end;
501 	return (avl_find(&rt->rt_root, &rsearch, NULL));
502 }
503 
504 range_seg_t *
505 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
506 {
507 	range_seg_t *rs = range_tree_find_impl(rt, start, size);
508 	if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
509 		return (rs);
510 	return (NULL);
511 }
512 
513 void
514 range_tree_verify_not_present(range_tree_t *rt, uint64_t off, uint64_t size)
515 {
516 	range_seg_t *rs = range_tree_find(rt, off, size);
517 	if (rs != NULL)
518 		panic("segment already in tree; rs=%p", (void *)rs);
519 }
520 
521 boolean_t
522 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
523 {
524 	return (range_tree_find(rt, start, size) != NULL);
525 }
526 
527 /*
528  * Returns the first subset of the given range which overlaps with the range
529  * tree. Returns true if there is a segment in the range, and false if there
530  * isn't.
531  */
532 boolean_t
533 range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size,
534     uint64_t *ostart, uint64_t *osize)
535 {
536 	range_seg_t rsearch;
537 	rsearch.rs_start = start;
538 	rsearch.rs_end = start + 1;
539 
540 	avl_index_t where;
541 	range_seg_t *rs = avl_find(&rt->rt_root, &rsearch, &where);
542 	if (rs != NULL) {
543 		*ostart = start;
544 		*osize = MIN(size, rs->rs_end - start);
545 		return (B_TRUE);
546 	}
547 
548 	rs = avl_nearest(&rt->rt_root, where, AVL_AFTER);
549 	if (rs == NULL || rs->rs_start > start + size)
550 		return (B_FALSE);
551 
552 	*ostart = rs->rs_start;
553 	*osize = MIN(start + size, rs->rs_end) - rs->rs_start;
554 	return (B_TRUE);
555 }
556 
557 /*
558  * Ensure that this range is not in the tree, regardless of whether
559  * it is currently in the tree.
560  */
561 void
562 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
563 {
564 	range_seg_t *rs;
565 
566 	if (size == 0)
567 		return;
568 
569 	while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
570 		uint64_t free_start = MAX(rs->rs_start, start);
571 		uint64_t free_end = MIN(rs->rs_end, start + size);
572 		range_tree_remove(rt, free_start, free_end - free_start);
573 	}
574 }
575 
576 void
577 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
578 {
579 	range_tree_t *rt;
580 
581 	ASSERT0(range_tree_space(*rtdst));
582 	ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
583 
584 	rt = *rtsrc;
585 	*rtsrc = *rtdst;
586 	*rtdst = rt;
587 }
588 
589 void
590 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
591 {
592 	range_seg_t *rs;
593 	void *cookie = NULL;
594 
595 
596 	if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL)
597 		rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
598 
599 	while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
600 		if (func != NULL)
601 			func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
602 		kmem_cache_free(range_seg_cache, rs);
603 	}
604 
605 	bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
606 	rt->rt_space = 0;
607 }
608 
609 void
610 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
611 {
612 	for (range_seg_t *rs = avl_first(&rt->rt_root); rs;
613 	    rs = AVL_NEXT(&rt->rt_root, rs)) {
614 		func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
615 	}
616 }
617 
618 range_seg_t *
619 range_tree_first(range_tree_t *rt)
620 {
621 	return (avl_first(&rt->rt_root));
622 }
623 
624 uint64_t
625 range_tree_space(range_tree_t *rt)
626 {
627 	return (rt->rt_space);
628 }
629 
630 uint64_t
631 range_tree_numsegs(range_tree_t *rt)
632 {
633 	return ((rt == NULL) ? 0 : avl_numnodes(&rt->rt_root));
634 }
635 
636 /* Generic range tree functions for maintaining segments in an AVL tree. */
637 void
638 rt_avl_create(range_tree_t *rt, void *arg)
639 {
640 	avl_tree_t *tree = arg;
641 
642 	avl_create(tree, rt->rt_avl_compare, sizeof (range_seg_t),
643 	    offsetof(range_seg_t, rs_pp_node));
644 }
645 
646 void
647 rt_avl_destroy(range_tree_t *rt, void *arg)
648 {
649 	avl_tree_t *tree = arg;
650 
651 	ASSERT0(avl_numnodes(tree));
652 	avl_destroy(tree);
653 }
654 
655 void
656 rt_avl_add(range_tree_t *rt, range_seg_t *rs, void *arg)
657 {
658 	avl_tree_t *tree = arg;
659 	avl_add(tree, rs);
660 }
661 
662 void
663 rt_avl_remove(range_tree_t *rt, range_seg_t *rs, void *arg)
664 {
665 	avl_tree_t *tree = arg;
666 	avl_remove(tree, rs);
667 }
668 
669 void
670 rt_avl_vacate(range_tree_t *rt, void *arg)
671 {
672 	/*
673 	 * Normally one would walk the tree freeing nodes along the way.
674 	 * Since the nodes are shared with the range trees we can avoid
675 	 * walking all nodes and just reinitialize the avl tree. The nodes
676 	 * will be freed by the range tree, so we don't want to free them here.
677 	 */
678 	rt_avl_create(rt, arg);
679 }
680 
681 boolean_t
682 range_tree_is_empty(range_tree_t *rt)
683 {
684 	ASSERT(rt != NULL);
685 	return (range_tree_space(rt) == 0);
686 }
687 
688 uint64_t
689 range_tree_min(range_tree_t *rt)
690 {
691 	range_seg_t *rs = avl_first(&rt->rt_root);
692 	return (rs != NULL ? rs->rs_start : 0);
693 }
694 
695 uint64_t
696 range_tree_max(range_tree_t *rt)
697 {
698 	range_seg_t *rs = avl_last(&rt->rt_root);
699 	return (rs != NULL ? rs->rs_end : 0);
700 }
701 
702 uint64_t
703 range_tree_span(range_tree_t *rt)
704 {
705 	return (range_tree_max(rt) - range_tree_min(rt));
706 }
707 
708 /*
709  * Remove any overlapping ranges between the given segment [start, end)
710  * from removefrom. Add non-overlapping leftovers to addto.
711  */
712 void
713 range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
714     range_tree_t *removefrom, range_tree_t *addto)
715 {
716 	avl_index_t where;
717 	range_seg_t starting_rs = {
718 		.rs_start = start,
719 		.rs_end = start + 1
720 	};
721 
722 	range_seg_t *curr = avl_find(&removefrom->rt_root,
723 	    &starting_rs, &where);
724 
725 	if (curr == NULL)
726 		curr = avl_nearest(&removefrom->rt_root, where, AVL_AFTER);
727 
728 	range_seg_t *next;
729 	for (; curr != NULL; curr = next) {
730 		next = AVL_NEXT(&removefrom->rt_root, curr);
731 
732 		if (start == end)
733 			return;
734 		VERIFY3U(start, <, end);
735 
736 		/* there is no overlap */
737 		if (end <= curr->rs_start) {
738 			range_tree_add(addto, start, end - start);
739 			return;
740 		}
741 
742 		uint64_t overlap_start = MAX(curr->rs_start, start);
743 		uint64_t overlap_end = MIN(curr->rs_end, end);
744 		uint64_t overlap_size = overlap_end - overlap_start;
745 		ASSERT3S(overlap_size, >, 0);
746 		range_tree_remove(removefrom, overlap_start, overlap_size);
747 
748 		if (start < overlap_start)
749 			range_tree_add(addto, start, overlap_start - start);
750 
751 		start = overlap_end;
752 	}
753 	VERIFY3P(curr, ==, NULL);
754 
755 	if (start != end) {
756 		VERIFY3U(start, <, end);
757 		range_tree_add(addto, start, end - start);
758 	} else {
759 		VERIFY3U(start, ==, end);
760 	}
761 }
762 
763 /*
764  * For each entry in rt, if it exists in removefrom, remove it
765  * from removefrom. Otherwise, add it to addto.
766  */
767 void
768 range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom,
769     range_tree_t *addto)
770 {
771 	for (range_seg_t *rs = avl_first(&rt->rt_root); rs;
772 	    rs = AVL_NEXT(&rt->rt_root, rs)) {
773 		range_tree_remove_xor_add_segment(rs->rs_start, rs->rs_end,
774 		    removefrom, addto);
775 	}
776 }
777