1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2013, 2017 by Delphix. All rights reserved. 27 */ 28 29 #include <sys/zfs_context.h> 30 #include <sys/spa.h> 31 #include <sys/dmu.h> 32 #include <sys/dnode.h> 33 #include <sys/zio.h> 34 #include <sys/range_tree.h> 35 36 kmem_cache_t *range_seg_cache; 37 38 void 39 range_tree_init(void) 40 { 41 ASSERT(range_seg_cache == NULL); 42 range_seg_cache = kmem_cache_create("range_seg_cache", 43 sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 44 } 45 46 void 47 range_tree_fini(void) 48 { 49 kmem_cache_destroy(range_seg_cache); 50 range_seg_cache = NULL; 51 } 52 53 void 54 range_tree_stat_verify(range_tree_t *rt) 55 { 56 range_seg_t *rs; 57 uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 }; 58 int i; 59 60 for (rs = avl_first(&rt->rt_root); rs != NULL; 61 rs = AVL_NEXT(&rt->rt_root, rs)) { 62 uint64_t size = rs->rs_end - rs->rs_start; 63 int idx = highbit64(size) - 1; 64 65 hist[idx]++; 66 ASSERT3U(hist[idx], !=, 0); 67 } 68 69 for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { 70 if (hist[i] != rt->rt_histogram[i]) { 71 zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu", 72 i, hist, hist[i], rt->rt_histogram[i]); 73 } 74 VERIFY3U(hist[i], ==, rt->rt_histogram[i]); 75 } 76 } 77 78 static void 79 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs) 80 { 81 uint64_t size = rs->rs_end - rs->rs_start; 82 int idx = highbit64(size) - 1; 83 84 ASSERT(size != 0); 85 ASSERT3U(idx, <, 86 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 87 88 rt->rt_histogram[idx]++; 89 ASSERT3U(rt->rt_histogram[idx], !=, 0); 90 } 91 92 static void 93 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs) 94 { 95 uint64_t size = rs->rs_end - rs->rs_start; 96 int idx = highbit64(size) - 1; 97 98 ASSERT(size != 0); 99 ASSERT3U(idx, <, 100 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 101 102 ASSERT3U(rt->rt_histogram[idx], !=, 0); 103 rt->rt_histogram[idx]--; 104 } 105 106 /* 107 * NOTE: caller is responsible for all locking. 108 */ 109 static int 110 range_tree_seg_compare(const void *x1, const void *x2) 111 { 112 const range_seg_t *r1 = (const range_seg_t *)x1; 113 const range_seg_t *r2 = (const range_seg_t *)x2; 114 115 ASSERT3U(r1->rs_start, <=, r1->rs_end); 116 ASSERT3U(r2->rs_start, <=, r2->rs_end); 117 118 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 119 } 120 121 range_tree_t * 122 range_tree_create(range_tree_ops_t *ops, void *arg) 123 { 124 range_tree_t *rt; 125 126 rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP); 127 128 avl_create(&rt->rt_root, range_tree_seg_compare, 129 sizeof (range_seg_t), offsetof(range_seg_t, rs_node)); 130 131 rt->rt_ops = ops; 132 rt->rt_arg = arg; 133 134 if (rt->rt_ops != NULL) 135 rt->rt_ops->rtop_create(rt, rt->rt_arg); 136 137 return (rt); 138 } 139 140 void 141 range_tree_destroy(range_tree_t *rt) 142 { 143 VERIFY0(rt->rt_space); 144 145 if (rt->rt_ops != NULL) 146 rt->rt_ops->rtop_destroy(rt, rt->rt_arg); 147 148 avl_destroy(&rt->rt_root); 149 kmem_free(rt, sizeof (*rt)); 150 } 151 152 void 153 range_tree_add(void *arg, uint64_t start, uint64_t size) 154 { 155 range_tree_t *rt = arg; 156 avl_index_t where; 157 range_seg_t rsearch, *rs_before, *rs_after, *rs; 158 uint64_t end = start + size; 159 boolean_t merge_before, merge_after; 160 161 VERIFY(size != 0); 162 163 rsearch.rs_start = start; 164 rsearch.rs_end = end; 165 rs = avl_find(&rt->rt_root, &rsearch, &where); 166 167 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) { 168 zfs_panic_recover("zfs: allocating allocated segment" 169 "(offset=%llu size=%llu)\n", 170 (longlong_t)start, (longlong_t)size); 171 return; 172 } 173 174 /* Make sure we don't overlap with either of our neighbors */ 175 VERIFY3P(rs, ==, NULL); 176 177 rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE); 178 rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER); 179 180 merge_before = (rs_before != NULL && rs_before->rs_end == start); 181 merge_after = (rs_after != NULL && rs_after->rs_start == end); 182 183 if (merge_before && merge_after) { 184 avl_remove(&rt->rt_root, rs_before); 185 if (rt->rt_ops != NULL) { 186 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 187 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 188 } 189 190 range_tree_stat_decr(rt, rs_before); 191 range_tree_stat_decr(rt, rs_after); 192 193 rs_after->rs_start = rs_before->rs_start; 194 kmem_cache_free(range_seg_cache, rs_before); 195 rs = rs_after; 196 } else if (merge_before) { 197 if (rt->rt_ops != NULL) 198 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 199 200 range_tree_stat_decr(rt, rs_before); 201 202 rs_before->rs_end = end; 203 rs = rs_before; 204 } else if (merge_after) { 205 if (rt->rt_ops != NULL) 206 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 207 208 range_tree_stat_decr(rt, rs_after); 209 210 rs_after->rs_start = start; 211 rs = rs_after; 212 } else { 213 rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP); 214 rs->rs_start = start; 215 rs->rs_end = end; 216 avl_insert(&rt->rt_root, rs, where); 217 } 218 219 if (rt->rt_ops != NULL) 220 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 221 222 range_tree_stat_incr(rt, rs); 223 rt->rt_space += size; 224 } 225 226 void 227 range_tree_remove(void *arg, uint64_t start, uint64_t size) 228 { 229 range_tree_t *rt = arg; 230 avl_index_t where; 231 range_seg_t rsearch, *rs, *newseg; 232 uint64_t end = start + size; 233 boolean_t left_over, right_over; 234 235 VERIFY3U(size, !=, 0); 236 VERIFY3U(size, <=, rt->rt_space); 237 238 rsearch.rs_start = start; 239 rsearch.rs_end = end; 240 rs = avl_find(&rt->rt_root, &rsearch, &where); 241 242 /* Make sure we completely overlap with someone */ 243 if (rs == NULL) { 244 zfs_panic_recover("zfs: freeing free segment " 245 "(offset=%llu size=%llu)", 246 (longlong_t)start, (longlong_t)size); 247 return; 248 } 249 VERIFY3U(rs->rs_start, <=, start); 250 VERIFY3U(rs->rs_end, >=, end); 251 252 left_over = (rs->rs_start != start); 253 right_over = (rs->rs_end != end); 254 255 range_tree_stat_decr(rt, rs); 256 257 if (rt->rt_ops != NULL) 258 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 259 260 if (left_over && right_over) { 261 newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP); 262 newseg->rs_start = end; 263 newseg->rs_end = rs->rs_end; 264 range_tree_stat_incr(rt, newseg); 265 266 rs->rs_end = start; 267 268 avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER); 269 if (rt->rt_ops != NULL) 270 rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg); 271 } else if (left_over) { 272 rs->rs_end = start; 273 } else if (right_over) { 274 rs->rs_start = end; 275 } else { 276 avl_remove(&rt->rt_root, rs); 277 kmem_cache_free(range_seg_cache, rs); 278 rs = NULL; 279 } 280 281 if (rs != NULL) { 282 range_tree_stat_incr(rt, rs); 283 284 if (rt->rt_ops != NULL) 285 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 286 } 287 288 rt->rt_space -= size; 289 } 290 291 static range_seg_t * 292 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size) 293 { 294 range_seg_t rsearch; 295 uint64_t end = start + size; 296 297 VERIFY(size != 0); 298 299 rsearch.rs_start = start; 300 rsearch.rs_end = end; 301 return (avl_find(&rt->rt_root, &rsearch, NULL)); 302 } 303 304 static range_seg_t * 305 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size) 306 { 307 range_seg_t *rs = range_tree_find_impl(rt, start, size); 308 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size) 309 return (rs); 310 return (NULL); 311 } 312 313 void 314 range_tree_verify_not_present(range_tree_t *rt, uint64_t off, uint64_t size) 315 { 316 range_seg_t *rs = range_tree_find(rt, off, size); 317 if (rs != NULL) 318 panic("segment already in tree; rs=%p", (void *)rs); 319 } 320 321 boolean_t 322 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size) 323 { 324 return (range_tree_find(rt, start, size) != NULL); 325 } 326 327 /* 328 * Ensure that this range is not in the tree, regardless of whether 329 * it is currently in the tree. 330 */ 331 void 332 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size) 333 { 334 range_seg_t *rs; 335 336 if (size == 0) 337 return; 338 339 while ((rs = range_tree_find_impl(rt, start, size)) != NULL) { 340 uint64_t free_start = MAX(rs->rs_start, start); 341 uint64_t free_end = MIN(rs->rs_end, start + size); 342 range_tree_remove(rt, free_start, free_end - free_start); 343 } 344 } 345 346 void 347 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst) 348 { 349 range_tree_t *rt; 350 351 ASSERT0(range_tree_space(*rtdst)); 352 ASSERT0(avl_numnodes(&(*rtdst)->rt_root)); 353 354 rt = *rtsrc; 355 *rtsrc = *rtdst; 356 *rtdst = rt; 357 } 358 359 void 360 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg) 361 { 362 range_seg_t *rs; 363 void *cookie = NULL; 364 365 366 if (rt->rt_ops != NULL) 367 rt->rt_ops->rtop_vacate(rt, rt->rt_arg); 368 369 while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) { 370 if (func != NULL) 371 func(arg, rs->rs_start, rs->rs_end - rs->rs_start); 372 kmem_cache_free(range_seg_cache, rs); 373 } 374 375 bzero(rt->rt_histogram, sizeof (rt->rt_histogram)); 376 rt->rt_space = 0; 377 } 378 379 void 380 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg) 381 { 382 range_seg_t *rs; 383 384 for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs)) 385 func(arg, rs->rs_start, rs->rs_end - rs->rs_start); 386 } 387 388 uint64_t 389 range_tree_space(range_tree_t *rt) 390 { 391 return (rt->rt_space); 392 } 393 394 boolean_t 395 range_tree_is_empty(range_tree_t *rt) 396 { 397 ASSERT(rt != NULL); 398 return (range_tree_space(rt) == 0); 399 } 400 401 uint64_t 402 range_tree_min(range_tree_t *rt) 403 { 404 range_seg_t *rs = avl_first(&rt->rt_root); 405 return (rs != NULL ? rs->rs_start : 0); 406 } 407 408 uint64_t 409 range_tree_max(range_tree_t *rt) 410 { 411 range_seg_t *rs = avl_last(&rt->rt_root); 412 return (rs != NULL ? rs->rs_end : 0); 413 } 414 415 uint64_t 416 range_tree_span(range_tree_t *rt) 417 { 418 return (range_tree_max(rt) - range_tree_min(rt)); 419 } 420