1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/dsl_dataset.h> 26 #include <sys/dmu.h> 27 #include <sys/refcount.h> 28 #include <sys/zap.h> 29 #include <sys/zfs_context.h> 30 #include <sys/dsl_pool.h> 31 32 static int 33 dsl_deadlist_compare(const void *arg1, const void *arg2) 34 { 35 const dsl_deadlist_entry_t *dle1 = arg1; 36 const dsl_deadlist_entry_t *dle2 = arg2; 37 38 if (dle1->dle_mintxg < dle2->dle_mintxg) 39 return (-1); 40 else if (dle1->dle_mintxg > dle2->dle_mintxg) 41 return (+1); 42 else 43 return (0); 44 } 45 46 static void 47 dsl_deadlist_load_tree(dsl_deadlist_t *dl) 48 { 49 zap_cursor_t zc; 50 zap_attribute_t za; 51 52 ASSERT(!dl->dl_oldfmt); 53 if (dl->dl_havetree) 54 return; 55 56 avl_create(&dl->dl_tree, dsl_deadlist_compare, 57 sizeof (dsl_deadlist_entry_t), 58 offsetof(dsl_deadlist_entry_t, dle_node)); 59 for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object); 60 zap_cursor_retrieve(&zc, &za) == 0; 61 zap_cursor_advance(&zc)) { 62 dsl_deadlist_entry_t *dle = kmem_alloc(sizeof (*dle), KM_SLEEP); 63 dle->dle_mintxg = strtonum(za.za_name, NULL); 64 VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, 65 za.za_first_integer)); 66 avl_add(&dl->dl_tree, dle); 67 } 68 zap_cursor_fini(&zc); 69 dl->dl_havetree = B_TRUE; 70 } 71 72 void 73 dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object) 74 { 75 dmu_object_info_t doi; 76 77 mutex_init(&dl->dl_lock, NULL, MUTEX_DEFAULT, NULL); 78 dl->dl_os = os; 79 dl->dl_object = object; 80 VERIFY3U(0, ==, dmu_bonus_hold(os, object, dl, &dl->dl_dbuf)); 81 dmu_object_info_from_db(dl->dl_dbuf, &doi); 82 if (doi.doi_type == DMU_OT_BPOBJ) { 83 dmu_buf_rele(dl->dl_dbuf, dl); 84 dl->dl_dbuf = NULL; 85 dl->dl_oldfmt = B_TRUE; 86 VERIFY3U(0, ==, bpobj_open(&dl->dl_bpobj, os, object)); 87 return; 88 } 89 90 dl->dl_oldfmt = B_FALSE; 91 dl->dl_phys = dl->dl_dbuf->db_data; 92 dl->dl_havetree = B_FALSE; 93 } 94 95 void 96 dsl_deadlist_close(dsl_deadlist_t *dl) 97 { 98 void *cookie = NULL; 99 dsl_deadlist_entry_t *dle; 100 101 if (dl->dl_oldfmt) { 102 dl->dl_oldfmt = B_FALSE; 103 bpobj_close(&dl->dl_bpobj); 104 return; 105 } 106 107 if (dl->dl_havetree) { 108 while ((dle = avl_destroy_nodes(&dl->dl_tree, &cookie)) 109 != NULL) { 110 bpobj_close(&dle->dle_bpobj); 111 kmem_free(dle, sizeof (*dle)); 112 } 113 avl_destroy(&dl->dl_tree); 114 } 115 dmu_buf_rele(dl->dl_dbuf, dl); 116 mutex_destroy(&dl->dl_lock); 117 dl->dl_dbuf = NULL; 118 dl->dl_phys = NULL; 119 } 120 121 uint64_t 122 dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx) 123 { 124 if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS) 125 return (bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx)); 126 return (zap_create(os, DMU_OT_DEADLIST, DMU_OT_DEADLIST_HDR, 127 sizeof (dsl_deadlist_phys_t), tx)); 128 } 129 130 void 131 dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx) 132 { 133 dmu_object_info_t doi; 134 zap_cursor_t zc; 135 zap_attribute_t za; 136 137 VERIFY3U(0, ==, dmu_object_info(os, dlobj, &doi)); 138 if (doi.doi_type == DMU_OT_BPOBJ) { 139 bpobj_free(os, dlobj, tx); 140 return; 141 } 142 143 for (zap_cursor_init(&zc, os, dlobj); 144 zap_cursor_retrieve(&zc, &za) == 0; 145 zap_cursor_advance(&zc)) 146 bpobj_free(os, za.za_first_integer, tx); 147 zap_cursor_fini(&zc); 148 VERIFY3U(0, ==, dmu_object_free(os, dlobj, tx)); 149 } 150 151 void 152 dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx) 153 { 154 dsl_deadlist_entry_t dle_tofind; 155 dsl_deadlist_entry_t *dle; 156 avl_index_t where; 157 158 if (dl->dl_oldfmt) { 159 bpobj_enqueue(&dl->dl_bpobj, bp, tx); 160 return; 161 } 162 163 dsl_deadlist_load_tree(dl); 164 165 dmu_buf_will_dirty(dl->dl_dbuf, tx); 166 mutex_enter(&dl->dl_lock); 167 dl->dl_phys->dl_used += 168 bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp); 169 dl->dl_phys->dl_comp += BP_GET_PSIZE(bp); 170 dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp); 171 mutex_exit(&dl->dl_lock); 172 173 dle_tofind.dle_mintxg = bp->blk_birth; 174 dle = avl_find(&dl->dl_tree, &dle_tofind, &where); 175 if (dle == NULL) 176 dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE); 177 else 178 dle = AVL_PREV(&dl->dl_tree, dle); 179 bpobj_enqueue(&dle->dle_bpobj, bp, tx); 180 } 181 182 /* 183 * Insert new key in deadlist, which must be > all current entries. 184 * mintxg is not inclusive. 185 */ 186 void 187 dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx) 188 { 189 uint64_t obj; 190 dsl_deadlist_entry_t *dle; 191 192 if (dl->dl_oldfmt) 193 return; 194 195 dsl_deadlist_load_tree(dl); 196 197 dle = kmem_alloc(sizeof (*dle), KM_SLEEP); 198 dle->dle_mintxg = mintxg; 199 obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx); 200 VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj)); 201 avl_add(&dl->dl_tree, dle); 202 203 VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object, 204 mintxg, obj, tx)); 205 } 206 207 /* 208 * Remove this key, merging its entries into the previous key. 209 */ 210 void 211 dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx) 212 { 213 dsl_deadlist_entry_t dle_tofind; 214 dsl_deadlist_entry_t *dle, *dle_prev; 215 216 if (dl->dl_oldfmt) 217 return; 218 219 dsl_deadlist_load_tree(dl); 220 221 dle_tofind.dle_mintxg = mintxg; 222 dle = avl_find(&dl->dl_tree, &dle_tofind, NULL); 223 dle_prev = AVL_PREV(&dl->dl_tree, dle); 224 225 bpobj_enqueue_subobj(&dle_prev->dle_bpobj, 226 dle->dle_bpobj.bpo_object, tx); 227 228 avl_remove(&dl->dl_tree, dle); 229 bpobj_close(&dle->dle_bpobj); 230 kmem_free(dle, sizeof (*dle)); 231 232 VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx)); 233 } 234 235 /* 236 * Walk ds's snapshots to regenerate generate ZAP & AVL. 237 */ 238 static void 239 dsl_deadlist_regenerate(objset_t *os, uint64_t dlobj, 240 uint64_t mrs_obj, dmu_tx_t *tx) 241 { 242 dsl_deadlist_t dl; 243 dsl_pool_t *dp = dmu_objset_pool(os); 244 245 dsl_deadlist_open(&dl, os, dlobj); 246 if (dl.dl_oldfmt) { 247 dsl_deadlist_close(&dl); 248 return; 249 } 250 251 while (mrs_obj != 0) { 252 dsl_dataset_t *ds; 253 VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds)); 254 dsl_deadlist_add_key(&dl, ds->ds_phys->ds_prev_snap_txg, tx); 255 mrs_obj = ds->ds_phys->ds_prev_snap_obj; 256 dsl_dataset_rele(ds, FTAG); 257 } 258 dsl_deadlist_close(&dl); 259 } 260 261 uint64_t 262 dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg, 263 uint64_t mrs_obj, dmu_tx_t *tx) 264 { 265 dsl_deadlist_entry_t *dle; 266 uint64_t newobj; 267 268 newobj = dsl_deadlist_alloc(dl->dl_os, tx); 269 270 if (dl->dl_oldfmt) { 271 dsl_deadlist_regenerate(dl->dl_os, newobj, mrs_obj, tx); 272 return (newobj); 273 } 274 275 dsl_deadlist_load_tree(dl); 276 277 for (dle = avl_first(&dl->dl_tree); dle; 278 dle = AVL_NEXT(&dl->dl_tree, dle)) { 279 uint64_t obj; 280 281 if (dle->dle_mintxg >= maxtxg) 282 break; 283 284 obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx); 285 VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj, 286 dle->dle_mintxg, obj, tx)); 287 } 288 return (newobj); 289 } 290 291 void 292 dsl_deadlist_space(dsl_deadlist_t *dl, 293 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 294 { 295 if (dl->dl_oldfmt) { 296 VERIFY3U(0, ==, bpobj_space(&dl->dl_bpobj, 297 usedp, compp, uncompp)); 298 return; 299 } 300 301 mutex_enter(&dl->dl_lock); 302 *usedp = dl->dl_phys->dl_used; 303 *compp = dl->dl_phys->dl_comp; 304 *uncompp = dl->dl_phys->dl_uncomp; 305 mutex_exit(&dl->dl_lock); 306 } 307 308 /* 309 * return space used in the range (mintxg, maxtxg]. 310 * Includes maxtxg, does not include mintxg. 311 * mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is 312 * UINT64_MAX). 313 */ 314 void 315 dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg, 316 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 317 { 318 dsl_deadlist_entry_t dle_tofind; 319 dsl_deadlist_entry_t *dle; 320 avl_index_t where; 321 322 if (dl->dl_oldfmt) { 323 VERIFY3U(0, ==, bpobj_space_range(&dl->dl_bpobj, 324 mintxg, maxtxg, usedp, compp, uncompp)); 325 return; 326 } 327 328 dsl_deadlist_load_tree(dl); 329 *usedp = *compp = *uncompp = 0; 330 331 dle_tofind.dle_mintxg = mintxg; 332 dle = avl_find(&dl->dl_tree, &dle_tofind, &where); 333 /* 334 * If we don't find this mintxg, there shouldn't be anything 335 * after it either. 336 */ 337 ASSERT(dle != NULL || 338 avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL); 339 for (; dle && dle->dle_mintxg < maxtxg; 340 dle = AVL_NEXT(&dl->dl_tree, dle)) { 341 uint64_t used, comp, uncomp; 342 343 VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj, 344 &used, &comp, &uncomp)); 345 346 *usedp += used; 347 *compp += comp; 348 *uncompp += uncomp; 349 } 350 } 351 352 static void 353 dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth, 354 dmu_tx_t *tx) 355 { 356 dsl_deadlist_entry_t dle_tofind; 357 dsl_deadlist_entry_t *dle; 358 avl_index_t where; 359 uint64_t used, comp, uncomp; 360 bpobj_t bpo; 361 362 VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj)); 363 VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp)); 364 bpobj_close(&bpo); 365 366 dsl_deadlist_load_tree(dl); 367 368 dmu_buf_will_dirty(dl->dl_dbuf, tx); 369 mutex_enter(&dl->dl_lock); 370 dl->dl_phys->dl_used += used; 371 dl->dl_phys->dl_comp += comp; 372 dl->dl_phys->dl_uncomp += uncomp; 373 mutex_exit(&dl->dl_lock); 374 375 dle_tofind.dle_mintxg = birth; 376 dle = avl_find(&dl->dl_tree, &dle_tofind, &where); 377 if (dle == NULL) 378 dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE); 379 bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx); 380 } 381 382 static int 383 dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 384 { 385 dsl_deadlist_t *dl = arg; 386 dsl_deadlist_insert(dl, bp, tx); 387 return (0); 388 } 389 390 /* 391 * Merge the deadlist pointed to by 'obj' into dl. obj will be left as 392 * an empty deadlist. 393 */ 394 void 395 dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) 396 { 397 zap_cursor_t zc; 398 zap_attribute_t za; 399 dmu_buf_t *bonus; 400 dsl_deadlist_phys_t *dlp; 401 dmu_object_info_t doi; 402 403 VERIFY3U(0, ==, dmu_object_info(dl->dl_os, obj, &doi)); 404 if (doi.doi_type == DMU_OT_BPOBJ) { 405 bpobj_t bpo; 406 VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj)); 407 VERIFY3U(0, ==, bpobj_iterate(&bpo, 408 dsl_deadlist_insert_cb, dl, tx)); 409 bpobj_close(&bpo); 410 return; 411 } 412 413 for (zap_cursor_init(&zc, dl->dl_os, obj); 414 zap_cursor_retrieve(&zc, &za) == 0; 415 zap_cursor_advance(&zc)) { 416 uint64_t mintxg = strtonum(za.za_name, NULL); 417 dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx); 418 VERIFY3U(0, ==, zap_remove_int(dl->dl_os, obj, mintxg, tx)); 419 } 420 zap_cursor_fini(&zc); 421 422 VERIFY3U(0, ==, dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus)); 423 dlp = bonus->db_data; 424 dmu_buf_will_dirty(bonus, tx); 425 bzero(dlp, sizeof (*dlp)); 426 dmu_buf_rele(bonus, FTAG); 427 } 428 429 /* 430 * Remove entries on dl that are >= mintxg, and put them on the bpobj. 431 */ 432 void 433 dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg, 434 dmu_tx_t *tx) 435 { 436 dsl_deadlist_entry_t dle_tofind; 437 dsl_deadlist_entry_t *dle; 438 avl_index_t where; 439 440 ASSERT(!dl->dl_oldfmt); 441 dmu_buf_will_dirty(dl->dl_dbuf, tx); 442 dsl_deadlist_load_tree(dl); 443 444 dle_tofind.dle_mintxg = mintxg; 445 dle = avl_find(&dl->dl_tree, &dle_tofind, &where); 446 if (dle == NULL) 447 dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER); 448 while (dle) { 449 uint64_t used, comp, uncomp; 450 dsl_deadlist_entry_t *dle_next; 451 452 bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx); 453 454 VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj, 455 &used, &comp, &uncomp)); 456 mutex_enter(&dl->dl_lock); 457 ASSERT3U(dl->dl_phys->dl_used, >=, used); 458 ASSERT3U(dl->dl_phys->dl_comp, >=, comp); 459 ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp); 460 dl->dl_phys->dl_used -= used; 461 dl->dl_phys->dl_comp -= comp; 462 dl->dl_phys->dl_uncomp -= uncomp; 463 mutex_exit(&dl->dl_lock); 464 465 VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, 466 dle->dle_mintxg, tx)); 467 468 dle_next = AVL_NEXT(&dl->dl_tree, dle); 469 avl_remove(&dl->dl_tree, dle); 470 bpobj_close(&dle->dle_bpobj); 471 kmem_free(dle, sizeof (*dle)); 472 dle = dle_next; 473 } 474 } 475