1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
25  * Copyright (c) 2014 Integros [integros.com]
26  */
27 
28 #include <sys/dsl_dataset.h>
29 #include <sys/dmu.h>
30 #include <sys/refcount.h>
31 #include <sys/zap.h>
32 #include <sys/zfs_context.h>
33 #include <sys/dsl_pool.h>
34 
35 /*
36  * Deadlist concurrency:
37  *
38  * Deadlists can only be modified from the syncing thread.
39  *
40  * Except for dsl_deadlist_insert(), it can only be modified with the
41  * dp_config_rwlock held with RW_WRITER.
42  *
43  * The accessors (dsl_deadlist_space() and dsl_deadlist_space_range()) can
44  * be called concurrently, from open context, with the dl_config_rwlock held
45  * with RW_READER.
46  *
47  * Therefore, we only need to provide locking between dsl_deadlist_insert() and
48  * the accessors, protecting:
49  *     dl_phys->dl_used,comp,uncomp
50  *     and protecting the dl_tree from being loaded.
51  * The locking is provided by dl_lock.  Note that locking on the bpobj_t
52  * provides its own locking, and dl_oldfmt is immutable.
53  */
54 
55 static int
dsl_deadlist_compare(const void * arg1,const void * arg2)56 dsl_deadlist_compare(const void *arg1, const void *arg2)
57 {
58 	const dsl_deadlist_entry_t *dle1 = (const dsl_deadlist_entry_t *)arg1;
59 	const dsl_deadlist_entry_t *dle2 = (const dsl_deadlist_entry_t *)arg2;
60 
61 	return (TREE_CMP(dle1->dle_mintxg, dle2->dle_mintxg));
62 }
63 
64 static void
dsl_deadlist_load_tree(dsl_deadlist_t * dl)65 dsl_deadlist_load_tree(dsl_deadlist_t *dl)
66 {
67 	zap_cursor_t zc;
68 	zap_attribute_t za;
69 
70 	ASSERT(MUTEX_HELD(&dl->dl_lock));
71 
72 	ASSERT(!dl->dl_oldfmt);
73 	if (dl->dl_havetree)
74 		return;
75 
76 	avl_create(&dl->dl_tree, dsl_deadlist_compare,
77 	    sizeof (dsl_deadlist_entry_t),
78 	    offsetof(dsl_deadlist_entry_t, dle_node));
79 	for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object);
80 	    zap_cursor_retrieve(&zc, &za) == 0;
81 	    zap_cursor_advance(&zc)) {
82 		dsl_deadlist_entry_t *dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
83 		dle->dle_mintxg = zfs_strtonum(za.za_name, NULL);
84 		VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os,
85 		    za.za_first_integer));
86 		avl_add(&dl->dl_tree, dle);
87 	}
88 	zap_cursor_fini(&zc);
89 	dl->dl_havetree = B_TRUE;
90 }
91 
92 void
dsl_deadlist_open(dsl_deadlist_t * dl,objset_t * os,uint64_t object)93 dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object)
94 {
95 	dmu_object_info_t doi;
96 
97 	ASSERT(!dsl_deadlist_is_open(dl));
98 
99 	mutex_init(&dl->dl_lock, NULL, MUTEX_DEFAULT, NULL);
100 	dl->dl_os = os;
101 	dl->dl_object = object;
102 	VERIFY3U(0, ==, dmu_bonus_hold(os, object, dl, &dl->dl_dbuf));
103 	dmu_object_info_from_db(dl->dl_dbuf, &doi);
104 	if (doi.doi_type == DMU_OT_BPOBJ) {
105 		dmu_buf_rele(dl->dl_dbuf, dl);
106 		dl->dl_dbuf = NULL;
107 		dl->dl_oldfmt = B_TRUE;
108 		VERIFY3U(0, ==, bpobj_open(&dl->dl_bpobj, os, object));
109 		return;
110 	}
111 
112 	dl->dl_oldfmt = B_FALSE;
113 	dl->dl_phys = dl->dl_dbuf->db_data;
114 	dl->dl_havetree = B_FALSE;
115 }
116 
117 boolean_t
dsl_deadlist_is_open(dsl_deadlist_t * dl)118 dsl_deadlist_is_open(dsl_deadlist_t *dl)
119 {
120 	return (dl->dl_os != NULL);
121 }
122 
123 void
dsl_deadlist_close(dsl_deadlist_t * dl)124 dsl_deadlist_close(dsl_deadlist_t *dl)
125 {
126 	void *cookie = NULL;
127 	dsl_deadlist_entry_t *dle;
128 
129 	ASSERT(dsl_deadlist_is_open(dl));
130 
131 	if (dl->dl_oldfmt) {
132 		dl->dl_oldfmt = B_FALSE;
133 		bpobj_close(&dl->dl_bpobj);
134 		dl->dl_os = NULL;
135 		dl->dl_object = 0;
136 		return;
137 	}
138 
139 	if (dl->dl_havetree) {
140 		while ((dle = avl_destroy_nodes(&dl->dl_tree, &cookie))
141 		    != NULL) {
142 			bpobj_close(&dle->dle_bpobj);
143 			kmem_free(dle, sizeof (*dle));
144 		}
145 		avl_destroy(&dl->dl_tree);
146 	}
147 	dmu_buf_rele(dl->dl_dbuf, dl);
148 	mutex_destroy(&dl->dl_lock);
149 	dl->dl_dbuf = NULL;
150 	dl->dl_phys = NULL;
151 	dl->dl_os = NULL;
152 	dl->dl_object = 0;
153 }
154 
155 uint64_t
dsl_deadlist_alloc(objset_t * os,dmu_tx_t * tx)156 dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx)
157 {
158 	if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
159 		return (bpobj_alloc(os, SPA_OLD_MAXBLOCKSIZE, tx));
160 	return (zap_create(os, DMU_OT_DEADLIST, DMU_OT_DEADLIST_HDR,
161 	    sizeof (dsl_deadlist_phys_t), tx));
162 }
163 
164 void
dsl_deadlist_free(objset_t * os,uint64_t dlobj,dmu_tx_t * tx)165 dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx)
166 {
167 	dmu_object_info_t doi;
168 	zap_cursor_t zc;
169 	zap_attribute_t za;
170 
171 	VERIFY3U(0, ==, dmu_object_info(os, dlobj, &doi));
172 	if (doi.doi_type == DMU_OT_BPOBJ) {
173 		bpobj_free(os, dlobj, tx);
174 		return;
175 	}
176 
177 	for (zap_cursor_init(&zc, os, dlobj);
178 	    zap_cursor_retrieve(&zc, &za) == 0;
179 	    zap_cursor_advance(&zc)) {
180 		uint64_t obj = za.za_first_integer;
181 		if (obj == dmu_objset_pool(os)->dp_empty_bpobj)
182 			bpobj_decr_empty(os, tx);
183 		else
184 			bpobj_free(os, obj, tx);
185 	}
186 	zap_cursor_fini(&zc);
187 	VERIFY3U(0, ==, dmu_object_free(os, dlobj, tx));
188 }
189 
190 static void
dle_enqueue(dsl_deadlist_t * dl,dsl_deadlist_entry_t * dle,const blkptr_t * bp,dmu_tx_t * tx)191 dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
192     const blkptr_t *bp, dmu_tx_t *tx)
193 {
194 	ASSERT(MUTEX_HELD(&dl->dl_lock));
195 	if (dle->dle_bpobj.bpo_object ==
196 	    dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
197 		uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
198 		bpobj_close(&dle->dle_bpobj);
199 		bpobj_decr_empty(dl->dl_os, tx);
200 		VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
201 		VERIFY3U(0, ==, zap_update_int_key(dl->dl_os, dl->dl_object,
202 		    dle->dle_mintxg, obj, tx));
203 	}
204 	bpobj_enqueue(&dle->dle_bpobj, bp, tx);
205 }
206 
207 static void
dle_enqueue_subobj(dsl_deadlist_t * dl,dsl_deadlist_entry_t * dle,uint64_t obj,dmu_tx_t * tx)208 dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
209     uint64_t obj, dmu_tx_t *tx)
210 {
211 	ASSERT(MUTEX_HELD(&dl->dl_lock));
212 	if (dle->dle_bpobj.bpo_object !=
213 	    dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
214 		bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
215 	} else {
216 		bpobj_close(&dle->dle_bpobj);
217 		bpobj_decr_empty(dl->dl_os, tx);
218 		VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
219 		VERIFY3U(0, ==, zap_update_int_key(dl->dl_os, dl->dl_object,
220 		    dle->dle_mintxg, obj, tx));
221 	}
222 }
223 
224 void
dsl_deadlist_insert(dsl_deadlist_t * dl,const blkptr_t * bp,dmu_tx_t * tx)225 dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
226 {
227 	dsl_deadlist_entry_t dle_tofind;
228 	dsl_deadlist_entry_t *dle;
229 	avl_index_t where;
230 
231 	if (dl->dl_oldfmt) {
232 		bpobj_enqueue(&dl->dl_bpobj, bp, tx);
233 		return;
234 	}
235 
236 	mutex_enter(&dl->dl_lock);
237 	dsl_deadlist_load_tree(dl);
238 
239 	dmu_buf_will_dirty(dl->dl_dbuf, tx);
240 	dl->dl_phys->dl_used +=
241 	    bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
242 	dl->dl_phys->dl_comp += BP_GET_PSIZE(bp);
243 	dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp);
244 
245 	dle_tofind.dle_mintxg = bp->blk_birth;
246 	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
247 	if (dle == NULL)
248 		dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
249 	else
250 		dle = AVL_PREV(&dl->dl_tree, dle);
251 	dle_enqueue(dl, dle, bp, tx);
252 	mutex_exit(&dl->dl_lock);
253 }
254 
255 /*
256  * Insert new key in deadlist, which must be > all current entries.
257  * mintxg is not inclusive.
258  */
259 void
dsl_deadlist_add_key(dsl_deadlist_t * dl,uint64_t mintxg,dmu_tx_t * tx)260 dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
261 {
262 	uint64_t obj;
263 	dsl_deadlist_entry_t *dle;
264 
265 	if (dl->dl_oldfmt)
266 		return;
267 
268 	dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
269 	dle->dle_mintxg = mintxg;
270 
271 	mutex_enter(&dl->dl_lock);
272 	dsl_deadlist_load_tree(dl);
273 
274 	obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
275 	VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
276 	avl_add(&dl->dl_tree, dle);
277 
278 	VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
279 	    mintxg, obj, tx));
280 	mutex_exit(&dl->dl_lock);
281 }
282 
283 /*
284  * Remove this key, merging its entries into the previous key.
285  */
286 void
dsl_deadlist_remove_key(dsl_deadlist_t * dl,uint64_t mintxg,dmu_tx_t * tx)287 dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
288 {
289 	dsl_deadlist_entry_t dle_tofind;
290 	dsl_deadlist_entry_t *dle, *dle_prev;
291 
292 	if (dl->dl_oldfmt)
293 		return;
294 
295 	mutex_enter(&dl->dl_lock);
296 	dsl_deadlist_load_tree(dl);
297 
298 	dle_tofind.dle_mintxg = mintxg;
299 	dle = avl_find(&dl->dl_tree, &dle_tofind, NULL);
300 	dle_prev = AVL_PREV(&dl->dl_tree, dle);
301 
302 	dle_enqueue_subobj(dl, dle_prev, dle->dle_bpobj.bpo_object, tx);
303 
304 	avl_remove(&dl->dl_tree, dle);
305 	bpobj_close(&dle->dle_bpobj);
306 	kmem_free(dle, sizeof (*dle));
307 
308 	VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
309 	mutex_exit(&dl->dl_lock);
310 }
311 
312 /*
313  * Walk ds's snapshots to regenerate generate ZAP & AVL.
314  */
315 static void
dsl_deadlist_regenerate(objset_t * os,uint64_t dlobj,uint64_t mrs_obj,dmu_tx_t * tx)316 dsl_deadlist_regenerate(objset_t *os, uint64_t dlobj,
317     uint64_t mrs_obj, dmu_tx_t *tx)
318 {
319 	dsl_deadlist_t dl = { 0 };
320 	dsl_pool_t *dp = dmu_objset_pool(os);
321 
322 	dsl_deadlist_open(&dl, os, dlobj);
323 	if (dl.dl_oldfmt) {
324 		dsl_deadlist_close(&dl);
325 		return;
326 	}
327 
328 	while (mrs_obj != 0) {
329 		dsl_dataset_t *ds;
330 		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds));
331 		dsl_deadlist_add_key(&dl,
332 		    dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
333 		mrs_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
334 		dsl_dataset_rele(ds, FTAG);
335 	}
336 	dsl_deadlist_close(&dl);
337 }
338 
339 uint64_t
dsl_deadlist_clone(dsl_deadlist_t * dl,uint64_t maxtxg,uint64_t mrs_obj,dmu_tx_t * tx)340 dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
341     uint64_t mrs_obj, dmu_tx_t *tx)
342 {
343 	dsl_deadlist_entry_t *dle;
344 	uint64_t newobj;
345 
346 	newobj = dsl_deadlist_alloc(dl->dl_os, tx);
347 
348 	if (dl->dl_oldfmt) {
349 		dsl_deadlist_regenerate(dl->dl_os, newobj, mrs_obj, tx);
350 		return (newobj);
351 	}
352 
353 	mutex_enter(&dl->dl_lock);
354 	dsl_deadlist_load_tree(dl);
355 
356 	for (dle = avl_first(&dl->dl_tree); dle;
357 	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
358 		uint64_t obj;
359 
360 		if (dle->dle_mintxg >= maxtxg)
361 			break;
362 
363 		obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
364 		VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
365 		    dle->dle_mintxg, obj, tx));
366 	}
367 	mutex_exit(&dl->dl_lock);
368 	return (newobj);
369 }
370 
371 void
dsl_deadlist_space(dsl_deadlist_t * dl,uint64_t * usedp,uint64_t * compp,uint64_t * uncompp)372 dsl_deadlist_space(dsl_deadlist_t *dl,
373     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
374 {
375 	ASSERT(dsl_deadlist_is_open(dl));
376 	if (dl->dl_oldfmt) {
377 		VERIFY3U(0, ==, bpobj_space(&dl->dl_bpobj,
378 		    usedp, compp, uncompp));
379 		return;
380 	}
381 
382 	mutex_enter(&dl->dl_lock);
383 	*usedp = dl->dl_phys->dl_used;
384 	*compp = dl->dl_phys->dl_comp;
385 	*uncompp = dl->dl_phys->dl_uncomp;
386 	mutex_exit(&dl->dl_lock);
387 }
388 
389 /*
390  * return space used in the range (mintxg, maxtxg].
391  * Includes maxtxg, does not include mintxg.
392  * mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is
393  * larger than any bp in the deadlist (eg. UINT64_MAX)).
394  */
395 void
dsl_deadlist_space_range(dsl_deadlist_t * dl,uint64_t mintxg,uint64_t maxtxg,uint64_t * usedp,uint64_t * compp,uint64_t * uncompp)396 dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
397     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
398 {
399 	dsl_deadlist_entry_t *dle;
400 	dsl_deadlist_entry_t dle_tofind;
401 	avl_index_t where;
402 
403 	if (dl->dl_oldfmt) {
404 		VERIFY3U(0, ==, bpobj_space_range(&dl->dl_bpobj,
405 		    mintxg, maxtxg, usedp, compp, uncompp));
406 		return;
407 	}
408 
409 	*usedp = *compp = *uncompp = 0;
410 
411 	mutex_enter(&dl->dl_lock);
412 	dsl_deadlist_load_tree(dl);
413 	dle_tofind.dle_mintxg = mintxg;
414 	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
415 	/*
416 	 * If we don't find this mintxg, there shouldn't be anything
417 	 * after it either.
418 	 */
419 	ASSERT(dle != NULL ||
420 	    avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL);
421 
422 	for (; dle && dle->dle_mintxg < maxtxg;
423 	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
424 		uint64_t used, comp, uncomp;
425 
426 		VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
427 		    &used, &comp, &uncomp));
428 
429 		*usedp += used;
430 		*compp += comp;
431 		*uncompp += uncomp;
432 	}
433 	mutex_exit(&dl->dl_lock);
434 }
435 
436 static void
dsl_deadlist_insert_bpobj(dsl_deadlist_t * dl,uint64_t obj,uint64_t birth,dmu_tx_t * tx)437 dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
438     dmu_tx_t *tx)
439 {
440 	dsl_deadlist_entry_t dle_tofind;
441 	dsl_deadlist_entry_t *dle;
442 	avl_index_t where;
443 	uint64_t used, comp, uncomp;
444 	bpobj_t bpo;
445 
446 	ASSERT(MUTEX_HELD(&dl->dl_lock));
447 
448 	VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
449 	VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
450 	bpobj_close(&bpo);
451 
452 	dsl_deadlist_load_tree(dl);
453 
454 	dmu_buf_will_dirty(dl->dl_dbuf, tx);
455 	dl->dl_phys->dl_used += used;
456 	dl->dl_phys->dl_comp += comp;
457 	dl->dl_phys->dl_uncomp += uncomp;
458 
459 	dle_tofind.dle_mintxg = birth;
460 	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
461 	if (dle == NULL)
462 		dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
463 	dle_enqueue_subobj(dl, dle, obj, tx);
464 }
465 
466 static int
dsl_deadlist_insert_cb(void * arg,const blkptr_t * bp,dmu_tx_t * tx)467 dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
468 {
469 	dsl_deadlist_t *dl = arg;
470 	dsl_deadlist_insert(dl, bp, tx);
471 	return (0);
472 }
473 
474 /*
475  * Merge the deadlist pointed to by 'obj' into dl.  obj will be left as
476  * an empty deadlist.
477  */
478 void
dsl_deadlist_merge(dsl_deadlist_t * dl,uint64_t obj,dmu_tx_t * tx)479 dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
480 {
481 	zap_cursor_t zc;
482 	zap_attribute_t za;
483 	dmu_buf_t *bonus;
484 	dsl_deadlist_phys_t *dlp;
485 	dmu_object_info_t doi;
486 
487 	VERIFY3U(0, ==, dmu_object_info(dl->dl_os, obj, &doi));
488 	if (doi.doi_type == DMU_OT_BPOBJ) {
489 		bpobj_t bpo;
490 		VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
491 		VERIFY3U(0, ==, bpobj_iterate(&bpo,
492 		    dsl_deadlist_insert_cb, dl, tx));
493 		bpobj_close(&bpo);
494 		return;
495 	}
496 
497 	mutex_enter(&dl->dl_lock);
498 	for (zap_cursor_init(&zc, dl->dl_os, obj);
499 	    zap_cursor_retrieve(&zc, &za) == 0;
500 	    zap_cursor_advance(&zc)) {
501 		uint64_t mintxg = zfs_strtonum(za.za_name, NULL);
502 		dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
503 		VERIFY3U(0, ==, zap_remove_int(dl->dl_os, obj, mintxg, tx));
504 	}
505 	zap_cursor_fini(&zc);
506 
507 	VERIFY3U(0, ==, dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus));
508 	dlp = bonus->db_data;
509 	dmu_buf_will_dirty(bonus, tx);
510 	bzero(dlp, sizeof (*dlp));
511 	dmu_buf_rele(bonus, FTAG);
512 	mutex_exit(&dl->dl_lock);
513 }
514 
515 /*
516  * Remove entries on dl that are >= mintxg, and put them on the bpobj.
517  */
518 void
dsl_deadlist_move_bpobj(dsl_deadlist_t * dl,bpobj_t * bpo,uint64_t mintxg,dmu_tx_t * tx)519 dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
520     dmu_tx_t *tx)
521 {
522 	dsl_deadlist_entry_t dle_tofind;
523 	dsl_deadlist_entry_t *dle;
524 	avl_index_t where;
525 
526 	ASSERT(!dl->dl_oldfmt);
527 
528 	mutex_enter(&dl->dl_lock);
529 	dmu_buf_will_dirty(dl->dl_dbuf, tx);
530 	dsl_deadlist_load_tree(dl);
531 
532 	dle_tofind.dle_mintxg = mintxg;
533 	dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
534 	if (dle == NULL)
535 		dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER);
536 	while (dle) {
537 		uint64_t used, comp, uncomp;
538 		dsl_deadlist_entry_t *dle_next;
539 
540 		bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx);
541 
542 		VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
543 		    &used, &comp, &uncomp));
544 		ASSERT3U(dl->dl_phys->dl_used, >=, used);
545 		ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
546 		ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp);
547 		dl->dl_phys->dl_used -= used;
548 		dl->dl_phys->dl_comp -= comp;
549 		dl->dl_phys->dl_uncomp -= uncomp;
550 
551 		VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
552 		    dle->dle_mintxg, tx));
553 
554 		dle_next = AVL_NEXT(&dl->dl_tree, dle);
555 		avl_remove(&dl->dl_tree, dle);
556 		bpobj_close(&dle->dle_bpobj);
557 		kmem_free(dle, sizeof (*dle));
558 		dle = dle_next;
559 	}
560 	mutex_exit(&dl->dl_lock);
561 }
562