1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
2255434c77Sek  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27fa9e4066Sahrens 
28fa9e4066Sahrens #include <sys/dmu_objset.h>
29fa9e4066Sahrens #include <sys/dsl_dataset.h>
30fa9e4066Sahrens #include <sys/dsl_dir.h>
3199653d4eSeschrock #include <sys/dsl_prop.h>
321d452cf5Sahrens #include <sys/dsl_synctask.h>
33fa9e4066Sahrens #include <sys/dmu_traverse.h>
34fa9e4066Sahrens #include <sys/dmu_tx.h>
35fa9e4066Sahrens #include <sys/arc.h>
36fa9e4066Sahrens #include <sys/zio.h>
37fa9e4066Sahrens #include <sys/zap.h>
38fa9e4066Sahrens #include <sys/unique.h>
39fa9e4066Sahrens #include <sys/zfs_context.h>
40cdf5b4caSmmusante #include <sys/zfs_ioctl.h>
41ecd6cf80Smarks #include <sys/spa.h>
42ecd6cf80Smarks #include <sys/sunddi.h>
43fa9e4066Sahrens 
441d452cf5Sahrens static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
451d452cf5Sahrens static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
461d452cf5Sahrens static dsl_checkfunc_t dsl_dataset_rollback_check;
471d452cf5Sahrens static dsl_syncfunc_t dsl_dataset_rollback_sync;
48e1930233Sbonwick 
4955434c77Sek #define	DS_REF_MAX	(1ULL << 62)
50fa9e4066Sahrens 
51fa9e4066Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
52fa9e4066Sahrens 
53fa9e4066Sahrens /*
54fa9e4066Sahrens  * We use weighted reference counts to express the various forms of exclusion
55fa9e4066Sahrens  * between different open modes.  A STANDARD open is 1 point, an EXCLUSIVE open
5655434c77Sek  * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
57fa9e4066Sahrens  * This makes the exclusion logic simple: the total refcnt for all opens cannot
5855434c77Sek  * exceed DS_REF_MAX.  For example, EXCLUSIVE opens are exclusive because their
5955434c77Sek  * weight (DS_REF_MAX) consumes the entire refcnt space.  PRIMARY opens consume
60fa9e4066Sahrens  * just over half of the refcnt space, so there can't be more than one, but it
61fa9e4066Sahrens  * can peacefully coexist with any number of STANDARD opens.
62fa9e4066Sahrens  */
63fa9e4066Sahrens static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
6455434c77Sek 	0,			/* DS_MODE_NONE - invalid		*/
6555434c77Sek 	1,			/* DS_MODE_STANDARD - unlimited number	*/
6655434c77Sek 	(DS_REF_MAX >> 1) + 1,	/* DS_MODE_PRIMARY - only one of these	*/
6755434c77Sek 	DS_REF_MAX		/* DS_MODE_EXCLUSIVE - no other opens	*/
68fa9e4066Sahrens };
69fa9e4066Sahrens 
70fa9e4066Sahrens 
71fa9e4066Sahrens void
72fa9e4066Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
73fa9e4066Sahrens {
7499653d4eSeschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
75fa9e4066Sahrens 	int compressed = BP_GET_PSIZE(bp);
76fa9e4066Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
77fa9e4066Sahrens 
78fa9e4066Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
79fa9e4066Sahrens 
80fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
81fa9e4066Sahrens 	/* It could have been compressed away to nothing */
82fa9e4066Sahrens 	if (BP_IS_HOLE(bp))
83fa9e4066Sahrens 		return;
84fa9e4066Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
85fa9e4066Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
86fa9e4066Sahrens 	if (ds == NULL) {
87fa9e4066Sahrens 		/*
88fa9e4066Sahrens 		 * Account for the meta-objset space in its placeholder
89fa9e4066Sahrens 		 * dsl_dir.
90fa9e4066Sahrens 		 */
91fa9e4066Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
92fa9e4066Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
93fa9e4066Sahrens 		    used, compressed, uncompressed, tx);
94fa9e4066Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
95fa9e4066Sahrens 		return;
96fa9e4066Sahrens 	}
97fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
98fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
99fa9e4066Sahrens 	ds->ds_phys->ds_used_bytes += used;
100fa9e4066Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
101fa9e4066Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
102fa9e4066Sahrens 	ds->ds_phys->ds_unique_bytes += used;
103fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
104fa9e4066Sahrens 	dsl_dir_diduse_space(ds->ds_dir,
105fa9e4066Sahrens 	    used, compressed, uncompressed, tx);
106fa9e4066Sahrens }
107fa9e4066Sahrens 
108fa9e4066Sahrens void
109c717a561Smaybee dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
110c717a561Smaybee     dmu_tx_t *tx)
111fa9e4066Sahrens {
11299653d4eSeschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
113fa9e4066Sahrens 	int compressed = BP_GET_PSIZE(bp);
114fa9e4066Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
115fa9e4066Sahrens 
116fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
117c717a561Smaybee 	/* No block pointer => nothing to free */
118fa9e4066Sahrens 	if (BP_IS_HOLE(bp))
119fa9e4066Sahrens 		return;
120fa9e4066Sahrens 
121fa9e4066Sahrens 	ASSERT(used > 0);
122fa9e4066Sahrens 	if (ds == NULL) {
123c717a561Smaybee 		int err;
124fa9e4066Sahrens 		/*
125fa9e4066Sahrens 		 * Account for the meta-objset space in its placeholder
126fa9e4066Sahrens 		 * dataset.
127fa9e4066Sahrens 		 */
128c717a561Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
129c717a561Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
130c717a561Smaybee 		ASSERT(err == 0);
131fa9e4066Sahrens 
132fa9e4066Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
133fa9e4066Sahrens 		    -used, -compressed, -uncompressed, tx);
134fa9e4066Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
135fa9e4066Sahrens 		return;
136fa9e4066Sahrens 	}
137fa9e4066Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
138fa9e4066Sahrens 
139fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
140fa9e4066Sahrens 
141fa9e4066Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
142c717a561Smaybee 		int err;
143c717a561Smaybee 
144fa9e4066Sahrens 		dprintf_bp(bp, "freeing: %s", "");
145c717a561Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
146c717a561Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
147c717a561Smaybee 		ASSERT(err == 0);
148fa9e4066Sahrens 
149fa9e4066Sahrens 		mutex_enter(&ds->ds_lock);
150fa9e4066Sahrens 		/* XXX unique_bytes is not accurate for head datasets */
151fa9e4066Sahrens 		/* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */
152fa9e4066Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
153fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
154fa9e4066Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
155fa9e4066Sahrens 		    -used, -compressed, -uncompressed, tx);
156fa9e4066Sahrens 	} else {
157fa9e4066Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
158ea8dc4b6Seschrock 		VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
159fa9e4066Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
160fa9e4066Sahrens 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
161fa9e4066Sahrens 			ASSERT3U(ds->ds_prev->ds_object, ==,
162fa9e4066Sahrens 			    ds->ds_phys->ds_prev_snap_obj);
163fa9e4066Sahrens 			ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
164fa9e4066Sahrens 			if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
16599653d4eSeschrock 			    ds->ds_object && bp->blk_birth >
166fa9e4066Sahrens 			    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
167fa9e4066Sahrens 				dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
168fa9e4066Sahrens 				mutex_enter(&ds->ds_prev->ds_lock);
169fa9e4066Sahrens 				ds->ds_prev->ds_phys->ds_unique_bytes +=
170fa9e4066Sahrens 				    used;
171fa9e4066Sahrens 				mutex_exit(&ds->ds_prev->ds_lock);
172fa9e4066Sahrens 			}
173fa9e4066Sahrens 		}
174fa9e4066Sahrens 	}
175fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
176fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
177fa9e4066Sahrens 	ds->ds_phys->ds_used_bytes -= used;
178fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
179fa9e4066Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
180fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
181fa9e4066Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
182fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
183fa9e4066Sahrens }
184fa9e4066Sahrens 
185ea8dc4b6Seschrock uint64_t
186ea8dc4b6Seschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
187fa9e4066Sahrens {
188a2eea2e1Sahrens 	uint64_t trysnap = 0;
189a2eea2e1Sahrens 
190fa9e4066Sahrens 	if (ds == NULL)
191ea8dc4b6Seschrock 		return (0);
192fa9e4066Sahrens 	/*
193fa9e4066Sahrens 	 * The snapshot creation could fail, but that would cause an
194fa9e4066Sahrens 	 * incorrect FALSE return, which would only result in an
195fa9e4066Sahrens 	 * overestimation of the amount of space that an operation would
196fa9e4066Sahrens 	 * consume, which is OK.
197fa9e4066Sahrens 	 *
198fa9e4066Sahrens 	 * There's also a small window where we could miss a pending
199fa9e4066Sahrens 	 * snapshot, because we could set the sync task in the quiescing
200fa9e4066Sahrens 	 * phase.  So this should only be used as a guess.
201fa9e4066Sahrens 	 */
202a2eea2e1Sahrens 	if (ds->ds_trysnap_txg >
203a2eea2e1Sahrens 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
204a2eea2e1Sahrens 		trysnap = ds->ds_trysnap_txg;
205a2eea2e1Sahrens 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
206ea8dc4b6Seschrock }
207ea8dc4b6Seschrock 
208ea8dc4b6Seschrock int
209ea8dc4b6Seschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
210ea8dc4b6Seschrock {
211ea8dc4b6Seschrock 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
212fa9e4066Sahrens }
213fa9e4066Sahrens 
214fa9e4066Sahrens /* ARGSUSED */
215fa9e4066Sahrens static void
216fa9e4066Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
217fa9e4066Sahrens {
218fa9e4066Sahrens 	dsl_dataset_t *ds = dsv;
219fa9e4066Sahrens 
22055434c77Sek 	/* open_refcount == DS_REF_MAX when deleting */
221fa9e4066Sahrens 	ASSERT(ds->ds_open_refcount == 0 ||
22255434c77Sek 	    ds->ds_open_refcount == DS_REF_MAX);
223fa9e4066Sahrens 
224fa9e4066Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
225fa9e4066Sahrens 
22691ebeef5Sahrens 	unique_remove(ds->ds_fsid_guid);
227fa9e4066Sahrens 
228fa9e4066Sahrens 	if (ds->ds_user_ptr != NULL)
229fa9e4066Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
230fa9e4066Sahrens 
231fa9e4066Sahrens 	if (ds->ds_prev) {
232fa9e4066Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
233fa9e4066Sahrens 		ds->ds_prev = NULL;
234fa9e4066Sahrens 	}
235fa9e4066Sahrens 
236fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
237fa9e4066Sahrens 	dsl_dir_close(ds->ds_dir, ds);
238fa9e4066Sahrens 
23991ebeef5Sahrens 	ASSERT(!list_link_active(&ds->ds_synced_link));
240fa9e4066Sahrens 
2415ad82045Snd 	mutex_destroy(&ds->ds_lock);
24291ebeef5Sahrens 	mutex_destroy(&ds->ds_opening_lock);
2435ad82045Snd 	mutex_destroy(&ds->ds_deadlist.bpl_lock);
2445ad82045Snd 
245fa9e4066Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
246fa9e4066Sahrens }
247fa9e4066Sahrens 
248ea8dc4b6Seschrock static int
249fa9e4066Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
250fa9e4066Sahrens {
251fa9e4066Sahrens 	dsl_dataset_phys_t *headphys;
252fa9e4066Sahrens 	int err;
253fa9e4066Sahrens 	dmu_buf_t *headdbuf;
254fa9e4066Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
255fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
256fa9e4066Sahrens 
257fa9e4066Sahrens 	if (ds->ds_snapname[0])
258ea8dc4b6Seschrock 		return (0);
259fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
260ea8dc4b6Seschrock 		return (0);
261fa9e4066Sahrens 
262ea8dc4b6Seschrock 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
263ea8dc4b6Seschrock 	    FTAG, &headdbuf);
264ea8dc4b6Seschrock 	if (err)
265ea8dc4b6Seschrock 		return (err);
266fa9e4066Sahrens 	headphys = headdbuf->db_data;
267fa9e4066Sahrens 	err = zap_value_search(dp->dp_meta_objset,
268e7437265Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
269ea8dc4b6Seschrock 	dmu_buf_rele(headdbuf, FTAG);
270ea8dc4b6Seschrock 	return (err);
271fa9e4066Sahrens }
272fa9e4066Sahrens 
273ea8dc4b6Seschrock int
274fa9e4066Sahrens dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
275ea8dc4b6Seschrock     int mode, void *tag, dsl_dataset_t **dsp)
276fa9e4066Sahrens {
277fa9e4066Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
278fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
279fa9e4066Sahrens 	dmu_buf_t *dbuf;
280fa9e4066Sahrens 	dsl_dataset_t *ds;
281ea8dc4b6Seschrock 	int err;
282fa9e4066Sahrens 
283fa9e4066Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
284fa9e4066Sahrens 	    dsl_pool_sync_context(dp));
285fa9e4066Sahrens 
286ea8dc4b6Seschrock 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
287ea8dc4b6Seschrock 	if (err)
288ea8dc4b6Seschrock 		return (err);
289fa9e4066Sahrens 	ds = dmu_buf_get_user(dbuf);
290fa9e4066Sahrens 	if (ds == NULL) {
291fa9e4066Sahrens 		dsl_dataset_t *winner;
292fa9e4066Sahrens 
293fa9e4066Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
294fa9e4066Sahrens 		ds->ds_dbuf = dbuf;
295fa9e4066Sahrens 		ds->ds_object = dsobj;
296fa9e4066Sahrens 		ds->ds_phys = dbuf->db_data;
297fa9e4066Sahrens 
2985ad82045Snd 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
29991ebeef5Sahrens 		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
3005ad82045Snd 		mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
3015ad82045Snd 		    NULL);
3025ad82045Snd 
303ea8dc4b6Seschrock 		err = bplist_open(&ds->ds_deadlist,
304fa9e4066Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
305ea8dc4b6Seschrock 		if (err == 0) {
306ea8dc4b6Seschrock 			err = dsl_dir_open_obj(dp,
307ea8dc4b6Seschrock 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
308ea8dc4b6Seschrock 		}
309ea8dc4b6Seschrock 		if (err) {
310ea8dc4b6Seschrock 			/*
311ea8dc4b6Seschrock 			 * we don't really need to close the blist if we
312ea8dc4b6Seschrock 			 * just opened it.
313ea8dc4b6Seschrock 			 */
3145ad82045Snd 			mutex_destroy(&ds->ds_lock);
31591ebeef5Sahrens 			mutex_destroy(&ds->ds_opening_lock);
3165ad82045Snd 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
317ea8dc4b6Seschrock 			kmem_free(ds, sizeof (dsl_dataset_t));
318ea8dc4b6Seschrock 			dmu_buf_rele(dbuf, tag);
319ea8dc4b6Seschrock 			return (err);
320ea8dc4b6Seschrock 		}
321fa9e4066Sahrens 
322fa9e4066Sahrens 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
323fa9e4066Sahrens 			ds->ds_snapname[0] = '\0';
324fa9e4066Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
325ea8dc4b6Seschrock 				err = dsl_dataset_open_obj(dp,
326fa9e4066Sahrens 				    ds->ds_phys->ds_prev_snap_obj, NULL,
327ea8dc4b6Seschrock 				    DS_MODE_NONE, ds, &ds->ds_prev);
328fa9e4066Sahrens 			}
329fa9e4066Sahrens 		} else {
330fa9e4066Sahrens 			if (snapname) {
331fa9e4066Sahrens #ifdef ZFS_DEBUG
332fa9e4066Sahrens 				dsl_dataset_phys_t *headphys;
333ea8dc4b6Seschrock 				dmu_buf_t *headdbuf;
334ea8dc4b6Seschrock 				err = dmu_bonus_hold(mos,
335ea8dc4b6Seschrock 				    ds->ds_dir->dd_phys->dd_head_dataset_obj,
336ea8dc4b6Seschrock 				    FTAG, &headdbuf);
337ea8dc4b6Seschrock 				if (err == 0) {
338ea8dc4b6Seschrock 					headphys = headdbuf->db_data;
339ea8dc4b6Seschrock 					uint64_t foundobj;
340ea8dc4b6Seschrock 					err = zap_lookup(dp->dp_meta_objset,
341ea8dc4b6Seschrock 					    headphys->ds_snapnames_zapobj,
342ea8dc4b6Seschrock 					    snapname, sizeof (foundobj), 1,
343ea8dc4b6Seschrock 					    &foundobj);
344ea8dc4b6Seschrock 					ASSERT3U(foundobj, ==, dsobj);
345ea8dc4b6Seschrock 					dmu_buf_rele(headdbuf, FTAG);
346ea8dc4b6Seschrock 				}
347fa9e4066Sahrens #endif
348fa9e4066Sahrens 				(void) strcat(ds->ds_snapname, snapname);
349fa9e4066Sahrens 			} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
350ea8dc4b6Seschrock 				err = dsl_dataset_get_snapname(ds);
351fa9e4066Sahrens 			}
352fa9e4066Sahrens 		}
353fa9e4066Sahrens 
354ea8dc4b6Seschrock 		if (err == 0) {
355ea8dc4b6Seschrock 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
356ea8dc4b6Seschrock 			    dsl_dataset_evict);
357ea8dc4b6Seschrock 		}
358ea8dc4b6Seschrock 		if (err || winner) {
359fa9e4066Sahrens 			bplist_close(&ds->ds_deadlist);
360fa9e4066Sahrens 			if (ds->ds_prev) {
361fa9e4066Sahrens 				dsl_dataset_close(ds->ds_prev,
362fa9e4066Sahrens 				    DS_MODE_NONE, ds);
363fa9e4066Sahrens 			}
364fa9e4066Sahrens 			dsl_dir_close(ds->ds_dir, ds);
3655ad82045Snd 			mutex_destroy(&ds->ds_lock);
36691ebeef5Sahrens 			mutex_destroy(&ds->ds_opening_lock);
3675ad82045Snd 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
368fa9e4066Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
369ea8dc4b6Seschrock 			if (err) {
370ea8dc4b6Seschrock 				dmu_buf_rele(dbuf, tag);
371ea8dc4b6Seschrock 				return (err);
372ea8dc4b6Seschrock 			}
373fa9e4066Sahrens 			ds = winner;
374fa9e4066Sahrens 		} else {
37591ebeef5Sahrens 			ds->ds_fsid_guid =
376fa9e4066Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
377fa9e4066Sahrens 		}
378fa9e4066Sahrens 	}
379fa9e4066Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
380fa9e4066Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
381fa9e4066Sahrens 
382fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
383fa9e4066Sahrens 	if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
38499653d4eSeschrock 	    (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) &&
38599653d4eSeschrock 	    !DS_MODE_IS_INCONSISTENT(mode)) ||
38655434c77Sek 	    (ds->ds_open_refcount + weight > DS_REF_MAX)) {
387fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
388fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
389ea8dc4b6Seschrock 		return (EBUSY);
390fa9e4066Sahrens 	}
391fa9e4066Sahrens 	ds->ds_open_refcount += weight;
392fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
393fa9e4066Sahrens 
394ea8dc4b6Seschrock 	*dsp = ds;
395ea8dc4b6Seschrock 	return (0);
396fa9e4066Sahrens }
397fa9e4066Sahrens 
398fa9e4066Sahrens int
399fa9e4066Sahrens dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
400fa9e4066Sahrens     void *tag, dsl_dataset_t **dsp)
401fa9e4066Sahrens {
402fa9e4066Sahrens 	dsl_dir_t *dd;
403fa9e4066Sahrens 	dsl_pool_t *dp;
404fa9e4066Sahrens 	const char *tail;
405fa9e4066Sahrens 	uint64_t obj;
406fa9e4066Sahrens 	dsl_dataset_t *ds = NULL;
407fa9e4066Sahrens 	int err = 0;
408fa9e4066Sahrens 
409ea8dc4b6Seschrock 	err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail);
410ea8dc4b6Seschrock 	if (err)
411ea8dc4b6Seschrock 		return (err);
412fa9e4066Sahrens 
413fa9e4066Sahrens 	dp = dd->dd_pool;
414fa9e4066Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
415fa9e4066Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
416fa9e4066Sahrens 	if (obj == 0) {
417fa9e4066Sahrens 		/* A dataset with no associated objset */
418fa9e4066Sahrens 		err = ENOENT;
419fa9e4066Sahrens 		goto out;
420fa9e4066Sahrens 	}
421fa9e4066Sahrens 
422fa9e4066Sahrens 	if (tail != NULL) {
423fa9e4066Sahrens 		objset_t *mos = dp->dp_meta_objset;
424fa9e4066Sahrens 
425ea8dc4b6Seschrock 		err = dsl_dataset_open_obj(dp, obj, NULL,
426ea8dc4b6Seschrock 		    DS_MODE_NONE, tag, &ds);
427ea8dc4b6Seschrock 		if (err)
428ea8dc4b6Seschrock 			goto out;
429fa9e4066Sahrens 		obj = ds->ds_phys->ds_snapnames_zapobj;
430fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
431fa9e4066Sahrens 		ds = NULL;
432fa9e4066Sahrens 
433fa9e4066Sahrens 		if (tail[0] != '@') {
434fa9e4066Sahrens 			err = ENOENT;
435fa9e4066Sahrens 			goto out;
436fa9e4066Sahrens 		}
437fa9e4066Sahrens 		tail++;
438fa9e4066Sahrens 
439fa9e4066Sahrens 		/* Look for a snapshot */
440fa9e4066Sahrens 		if (!DS_MODE_IS_READONLY(mode)) {
441fa9e4066Sahrens 			err = EROFS;
442fa9e4066Sahrens 			goto out;
443fa9e4066Sahrens 		}
444fa9e4066Sahrens 		dprintf("looking for snapshot '%s'\n", tail);
445fa9e4066Sahrens 		err = zap_lookup(mos, obj, tail, 8, 1, &obj);
446fa9e4066Sahrens 		if (err)
447fa9e4066Sahrens 			goto out;
448fa9e4066Sahrens 	}
449ea8dc4b6Seschrock 	err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds);
450fa9e4066Sahrens 
451fa9e4066Sahrens out:
452fa9e4066Sahrens 	rw_exit(&dp->dp_config_rwlock);
453fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
454fa9e4066Sahrens 
455fa9e4066Sahrens 	ASSERT3U((err == 0), ==, (ds != NULL));
456fa9e4066Sahrens 	/* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
457fa9e4066Sahrens 
458fa9e4066Sahrens 	*dsp = ds;
459fa9e4066Sahrens 	return (err);
460fa9e4066Sahrens }
461fa9e4066Sahrens 
462fa9e4066Sahrens int
463fa9e4066Sahrens dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
464fa9e4066Sahrens {
465fa9e4066Sahrens 	return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
466fa9e4066Sahrens }
467fa9e4066Sahrens 
468fa9e4066Sahrens void
469fa9e4066Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
470fa9e4066Sahrens {
471fa9e4066Sahrens 	if (ds == NULL) {
472fa9e4066Sahrens 		(void) strcpy(name, "mos");
473fa9e4066Sahrens 	} else {
474fa9e4066Sahrens 		dsl_dir_name(ds->ds_dir, name);
475ea8dc4b6Seschrock 		VERIFY(0 == dsl_dataset_get_snapname(ds));
476fa9e4066Sahrens 		if (ds->ds_snapname[0]) {
477fa9e4066Sahrens 			(void) strcat(name, "@");
478fa9e4066Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
479fa9e4066Sahrens 				/*
480fa9e4066Sahrens 				 * We use a "recursive" mutex so that we
481fa9e4066Sahrens 				 * can call dprintf_ds() with ds_lock held.
482fa9e4066Sahrens 				 */
483fa9e4066Sahrens 				mutex_enter(&ds->ds_lock);
484fa9e4066Sahrens 				(void) strcat(name, ds->ds_snapname);
485fa9e4066Sahrens 				mutex_exit(&ds->ds_lock);
486fa9e4066Sahrens 			} else {
487fa9e4066Sahrens 				(void) strcat(name, ds->ds_snapname);
488fa9e4066Sahrens 			}
489fa9e4066Sahrens 		}
490fa9e4066Sahrens 	}
491fa9e4066Sahrens }
492fa9e4066Sahrens 
493b7661cccSmmusante static int
494b7661cccSmmusante dsl_dataset_namelen(dsl_dataset_t *ds)
495b7661cccSmmusante {
496b7661cccSmmusante 	int result;
497b7661cccSmmusante 
498b7661cccSmmusante 	if (ds == NULL) {
499b7661cccSmmusante 		result = 3;	/* "mos" */
500b7661cccSmmusante 	} else {
501b7661cccSmmusante 		result = dsl_dir_namelen(ds->ds_dir);
502b7661cccSmmusante 		VERIFY(0 == dsl_dataset_get_snapname(ds));
503b7661cccSmmusante 		if (ds->ds_snapname[0]) {
504b7661cccSmmusante 			++result;	/* adding one for the @-sign */
505b7661cccSmmusante 			if (!MUTEX_HELD(&ds->ds_lock)) {
506b7661cccSmmusante 				/* see dsl_datset_name */
507b7661cccSmmusante 				mutex_enter(&ds->ds_lock);
508b7661cccSmmusante 				result += strlen(ds->ds_snapname);
509b7661cccSmmusante 				mutex_exit(&ds->ds_lock);
510b7661cccSmmusante 			} else {
511b7661cccSmmusante 				result += strlen(ds->ds_snapname);
512b7661cccSmmusante 			}
513b7661cccSmmusante 		}
514b7661cccSmmusante 	}
515b7661cccSmmusante 
516b7661cccSmmusante 	return (result);
517b7661cccSmmusante }
518b7661cccSmmusante 
519fa9e4066Sahrens void
520fa9e4066Sahrens dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
521fa9e4066Sahrens {
522fa9e4066Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
523fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
524fa9e4066Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, weight);
525fa9e4066Sahrens 	ds->ds_open_refcount -= weight;
526fa9e4066Sahrens 	dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n",
527fa9e4066Sahrens 	    mode, ds->ds_open_refcount);
528fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
529fa9e4066Sahrens 
530ea8dc4b6Seschrock 	dmu_buf_rele(ds->ds_dbuf, tag);
531fa9e4066Sahrens }
532fa9e4066Sahrens 
533*3cb34c60Sahrens void
534*3cb34c60Sahrens dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode)
535*3cb34c60Sahrens {
536*3cb34c60Sahrens 	uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)];
537*3cb34c60Sahrens 	uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)];
538*3cb34c60Sahrens 	mutex_enter(&ds->ds_lock);
539*3cb34c60Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, oldweight);
540*3cb34c60Sahrens 	ASSERT3U(oldweight, >=, newweight);
541*3cb34c60Sahrens 	ds->ds_open_refcount -= oldweight;
542*3cb34c60Sahrens 	ds->ds_open_refcount += newweight;
543*3cb34c60Sahrens 	mutex_exit(&ds->ds_lock);
544*3cb34c60Sahrens }
545*3cb34c60Sahrens 
546*3cb34c60Sahrens boolean_t
547*3cb34c60Sahrens dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode)
548*3cb34c60Sahrens {
549*3cb34c60Sahrens 	boolean_t rv;
550*3cb34c60Sahrens 	uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)];
551*3cb34c60Sahrens 	uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)];
552*3cb34c60Sahrens 	mutex_enter(&ds->ds_lock);
553*3cb34c60Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, oldweight);
554*3cb34c60Sahrens 	ASSERT3U(newweight, >=, oldweight);
555*3cb34c60Sahrens 	if (ds->ds_open_refcount - oldweight + newweight > DS_REF_MAX) {
556*3cb34c60Sahrens 		rv = B_FALSE;
557*3cb34c60Sahrens 	} else {
558*3cb34c60Sahrens 		ds->ds_open_refcount -= oldweight;
559*3cb34c60Sahrens 		ds->ds_open_refcount += newweight;
560*3cb34c60Sahrens 		rv = B_TRUE;
561*3cb34c60Sahrens 	}
562*3cb34c60Sahrens 	mutex_exit(&ds->ds_lock);
563*3cb34c60Sahrens 	return (rv);
564*3cb34c60Sahrens }
565*3cb34c60Sahrens 
566fa9e4066Sahrens void
567fa9e4066Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
568fa9e4066Sahrens {
569fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
570fa9e4066Sahrens 	dmu_buf_t *dbuf;
571fa9e4066Sahrens 	dsl_dataset_phys_t *dsphys;
572fa9e4066Sahrens 	dsl_dataset_t *ds;
573fa9e4066Sahrens 	uint64_t dsobj;
574fa9e4066Sahrens 	dsl_dir_t *dd;
575fa9e4066Sahrens 
576fa9e4066Sahrens 	dsl_dir_create_root(mos, ddobjp, tx);
577ea8dc4b6Seschrock 	VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd));
578fa9e4066Sahrens 
5791649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
5801649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
581ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
582fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
583fa9e4066Sahrens 	dsphys = dbuf->db_data;
584fa9e4066Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
585fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
586fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
587fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
588fa9e4066Sahrens 	dsphys->ds_snapnames_zapobj =
58987e5029aSahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
590fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
591fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
592fa9e4066Sahrens 	dsphys->ds_deadlist_obj =
593fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
594ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
595fa9e4066Sahrens 
596fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
597fa9e4066Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
598fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
599fa9e4066Sahrens 
600ea8dc4b6Seschrock 	VERIFY(0 ==
601ea8dc4b6Seschrock 	    dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds));
602c717a561Smaybee 	(void) dmu_objset_create_impl(dp->dp_spa, ds,
603c717a561Smaybee 	    &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx);
604fa9e4066Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
605fa9e4066Sahrens }
606fa9e4066Sahrens 
6071d452cf5Sahrens uint64_t
608*3cb34c60Sahrens dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin, dmu_tx_t *tx)
609fa9e4066Sahrens {
610*3cb34c60Sahrens 	dsl_pool_t *dp = dd->dd_pool;
611fa9e4066Sahrens 	dmu_buf_t *dbuf;
612fa9e4066Sahrens 	dsl_dataset_phys_t *dsphys;
613*3cb34c60Sahrens 	uint64_t dsobj;
614fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
615fa9e4066Sahrens 
616*3cb34c60Sahrens 	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
617*3cb34c60Sahrens 	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
618fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
619*3cb34c60Sahrens 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
620fa9e4066Sahrens 
6211649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
6221649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
623ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
624fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
625fa9e4066Sahrens 	dsphys = dbuf->db_data;
626fa9e4066Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
627fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
628fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
629fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
630fa9e4066Sahrens 	dsphys->ds_snapnames_zapobj =
63187e5029aSahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
632fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
633fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
634fa9e4066Sahrens 	dsphys->ds_deadlist_obj =
635fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
636*3cb34c60Sahrens 	if (origin) {
637*3cb34c60Sahrens 		dsphys->ds_prev_snap_obj = origin->ds_object;
638fa9e4066Sahrens 		dsphys->ds_prev_snap_txg =
639*3cb34c60Sahrens 		    origin->ds_phys->ds_creation_txg;
640fa9e4066Sahrens 		dsphys->ds_used_bytes =
641*3cb34c60Sahrens 		    origin->ds_phys->ds_used_bytes;
642fa9e4066Sahrens 		dsphys->ds_compressed_bytes =
643*3cb34c60Sahrens 		    origin->ds_phys->ds_compressed_bytes;
644fa9e4066Sahrens 		dsphys->ds_uncompressed_bytes =
645*3cb34c60Sahrens 		    origin->ds_phys->ds_uncompressed_bytes;
646*3cb34c60Sahrens 		dsphys->ds_bp = origin->ds_phys->ds_bp;
647fa9e4066Sahrens 
648*3cb34c60Sahrens 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
649*3cb34c60Sahrens 		origin->ds_phys->ds_num_children++;
650fa9e4066Sahrens 
651fa9e4066Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
652*3cb34c60Sahrens 		dd->dd_phys->dd_origin_obj = origin->ds_object;
653fa9e4066Sahrens 	}
654ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
655fa9e4066Sahrens 
656fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
657fa9e4066Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
658*3cb34c60Sahrens 
659*3cb34c60Sahrens 	return (dsobj);
660*3cb34c60Sahrens }
661*3cb34c60Sahrens 
662*3cb34c60Sahrens uint64_t
663*3cb34c60Sahrens dsl_dataset_create_sync(dsl_dir_t *pdd,
664*3cb34c60Sahrens     const char *lastname, dsl_dataset_t *origin, cred_t *cr, dmu_tx_t *tx)
665*3cb34c60Sahrens {
666*3cb34c60Sahrens 	dsl_pool_t *dp = pdd->dd_pool;
667*3cb34c60Sahrens 	uint64_t dsobj, ddobj;
668*3cb34c60Sahrens 	dsl_dir_t *dd;
669*3cb34c60Sahrens 
670*3cb34c60Sahrens 	ASSERT(lastname[0] != '@');
671*3cb34c60Sahrens 
672*3cb34c60Sahrens 	ddobj = dsl_dir_create_sync(pdd, lastname, tx);
673*3cb34c60Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
674*3cb34c60Sahrens 
675*3cb34c60Sahrens 	dsobj = dsl_dataset_create_sync_impl(dd, origin, tx);
676*3cb34c60Sahrens 
677*3cb34c60Sahrens 	dsl_deleg_set_create_perms(dd, tx, cr);
678*3cb34c60Sahrens 
679fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
680fa9e4066Sahrens 
6811d452cf5Sahrens 	return (dsobj);
682fa9e4066Sahrens }
683fa9e4066Sahrens 
6841d452cf5Sahrens struct destroyarg {
6851d452cf5Sahrens 	dsl_sync_task_group_t *dstg;
6861d452cf5Sahrens 	char *snapname;
6871d452cf5Sahrens 	char *failed;
6881d452cf5Sahrens };
6891d452cf5Sahrens 
6901d452cf5Sahrens static int
6911d452cf5Sahrens dsl_snapshot_destroy_one(char *name, void *arg)
692fa9e4066Sahrens {
6931d452cf5Sahrens 	struct destroyarg *da = arg;
6941d452cf5Sahrens 	dsl_dataset_t *ds;
6951d452cf5Sahrens 	char *cp;
696fa9e4066Sahrens 	int err;
697fa9e4066Sahrens 
6981d452cf5Sahrens 	(void) strcat(name, "@");
6991d452cf5Sahrens 	(void) strcat(name, da->snapname);
7001d452cf5Sahrens 	err = dsl_dataset_open(name,
7011d452cf5Sahrens 	    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
702cdf5b4caSmmusante 	    da->dstg, &ds);
7031d452cf5Sahrens 	cp = strchr(name, '@');
7041d452cf5Sahrens 	*cp = '\0';
7051d452cf5Sahrens 	if (err == ENOENT)
7061d452cf5Sahrens 		return (0);
7071d452cf5Sahrens 	if (err) {
7081d452cf5Sahrens 		(void) strcpy(da->failed, name);
709ea8dc4b6Seschrock 		return (err);
7101d452cf5Sahrens 	}
711fa9e4066Sahrens 
7121d452cf5Sahrens 	dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
713cdf5b4caSmmusante 	    dsl_dataset_destroy_sync, ds, da->dstg, 0);
7141d452cf5Sahrens 	return (0);
7151d452cf5Sahrens }
71631fd60d3Sahrens 
7171d452cf5Sahrens /*
7181d452cf5Sahrens  * Destroy 'snapname' in all descendants of 'fsname'.
7191d452cf5Sahrens  */
7201d452cf5Sahrens #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
7211d452cf5Sahrens int
7221d452cf5Sahrens dsl_snapshots_destroy(char *fsname, char *snapname)
7231d452cf5Sahrens {
7241d452cf5Sahrens 	int err;
7251d452cf5Sahrens 	struct destroyarg da;
7261d452cf5Sahrens 	dsl_sync_task_t *dst;
7271d452cf5Sahrens 	spa_t *spa;
7281d452cf5Sahrens 
72940feaa91Sahrens 	err = spa_open(fsname, &spa, FTAG);
7301d452cf5Sahrens 	if (err)
7311d452cf5Sahrens 		return (err);
7321d452cf5Sahrens 	da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
7331d452cf5Sahrens 	da.snapname = snapname;
7341d452cf5Sahrens 	da.failed = fsname;
7351d452cf5Sahrens 
7361d452cf5Sahrens 	err = dmu_objset_find(fsname,
7370b69c2f0Sahrens 	    dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
7381d452cf5Sahrens 
7391d452cf5Sahrens 	if (err == 0)
7401d452cf5Sahrens 		err = dsl_sync_task_group_wait(da.dstg);
7411d452cf5Sahrens 
7421d452cf5Sahrens 	for (dst = list_head(&da.dstg->dstg_tasks); dst;
7431d452cf5Sahrens 	    dst = list_next(&da.dstg->dstg_tasks, dst)) {
7441d452cf5Sahrens 		dsl_dataset_t *ds = dst->dst_arg1;
7451d452cf5Sahrens 		if (dst->dst_err) {
7461d452cf5Sahrens 			dsl_dataset_name(ds, fsname);
74740feaa91Sahrens 			*strchr(fsname, '@') = '\0';
748e1930233Sbonwick 		}
749fa9e4066Sahrens 		/*
7501d452cf5Sahrens 		 * If it was successful, destroy_sync would have
7511d452cf5Sahrens 		 * closed the ds
752fa9e4066Sahrens 		 */
753ea8dc4b6Seschrock 		if (err)
754cdf5b4caSmmusante 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg);
755fa9e4066Sahrens 	}
756fa9e4066Sahrens 
7571d452cf5Sahrens 	dsl_sync_task_group_destroy(da.dstg);
7581d452cf5Sahrens 	spa_close(spa, FTAG);
759fa9e4066Sahrens 	return (err);
760fa9e4066Sahrens }
761fa9e4066Sahrens 
762*3cb34c60Sahrens /*
763*3cb34c60Sahrens  * ds must be opened EXCLUSIVE or PRIMARY.  on return (whether
764*3cb34c60Sahrens  * successful or not), ds will be closed and caller can no longer
765*3cb34c60Sahrens  * dereference it.
766*3cb34c60Sahrens  */
767fa9e4066Sahrens int
768*3cb34c60Sahrens dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
769fa9e4066Sahrens {
770fa9e4066Sahrens 	int err;
7711d452cf5Sahrens 	dsl_sync_task_group_t *dstg;
7721d452cf5Sahrens 	objset_t *os;
773fa9e4066Sahrens 	dsl_dir_t *dd;
7741d452cf5Sahrens 	uint64_t obj;
7751d452cf5Sahrens 
776*3cb34c60Sahrens 	if (ds->ds_open_refcount != DS_REF_MAX) {
777*3cb34c60Sahrens 		if (dsl_dataset_tryupgrade(ds, DS_MODE_PRIMARY,
778*3cb34c60Sahrens 		    DS_MODE_EXCLUSIVE) == 0) {
779*3cb34c60Sahrens 			dsl_dataset_close(ds, DS_MODE_PRIMARY, tag);
780*3cb34c60Sahrens 			return (EBUSY);
781*3cb34c60Sahrens 		}
782*3cb34c60Sahrens 	}
783*3cb34c60Sahrens 
784*3cb34c60Sahrens 	if (dsl_dataset_is_snapshot(ds)) {
7851d452cf5Sahrens 		/* Destroying a snapshot is simpler */
7861d452cf5Sahrens 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
7871d452cf5Sahrens 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
788*3cb34c60Sahrens 		    ds, tag, 0);
789*3cb34c60Sahrens 		goto out;
7901d452cf5Sahrens 	}
791fa9e4066Sahrens 
7921d452cf5Sahrens 	dd = ds->ds_dir;
793fa9e4066Sahrens 
7941d452cf5Sahrens 	/*
7951d452cf5Sahrens 	 * Check for errors and mark this ds as inconsistent, in
7961d452cf5Sahrens 	 * case we crash while freeing the objects.
7971d452cf5Sahrens 	 */
7981d452cf5Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
7991d452cf5Sahrens 	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
800*3cb34c60Sahrens 	if (err)
801*3cb34c60Sahrens 		goto out;
802*3cb34c60Sahrens 
803*3cb34c60Sahrens 	err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
804*3cb34c60Sahrens 	if (err)
805*3cb34c60Sahrens 		goto out;
806fa9e4066Sahrens 
8071d452cf5Sahrens 	/*
8081d452cf5Sahrens 	 * remove the objects in open context, so that we won't
8091d452cf5Sahrens 	 * have too much to do in syncing context.
8101d452cf5Sahrens 	 */
8116754306eSahrens 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
8126754306eSahrens 	    ds->ds_phys->ds_prev_snap_txg)) {
8131d452cf5Sahrens 		dmu_tx_t *tx = dmu_tx_create(os);
8141d452cf5Sahrens 		dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
8151d452cf5Sahrens 		dmu_tx_hold_bonus(tx, obj);
8161d452cf5Sahrens 		err = dmu_tx_assign(tx, TXG_WAIT);
8171d452cf5Sahrens 		if (err) {
8181d452cf5Sahrens 			/*
8191d452cf5Sahrens 			 * Perhaps there is not enough disk
8201d452cf5Sahrens 			 * space.  Just deal with it from
8211d452cf5Sahrens 			 * dsl_dataset_destroy_sync().
8221d452cf5Sahrens 			 */
8231d452cf5Sahrens 			dmu_tx_abort(tx);
8241d452cf5Sahrens 			continue;
8251d452cf5Sahrens 		}
8261d452cf5Sahrens 		VERIFY(0 == dmu_object_free(os, obj, tx));
8271d452cf5Sahrens 		dmu_tx_commit(tx);
8281d452cf5Sahrens 	}
8291d452cf5Sahrens 	/* Make sure it's not dirty before we finish destroying it. */
8301d452cf5Sahrens 	txg_wait_synced(dd->dd_pool, 0);
8311d452cf5Sahrens 
8321d452cf5Sahrens 	dmu_objset_close(os);
8331d452cf5Sahrens 	if (err != ESRCH)
834*3cb34c60Sahrens 		goto out;
8351d452cf5Sahrens 
836*3cb34c60Sahrens 	if (ds->ds_user_ptr) {
837*3cb34c60Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
838*3cb34c60Sahrens 		ds->ds_user_ptr = NULL;
8391d452cf5Sahrens 	}
8401d452cf5Sahrens 
841*3cb34c60Sahrens 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
842*3cb34c60Sahrens 	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
843*3cb34c60Sahrens 	rw_exit(&dd->dd_pool->dp_config_rwlock);
844*3cb34c60Sahrens 
845*3cb34c60Sahrens 	if (err)
846*3cb34c60Sahrens 		goto out;
847*3cb34c60Sahrens 
8481d452cf5Sahrens 	/*
8491d452cf5Sahrens 	 * Blow away the dsl_dir + head dataset.
8501d452cf5Sahrens 	 */
8511d452cf5Sahrens 	dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
8521d452cf5Sahrens 	dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
853*3cb34c60Sahrens 	    dsl_dataset_destroy_sync, ds, tag, 0);
8541d452cf5Sahrens 	dsl_sync_task_create(dstg, dsl_dir_destroy_check,
8551d452cf5Sahrens 	    dsl_dir_destroy_sync, dd, FTAG, 0);
8561d452cf5Sahrens 	err = dsl_sync_task_group_wait(dstg);
8571d452cf5Sahrens 	dsl_sync_task_group_destroy(dstg);
8581d452cf5Sahrens 	/* if it is successful, *destroy_sync will close the ds+dd */
859*3cb34c60Sahrens 	if (err)
8601d452cf5Sahrens 		dsl_dir_close(dd, FTAG);
861*3cb34c60Sahrens out:
862*3cb34c60Sahrens 	if (err)
863*3cb34c60Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
864fa9e4066Sahrens 	return (err);
865fa9e4066Sahrens }
866fa9e4066Sahrens 
8671d452cf5Sahrens int
868*3cb34c60Sahrens dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost)
8691d452cf5Sahrens {
87055434c77Sek 	ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
871*3cb34c60Sahrens 
8721d452cf5Sahrens 	return (dsl_sync_task_do(ds->ds_dir->dd_pool,
8731d452cf5Sahrens 	    dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
874*3cb34c60Sahrens 	    ds, &ost, 0));
8751d452cf5Sahrens }
8761d452cf5Sahrens 
877fa9e4066Sahrens void *
878fa9e4066Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
879fa9e4066Sahrens     void *p, dsl_dataset_evict_func_t func)
880fa9e4066Sahrens {
881fa9e4066Sahrens 	void *old;
882fa9e4066Sahrens 
883fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
884fa9e4066Sahrens 	old = ds->ds_user_ptr;
885fa9e4066Sahrens 	if (old == NULL) {
886fa9e4066Sahrens 		ds->ds_user_ptr = p;
887fa9e4066Sahrens 		ds->ds_user_evict_func = func;
888fa9e4066Sahrens 	}
889fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
890fa9e4066Sahrens 	return (old);
891fa9e4066Sahrens }
892fa9e4066Sahrens 
893fa9e4066Sahrens void *
894fa9e4066Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
895fa9e4066Sahrens {
896fa9e4066Sahrens 	return (ds->ds_user_ptr);
897fa9e4066Sahrens }
898fa9e4066Sahrens 
899fa9e4066Sahrens 
900c717a561Smaybee blkptr_t *
901c717a561Smaybee dsl_dataset_get_blkptr(dsl_dataset_t *ds)
902fa9e4066Sahrens {
903c717a561Smaybee 	return (&ds->ds_phys->ds_bp);
904fa9e4066Sahrens }
905fa9e4066Sahrens 
906fa9e4066Sahrens void
907fa9e4066Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
908fa9e4066Sahrens {
909fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
910fa9e4066Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
911fa9e4066Sahrens 	if (ds == NULL) {
912fa9e4066Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
913fa9e4066Sahrens 	} else {
914fa9e4066Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
915fa9e4066Sahrens 		ds->ds_phys->ds_bp = *bp;
916fa9e4066Sahrens 	}
917fa9e4066Sahrens }
918fa9e4066Sahrens 
919fa9e4066Sahrens spa_t *
920fa9e4066Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
921fa9e4066Sahrens {
922fa9e4066Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
923fa9e4066Sahrens }
924fa9e4066Sahrens 
925fa9e4066Sahrens void
926fa9e4066Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
927fa9e4066Sahrens {
928fa9e4066Sahrens 	dsl_pool_t *dp;
929fa9e4066Sahrens 
930fa9e4066Sahrens 	if (ds == NULL) /* this is the meta-objset */
931fa9e4066Sahrens 		return;
932fa9e4066Sahrens 
933fa9e4066Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
934a2eea2e1Sahrens 
935a2eea2e1Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
936a2eea2e1Sahrens 		panic("dirtying snapshot!");
937fa9e4066Sahrens 
938fa9e4066Sahrens 	dp = ds->ds_dir->dd_pool;
939fa9e4066Sahrens 
940fa9e4066Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
941fa9e4066Sahrens 		/* up the hold count until we can be written out */
942fa9e4066Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
943fa9e4066Sahrens 	}
944fa9e4066Sahrens }
945fa9e4066Sahrens 
946fa9e4066Sahrens struct killarg {
947fa9e4066Sahrens 	uint64_t *usedp;
948fa9e4066Sahrens 	uint64_t *compressedp;
949fa9e4066Sahrens 	uint64_t *uncompressedp;
950fa9e4066Sahrens 	zio_t *zio;
951fa9e4066Sahrens 	dmu_tx_t *tx;
952fa9e4066Sahrens };
953fa9e4066Sahrens 
954fa9e4066Sahrens static int
955fa9e4066Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
956fa9e4066Sahrens {
957fa9e4066Sahrens 	struct killarg *ka = arg;
958fa9e4066Sahrens 	blkptr_t *bp = &bc->bc_blkptr;
959fa9e4066Sahrens 
960fa9e4066Sahrens 	ASSERT3U(bc->bc_errno, ==, 0);
961fa9e4066Sahrens 
962fa9e4066Sahrens 	/*
963fa9e4066Sahrens 	 * Since this callback is not called concurrently, no lock is
964fa9e4066Sahrens 	 * needed on the accounting values.
965fa9e4066Sahrens 	 */
96699653d4eSeschrock 	*ka->usedp += bp_get_dasize(spa, bp);
967fa9e4066Sahrens 	*ka->compressedp += BP_GET_PSIZE(bp);
968fa9e4066Sahrens 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
969fa9e4066Sahrens 	/* XXX check for EIO? */
970fa9e4066Sahrens 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
971fa9e4066Sahrens 	    ARC_NOWAIT);
972fa9e4066Sahrens 	return (0);
973fa9e4066Sahrens }
974fa9e4066Sahrens 
975fa9e4066Sahrens /* ARGSUSED */
9761d452cf5Sahrens static int
9771d452cf5Sahrens dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
978fa9e4066Sahrens {
9791d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
980*3cb34c60Sahrens 	dmu_objset_type_t *ost = arg2;
981fa9e4066Sahrens 
9821d452cf5Sahrens 	/*
983*3cb34c60Sahrens 	 * We can only roll back to emptyness if it is a ZPL objset.
9841d452cf5Sahrens 	 */
985*3cb34c60Sahrens 	if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0)
986fa9e4066Sahrens 		return (EINVAL);
987fa9e4066Sahrens 
9881d452cf5Sahrens 	/*
9891d452cf5Sahrens 	 * This must not be a snapshot.
9901d452cf5Sahrens 	 */
9911d452cf5Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
992fa9e4066Sahrens 		return (EINVAL);
993fa9e4066Sahrens 
994fa9e4066Sahrens 	/*
995fa9e4066Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
996fa9e4066Sahrens 	 * them.  Try again.
997fa9e4066Sahrens 	 */
9981d452cf5Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
999fa9e4066Sahrens 		return (EAGAIN);
1000fa9e4066Sahrens 
10011d452cf5Sahrens 	return (0);
10021d452cf5Sahrens }
10031d452cf5Sahrens 
10041d452cf5Sahrens /* ARGSUSED */
10051d452cf5Sahrens static void
1006ecd6cf80Smarks dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
10071d452cf5Sahrens {
10081d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1009*3cb34c60Sahrens 	dmu_objset_type_t *ost = arg2;
10101d452cf5Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1011fa9e4066Sahrens 
1012fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1013fa9e4066Sahrens 
101486ccc033Sperrin 	/*
101586ccc033Sperrin 	 * Before the roll back destroy the zil.
101686ccc033Sperrin 	 */
101786ccc033Sperrin 	if (ds->ds_user_ptr != NULL) {
101886ccc033Sperrin 		zil_rollback_destroy(
101986ccc033Sperrin 		    ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx);
1020*3cb34c60Sahrens 
1021*3cb34c60Sahrens 		/*
1022*3cb34c60Sahrens 		 * We need to make sure that the objset_impl_t is reopened after
1023*3cb34c60Sahrens 		 * we do the rollback, otherwise it will have the wrong
1024*3cb34c60Sahrens 		 * objset_phys_t.  Normally this would happen when this
1025*3cb34c60Sahrens 		 * DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the
1026*3cb34c60Sahrens 		 * dataset to be immediately evicted.  But when doing "zfs recv
1027*3cb34c60Sahrens 		 * -F", we reopen the objset before that, so that there is no
1028*3cb34c60Sahrens 		 * window where the dataset is closed and inconsistent.
1029*3cb34c60Sahrens 		 */
1030*3cb34c60Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
1031*3cb34c60Sahrens 		ds->ds_user_ptr = NULL;
103286ccc033Sperrin 	}
10333a8a1de4Sperrin 
1034fa9e4066Sahrens 	/* Zero out the deadlist. */
1035fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
1036fa9e4066Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1037fa9e4066Sahrens 	ds->ds_phys->ds_deadlist_obj =
1038fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1039ea8dc4b6Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
1040ea8dc4b6Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1041fa9e4066Sahrens 
1042fa9e4066Sahrens 	{
1043fa9e4066Sahrens 		/* Free blkptrs that we gave birth to */
1044fa9e4066Sahrens 		zio_t *zio;
1045fa9e4066Sahrens 		uint64_t used = 0, compressed = 0, uncompressed = 0;
1046fa9e4066Sahrens 		struct killarg ka;
1047fa9e4066Sahrens 
1048fa9e4066Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
1049fa9e4066Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
1050fa9e4066Sahrens 		ka.usedp = &used;
1051fa9e4066Sahrens 		ka.compressedp = &compressed;
1052fa9e4066Sahrens 		ka.uncompressedp = &uncompressed;
1053fa9e4066Sahrens 		ka.zio = zio;
1054fa9e4066Sahrens 		ka.tx = tx;
1055fa9e4066Sahrens 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1056fa9e4066Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1057fa9e4066Sahrens 		(void) zio_wait(zio);
1058fa9e4066Sahrens 
10591d452cf5Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
1060fa9e4066Sahrens 		    -used, -compressed, -uncompressed, tx);
1061fa9e4066Sahrens 	}
1062fa9e4066Sahrens 
1063*3cb34c60Sahrens 	if (ds->ds_prev) {
1064*3cb34c60Sahrens 		/* Change our contents to that of the prev snapshot */
1065*3cb34c60Sahrens 		ASSERT3U(ds->ds_prev->ds_object, ==,
1066*3cb34c60Sahrens 		    ds->ds_phys->ds_prev_snap_obj);
1067*3cb34c60Sahrens 		ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
1068*3cb34c60Sahrens 		ds->ds_phys->ds_used_bytes =
1069*3cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_used_bytes;
1070*3cb34c60Sahrens 		ds->ds_phys->ds_compressed_bytes =
1071*3cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_compressed_bytes;
1072*3cb34c60Sahrens 		ds->ds_phys->ds_uncompressed_bytes =
1073*3cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
1074*3cb34c60Sahrens 		ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
1075*3cb34c60Sahrens 		ds->ds_phys->ds_unique_bytes = 0;
1076fa9e4066Sahrens 
1077*3cb34c60Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
1078*3cb34c60Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1079*3cb34c60Sahrens 			ds->ds_prev->ds_phys->ds_unique_bytes = 0;
1080*3cb34c60Sahrens 		}
1081*3cb34c60Sahrens 	} else {
1082*3cb34c60Sahrens 		/* Zero out our contents, recreate objset */
1083*3cb34c60Sahrens 		bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t));
1084*3cb34c60Sahrens 		ds->ds_phys->ds_used_bytes = 0;
1085*3cb34c60Sahrens 		ds->ds_phys->ds_compressed_bytes = 0;
1086*3cb34c60Sahrens 		ds->ds_phys->ds_uncompressed_bytes = 0;
1087*3cb34c60Sahrens 		ds->ds_phys->ds_flags = 0;
1088*3cb34c60Sahrens 		ds->ds_phys->ds_unique_bytes = 0;
1089*3cb34c60Sahrens 		(void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds,
1090*3cb34c60Sahrens 		    &ds->ds_phys->ds_bp, *ost, tx);
109185edac42Sahrens 	}
1092ecd6cf80Smarks 
1093ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa,
1094ecd6cf80Smarks 	    tx, cr, "dataset = %llu", ds->ds_object);
1095fa9e4066Sahrens }
1096fa9e4066Sahrens 
1097e1930233Sbonwick /* ARGSUSED */
1098e1930233Sbonwick static int
10991d452cf5Sahrens dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
1100e1930233Sbonwick {
11011d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1102*3cb34c60Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1103*3cb34c60Sahrens 	uint64_t count;
1104*3cb34c60Sahrens 	int err;
1105e1930233Sbonwick 
1106e1930233Sbonwick 	/*
1107e1930233Sbonwick 	 * Can't delete a head dataset if there are snapshots of it.
1108e1930233Sbonwick 	 * (Except if the only snapshots are from the branch we cloned
1109e1930233Sbonwick 	 * from.)
1110e1930233Sbonwick 	 */
1111e1930233Sbonwick 	if (ds->ds_prev != NULL &&
1112e1930233Sbonwick 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1113e1930233Sbonwick 		return (EINVAL);
1114e1930233Sbonwick 
1115*3cb34c60Sahrens 	/*
1116*3cb34c60Sahrens 	 * This is really a dsl_dir thing, but check it here so that
1117*3cb34c60Sahrens 	 * we'll be less likely to leave this dataset inconsistent &
1118*3cb34c60Sahrens 	 * nearly destroyed.
1119*3cb34c60Sahrens 	 */
1120*3cb34c60Sahrens 	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
1121*3cb34c60Sahrens 	if (err)
1122*3cb34c60Sahrens 		return (err);
1123*3cb34c60Sahrens 	if (count != 0)
1124*3cb34c60Sahrens 		return (EEXIST);
1125*3cb34c60Sahrens 
1126e1930233Sbonwick 	return (0);
1127e1930233Sbonwick }
1128e1930233Sbonwick 
11291d452cf5Sahrens /* ARGSUSED */
11301d452cf5Sahrens static void
1131ecd6cf80Smarks dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
1132fa9e4066Sahrens {
11331d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1134ecd6cf80Smarks 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1135fa9e4066Sahrens 
11361d452cf5Sahrens 	/* Mark it as inconsistent on-disk, in case we crash */
11371d452cf5Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
11381d452cf5Sahrens 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
1139ecd6cf80Smarks 
1140ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
1141ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
11421d452cf5Sahrens }
1143fa9e4066Sahrens 
11441d452cf5Sahrens /* ARGSUSED */
1145*3cb34c60Sahrens int
11461d452cf5Sahrens dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
11471d452cf5Sahrens {
11481d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1149fa9e4066Sahrens 
1150fa9e4066Sahrens 	/* Can't delete a branch point. */
11511d452cf5Sahrens 	if (ds->ds_phys->ds_num_children > 1)
11521d452cf5Sahrens 		return (EEXIST);
1153fa9e4066Sahrens 
1154fa9e4066Sahrens 	/*
1155fa9e4066Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
1156fa9e4066Sahrens 	 * (Except if the only snapshots are from the branch we cloned
1157fa9e4066Sahrens 	 * from.)
1158fa9e4066Sahrens 	 */
1159fa9e4066Sahrens 	if (ds->ds_prev != NULL &&
11601d452cf5Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1161fa9e4066Sahrens 		return (EINVAL);
1162fa9e4066Sahrens 
1163fa9e4066Sahrens 	/*
1164fa9e4066Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
1165fa9e4066Sahrens 	 * them.  Try again.
1166fa9e4066Sahrens 	 */
11671d452cf5Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1168fa9e4066Sahrens 		return (EAGAIN);
11691d452cf5Sahrens 
11701d452cf5Sahrens 	/* XXX we should do some i/o error checking... */
11711d452cf5Sahrens 	return (0);
11721d452cf5Sahrens }
11731d452cf5Sahrens 
1174*3cb34c60Sahrens void
1175ecd6cf80Smarks dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
11761d452cf5Sahrens {
11771d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
11781d452cf5Sahrens 	uint64_t used = 0, compressed = 0, uncompressed = 0;
11791d452cf5Sahrens 	zio_t *zio;
11801d452cf5Sahrens 	int err;
11811d452cf5Sahrens 	int after_branch_point = FALSE;
11821d452cf5Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
11831d452cf5Sahrens 	objset_t *mos = dp->dp_meta_objset;
11841d452cf5Sahrens 	dsl_dataset_t *ds_prev = NULL;
11851d452cf5Sahrens 	uint64_t obj;
11861d452cf5Sahrens 
118755434c77Sek 	ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
11881d452cf5Sahrens 	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
11891d452cf5Sahrens 	ASSERT(ds->ds_prev == NULL ||
11901d452cf5Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
11911d452cf5Sahrens 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
11921d452cf5Sahrens 
11931d452cf5Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
11941d452cf5Sahrens 
11951d452cf5Sahrens 	obj = ds->ds_object;
1196fa9e4066Sahrens 
1197fa9e4066Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1198fa9e4066Sahrens 		if (ds->ds_prev) {
1199fa9e4066Sahrens 			ds_prev = ds->ds_prev;
1200fa9e4066Sahrens 		} else {
12011d452cf5Sahrens 			VERIFY(0 == dsl_dataset_open_obj(dp,
1202fa9e4066Sahrens 			    ds->ds_phys->ds_prev_snap_obj, NULL,
12031d452cf5Sahrens 			    DS_MODE_NONE, FTAG, &ds_prev));
1204fa9e4066Sahrens 		}
1205fa9e4066Sahrens 		after_branch_point =
1206fa9e4066Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
1207fa9e4066Sahrens 
1208fa9e4066Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1209fa9e4066Sahrens 		if (after_branch_point &&
1210fa9e4066Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
1211fa9e4066Sahrens 			/* This clone is toast. */
1212fa9e4066Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1213fa9e4066Sahrens 			ds_prev->ds_phys->ds_num_children--;
1214fa9e4066Sahrens 		} else if (!after_branch_point) {
1215fa9e4066Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
1216fa9e4066Sahrens 			    ds->ds_phys->ds_next_snap_obj;
1217fa9e4066Sahrens 		}
1218fa9e4066Sahrens 	}
1219fa9e4066Sahrens 
1220fa9e4066Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1221fa9e4066Sahrens 
1222fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
12231d452cf5Sahrens 		blkptr_t bp;
1224fa9e4066Sahrens 		dsl_dataset_t *ds_next;
1225fa9e4066Sahrens 		uint64_t itor = 0;
1226fa9e4066Sahrens 
1227fa9e4066Sahrens 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1228fa9e4066Sahrens 
12291d452cf5Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
1230ea8dc4b6Seschrock 		    ds->ds_phys->ds_next_snap_obj, NULL,
1231ea8dc4b6Seschrock 		    DS_MODE_NONE, FTAG, &ds_next));
1232fa9e4066Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1233fa9e4066Sahrens 
1234fa9e4066Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1235fa9e4066Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
1236fa9e4066Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
1237fa9e4066Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
1238fa9e4066Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
1239fa9e4066Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1240fa9e4066Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1241fa9e4066Sahrens 
1242fa9e4066Sahrens 		/*
1243fa9e4066Sahrens 		 * Transfer to our deadlist (which will become next's
1244fa9e4066Sahrens 		 * new deadlist) any entries from next's current
1245fa9e4066Sahrens 		 * deadlist which were born before prev, and free the
1246fa9e4066Sahrens 		 * other entries.
1247fa9e4066Sahrens 		 *
1248fa9e4066Sahrens 		 * XXX we're doing this long task with the config lock held
1249fa9e4066Sahrens 		 */
1250fa9e4066Sahrens 		while (bplist_iterate(&ds_next->ds_deadlist, &itor,
1251fa9e4066Sahrens 		    &bp) == 0) {
1252fa9e4066Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
1253ea8dc4b6Seschrock 				VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
1254ea8dc4b6Seschrock 				    &bp, tx));
1255fa9e4066Sahrens 				if (ds_prev && !after_branch_point &&
1256fa9e4066Sahrens 				    bp.blk_birth >
1257fa9e4066Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1258fa9e4066Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
125999653d4eSeschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1260fa9e4066Sahrens 				}
1261fa9e4066Sahrens 			} else {
126299653d4eSeschrock 				used += bp_get_dasize(dp->dp_spa, &bp);
1263fa9e4066Sahrens 				compressed += BP_GET_PSIZE(&bp);
1264fa9e4066Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1265fa9e4066Sahrens 				/* XXX check return value? */
1266fa9e4066Sahrens 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1267fa9e4066Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1268fa9e4066Sahrens 			}
1269fa9e4066Sahrens 		}
1270fa9e4066Sahrens 
1271fa9e4066Sahrens 		/* free next's deadlist */
1272fa9e4066Sahrens 		bplist_close(&ds_next->ds_deadlist);
1273fa9e4066Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1274fa9e4066Sahrens 
1275fa9e4066Sahrens 		/* set next's deadlist to our deadlist */
1276fa9e4066Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1277fa9e4066Sahrens 		    ds->ds_phys->ds_deadlist_obj;
1278ea8dc4b6Seschrock 		VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
1279ea8dc4b6Seschrock 		    ds_next->ds_phys->ds_deadlist_obj));
1280fa9e4066Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1281fa9e4066Sahrens 
1282fa9e4066Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1283fa9e4066Sahrens 			/*
1284fa9e4066Sahrens 			 * Update next's unique to include blocks which
1285fa9e4066Sahrens 			 * were previously shared by only this snapshot
1286fa9e4066Sahrens 			 * and it.  Those blocks will be born after the
1287fa9e4066Sahrens 			 * prev snap and before this snap, and will have
1288fa9e4066Sahrens 			 * died after the next snap and before the one
1289fa9e4066Sahrens 			 * after that (ie. be on the snap after next's
1290fa9e4066Sahrens 			 * deadlist).
1291fa9e4066Sahrens 			 *
1292fa9e4066Sahrens 			 * XXX we're doing this long task with the
1293fa9e4066Sahrens 			 * config lock held
1294fa9e4066Sahrens 			 */
1295fa9e4066Sahrens 			dsl_dataset_t *ds_after_next;
1296fa9e4066Sahrens 
12971d452cf5Sahrens 			VERIFY(0 == dsl_dataset_open_obj(dp,
1298fa9e4066Sahrens 			    ds_next->ds_phys->ds_next_snap_obj, NULL,
1299ea8dc4b6Seschrock 			    DS_MODE_NONE, FTAG, &ds_after_next));
1300fa9e4066Sahrens 			itor = 0;
1301fa9e4066Sahrens 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1302fa9e4066Sahrens 			    &itor, &bp) == 0) {
1303fa9e4066Sahrens 				if (bp.blk_birth >
1304fa9e4066Sahrens 				    ds->ds_phys->ds_prev_snap_txg &&
1305fa9e4066Sahrens 				    bp.blk_birth <=
1306fa9e4066Sahrens 				    ds->ds_phys->ds_creation_txg) {
1307fa9e4066Sahrens 					ds_next->ds_phys->ds_unique_bytes +=
130899653d4eSeschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1309fa9e4066Sahrens 				}
1310fa9e4066Sahrens 			}
1311fa9e4066Sahrens 
1312fa9e4066Sahrens 			dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
1313fa9e4066Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1314fa9e4066Sahrens 		} else {
1315fa9e4066Sahrens 			/*
1316fa9e4066Sahrens 			 * It would be nice to update the head dataset's
1317fa9e4066Sahrens 			 * unique.  To do so we would have to traverse
1318fa9e4066Sahrens 			 * it for blocks born after ds_prev, which is
1319fa9e4066Sahrens 			 * pretty expensive just to maintain something
1320fa9e4066Sahrens 			 * for debugging purposes.
1321fa9e4066Sahrens 			 */
1322fa9e4066Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
1323fa9e4066Sahrens 			dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
1324fa9e4066Sahrens 			    ds_next);
1325fa9e4066Sahrens 			if (ds_prev) {
13261d452cf5Sahrens 				VERIFY(0 == dsl_dataset_open_obj(dp,
1327ea8dc4b6Seschrock 				    ds->ds_phys->ds_prev_snap_obj, NULL,
1328ea8dc4b6Seschrock 				    DS_MODE_NONE, ds_next, &ds_next->ds_prev));
1329fa9e4066Sahrens 			} else {
1330fa9e4066Sahrens 				ds_next->ds_prev = NULL;
1331fa9e4066Sahrens 			}
1332fa9e4066Sahrens 		}
1333fa9e4066Sahrens 		dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
1334fa9e4066Sahrens 
1335fa9e4066Sahrens 		/*
1336fa9e4066Sahrens 		 * NB: unique_bytes is not accurate for head objsets
1337fa9e4066Sahrens 		 * because we don't update it when we delete the most
1338fa9e4066Sahrens 		 * recent snapshot -- see above comment.
1339fa9e4066Sahrens 		 */
1340fa9e4066Sahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1341fa9e4066Sahrens 	} else {
1342fa9e4066Sahrens 		/*
1343fa9e4066Sahrens 		 * There's no next snapshot, so this is a head dataset.
1344fa9e4066Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1345fa9e4066Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1346fa9e4066Sahrens 		 * safe to ignore the deadlist contents.)
1347fa9e4066Sahrens 		 */
1348fa9e4066Sahrens 		struct killarg ka;
1349fa9e4066Sahrens 
1350fa9e4066Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1351fa9e4066Sahrens 		bplist_close(&ds->ds_deadlist);
1352fa9e4066Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1353fa9e4066Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1354fa9e4066Sahrens 
1355fa9e4066Sahrens 		/*
1356fa9e4066Sahrens 		 * Free everything that we point to (that's born after
1357fa9e4066Sahrens 		 * the previous snapshot, if we are a clone)
1358fa9e4066Sahrens 		 *
1359fa9e4066Sahrens 		 * XXX we're doing this long task with the config lock held
1360fa9e4066Sahrens 		 */
1361fa9e4066Sahrens 		ka.usedp = &used;
1362fa9e4066Sahrens 		ka.compressedp = &compressed;
1363fa9e4066Sahrens 		ka.uncompressedp = &uncompressed;
1364fa9e4066Sahrens 		ka.zio = zio;
1365fa9e4066Sahrens 		ka.tx = tx;
1366fa9e4066Sahrens 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1367fa9e4066Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1368fa9e4066Sahrens 		ASSERT3U(err, ==, 0);
1369fa9e4066Sahrens 	}
1370fa9e4066Sahrens 
1371fa9e4066Sahrens 	err = zio_wait(zio);
1372fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1373fa9e4066Sahrens 
13741d452cf5Sahrens 	dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx);
1375fa9e4066Sahrens 
1376fa9e4066Sahrens 	if (ds->ds_phys->ds_snapnames_zapobj) {
1377fa9e4066Sahrens 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1378fa9e4066Sahrens 		ASSERT(err == 0);
1379fa9e4066Sahrens 	}
1380fa9e4066Sahrens 
13811d452cf5Sahrens 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1382fa9e4066Sahrens 		/* Erase the link in the dataset */
13831d452cf5Sahrens 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
13841d452cf5Sahrens 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
1385fa9e4066Sahrens 		/*
1386fa9e4066Sahrens 		 * dsl_dir_sync_destroy() called us, they'll destroy
1387fa9e4066Sahrens 		 * the dataset.
1388fa9e4066Sahrens 		 */
1389fa9e4066Sahrens 	} else {
1390fa9e4066Sahrens 		/* remove from snapshot namespace */
1391fa9e4066Sahrens 		dsl_dataset_t *ds_head;
13921d452cf5Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
13931d452cf5Sahrens 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL,
1394ea8dc4b6Seschrock 		    DS_MODE_NONE, FTAG, &ds_head));
13958660574dSahrens 		VERIFY(0 == dsl_dataset_get_snapname(ds));
1396fa9e4066Sahrens #ifdef ZFS_DEBUG
1397fa9e4066Sahrens 		{
1398fa9e4066Sahrens 			uint64_t val;
1399fa9e4066Sahrens 			err = zap_lookup(mos,
1400fa9e4066Sahrens 			    ds_head->ds_phys->ds_snapnames_zapobj,
14011d452cf5Sahrens 			    ds->ds_snapname, 8, 1, &val);
1402fa9e4066Sahrens 			ASSERT3U(err, ==, 0);
1403fa9e4066Sahrens 			ASSERT3U(val, ==, obj);
1404fa9e4066Sahrens 		}
1405fa9e4066Sahrens #endif
1406fa9e4066Sahrens 		err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj,
14071d452cf5Sahrens 		    ds->ds_snapname, tx);
1408fa9e4066Sahrens 		ASSERT(err == 0);
1409fa9e4066Sahrens 		dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
1410fa9e4066Sahrens 	}
1411fa9e4066Sahrens 
1412fa9e4066Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
1413fa9e4066Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1414fa9e4066Sahrens 
1415990b4856Slling 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1416ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx,
1417ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
1418ecd6cf80Smarks 
14191d452cf5Sahrens 	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
14201d452cf5Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
1421b1b8ab34Slling 
1422fa9e4066Sahrens }
1423fa9e4066Sahrens 
14241d452cf5Sahrens /* ARGSUSED */
1425fa9e4066Sahrens int
14261d452cf5Sahrens dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
1427fa9e4066Sahrens {
1428*3cb34c60Sahrens 	dsl_dataset_t *ds = arg1;
14291d452cf5Sahrens 	const char *snapname = arg2;
14301d452cf5Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1431fa9e4066Sahrens 	int err;
14321d452cf5Sahrens 	uint64_t value;
1433fa9e4066Sahrens 
14341d452cf5Sahrens 	/*
14351d452cf5Sahrens 	 * We don't allow multiple snapshots of the same txg.  If there
14361d452cf5Sahrens 	 * is already one, try again.
14371d452cf5Sahrens 	 */
14381d452cf5Sahrens 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
14391d452cf5Sahrens 		return (EAGAIN);
1440fa9e4066Sahrens 
14411d452cf5Sahrens 	/*
14421d452cf5Sahrens 	 * Check for conflicting name snapshot name.
14431d452cf5Sahrens 	 */
1444fa9e4066Sahrens 	err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
1445fa9e4066Sahrens 	    snapname, 8, 1, &value);
14461d452cf5Sahrens 	if (err == 0)
1447fa9e4066Sahrens 		return (EEXIST);
14481d452cf5Sahrens 	if (err != ENOENT)
14491d452cf5Sahrens 		return (err);
1450fa9e4066Sahrens 
1451b7661cccSmmusante 	/*
1452b7661cccSmmusante 	 * Check that the dataset's name is not too long.  Name consists
1453b7661cccSmmusante 	 * of the dataset's length + 1 for the @-sign + snapshot name's length
1454b7661cccSmmusante 	 */
1455b7661cccSmmusante 	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
1456b7661cccSmmusante 		return (ENAMETOOLONG);
1457b7661cccSmmusante 
14581d452cf5Sahrens 	ds->ds_trysnap_txg = tx->tx_txg;
14591d452cf5Sahrens 	return (0);
14601d452cf5Sahrens }
1461fa9e4066Sahrens 
14621d452cf5Sahrens void
1463ecd6cf80Smarks dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
14641d452cf5Sahrens {
1465*3cb34c60Sahrens 	dsl_dataset_t *ds = arg1;
14661d452cf5Sahrens 	const char *snapname = arg2;
14671d452cf5Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
14681d452cf5Sahrens 	dmu_buf_t *dbuf;
14691d452cf5Sahrens 	dsl_dataset_phys_t *dsphys;
14701d452cf5Sahrens 	uint64_t dsobj;
14711d452cf5Sahrens 	objset_t *mos = dp->dp_meta_objset;
14721d452cf5Sahrens 	int err;
1473fa9e4066Sahrens 
1474fa9e4066Sahrens 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
14751d452cf5Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1476fa9e4066Sahrens 
14771649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
14781649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
1479ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1480fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1481fa9e4066Sahrens 	dsphys = dbuf->db_data;
14821d452cf5Sahrens 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1483fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
1484fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1485fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
1486fa9e4066Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1487fa9e4066Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1488fa9e4066Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1489fa9e4066Sahrens 	dsphys->ds_num_children = 1;
1490fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1491fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
1492fa9e4066Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1493fa9e4066Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1494fa9e4066Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1495fa9e4066Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
149699653d4eSeschrock 	dsphys->ds_flags = ds->ds_phys->ds_flags;
1497fa9e4066Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
1498ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
1499fa9e4066Sahrens 
15001d452cf5Sahrens 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
15011d452cf5Sahrens 	if (ds->ds_prev) {
15021d452cf5Sahrens 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
1503fa9e4066Sahrens 		    ds->ds_object ||
15041d452cf5Sahrens 		    ds->ds_prev->ds_phys->ds_num_children > 1);
15051d452cf5Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
15061d452cf5Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1507fa9e4066Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
15081d452cf5Sahrens 			    ds->ds_prev->ds_phys->ds_creation_txg);
15091d452cf5Sahrens 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1510fa9e4066Sahrens 		}
1511fa9e4066Sahrens 	}
1512fa9e4066Sahrens 
1513fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
1514fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1515fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
1516fa9e4066Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1517fa9e4066Sahrens 	ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
1518fa9e4066Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1519fa9e4066Sahrens 	ds->ds_phys->ds_deadlist_obj =
1520fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1521ea8dc4b6Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
1522ea8dc4b6Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1523fa9e4066Sahrens 
1524fa9e4066Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1525fa9e4066Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1526fa9e4066Sahrens 	    snapname, 8, 1, &dsobj, tx);
1527fa9e4066Sahrens 	ASSERT(err == 0);
1528fa9e4066Sahrens 
1529fa9e4066Sahrens 	if (ds->ds_prev)
1530fa9e4066Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
1531ea8dc4b6Seschrock 	VERIFY(0 == dsl_dataset_open_obj(dp,
1532ea8dc4b6Seschrock 	    ds->ds_phys->ds_prev_snap_obj, snapname,
1533ea8dc4b6Seschrock 	    DS_MODE_NONE, ds, &ds->ds_prev));
1534ecd6cf80Smarks 
1535ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr,
153640feaa91Sahrens 	    "dataset = %llu", dsobj);
1537fa9e4066Sahrens }
1538fa9e4066Sahrens 
1539fa9e4066Sahrens void
1540c717a561Smaybee dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
1541fa9e4066Sahrens {
1542fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1543fa9e4066Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1544fa9e4066Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1545fa9e4066Sahrens 
154691ebeef5Sahrens 	/*
154791ebeef5Sahrens 	 * in case we had to change ds_fsid_guid when we opened it,
154891ebeef5Sahrens 	 * sync it out now.
154991ebeef5Sahrens 	 */
155091ebeef5Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
155191ebeef5Sahrens 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
155291ebeef5Sahrens 
1553fa9e4066Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
1554c717a561Smaybee 	dmu_objset_sync(ds->ds_user_ptr, zio, tx);
1555fa9e4066Sahrens }
1556fa9e4066Sahrens 
1557fa9e4066Sahrens void
1558a2eea2e1Sahrens dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1559fa9e4066Sahrens {
1560a2eea2e1Sahrens 	dsl_dir_stats(ds->ds_dir, nv);
1561fa9e4066Sahrens 
1562a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
1563a2eea2e1Sahrens 	    ds->ds_phys->ds_creation_time);
1564a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
1565a2eea2e1Sahrens 	    ds->ds_phys->ds_creation_txg);
1566a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,
1567a2eea2e1Sahrens 	    ds->ds_phys->ds_used_bytes);
1568fa9e4066Sahrens 
1569fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1570fa9e4066Sahrens 		/*
1571fa9e4066Sahrens 		 * This is a snapshot; override the dd's space used with
1572a2eea2e1Sahrens 		 * our unique space and compression ratio.
1573fa9e4066Sahrens 		 */
1574a2eea2e1Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
1575a2eea2e1Sahrens 		    ds->ds_phys->ds_unique_bytes);
1576a2eea2e1Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
1577a2eea2e1Sahrens 		    ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
1578a2eea2e1Sahrens 		    (ds->ds_phys->ds_uncompressed_bytes * 100 /
1579a2eea2e1Sahrens 		    ds->ds_phys->ds_compressed_bytes));
1580fa9e4066Sahrens 	}
1581fa9e4066Sahrens }
1582fa9e4066Sahrens 
1583a2eea2e1Sahrens void
1584a2eea2e1Sahrens dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1585a2eea2e1Sahrens {
1586a2eea2e1Sahrens 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
1587a2eea2e1Sahrens 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
1588*3cb34c60Sahrens 	stat->dds_guid = ds->ds_phys->ds_guid;
1589a2eea2e1Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1590a2eea2e1Sahrens 		stat->dds_is_snapshot = B_TRUE;
1591a2eea2e1Sahrens 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
1592a2eea2e1Sahrens 	}
1593a2eea2e1Sahrens 
1594a2eea2e1Sahrens 	/* clone origin is really a dsl_dir thing... */
1595*3cb34c60Sahrens 	if (ds->ds_dir->dd_phys->dd_origin_obj) {
1596a2eea2e1Sahrens 		dsl_dataset_t *ods;
1597a2eea2e1Sahrens 
1598a2eea2e1Sahrens 		rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
1599a2eea2e1Sahrens 		VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool,
1600*3cb34c60Sahrens 		    ds->ds_dir->dd_phys->dd_origin_obj,
1601a2eea2e1Sahrens 		    NULL, DS_MODE_NONE, FTAG, &ods));
1602*3cb34c60Sahrens 		dsl_dataset_name(ods, stat->dds_origin);
1603a2eea2e1Sahrens 		dsl_dataset_close(ods, DS_MODE_NONE, FTAG);
1604a2eea2e1Sahrens 		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
1605a2eea2e1Sahrens 	}
1606a2eea2e1Sahrens }
1607a2eea2e1Sahrens 
1608a2eea2e1Sahrens uint64_t
1609a2eea2e1Sahrens dsl_dataset_fsid_guid(dsl_dataset_t *ds)
1610a2eea2e1Sahrens {
161191ebeef5Sahrens 	return (ds->ds_fsid_guid);
1612a2eea2e1Sahrens }
1613a2eea2e1Sahrens 
1614a2eea2e1Sahrens void
1615a2eea2e1Sahrens dsl_dataset_space(dsl_dataset_t *ds,
1616a2eea2e1Sahrens     uint64_t *refdbytesp, uint64_t *availbytesp,
1617a2eea2e1Sahrens     uint64_t *usedobjsp, uint64_t *availobjsp)
1618fa9e4066Sahrens {
1619a2eea2e1Sahrens 	*refdbytesp = ds->ds_phys->ds_used_bytes;
1620a2eea2e1Sahrens 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
1621a2eea2e1Sahrens 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
1622a2eea2e1Sahrens 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
1623fa9e4066Sahrens }
1624fa9e4066Sahrens 
1625f18faf3fSek boolean_t
1626f18faf3fSek dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
1627f18faf3fSek {
1628f18faf3fSek 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1629f18faf3fSek 
1630f18faf3fSek 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
1631f18faf3fSek 	    dsl_pool_sync_context(dp));
1632f18faf3fSek 	if (ds->ds_prev == NULL)
1633f18faf3fSek 		return (B_FALSE);
1634f18faf3fSek 	if (ds->ds_phys->ds_bp.blk_birth >
1635f18faf3fSek 	    ds->ds_prev->ds_phys->ds_creation_txg)
1636f18faf3fSek 		return (B_TRUE);
1637f18faf3fSek 	return (B_FALSE);
1638f18faf3fSek }
1639f18faf3fSek 
16401d452cf5Sahrens /* ARGSUSED */
1641fa9e4066Sahrens static int
16421d452cf5Sahrens dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1643fa9e4066Sahrens {
16441d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
16451d452cf5Sahrens 	char *newsnapname = arg2;
16461d452cf5Sahrens 	dsl_dir_t *dd = ds->ds_dir;
1647fa9e4066Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
16481d452cf5Sahrens 	dsl_dataset_t *hds;
1649fa9e4066Sahrens 	uint64_t val;
16501d452cf5Sahrens 	int err;
1651fa9e4066Sahrens 
16521d452cf5Sahrens 	err = dsl_dataset_open_obj(dd->dd_pool,
16531d452cf5Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds);
1654fa9e4066Sahrens 	if (err)
1655fa9e4066Sahrens 		return (err);
1656fa9e4066Sahrens 
16571d452cf5Sahrens 	/* new name better not be in use */
16581d452cf5Sahrens 	err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj,
16591d452cf5Sahrens 	    newsnapname, 8, 1, &val);
16601d452cf5Sahrens 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
16611d452cf5Sahrens 
16621d452cf5Sahrens 	if (err == 0)
16631d452cf5Sahrens 		err = EEXIST;
16641d452cf5Sahrens 	else if (err == ENOENT)
16651d452cf5Sahrens 		err = 0;
1666cdf5b4caSmmusante 
1667cdf5b4caSmmusante 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
1668cdf5b4caSmmusante 	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
1669cdf5b4caSmmusante 		err = ENAMETOOLONG;
1670cdf5b4caSmmusante 
16711d452cf5Sahrens 	return (err);
16721d452cf5Sahrens }
1673fa9e4066Sahrens 
16741d452cf5Sahrens static void
1675ecd6cf80Smarks dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2,
1676ecd6cf80Smarks     cred_t *cr, dmu_tx_t *tx)
16771d452cf5Sahrens {
16781d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1679ecd6cf80Smarks 	const char *newsnapname = arg2;
16801d452cf5Sahrens 	dsl_dir_t *dd = ds->ds_dir;
16811d452cf5Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
16821d452cf5Sahrens 	dsl_dataset_t *hds;
16831d452cf5Sahrens 	int err;
1684fa9e4066Sahrens 
16851d452cf5Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
1686fa9e4066Sahrens 
16871d452cf5Sahrens 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
16881d452cf5Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds));
1689fa9e4066Sahrens 
16901d452cf5Sahrens 	VERIFY(0 == dsl_dataset_get_snapname(ds));
16911d452cf5Sahrens 	err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj,
16921d452cf5Sahrens 	    ds->ds_snapname, tx);
1693fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
16941d452cf5Sahrens 	mutex_enter(&ds->ds_lock);
16951d452cf5Sahrens 	(void) strcpy(ds->ds_snapname, newsnapname);
16961d452cf5Sahrens 	mutex_exit(&ds->ds_lock);
16971d452cf5Sahrens 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
16981d452cf5Sahrens 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
1699fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1700fa9e4066Sahrens 
1701ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
1702ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
17031d452cf5Sahrens 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
1704fa9e4066Sahrens }
1705fa9e4066Sahrens 
1706f18faf3fSek struct renamesnaparg {
1707cdf5b4caSmmusante 	dsl_sync_task_group_t *dstg;
1708cdf5b4caSmmusante 	char failed[MAXPATHLEN];
1709cdf5b4caSmmusante 	char *oldsnap;
1710cdf5b4caSmmusante 	char *newsnap;
1711cdf5b4caSmmusante };
1712cdf5b4caSmmusante 
1713cdf5b4caSmmusante static int
1714cdf5b4caSmmusante dsl_snapshot_rename_one(char *name, void *arg)
1715cdf5b4caSmmusante {
1716f18faf3fSek 	struct renamesnaparg *ra = arg;
1717cdf5b4caSmmusante 	dsl_dataset_t *ds = NULL;
1718cdf5b4caSmmusante 	char *cp;
1719cdf5b4caSmmusante 	int err;
1720cdf5b4caSmmusante 
1721cdf5b4caSmmusante 	cp = name + strlen(name);
1722cdf5b4caSmmusante 	*cp = '@';
1723cdf5b4caSmmusante 	(void) strcpy(cp + 1, ra->oldsnap);
1724ecd6cf80Smarks 
1725ecd6cf80Smarks 	/*
1726ecd6cf80Smarks 	 * For recursive snapshot renames the parent won't be changing
1727ecd6cf80Smarks 	 * so we just pass name for both the to/from argument.
1728ecd6cf80Smarks 	 */
1729ecd6cf80Smarks 	if (err = zfs_secpolicy_rename_perms(name, name, CRED())) {
1730ecd6cf80Smarks 		(void) strcpy(ra->failed, name);
1731ecd6cf80Smarks 		return (err);
1732ecd6cf80Smarks 	}
1733ecd6cf80Smarks 
1734cdf5b4caSmmusante 	err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD,
1735cdf5b4caSmmusante 	    ra->dstg, &ds);
1736cdf5b4caSmmusante 	if (err == ENOENT) {
1737cdf5b4caSmmusante 		*cp = '\0';
1738cdf5b4caSmmusante 		return (0);
1739cdf5b4caSmmusante 	}
1740cdf5b4caSmmusante 	if (err) {
1741cdf5b4caSmmusante 		(void) strcpy(ra->failed, name);
1742cdf5b4caSmmusante 		*cp = '\0';
1743cdf5b4caSmmusante 		dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
1744cdf5b4caSmmusante 		return (err);
1745cdf5b4caSmmusante 	}
1746cdf5b4caSmmusante 
1747cdf5b4caSmmusante #ifdef _KERNEL
1748cdf5b4caSmmusante 	/* for all filesystems undergoing rename, we'll need to unmount it */
1749cdf5b4caSmmusante 	(void) zfs_unmount_snap(name, NULL);
1750cdf5b4caSmmusante #endif
1751cdf5b4caSmmusante 
1752cdf5b4caSmmusante 	*cp = '\0';
1753cdf5b4caSmmusante 
1754cdf5b4caSmmusante 	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
1755cdf5b4caSmmusante 	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
1756cdf5b4caSmmusante 
1757cdf5b4caSmmusante 	return (0);
1758cdf5b4caSmmusante }
1759cdf5b4caSmmusante 
1760cdf5b4caSmmusante static int
1761cdf5b4caSmmusante dsl_recursive_rename(char *oldname, const char *newname)
1762cdf5b4caSmmusante {
1763cdf5b4caSmmusante 	int err;
1764f18faf3fSek 	struct renamesnaparg *ra;
1765cdf5b4caSmmusante 	dsl_sync_task_t *dst;
1766cdf5b4caSmmusante 	spa_t *spa;
1767cdf5b4caSmmusante 	char *cp, *fsname = spa_strdup(oldname);
1768cdf5b4caSmmusante 	int len = strlen(oldname);
1769cdf5b4caSmmusante 
1770cdf5b4caSmmusante 	/* truncate the snapshot name to get the fsname */
1771cdf5b4caSmmusante 	cp = strchr(fsname, '@');
1772cdf5b4caSmmusante 	*cp = '\0';
1773cdf5b4caSmmusante 
177440feaa91Sahrens 	err = spa_open(fsname, &spa, FTAG);
1775cdf5b4caSmmusante 	if (err) {
1776cdf5b4caSmmusante 		kmem_free(fsname, len + 1);
1777cdf5b4caSmmusante 		return (err);
1778cdf5b4caSmmusante 	}
1779f18faf3fSek 	ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
1780cdf5b4caSmmusante 	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
1781cdf5b4caSmmusante 
1782cdf5b4caSmmusante 	ra->oldsnap = strchr(oldname, '@') + 1;
1783cdf5b4caSmmusante 	ra->newsnap = strchr(newname, '@') + 1;
1784cdf5b4caSmmusante 	*ra->failed = '\0';
1785cdf5b4caSmmusante 
1786cdf5b4caSmmusante 	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
1787cdf5b4caSmmusante 	    DS_FIND_CHILDREN);
1788cdf5b4caSmmusante 	kmem_free(fsname, len + 1);
1789cdf5b4caSmmusante 
1790cdf5b4caSmmusante 	if (err == 0) {
1791cdf5b4caSmmusante 		err = dsl_sync_task_group_wait(ra->dstg);
1792cdf5b4caSmmusante 	}
1793cdf5b4caSmmusante 
1794cdf5b4caSmmusante 	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
1795cdf5b4caSmmusante 	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
1796cdf5b4caSmmusante 		dsl_dataset_t *ds = dst->dst_arg1;
1797cdf5b4caSmmusante 		if (dst->dst_err) {
1798cdf5b4caSmmusante 			dsl_dir_name(ds->ds_dir, ra->failed);
17992572aa4eSmmusante 			(void) strcat(ra->failed, "@");
18002572aa4eSmmusante 			(void) strcat(ra->failed, ra->newsnap);
1801cdf5b4caSmmusante 		}
1802cdf5b4caSmmusante 		dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
1803cdf5b4caSmmusante 	}
1804cdf5b4caSmmusante 
1805ecd6cf80Smarks 	if (err)
1806ecd6cf80Smarks 		(void) strcpy(oldname, ra->failed);
1807cdf5b4caSmmusante 
1808cdf5b4caSmmusante 	dsl_sync_task_group_destroy(ra->dstg);
1809f18faf3fSek 	kmem_free(ra, sizeof (struct renamesnaparg));
1810cdf5b4caSmmusante 	spa_close(spa, FTAG);
1811cdf5b4caSmmusante 	return (err);
1812cdf5b4caSmmusante }
1813cdf5b4caSmmusante 
18143a5a36beSmmusante static int
18153a5a36beSmmusante dsl_valid_rename(char *oldname, void *arg)
18163a5a36beSmmusante {
18173a5a36beSmmusante 	int delta = *(int *)arg;
18183a5a36beSmmusante 
18193a5a36beSmmusante 	if (strlen(oldname) + delta >= MAXNAMELEN)
18203a5a36beSmmusante 		return (ENAMETOOLONG);
18213a5a36beSmmusante 
18223a5a36beSmmusante 	return (0);
18233a5a36beSmmusante }
18243a5a36beSmmusante 
1825fa9e4066Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
1826fa9e4066Sahrens int
1827cdf5b4caSmmusante dsl_dataset_rename(char *oldname, const char *newname,
1828cdf5b4caSmmusante     boolean_t recursive)
1829fa9e4066Sahrens {
1830fa9e4066Sahrens 	dsl_dir_t *dd;
18311d452cf5Sahrens 	dsl_dataset_t *ds;
1832fa9e4066Sahrens 	const char *tail;
1833fa9e4066Sahrens 	int err;
1834fa9e4066Sahrens 
18351d452cf5Sahrens 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
1836ea8dc4b6Seschrock 	if (err)
1837ea8dc4b6Seschrock 		return (err);
1838fa9e4066Sahrens 	if (tail == NULL) {
18393a5a36beSmmusante 		int delta = strlen(newname) - strlen(oldname);
18403a5a36beSmmusante 
18413a5a36beSmmusante 		/* if we're growing, validate child size lengths */
18423a5a36beSmmusante 		if (delta > 0)
18433a5a36beSmmusante 			err = dmu_objset_find(oldname, dsl_valid_rename,
18443a5a36beSmmusante 			    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
18453a5a36beSmmusante 
18463a5a36beSmmusante 		if (!err)
18473a5a36beSmmusante 			err = dsl_dir_rename(dd, newname);
1848fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
1849fa9e4066Sahrens 		return (err);
1850fa9e4066Sahrens 	}
1851fa9e4066Sahrens 	if (tail[0] != '@') {
1852fa9e4066Sahrens 		/* the name ended in a nonexistant component */
1853fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
1854fa9e4066Sahrens 		return (ENOENT);
1855fa9e4066Sahrens 	}
1856fa9e4066Sahrens 
1857fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
18581d452cf5Sahrens 
18591d452cf5Sahrens 	/* new name must be snapshot in same filesystem */
18601d452cf5Sahrens 	tail = strchr(newname, '@');
18611d452cf5Sahrens 	if (tail == NULL)
18621d452cf5Sahrens 		return (EINVAL);
18631d452cf5Sahrens 	tail++;
18641d452cf5Sahrens 	if (strncmp(oldname, newname, tail - newname) != 0)
18651d452cf5Sahrens 		return (EXDEV);
18661d452cf5Sahrens 
1867cdf5b4caSmmusante 	if (recursive) {
1868cdf5b4caSmmusante 		err = dsl_recursive_rename(oldname, newname);
1869cdf5b4caSmmusante 	} else {
1870cdf5b4caSmmusante 		err = dsl_dataset_open(oldname,
1871cdf5b4caSmmusante 		    DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds);
1872cdf5b4caSmmusante 		if (err)
1873cdf5b4caSmmusante 			return (err);
18741d452cf5Sahrens 
1875cdf5b4caSmmusante 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
1876cdf5b4caSmmusante 		    dsl_dataset_snapshot_rename_check,
1877cdf5b4caSmmusante 		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
18781d452cf5Sahrens 
1879cdf5b4caSmmusante 		dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
1880cdf5b4caSmmusante 	}
18811d452cf5Sahrens 
1882fa9e4066Sahrens 	return (err);
1883fa9e4066Sahrens }
188499653d4eSeschrock 
18851d452cf5Sahrens struct promotearg {
18861d452cf5Sahrens 	uint64_t used, comp, uncomp, unique;
18871d452cf5Sahrens 	uint64_t newnext_obj, snapnames_obj;
18881d452cf5Sahrens };
18891d452cf5Sahrens 
1890ecd6cf80Smarks /* ARGSUSED */
189199653d4eSeschrock static int
18921d452cf5Sahrens dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
189399653d4eSeschrock {
18941d452cf5Sahrens 	dsl_dataset_t *hds = arg1;
18951d452cf5Sahrens 	struct promotearg *pa = arg2;
18961d452cf5Sahrens 	dsl_dir_t *dd = hds->ds_dir;
18971d452cf5Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
1898*3cb34c60Sahrens 	dsl_dir_t *odd = NULL;
189999653d4eSeschrock 	dsl_dataset_t *ds = NULL;
1900*3cb34c60Sahrens 	dsl_dataset_t *origin_ds = NULL;
190199653d4eSeschrock 	dsl_dataset_t *newnext_ds = NULL;
190299653d4eSeschrock 	int err;
190399653d4eSeschrock 	char *name = NULL;
19041d452cf5Sahrens 	uint64_t itor = 0;
190599653d4eSeschrock 	blkptr_t bp;
190699653d4eSeschrock 
19071d452cf5Sahrens 	bzero(pa, sizeof (*pa));
19081d452cf5Sahrens 
190999653d4eSeschrock 	/* Check that it is a clone */
1910*3cb34c60Sahrens 	if (dd->dd_phys->dd_origin_obj == 0)
191199653d4eSeschrock 		return (EINVAL);
191299653d4eSeschrock 
19131d452cf5Sahrens 	/* Since this is so expensive, don't do the preliminary check */
19141d452cf5Sahrens 	if (!dmu_tx_is_syncing(tx))
19151d452cf5Sahrens 		return (0);
19161d452cf5Sahrens 
1917*3cb34c60Sahrens 	if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj,
1918*3cb34c60Sahrens 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds))
191999653d4eSeschrock 		goto out;
1920*3cb34c60Sahrens 	odd = origin_ds->ds_dir;
19211d452cf5Sahrens 
19221d452cf5Sahrens 	{
19231d452cf5Sahrens 		dsl_dataset_t *phds;
19241d452cf5Sahrens 		if (err = dsl_dataset_open_obj(dd->dd_pool,
1925*3cb34c60Sahrens 		    odd->dd_phys->dd_head_dataset_obj,
19261d452cf5Sahrens 		    NULL, DS_MODE_NONE, FTAG, &phds))
19271d452cf5Sahrens 			goto out;
19281d452cf5Sahrens 		pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj;
19291d452cf5Sahrens 		dsl_dataset_close(phds, DS_MODE_NONE, FTAG);
19301d452cf5Sahrens 	}
193199653d4eSeschrock 
193299653d4eSeschrock 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
193399653d4eSeschrock 		err = EXDEV;
193499653d4eSeschrock 		goto out;
193599653d4eSeschrock 	}
193699653d4eSeschrock 
1937*3cb34c60Sahrens 	/* find origin's new next ds */
193899653d4eSeschrock 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object,
193999653d4eSeschrock 	    NULL, DS_MODE_NONE, FTAG, &newnext_ds));
1940*3cb34c60Sahrens 	while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) {
194199653d4eSeschrock 		dsl_dataset_t *prev;
194299653d4eSeschrock 
194399653d4eSeschrock 		if (err = dsl_dataset_open_obj(dd->dd_pool,
19441d452cf5Sahrens 		    newnext_ds->ds_phys->ds_prev_snap_obj,
19451d452cf5Sahrens 		    NULL, DS_MODE_NONE, FTAG, &prev))
194699653d4eSeschrock 			goto out;
194799653d4eSeschrock 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
194899653d4eSeschrock 		newnext_ds = prev;
194999653d4eSeschrock 	}
19501d452cf5Sahrens 	pa->newnext_obj = newnext_ds->ds_object;
195199653d4eSeschrock 
1952*3cb34c60Sahrens 	/* compute origin's new unique space */
195399653d4eSeschrock 	while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
195499653d4eSeschrock 	    &itor, &bp)) == 0) {
1955*3cb34c60Sahrens 		if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg)
19561d452cf5Sahrens 			pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp);
195799653d4eSeschrock 	}
195899653d4eSeschrock 	if (err != ENOENT)
195999653d4eSeschrock 		goto out;
196099653d4eSeschrock 
196199653d4eSeschrock 	/* Walk the snapshots that we are moving */
196299653d4eSeschrock 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1963*3cb34c60Sahrens 	ds = origin_ds;
196499653d4eSeschrock 	/* CONSTCOND */
196599653d4eSeschrock 	while (TRUE) {
196699653d4eSeschrock 		uint64_t val, dlused, dlcomp, dluncomp;
196799653d4eSeschrock 		dsl_dataset_t *prev;
196899653d4eSeschrock 
196999653d4eSeschrock 		/* Check that the snapshot name does not conflict */
197099653d4eSeschrock 		dsl_dataset_name(ds, name);
197199653d4eSeschrock 		err = zap_lookup(dd->dd_pool->dp_meta_objset,
197299653d4eSeschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
197399653d4eSeschrock 		    8, 1, &val);
197499653d4eSeschrock 		if (err != ENOENT) {
197599653d4eSeschrock 			if (err == 0)
197699653d4eSeschrock 				err = EEXIST;
197799653d4eSeschrock 			goto out;
197899653d4eSeschrock 		}
197999653d4eSeschrock 
198099653d4eSeschrock 		/*
198199653d4eSeschrock 		 * compute space to transfer.  Each snapshot gave birth to:
198299653d4eSeschrock 		 * (my used) - (prev's used) + (deadlist's used)
198399653d4eSeschrock 		 */
19841d452cf5Sahrens 		pa->used += ds->ds_phys->ds_used_bytes;
19851d452cf5Sahrens 		pa->comp += ds->ds_phys->ds_compressed_bytes;
19861d452cf5Sahrens 		pa->uncomp += ds->ds_phys->ds_uncompressed_bytes;
198799653d4eSeschrock 
198899653d4eSeschrock 		/* If we reach the first snapshot, we're done. */
198999653d4eSeschrock 		if (ds->ds_phys->ds_prev_snap_obj == 0)
199099653d4eSeschrock 			break;
199199653d4eSeschrock 
199299653d4eSeschrock 		if (err = bplist_space(&ds->ds_deadlist,
199399653d4eSeschrock 		    &dlused, &dlcomp, &dluncomp))
199499653d4eSeschrock 			goto out;
199599653d4eSeschrock 		if (err = dsl_dataset_open_obj(dd->dd_pool,
199699653d4eSeschrock 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
199799653d4eSeschrock 		    FTAG, &prev))
199899653d4eSeschrock 			goto out;
19991d452cf5Sahrens 		pa->used += dlused - prev->ds_phys->ds_used_bytes;
20001d452cf5Sahrens 		pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes;
20011d452cf5Sahrens 		pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes;
200299653d4eSeschrock 
200399653d4eSeschrock 		/*
200499653d4eSeschrock 		 * We could be a clone of a clone.  If we reach our
200599653d4eSeschrock 		 * parent's branch point, we're done.
200699653d4eSeschrock 		 */
200799653d4eSeschrock 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
200899653d4eSeschrock 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
200999653d4eSeschrock 			break;
201099653d4eSeschrock 		}
2011*3cb34c60Sahrens 		if (ds != origin_ds)
201299653d4eSeschrock 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
201399653d4eSeschrock 		ds = prev;
201499653d4eSeschrock 	}
201599653d4eSeschrock 
201699653d4eSeschrock 	/* Check that there is enough space here */
2017*3cb34c60Sahrens 	err = dsl_dir_transfer_possible(odd, dd, pa->used);
20181d452cf5Sahrens 
20191d452cf5Sahrens out:
2020*3cb34c60Sahrens 	if (ds && ds != origin_ds)
20211d452cf5Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
2022*3cb34c60Sahrens 	if (origin_ds)
2023*3cb34c60Sahrens 		dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG);
20241d452cf5Sahrens 	if (newnext_ds)
20251d452cf5Sahrens 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
20261d452cf5Sahrens 	if (name)
20271d452cf5Sahrens 		kmem_free(name, MAXPATHLEN);
20281d452cf5Sahrens 	return (err);
20291d452cf5Sahrens }
203099653d4eSeschrock 
20311d452cf5Sahrens static void
2032ecd6cf80Smarks dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
20331d452cf5Sahrens {
20341d452cf5Sahrens 	dsl_dataset_t *hds = arg1;
20351d452cf5Sahrens 	struct promotearg *pa = arg2;
20361d452cf5Sahrens 	dsl_dir_t *dd = hds->ds_dir;
20371d452cf5Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
2038*3cb34c60Sahrens 	dsl_dir_t *odd = NULL;
2039*3cb34c60Sahrens 	dsl_dataset_t *ds, *origin_ds;
20401d452cf5Sahrens 	char *name;
20411d452cf5Sahrens 
2042*3cb34c60Sahrens 	ASSERT(dd->dd_phys->dd_origin_obj != 0);
20431d452cf5Sahrens 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
20441d452cf5Sahrens 
2045*3cb34c60Sahrens 	VERIFY(0 == dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj,
2046*3cb34c60Sahrens 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds));
20470b69c2f0Sahrens 	/*
2048*3cb34c60Sahrens 	 * We need to explicitly open odd, since origin_ds's dd will be
20490b69c2f0Sahrens 	 * changing.
20500b69c2f0Sahrens 	 */
2051*3cb34c60Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
2052*3cb34c60Sahrens 	    NULL, FTAG, &odd));
205399653d4eSeschrock 
205499653d4eSeschrock 	/* move snapshots to this dir */
20551d452cf5Sahrens 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2056*3cb34c60Sahrens 	ds = origin_ds;
205799653d4eSeschrock 	/* CONSTCOND */
205899653d4eSeschrock 	while (TRUE) {
205999653d4eSeschrock 		dsl_dataset_t *prev;
206099653d4eSeschrock 
206199653d4eSeschrock 		/* move snap name entry */
206299653d4eSeschrock 		dsl_dataset_name(ds, name);
20631d452cf5Sahrens 		VERIFY(0 == zap_remove(dp->dp_meta_objset,
20641d452cf5Sahrens 		    pa->snapnames_obj, ds->ds_snapname, tx));
20651d452cf5Sahrens 		VERIFY(0 == zap_add(dp->dp_meta_objset,
206699653d4eSeschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
206799653d4eSeschrock 		    8, 1, &ds->ds_object, tx));
206899653d4eSeschrock 
206999653d4eSeschrock 		/* change containing dsl_dir */
207099653d4eSeschrock 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
2071*3cb34c60Sahrens 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
207299653d4eSeschrock 		ds->ds_phys->ds_dir_obj = dd->dd_object;
2073*3cb34c60Sahrens 		ASSERT3P(ds->ds_dir, ==, odd);
207499653d4eSeschrock 		dsl_dir_close(ds->ds_dir, ds);
20751d452cf5Sahrens 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
207699653d4eSeschrock 		    NULL, ds, &ds->ds_dir));
207799653d4eSeschrock 
207899653d4eSeschrock 		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
207999653d4eSeschrock 
208099653d4eSeschrock 		if (ds->ds_phys->ds_prev_snap_obj == 0)
208199653d4eSeschrock 			break;
208299653d4eSeschrock 
20831d452cf5Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
208499653d4eSeschrock 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
208599653d4eSeschrock 		    FTAG, &prev));
208699653d4eSeschrock 
208799653d4eSeschrock 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
208899653d4eSeschrock 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
208999653d4eSeschrock 			break;
209099653d4eSeschrock 		}
2091*3cb34c60Sahrens 		if (ds != origin_ds)
209299653d4eSeschrock 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
209399653d4eSeschrock 		ds = prev;
209499653d4eSeschrock 	}
2095*3cb34c60Sahrens 	if (ds != origin_ds)
20961d452cf5Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
209799653d4eSeschrock 
2098*3cb34c60Sahrens 	/* change origin's next snap */
2099*3cb34c60Sahrens 	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
2100*3cb34c60Sahrens 	origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
210199653d4eSeschrock 
2102*3cb34c60Sahrens 	/* change origin */
210399653d4eSeschrock 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
2104*3cb34c60Sahrens 	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
2105*3cb34c60Sahrens 	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
2106*3cb34c60Sahrens 	dmu_buf_will_dirty(odd->dd_dbuf, tx);
2107*3cb34c60Sahrens 	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
210899653d4eSeschrock 
210999653d4eSeschrock 	/* change space accounting */
2110*3cb34c60Sahrens 	dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx);
21111d452cf5Sahrens 	dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx);
2112*3cb34c60Sahrens 	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
211399653d4eSeschrock 
2114ecd6cf80Smarks 	/* log history record */
2115ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
2116ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
2117ecd6cf80Smarks 
2118*3cb34c60Sahrens 	dsl_dir_close(odd, FTAG);
2119*3cb34c60Sahrens 	dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG);
21201d452cf5Sahrens 	kmem_free(name, MAXPATHLEN);
212199653d4eSeschrock }
212299653d4eSeschrock 
212399653d4eSeschrock int
212499653d4eSeschrock dsl_dataset_promote(const char *name)
212599653d4eSeschrock {
212699653d4eSeschrock 	dsl_dataset_t *ds;
212799653d4eSeschrock 	int err;
212899653d4eSeschrock 	dmu_object_info_t doi;
21291d452cf5Sahrens 	struct promotearg pa;
213099653d4eSeschrock 
213199653d4eSeschrock 	err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds);
213299653d4eSeschrock 	if (err)
213399653d4eSeschrock 		return (err);
213499653d4eSeschrock 
213599653d4eSeschrock 	err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset,
213699653d4eSeschrock 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
213799653d4eSeschrock 	if (err) {
213899653d4eSeschrock 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
213999653d4eSeschrock 		return (err);
214099653d4eSeschrock 	}
214199653d4eSeschrock 
214299653d4eSeschrock 	/*
214399653d4eSeschrock 	 * Add in 128x the snapnames zapobj size, since we will be moving
214499653d4eSeschrock 	 * a bunch of snapnames to the promoted ds, and dirtying their
214599653d4eSeschrock 	 * bonus buffers.
214699653d4eSeschrock 	 */
21471d452cf5Sahrens 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
21481d452cf5Sahrens 	    dsl_dataset_promote_check,
21491d452cf5Sahrens 	    dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks);
215099653d4eSeschrock 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
215199653d4eSeschrock 	return (err);
215299653d4eSeschrock }
2153b1b8ab34Slling 
2154*3cb34c60Sahrens struct cloneswaparg {
2155*3cb34c60Sahrens 	dsl_dataset_t *cds; /* clone dataset */
2156*3cb34c60Sahrens 	dsl_dataset_t *ohds; /* origin's head dataset */
2157*3cb34c60Sahrens 	boolean_t force;
2158*3cb34c60Sahrens };
2159f18faf3fSek 
2160f18faf3fSek /* ARGSUSED */
2161f18faf3fSek static int
2162f18faf3fSek dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
2163f18faf3fSek {
2164*3cb34c60Sahrens 	struct cloneswaparg *csa = arg1;
2165f18faf3fSek 
2166*3cb34c60Sahrens 	/* they should both be heads */
2167*3cb34c60Sahrens 	if (dsl_dataset_is_snapshot(csa->cds) ||
2168*3cb34c60Sahrens 	    dsl_dataset_is_snapshot(csa->ohds))
2169f18faf3fSek 		return (EINVAL);
2170f18faf3fSek 
2171*3cb34c60Sahrens 	/* the branch point should be just before them */
2172*3cb34c60Sahrens 	if (csa->cds->ds_prev != csa->ohds->ds_prev)
2173f18faf3fSek 		return (EINVAL);
2174f18faf3fSek 
2175*3cb34c60Sahrens 	/* cds should be the clone */
2176*3cb34c60Sahrens 	if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj !=
2177*3cb34c60Sahrens 	    csa->ohds->ds_object)
2178*3cb34c60Sahrens 		return (EINVAL);
2179f18faf3fSek 
2180*3cb34c60Sahrens 	/* the clone should be a child of the origin */
2181*3cb34c60Sahrens 	if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
2182*3cb34c60Sahrens 		return (EINVAL);
2183f18faf3fSek 
2184*3cb34c60Sahrens 	/* ohds shouldn't be modified unless 'force' */
2185*3cb34c60Sahrens 	if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
2186*3cb34c60Sahrens 		return (ETXTBSY);
2187*3cb34c60Sahrens 	return (0);
2188f18faf3fSek }
2189f18faf3fSek 
2190f18faf3fSek /* ARGSUSED */
2191f18faf3fSek static void
2192f18faf3fSek dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
2193f18faf3fSek {
2194*3cb34c60Sahrens 	struct cloneswaparg *csa = arg1;
2195*3cb34c60Sahrens 	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
2196f18faf3fSek 	uint64_t itor = 0;
2197f18faf3fSek 	blkptr_t bp;
2198f18faf3fSek 	uint64_t unique = 0;
2199f18faf3fSek 	int err;
2200f18faf3fSek 
2201*3cb34c60Sahrens 	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
2202*3cb34c60Sahrens 	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
2203*3cb34c60Sahrens 	dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx);
2204f18faf3fSek 
2205*3cb34c60Sahrens 	if (csa->cds->ds_user_ptr != NULL) {
2206*3cb34c60Sahrens 		csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr);
2207*3cb34c60Sahrens 		csa->cds->ds_user_ptr = NULL;
2208*3cb34c60Sahrens 	}
2209f18faf3fSek 
2210*3cb34c60Sahrens 	if (csa->ohds->ds_user_ptr != NULL) {
2211*3cb34c60Sahrens 		csa->ohds->ds_user_evict_func(csa->ohds,
2212*3cb34c60Sahrens 		    csa->ohds->ds_user_ptr);
2213*3cb34c60Sahrens 		csa->ohds->ds_user_ptr = NULL;
2214*3cb34c60Sahrens 	}
2215f18faf3fSek 
2216f18faf3fSek 	/* compute unique space */
2217*3cb34c60Sahrens 	while ((err = bplist_iterate(&csa->cds->ds_deadlist,
2218*3cb34c60Sahrens 	    &itor, &bp)) == 0) {
2219*3cb34c60Sahrens 		if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg)
2220*3cb34c60Sahrens 			unique += bp_get_dasize(dp->dp_spa, &bp);
2221f18faf3fSek 	}
2222f18faf3fSek 	VERIFY(err == ENOENT);
2223f18faf3fSek 
2224f18faf3fSek 	/* reset origin's unique bytes */
2225*3cb34c60Sahrens 	csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique;
2226f18faf3fSek 
2227f18faf3fSek 	/* swap blkptrs */
2228f18faf3fSek 	{
2229f18faf3fSek 		blkptr_t tmp;
2230*3cb34c60Sahrens 		tmp = csa->ohds->ds_phys->ds_bp;
2231*3cb34c60Sahrens 		csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
2232*3cb34c60Sahrens 		csa->cds->ds_phys->ds_bp = tmp;
2233f18faf3fSek 	}
2234f18faf3fSek 
2235f18faf3fSek 	/* set dd_*_bytes */
2236f18faf3fSek 	{
2237f18faf3fSek 		int64_t dused, dcomp, duncomp;
2238f18faf3fSek 		uint64_t cdl_used, cdl_comp, cdl_uncomp;
2239f18faf3fSek 		uint64_t odl_used, odl_comp, odl_uncomp;
2240f18faf3fSek 
2241*3cb34c60Sahrens 		VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used,
2242f18faf3fSek 		    &cdl_comp, &cdl_uncomp));
2243*3cb34c60Sahrens 		VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used,
2244f18faf3fSek 		    &odl_comp, &odl_uncomp));
2245*3cb34c60Sahrens 		dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
2246*3cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_used_bytes + odl_used);
2247*3cb34c60Sahrens 		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
2248*3cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
2249*3cb34c60Sahrens 		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
2250*3cb34c60Sahrens 		    cdl_uncomp -
2251*3cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
2252*3cb34c60Sahrens 
2253*3cb34c60Sahrens 		dsl_dir_diduse_space(csa->ohds->ds_dir,
2254*3cb34c60Sahrens 		    dused, dcomp, duncomp, tx);
2255*3cb34c60Sahrens 		dsl_dir_diduse_space(csa->cds->ds_dir,
2256*3cb34c60Sahrens 		    -dused, -dcomp, -duncomp, tx);
2257*3cb34c60Sahrens 	}
2258*3cb34c60Sahrens 
2259*3cb34c60Sahrens #define	SWITCH64(x, y) \
2260*3cb34c60Sahrens 	{ \
2261*3cb34c60Sahrens 		uint64_t __tmp = (x); \
2262*3cb34c60Sahrens 		(x) = (y); \
2263*3cb34c60Sahrens 		(y) = __tmp; \
2264f18faf3fSek 	}
2265f18faf3fSek 
2266f18faf3fSek 	/* swap ds_*_bytes */
2267*3cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
2268*3cb34c60Sahrens 	    csa->cds->ds_phys->ds_used_bytes);
2269*3cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
2270*3cb34c60Sahrens 	    csa->cds->ds_phys->ds_compressed_bytes);
2271*3cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
2272*3cb34c60Sahrens 	    csa->cds->ds_phys->ds_uncompressed_bytes);
2273f18faf3fSek 
2274f18faf3fSek 	/* swap deadlists */
2275*3cb34c60Sahrens 	bplist_close(&csa->cds->ds_deadlist);
2276*3cb34c60Sahrens 	bplist_close(&csa->ohds->ds_deadlist);
2277*3cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
2278*3cb34c60Sahrens 	    csa->cds->ds_phys->ds_deadlist_obj);
2279*3cb34c60Sahrens 	VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
2280*3cb34c60Sahrens 	    csa->cds->ds_phys->ds_deadlist_obj));
2281*3cb34c60Sahrens 	VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
2282*3cb34c60Sahrens 	    csa->ohds->ds_phys->ds_deadlist_obj));
2283f18faf3fSek }
2284f18faf3fSek 
2285f18faf3fSek /*
2286f18faf3fSek  * Swap the clone "cosname" with its origin head file system.
2287f18faf3fSek  */
2288f18faf3fSek int
2289*3cb34c60Sahrens dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
2290*3cb34c60Sahrens     boolean_t force)
2291f18faf3fSek {
2292*3cb34c60Sahrens 	struct cloneswaparg csa;
2293f18faf3fSek 
2294*3cb34c60Sahrens 	ASSERT(clone->ds_open_refcount == DS_REF_MAX);
2295*3cb34c60Sahrens 	ASSERT(origin_head->ds_open_refcount == DS_REF_MAX);
2296f18faf3fSek 
2297*3cb34c60Sahrens 	csa.cds = clone;
2298*3cb34c60Sahrens 	csa.ohds = origin_head;
2299*3cb34c60Sahrens 	csa.force = force;
2300*3cb34c60Sahrens 	return (dsl_sync_task_do(clone->ds_dir->dd_pool,
2301f18faf3fSek 	    dsl_dataset_clone_swap_check,
2302*3cb34c60Sahrens 	    dsl_dataset_clone_swap_sync, &csa, NULL, 9));
2303f18faf3fSek }
2304f18faf3fSek 
2305b1b8ab34Slling /*
2306b1b8ab34Slling  * Given a pool name and a dataset object number in that pool,
2307b1b8ab34Slling  * return the name of that dataset.
2308b1b8ab34Slling  */
2309b1b8ab34Slling int
2310b1b8ab34Slling dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
2311b1b8ab34Slling {
2312b1b8ab34Slling 	spa_t *spa;
2313b1b8ab34Slling 	dsl_pool_t *dp;
2314b1b8ab34Slling 	dsl_dataset_t *ds = NULL;
2315b1b8ab34Slling 	int error;
2316b1b8ab34Slling 
2317b1b8ab34Slling 	if ((error = spa_open(pname, &spa, FTAG)) != 0)
2318b1b8ab34Slling 		return (error);
2319b1b8ab34Slling 	dp = spa_get_dsl(spa);
2320b1b8ab34Slling 	rw_enter(&dp->dp_config_rwlock, RW_READER);
2321b1b8ab34Slling 	if ((error = dsl_dataset_open_obj(dp, obj,
2322b1b8ab34Slling 	    NULL, DS_MODE_NONE, FTAG, &ds)) != 0) {
2323b1b8ab34Slling 		rw_exit(&dp->dp_config_rwlock);
2324b1b8ab34Slling 		spa_close(spa, FTAG);
2325b1b8ab34Slling 		return (error);
2326b1b8ab34Slling 	}
2327b1b8ab34Slling 	dsl_dataset_name(ds, buf);
2328b1b8ab34Slling 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
2329b1b8ab34Slling 	rw_exit(&dp->dp_config_rwlock);
2330b1b8ab34Slling 	spa_close(spa, FTAG);
2331b1b8ab34Slling 
2332b1b8ab34Slling 	return (0);
2333b1b8ab34Slling }
2334