xref: /illumos-gate/usr/src/uts/common/fs/zfs/dsl_dataset.c (revision a9799022bd90b13722204e80112efaa5bf573099)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
2255434c77Sek  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27fa9e4066Sahrens 
28fa9e4066Sahrens #include <sys/dmu_objset.h>
29fa9e4066Sahrens #include <sys/dsl_dataset.h>
30fa9e4066Sahrens #include <sys/dsl_dir.h>
3199653d4eSeschrock #include <sys/dsl_prop.h>
321d452cf5Sahrens #include <sys/dsl_synctask.h>
33fa9e4066Sahrens #include <sys/dmu_traverse.h>
34fa9e4066Sahrens #include <sys/dmu_tx.h>
35fa9e4066Sahrens #include <sys/arc.h>
36fa9e4066Sahrens #include <sys/zio.h>
37fa9e4066Sahrens #include <sys/zap.h>
38fa9e4066Sahrens #include <sys/unique.h>
39fa9e4066Sahrens #include <sys/zfs_context.h>
40cdf5b4caSmmusante #include <sys/zfs_ioctl.h>
41ecd6cf80Smarks #include <sys/spa.h>
42ecd6cf80Smarks #include <sys/sunddi.h>
43fa9e4066Sahrens 
441d452cf5Sahrens static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
451d452cf5Sahrens static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
461d452cf5Sahrens static dsl_checkfunc_t dsl_dataset_rollback_check;
471d452cf5Sahrens static dsl_syncfunc_t dsl_dataset_rollback_sync;
48*a9799022Sck static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
49e1930233Sbonwick 
5055434c77Sek #define	DS_REF_MAX	(1ULL << 62)
51fa9e4066Sahrens 
52fa9e4066Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
53fa9e4066Sahrens 
54fa9e4066Sahrens /*
55fa9e4066Sahrens  * We use weighted reference counts to express the various forms of exclusion
56fa9e4066Sahrens  * between different open modes.  A STANDARD open is 1 point, an EXCLUSIVE open
5755434c77Sek  * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
58fa9e4066Sahrens  * This makes the exclusion logic simple: the total refcnt for all opens cannot
5955434c77Sek  * exceed DS_REF_MAX.  For example, EXCLUSIVE opens are exclusive because their
6055434c77Sek  * weight (DS_REF_MAX) consumes the entire refcnt space.  PRIMARY opens consume
61fa9e4066Sahrens  * just over half of the refcnt space, so there can't be more than one, but it
62fa9e4066Sahrens  * can peacefully coexist with any number of STANDARD opens.
63fa9e4066Sahrens  */
64fa9e4066Sahrens static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
6555434c77Sek 	0,			/* DS_MODE_NONE - invalid		*/
6655434c77Sek 	1,			/* DS_MODE_STANDARD - unlimited number	*/
6755434c77Sek 	(DS_REF_MAX >> 1) + 1,	/* DS_MODE_PRIMARY - only one of these	*/
6855434c77Sek 	DS_REF_MAX		/* DS_MODE_EXCLUSIVE - no other opens	*/
69fa9e4066Sahrens };
70fa9e4066Sahrens 
71*a9799022Sck /*
72*a9799022Sck  * Figure out how much of this delta should be propogated to the dsl_dir
73*a9799022Sck  * layer.  If there's a refreservation, that space has already been
74*a9799022Sck  * partially accounted for in our ancestors.
75*a9799022Sck  */
76*a9799022Sck static int64_t
77*a9799022Sck parent_delta(dsl_dataset_t *ds, int64_t delta)
78*a9799022Sck {
79*a9799022Sck 	uint64_t old_bytes, new_bytes;
80*a9799022Sck 
81*a9799022Sck 	if (ds->ds_reserved == 0)
82*a9799022Sck 		return (delta);
83*a9799022Sck 
84*a9799022Sck 	old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
85*a9799022Sck 	new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
86*a9799022Sck 
87*a9799022Sck 	ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
88*a9799022Sck 	return (new_bytes - old_bytes);
89*a9799022Sck }
90fa9e4066Sahrens 
91fa9e4066Sahrens void
92fa9e4066Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
93fa9e4066Sahrens {
9499653d4eSeschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
95fa9e4066Sahrens 	int compressed = BP_GET_PSIZE(bp);
96fa9e4066Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
97*a9799022Sck 	int64_t delta;
98fa9e4066Sahrens 
99fa9e4066Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
100fa9e4066Sahrens 
101fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
102fa9e4066Sahrens 	/* It could have been compressed away to nothing */
103fa9e4066Sahrens 	if (BP_IS_HOLE(bp))
104fa9e4066Sahrens 		return;
105fa9e4066Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
106fa9e4066Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
107fa9e4066Sahrens 	if (ds == NULL) {
108fa9e4066Sahrens 		/*
109fa9e4066Sahrens 		 * Account for the meta-objset space in its placeholder
110fa9e4066Sahrens 		 * dsl_dir.
111fa9e4066Sahrens 		 */
112fa9e4066Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
113fa9e4066Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
114fa9e4066Sahrens 		    used, compressed, uncompressed, tx);
115fa9e4066Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
116fa9e4066Sahrens 		return;
117fa9e4066Sahrens 	}
118fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
119fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
120*a9799022Sck 	delta = parent_delta(ds, used);
121fa9e4066Sahrens 	ds->ds_phys->ds_used_bytes += used;
122fa9e4066Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
123fa9e4066Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
124fa9e4066Sahrens 	ds->ds_phys->ds_unique_bytes += used;
125fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
126*a9799022Sck 	dsl_dir_diduse_space(ds->ds_dir, delta, compressed, uncompressed, tx);
127fa9e4066Sahrens }
128fa9e4066Sahrens 
129fa9e4066Sahrens void
130c717a561Smaybee dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
131c717a561Smaybee     dmu_tx_t *tx)
132fa9e4066Sahrens {
13399653d4eSeschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
134fa9e4066Sahrens 	int compressed = BP_GET_PSIZE(bp);
135fa9e4066Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
136fa9e4066Sahrens 
137fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
138c717a561Smaybee 	/* No block pointer => nothing to free */
139fa9e4066Sahrens 	if (BP_IS_HOLE(bp))
140fa9e4066Sahrens 		return;
141fa9e4066Sahrens 
142fa9e4066Sahrens 	ASSERT(used > 0);
143fa9e4066Sahrens 	if (ds == NULL) {
144c717a561Smaybee 		int err;
145fa9e4066Sahrens 		/*
146fa9e4066Sahrens 		 * Account for the meta-objset space in its placeholder
147fa9e4066Sahrens 		 * dataset.
148fa9e4066Sahrens 		 */
149c717a561Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
150c717a561Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
151c717a561Smaybee 		ASSERT(err == 0);
152fa9e4066Sahrens 
153fa9e4066Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
154fa9e4066Sahrens 		    -used, -compressed, -uncompressed, tx);
155fa9e4066Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
156fa9e4066Sahrens 		return;
157fa9e4066Sahrens 	}
158fa9e4066Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
159fa9e4066Sahrens 
160fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
161fa9e4066Sahrens 
162fa9e4066Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
163c717a561Smaybee 		int err;
164*a9799022Sck 		int64_t delta;
165c717a561Smaybee 
166fa9e4066Sahrens 		dprintf_bp(bp, "freeing: %s", "");
167c717a561Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
168c717a561Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
169c717a561Smaybee 		ASSERT(err == 0);
170fa9e4066Sahrens 
171fa9e4066Sahrens 		mutex_enter(&ds->ds_lock);
172*a9799022Sck 		ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
173*a9799022Sck 		    !DS_UNIQUE_IS_ACCURATE(ds));
174*a9799022Sck 		delta = parent_delta(ds, -used);
175fa9e4066Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
176fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
177fa9e4066Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
178*a9799022Sck 		    delta, -compressed, -uncompressed, tx);
179fa9e4066Sahrens 	} else {
180fa9e4066Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
181ea8dc4b6Seschrock 		VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
182fa9e4066Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
183fa9e4066Sahrens 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
184fa9e4066Sahrens 			ASSERT3U(ds->ds_prev->ds_object, ==,
185fa9e4066Sahrens 			    ds->ds_phys->ds_prev_snap_obj);
186fa9e4066Sahrens 			ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
187fa9e4066Sahrens 			if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
18899653d4eSeschrock 			    ds->ds_object && bp->blk_birth >
189fa9e4066Sahrens 			    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
190fa9e4066Sahrens 				dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
191fa9e4066Sahrens 				mutex_enter(&ds->ds_prev->ds_lock);
192fa9e4066Sahrens 				ds->ds_prev->ds_phys->ds_unique_bytes +=
193fa9e4066Sahrens 				    used;
194fa9e4066Sahrens 				mutex_exit(&ds->ds_prev->ds_lock);
195fa9e4066Sahrens 			}
196fa9e4066Sahrens 		}
197fa9e4066Sahrens 	}
198fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
199fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
200fa9e4066Sahrens 	ds->ds_phys->ds_used_bytes -= used;
201fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
202fa9e4066Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
203fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
204fa9e4066Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
205fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
206fa9e4066Sahrens }
207fa9e4066Sahrens 
208ea8dc4b6Seschrock uint64_t
209ea8dc4b6Seschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
210fa9e4066Sahrens {
211a2eea2e1Sahrens 	uint64_t trysnap = 0;
212a2eea2e1Sahrens 
213fa9e4066Sahrens 	if (ds == NULL)
214ea8dc4b6Seschrock 		return (0);
215fa9e4066Sahrens 	/*
216fa9e4066Sahrens 	 * The snapshot creation could fail, but that would cause an
217fa9e4066Sahrens 	 * incorrect FALSE return, which would only result in an
218fa9e4066Sahrens 	 * overestimation of the amount of space that an operation would
219fa9e4066Sahrens 	 * consume, which is OK.
220fa9e4066Sahrens 	 *
221fa9e4066Sahrens 	 * There's also a small window where we could miss a pending
222fa9e4066Sahrens 	 * snapshot, because we could set the sync task in the quiescing
223fa9e4066Sahrens 	 * phase.  So this should only be used as a guess.
224fa9e4066Sahrens 	 */
225a2eea2e1Sahrens 	if (ds->ds_trysnap_txg >
226a2eea2e1Sahrens 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
227a2eea2e1Sahrens 		trysnap = ds->ds_trysnap_txg;
228a2eea2e1Sahrens 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
229ea8dc4b6Seschrock }
230ea8dc4b6Seschrock 
231ea8dc4b6Seschrock int
232ea8dc4b6Seschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
233ea8dc4b6Seschrock {
234ea8dc4b6Seschrock 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
235fa9e4066Sahrens }
236fa9e4066Sahrens 
237fa9e4066Sahrens /* ARGSUSED */
238fa9e4066Sahrens static void
239fa9e4066Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
240fa9e4066Sahrens {
241fa9e4066Sahrens 	dsl_dataset_t *ds = dsv;
242fa9e4066Sahrens 
24355434c77Sek 	/* open_refcount == DS_REF_MAX when deleting */
244fa9e4066Sahrens 	ASSERT(ds->ds_open_refcount == 0 ||
24555434c77Sek 	    ds->ds_open_refcount == DS_REF_MAX);
246fa9e4066Sahrens 
247fa9e4066Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
248fa9e4066Sahrens 
24991ebeef5Sahrens 	unique_remove(ds->ds_fsid_guid);
250fa9e4066Sahrens 
251fa9e4066Sahrens 	if (ds->ds_user_ptr != NULL)
252fa9e4066Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
253fa9e4066Sahrens 
254fa9e4066Sahrens 	if (ds->ds_prev) {
255fa9e4066Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
256fa9e4066Sahrens 		ds->ds_prev = NULL;
257fa9e4066Sahrens 	}
258fa9e4066Sahrens 
259fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
260fa9e4066Sahrens 	dsl_dir_close(ds->ds_dir, ds);
261fa9e4066Sahrens 
26291ebeef5Sahrens 	ASSERT(!list_link_active(&ds->ds_synced_link));
263fa9e4066Sahrens 
2645ad82045Snd 	mutex_destroy(&ds->ds_lock);
26591ebeef5Sahrens 	mutex_destroy(&ds->ds_opening_lock);
2665ad82045Snd 	mutex_destroy(&ds->ds_deadlist.bpl_lock);
2675ad82045Snd 
268fa9e4066Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
269fa9e4066Sahrens }
270fa9e4066Sahrens 
271ea8dc4b6Seschrock static int
272fa9e4066Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
273fa9e4066Sahrens {
274fa9e4066Sahrens 	dsl_dataset_phys_t *headphys;
275fa9e4066Sahrens 	int err;
276fa9e4066Sahrens 	dmu_buf_t *headdbuf;
277fa9e4066Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
278fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
279fa9e4066Sahrens 
280fa9e4066Sahrens 	if (ds->ds_snapname[0])
281ea8dc4b6Seschrock 		return (0);
282fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
283ea8dc4b6Seschrock 		return (0);
284fa9e4066Sahrens 
285ea8dc4b6Seschrock 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
286ea8dc4b6Seschrock 	    FTAG, &headdbuf);
287ea8dc4b6Seschrock 	if (err)
288ea8dc4b6Seschrock 		return (err);
289fa9e4066Sahrens 	headphys = headdbuf->db_data;
290fa9e4066Sahrens 	err = zap_value_search(dp->dp_meta_objset,
291e7437265Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
292ea8dc4b6Seschrock 	dmu_buf_rele(headdbuf, FTAG);
293ea8dc4b6Seschrock 	return (err);
294fa9e4066Sahrens }
295fa9e4066Sahrens 
296ea8dc4b6Seschrock int
297fa9e4066Sahrens dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
298ea8dc4b6Seschrock     int mode, void *tag, dsl_dataset_t **dsp)
299fa9e4066Sahrens {
300fa9e4066Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
301fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
302fa9e4066Sahrens 	dmu_buf_t *dbuf;
303fa9e4066Sahrens 	dsl_dataset_t *ds;
304ea8dc4b6Seschrock 	int err;
305fa9e4066Sahrens 
306fa9e4066Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
307fa9e4066Sahrens 	    dsl_pool_sync_context(dp));
308fa9e4066Sahrens 
309ea8dc4b6Seschrock 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
310ea8dc4b6Seschrock 	if (err)
311ea8dc4b6Seschrock 		return (err);
312fa9e4066Sahrens 	ds = dmu_buf_get_user(dbuf);
313fa9e4066Sahrens 	if (ds == NULL) {
314fa9e4066Sahrens 		dsl_dataset_t *winner;
315fa9e4066Sahrens 
316fa9e4066Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
317fa9e4066Sahrens 		ds->ds_dbuf = dbuf;
318fa9e4066Sahrens 		ds->ds_object = dsobj;
319fa9e4066Sahrens 		ds->ds_phys = dbuf->db_data;
320fa9e4066Sahrens 
3215ad82045Snd 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
32291ebeef5Sahrens 		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
3235ad82045Snd 		mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
3245ad82045Snd 		    NULL);
3255ad82045Snd 
326ea8dc4b6Seschrock 		err = bplist_open(&ds->ds_deadlist,
327fa9e4066Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
328ea8dc4b6Seschrock 		if (err == 0) {
329ea8dc4b6Seschrock 			err = dsl_dir_open_obj(dp,
330ea8dc4b6Seschrock 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
331ea8dc4b6Seschrock 		}
332ea8dc4b6Seschrock 		if (err) {
333ea8dc4b6Seschrock 			/*
334ea8dc4b6Seschrock 			 * we don't really need to close the blist if we
335ea8dc4b6Seschrock 			 * just opened it.
336ea8dc4b6Seschrock 			 */
3375ad82045Snd 			mutex_destroy(&ds->ds_lock);
33891ebeef5Sahrens 			mutex_destroy(&ds->ds_opening_lock);
3395ad82045Snd 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
340ea8dc4b6Seschrock 			kmem_free(ds, sizeof (dsl_dataset_t));
341ea8dc4b6Seschrock 			dmu_buf_rele(dbuf, tag);
342ea8dc4b6Seschrock 			return (err);
343ea8dc4b6Seschrock 		}
344fa9e4066Sahrens 
345fa9e4066Sahrens 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
346fa9e4066Sahrens 			ds->ds_snapname[0] = '\0';
347fa9e4066Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
348ea8dc4b6Seschrock 				err = dsl_dataset_open_obj(dp,
349fa9e4066Sahrens 				    ds->ds_phys->ds_prev_snap_obj, NULL,
350ea8dc4b6Seschrock 				    DS_MODE_NONE, ds, &ds->ds_prev);
351fa9e4066Sahrens 			}
352fa9e4066Sahrens 		} else {
353fa9e4066Sahrens 			if (snapname) {
354fa9e4066Sahrens #ifdef ZFS_DEBUG
355fa9e4066Sahrens 				dsl_dataset_phys_t *headphys;
356ea8dc4b6Seschrock 				dmu_buf_t *headdbuf;
357ea8dc4b6Seschrock 				err = dmu_bonus_hold(mos,
358ea8dc4b6Seschrock 				    ds->ds_dir->dd_phys->dd_head_dataset_obj,
359ea8dc4b6Seschrock 				    FTAG, &headdbuf);
360ea8dc4b6Seschrock 				if (err == 0) {
361ea8dc4b6Seschrock 					headphys = headdbuf->db_data;
362ea8dc4b6Seschrock 					uint64_t foundobj;
363ea8dc4b6Seschrock 					err = zap_lookup(dp->dp_meta_objset,
364ea8dc4b6Seschrock 					    headphys->ds_snapnames_zapobj,
365ea8dc4b6Seschrock 					    snapname, sizeof (foundobj), 1,
366ea8dc4b6Seschrock 					    &foundobj);
367ea8dc4b6Seschrock 					ASSERT3U(foundobj, ==, dsobj);
368ea8dc4b6Seschrock 					dmu_buf_rele(headdbuf, FTAG);
369ea8dc4b6Seschrock 				}
370fa9e4066Sahrens #endif
371fa9e4066Sahrens 				(void) strcat(ds->ds_snapname, snapname);
372fa9e4066Sahrens 			} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
373ea8dc4b6Seschrock 				err = dsl_dataset_get_snapname(ds);
374fa9e4066Sahrens 			}
375fa9e4066Sahrens 		}
376fa9e4066Sahrens 
377ea8dc4b6Seschrock 		if (err == 0) {
378ea8dc4b6Seschrock 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
379ea8dc4b6Seschrock 			    dsl_dataset_evict);
380ea8dc4b6Seschrock 		}
381ea8dc4b6Seschrock 		if (err || winner) {
382fa9e4066Sahrens 			bplist_close(&ds->ds_deadlist);
383fa9e4066Sahrens 			if (ds->ds_prev) {
384fa9e4066Sahrens 				dsl_dataset_close(ds->ds_prev,
385fa9e4066Sahrens 				    DS_MODE_NONE, ds);
386fa9e4066Sahrens 			}
387fa9e4066Sahrens 			dsl_dir_close(ds->ds_dir, ds);
3885ad82045Snd 			mutex_destroy(&ds->ds_lock);
38991ebeef5Sahrens 			mutex_destroy(&ds->ds_opening_lock);
3905ad82045Snd 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
391fa9e4066Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
392ea8dc4b6Seschrock 			if (err) {
393ea8dc4b6Seschrock 				dmu_buf_rele(dbuf, tag);
394ea8dc4b6Seschrock 				return (err);
395ea8dc4b6Seschrock 			}
396fa9e4066Sahrens 			ds = winner;
397fa9e4066Sahrens 		} else {
39891ebeef5Sahrens 			ds->ds_fsid_guid =
399fa9e4066Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
400fa9e4066Sahrens 		}
401*a9799022Sck 
402*a9799022Sck 		if (!dsl_dataset_is_snapshot(ds)) {
403*a9799022Sck 			boolean_t need_lock =
404*a9799022Sck 			    !RW_LOCK_HELD(&dp->dp_config_rwlock);
405*a9799022Sck 
406*a9799022Sck 			if (need_lock)
407*a9799022Sck 				rw_enter(&dp->dp_config_rwlock, RW_READER);
408*a9799022Sck 			VERIFY(0 == dsl_prop_get_ds_locked(ds->ds_dir,
409*a9799022Sck 			    "refreservation", sizeof (uint64_t), 1,
410*a9799022Sck 			    &ds->ds_reserved, NULL));
411*a9799022Sck 			VERIFY(0 == dsl_prop_get_ds_locked(ds->ds_dir,
412*a9799022Sck 			    "refquota", sizeof (uint64_t), 1, &ds->ds_quota,
413*a9799022Sck 			    NULL));
414*a9799022Sck 			if (need_lock)
415*a9799022Sck 				rw_exit(&dp->dp_config_rwlock);
416*a9799022Sck 		} else {
417*a9799022Sck 			ds->ds_reserved = ds->ds_quota = 0;
418*a9799022Sck 		}
419fa9e4066Sahrens 	}
420fa9e4066Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
421fa9e4066Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
422fa9e4066Sahrens 
423fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
424fa9e4066Sahrens 	if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
42599653d4eSeschrock 	    (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) &&
42699653d4eSeschrock 	    !DS_MODE_IS_INCONSISTENT(mode)) ||
42755434c77Sek 	    (ds->ds_open_refcount + weight > DS_REF_MAX)) {
428fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
429fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
430ea8dc4b6Seschrock 		return (EBUSY);
431fa9e4066Sahrens 	}
432fa9e4066Sahrens 	ds->ds_open_refcount += weight;
433fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
434fa9e4066Sahrens 
435ea8dc4b6Seschrock 	*dsp = ds;
436ea8dc4b6Seschrock 	return (0);
437fa9e4066Sahrens }
438fa9e4066Sahrens 
439fa9e4066Sahrens int
440fa9e4066Sahrens dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
441fa9e4066Sahrens     void *tag, dsl_dataset_t **dsp)
442fa9e4066Sahrens {
443fa9e4066Sahrens 	dsl_dir_t *dd;
444fa9e4066Sahrens 	dsl_pool_t *dp;
445fa9e4066Sahrens 	const char *tail;
446fa9e4066Sahrens 	uint64_t obj;
447fa9e4066Sahrens 	dsl_dataset_t *ds = NULL;
448fa9e4066Sahrens 	int err = 0;
449fa9e4066Sahrens 
450ea8dc4b6Seschrock 	err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail);
451ea8dc4b6Seschrock 	if (err)
452ea8dc4b6Seschrock 		return (err);
453fa9e4066Sahrens 
454fa9e4066Sahrens 	dp = dd->dd_pool;
455fa9e4066Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
456fa9e4066Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
457fa9e4066Sahrens 	if (obj == 0) {
458fa9e4066Sahrens 		/* A dataset with no associated objset */
459fa9e4066Sahrens 		err = ENOENT;
460fa9e4066Sahrens 		goto out;
461fa9e4066Sahrens 	}
462fa9e4066Sahrens 
463fa9e4066Sahrens 	if (tail != NULL) {
464fa9e4066Sahrens 		objset_t *mos = dp->dp_meta_objset;
465fa9e4066Sahrens 
466ea8dc4b6Seschrock 		err = dsl_dataset_open_obj(dp, obj, NULL,
467ea8dc4b6Seschrock 		    DS_MODE_NONE, tag, &ds);
468ea8dc4b6Seschrock 		if (err)
469ea8dc4b6Seschrock 			goto out;
470fa9e4066Sahrens 		obj = ds->ds_phys->ds_snapnames_zapobj;
471fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
472fa9e4066Sahrens 		ds = NULL;
473fa9e4066Sahrens 
474fa9e4066Sahrens 		if (tail[0] != '@') {
475fa9e4066Sahrens 			err = ENOENT;
476fa9e4066Sahrens 			goto out;
477fa9e4066Sahrens 		}
478fa9e4066Sahrens 		tail++;
479fa9e4066Sahrens 
480fa9e4066Sahrens 		/* Look for a snapshot */
481fa9e4066Sahrens 		if (!DS_MODE_IS_READONLY(mode)) {
482fa9e4066Sahrens 			err = EROFS;
483fa9e4066Sahrens 			goto out;
484fa9e4066Sahrens 		}
485fa9e4066Sahrens 		dprintf("looking for snapshot '%s'\n", tail);
486fa9e4066Sahrens 		err = zap_lookup(mos, obj, tail, 8, 1, &obj);
487fa9e4066Sahrens 		if (err)
488fa9e4066Sahrens 			goto out;
489fa9e4066Sahrens 	}
490ea8dc4b6Seschrock 	err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds);
491fa9e4066Sahrens 
492fa9e4066Sahrens out:
493fa9e4066Sahrens 	rw_exit(&dp->dp_config_rwlock);
494fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
495fa9e4066Sahrens 
496fa9e4066Sahrens 	ASSERT3U((err == 0), ==, (ds != NULL));
497fa9e4066Sahrens 	/* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
498fa9e4066Sahrens 
499fa9e4066Sahrens 	*dsp = ds;
500fa9e4066Sahrens 	return (err);
501fa9e4066Sahrens }
502fa9e4066Sahrens 
503fa9e4066Sahrens int
504fa9e4066Sahrens dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
505fa9e4066Sahrens {
506fa9e4066Sahrens 	return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
507fa9e4066Sahrens }
508fa9e4066Sahrens 
509fa9e4066Sahrens void
510fa9e4066Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
511fa9e4066Sahrens {
512fa9e4066Sahrens 	if (ds == NULL) {
513fa9e4066Sahrens 		(void) strcpy(name, "mos");
514fa9e4066Sahrens 	} else {
515fa9e4066Sahrens 		dsl_dir_name(ds->ds_dir, name);
516ea8dc4b6Seschrock 		VERIFY(0 == dsl_dataset_get_snapname(ds));
517fa9e4066Sahrens 		if (ds->ds_snapname[0]) {
518fa9e4066Sahrens 			(void) strcat(name, "@");
519fa9e4066Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
520fa9e4066Sahrens 				/*
521fa9e4066Sahrens 				 * We use a "recursive" mutex so that we
522fa9e4066Sahrens 				 * can call dprintf_ds() with ds_lock held.
523fa9e4066Sahrens 				 */
524fa9e4066Sahrens 				mutex_enter(&ds->ds_lock);
525fa9e4066Sahrens 				(void) strcat(name, ds->ds_snapname);
526fa9e4066Sahrens 				mutex_exit(&ds->ds_lock);
527fa9e4066Sahrens 			} else {
528fa9e4066Sahrens 				(void) strcat(name, ds->ds_snapname);
529fa9e4066Sahrens 			}
530fa9e4066Sahrens 		}
531fa9e4066Sahrens 	}
532fa9e4066Sahrens }
533fa9e4066Sahrens 
534b7661cccSmmusante static int
535b7661cccSmmusante dsl_dataset_namelen(dsl_dataset_t *ds)
536b7661cccSmmusante {
537b7661cccSmmusante 	int result;
538b7661cccSmmusante 
539b7661cccSmmusante 	if (ds == NULL) {
540b7661cccSmmusante 		result = 3;	/* "mos" */
541b7661cccSmmusante 	} else {
542b7661cccSmmusante 		result = dsl_dir_namelen(ds->ds_dir);
543b7661cccSmmusante 		VERIFY(0 == dsl_dataset_get_snapname(ds));
544b7661cccSmmusante 		if (ds->ds_snapname[0]) {
545b7661cccSmmusante 			++result;	/* adding one for the @-sign */
546b7661cccSmmusante 			if (!MUTEX_HELD(&ds->ds_lock)) {
547b7661cccSmmusante 				/* see dsl_datset_name */
548b7661cccSmmusante 				mutex_enter(&ds->ds_lock);
549b7661cccSmmusante 				result += strlen(ds->ds_snapname);
550b7661cccSmmusante 				mutex_exit(&ds->ds_lock);
551b7661cccSmmusante 			} else {
552b7661cccSmmusante 				result += strlen(ds->ds_snapname);
553b7661cccSmmusante 			}
554b7661cccSmmusante 		}
555b7661cccSmmusante 	}
556b7661cccSmmusante 
557b7661cccSmmusante 	return (result);
558b7661cccSmmusante }
559b7661cccSmmusante 
560fa9e4066Sahrens void
561fa9e4066Sahrens dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
562fa9e4066Sahrens {
563fa9e4066Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
564fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
565fa9e4066Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, weight);
566fa9e4066Sahrens 	ds->ds_open_refcount -= weight;
567fa9e4066Sahrens 	dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n",
568fa9e4066Sahrens 	    mode, ds->ds_open_refcount);
569fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
570fa9e4066Sahrens 
571ea8dc4b6Seschrock 	dmu_buf_rele(ds->ds_dbuf, tag);
572fa9e4066Sahrens }
573fa9e4066Sahrens 
5743cb34c60Sahrens void
5753cb34c60Sahrens dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode)
5763cb34c60Sahrens {
5773cb34c60Sahrens 	uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)];
5783cb34c60Sahrens 	uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)];
5793cb34c60Sahrens 	mutex_enter(&ds->ds_lock);
5803cb34c60Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, oldweight);
5813cb34c60Sahrens 	ASSERT3U(oldweight, >=, newweight);
5823cb34c60Sahrens 	ds->ds_open_refcount -= oldweight;
5833cb34c60Sahrens 	ds->ds_open_refcount += newweight;
5843cb34c60Sahrens 	mutex_exit(&ds->ds_lock);
5853cb34c60Sahrens }
5863cb34c60Sahrens 
5873cb34c60Sahrens boolean_t
5883cb34c60Sahrens dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode)
5893cb34c60Sahrens {
5903cb34c60Sahrens 	boolean_t rv;
5913cb34c60Sahrens 	uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)];
5923cb34c60Sahrens 	uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)];
5933cb34c60Sahrens 	mutex_enter(&ds->ds_lock);
5943cb34c60Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, oldweight);
5953cb34c60Sahrens 	ASSERT3U(newweight, >=, oldweight);
5963cb34c60Sahrens 	if (ds->ds_open_refcount - oldweight + newweight > DS_REF_MAX) {
5973cb34c60Sahrens 		rv = B_FALSE;
5983cb34c60Sahrens 	} else {
5993cb34c60Sahrens 		ds->ds_open_refcount -= oldweight;
6003cb34c60Sahrens 		ds->ds_open_refcount += newweight;
6013cb34c60Sahrens 		rv = B_TRUE;
6023cb34c60Sahrens 	}
6033cb34c60Sahrens 	mutex_exit(&ds->ds_lock);
6043cb34c60Sahrens 	return (rv);
6053cb34c60Sahrens }
6063cb34c60Sahrens 
607fa9e4066Sahrens void
608fa9e4066Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
609fa9e4066Sahrens {
610fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
611fa9e4066Sahrens 	dmu_buf_t *dbuf;
612fa9e4066Sahrens 	dsl_dataset_phys_t *dsphys;
613fa9e4066Sahrens 	dsl_dataset_t *ds;
614fa9e4066Sahrens 	uint64_t dsobj;
615fa9e4066Sahrens 	dsl_dir_t *dd;
616fa9e4066Sahrens 
617fa9e4066Sahrens 	dsl_dir_create_root(mos, ddobjp, tx);
618ea8dc4b6Seschrock 	VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd));
619fa9e4066Sahrens 
6201649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
6211649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
622ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
623fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
624fa9e4066Sahrens 	dsphys = dbuf->db_data;
625fa9e4066Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
626fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
627fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
628fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
629fa9e4066Sahrens 	dsphys->ds_snapnames_zapobj =
63087e5029aSahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
631fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
632fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
633fa9e4066Sahrens 	dsphys->ds_deadlist_obj =
634fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
635*a9799022Sck 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
636*a9799022Sck 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
637ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
638fa9e4066Sahrens 
639fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
640fa9e4066Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
641fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
642fa9e4066Sahrens 
643ea8dc4b6Seschrock 	VERIFY(0 ==
644ea8dc4b6Seschrock 	    dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds));
645c717a561Smaybee 	(void) dmu_objset_create_impl(dp->dp_spa, ds,
646c717a561Smaybee 	    &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx);
647fa9e4066Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
648fa9e4066Sahrens }
649fa9e4066Sahrens 
6501d452cf5Sahrens uint64_t
6513cb34c60Sahrens dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin, dmu_tx_t *tx)
652fa9e4066Sahrens {
6533cb34c60Sahrens 	dsl_pool_t *dp = dd->dd_pool;
654fa9e4066Sahrens 	dmu_buf_t *dbuf;
655fa9e4066Sahrens 	dsl_dataset_phys_t *dsphys;
6563cb34c60Sahrens 	uint64_t dsobj;
657fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
658fa9e4066Sahrens 
6593cb34c60Sahrens 	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
6603cb34c60Sahrens 	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
661fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
6623cb34c60Sahrens 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
663fa9e4066Sahrens 
6641649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
6651649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
666ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
667fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
668fa9e4066Sahrens 	dsphys = dbuf->db_data;
669fa9e4066Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
670fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
671fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
672fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
673fa9e4066Sahrens 	dsphys->ds_snapnames_zapobj =
67487e5029aSahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
675fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
676fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
677fa9e4066Sahrens 	dsphys->ds_deadlist_obj =
678fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
679*a9799022Sck 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
680*a9799022Sck 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
681*a9799022Sck 
6823cb34c60Sahrens 	if (origin) {
6833cb34c60Sahrens 		dsphys->ds_prev_snap_obj = origin->ds_object;
684fa9e4066Sahrens 		dsphys->ds_prev_snap_txg =
6853cb34c60Sahrens 		    origin->ds_phys->ds_creation_txg;
686fa9e4066Sahrens 		dsphys->ds_used_bytes =
6873cb34c60Sahrens 		    origin->ds_phys->ds_used_bytes;
688fa9e4066Sahrens 		dsphys->ds_compressed_bytes =
6893cb34c60Sahrens 		    origin->ds_phys->ds_compressed_bytes;
690fa9e4066Sahrens 		dsphys->ds_uncompressed_bytes =
6913cb34c60Sahrens 		    origin->ds_phys->ds_uncompressed_bytes;
6923cb34c60Sahrens 		dsphys->ds_bp = origin->ds_phys->ds_bp;
693fa9e4066Sahrens 
6943cb34c60Sahrens 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
6953cb34c60Sahrens 		origin->ds_phys->ds_num_children++;
696fa9e4066Sahrens 
697fa9e4066Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
6983cb34c60Sahrens 		dd->dd_phys->dd_origin_obj = origin->ds_object;
699fa9e4066Sahrens 	}
700ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
701fa9e4066Sahrens 
702fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
703fa9e4066Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
7043cb34c60Sahrens 
7053cb34c60Sahrens 	return (dsobj);
7063cb34c60Sahrens }
7073cb34c60Sahrens 
7083cb34c60Sahrens uint64_t
7093cb34c60Sahrens dsl_dataset_create_sync(dsl_dir_t *pdd,
7103cb34c60Sahrens     const char *lastname, dsl_dataset_t *origin, cred_t *cr, dmu_tx_t *tx)
7113cb34c60Sahrens {
7123cb34c60Sahrens 	dsl_pool_t *dp = pdd->dd_pool;
7133cb34c60Sahrens 	uint64_t dsobj, ddobj;
7143cb34c60Sahrens 	dsl_dir_t *dd;
7153cb34c60Sahrens 
7163cb34c60Sahrens 	ASSERT(lastname[0] != '@');
7173cb34c60Sahrens 
7183cb34c60Sahrens 	ddobj = dsl_dir_create_sync(pdd, lastname, tx);
7193cb34c60Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
7203cb34c60Sahrens 
7213cb34c60Sahrens 	dsobj = dsl_dataset_create_sync_impl(dd, origin, tx);
7223cb34c60Sahrens 
7233cb34c60Sahrens 	dsl_deleg_set_create_perms(dd, tx, cr);
7243cb34c60Sahrens 
725fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
726fa9e4066Sahrens 
7271d452cf5Sahrens 	return (dsobj);
728fa9e4066Sahrens }
729fa9e4066Sahrens 
7301d452cf5Sahrens struct destroyarg {
7311d452cf5Sahrens 	dsl_sync_task_group_t *dstg;
7321d452cf5Sahrens 	char *snapname;
7331d452cf5Sahrens 	char *failed;
7341d452cf5Sahrens };
7351d452cf5Sahrens 
7361d452cf5Sahrens static int
7371d452cf5Sahrens dsl_snapshot_destroy_one(char *name, void *arg)
738fa9e4066Sahrens {
7391d452cf5Sahrens 	struct destroyarg *da = arg;
7401d452cf5Sahrens 	dsl_dataset_t *ds;
7411d452cf5Sahrens 	char *cp;
742fa9e4066Sahrens 	int err;
743fa9e4066Sahrens 
7441d452cf5Sahrens 	(void) strcat(name, "@");
7451d452cf5Sahrens 	(void) strcat(name, da->snapname);
7461d452cf5Sahrens 	err = dsl_dataset_open(name,
7471d452cf5Sahrens 	    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
748cdf5b4caSmmusante 	    da->dstg, &ds);
7491d452cf5Sahrens 	cp = strchr(name, '@');
7501d452cf5Sahrens 	*cp = '\0';
7511d452cf5Sahrens 	if (err == ENOENT)
7521d452cf5Sahrens 		return (0);
7531d452cf5Sahrens 	if (err) {
7541d452cf5Sahrens 		(void) strcpy(da->failed, name);
755ea8dc4b6Seschrock 		return (err);
7561d452cf5Sahrens 	}
757fa9e4066Sahrens 
7581d452cf5Sahrens 	dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
759cdf5b4caSmmusante 	    dsl_dataset_destroy_sync, ds, da->dstg, 0);
7601d452cf5Sahrens 	return (0);
7611d452cf5Sahrens }
76231fd60d3Sahrens 
7631d452cf5Sahrens /*
7641d452cf5Sahrens  * Destroy 'snapname' in all descendants of 'fsname'.
7651d452cf5Sahrens  */
7661d452cf5Sahrens #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
7671d452cf5Sahrens int
7681d452cf5Sahrens dsl_snapshots_destroy(char *fsname, char *snapname)
7691d452cf5Sahrens {
7701d452cf5Sahrens 	int err;
7711d452cf5Sahrens 	struct destroyarg da;
7721d452cf5Sahrens 	dsl_sync_task_t *dst;
7731d452cf5Sahrens 	spa_t *spa;
7741d452cf5Sahrens 
77540feaa91Sahrens 	err = spa_open(fsname, &spa, FTAG);
7761d452cf5Sahrens 	if (err)
7771d452cf5Sahrens 		return (err);
7781d452cf5Sahrens 	da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
7791d452cf5Sahrens 	da.snapname = snapname;
7801d452cf5Sahrens 	da.failed = fsname;
7811d452cf5Sahrens 
7821d452cf5Sahrens 	err = dmu_objset_find(fsname,
7830b69c2f0Sahrens 	    dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
7841d452cf5Sahrens 
7851d452cf5Sahrens 	if (err == 0)
7861d452cf5Sahrens 		err = dsl_sync_task_group_wait(da.dstg);
7871d452cf5Sahrens 
7881d452cf5Sahrens 	for (dst = list_head(&da.dstg->dstg_tasks); dst;
7891d452cf5Sahrens 	    dst = list_next(&da.dstg->dstg_tasks, dst)) {
7901d452cf5Sahrens 		dsl_dataset_t *ds = dst->dst_arg1;
7911d452cf5Sahrens 		if (dst->dst_err) {
7921d452cf5Sahrens 			dsl_dataset_name(ds, fsname);
79340feaa91Sahrens 			*strchr(fsname, '@') = '\0';
794e1930233Sbonwick 		}
795fa9e4066Sahrens 		/*
7961d452cf5Sahrens 		 * If it was successful, destroy_sync would have
7971d452cf5Sahrens 		 * closed the ds
798fa9e4066Sahrens 		 */
799ea8dc4b6Seschrock 		if (err)
800cdf5b4caSmmusante 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg);
801fa9e4066Sahrens 	}
802fa9e4066Sahrens 
8031d452cf5Sahrens 	dsl_sync_task_group_destroy(da.dstg);
8041d452cf5Sahrens 	spa_close(spa, FTAG);
805fa9e4066Sahrens 	return (err);
806fa9e4066Sahrens }
807fa9e4066Sahrens 
8083cb34c60Sahrens /*
8093cb34c60Sahrens  * ds must be opened EXCLUSIVE or PRIMARY.  on return (whether
8103cb34c60Sahrens  * successful or not), ds will be closed and caller can no longer
8113cb34c60Sahrens  * dereference it.
8123cb34c60Sahrens  */
813fa9e4066Sahrens int
8143cb34c60Sahrens dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
815fa9e4066Sahrens {
816fa9e4066Sahrens 	int err;
8171d452cf5Sahrens 	dsl_sync_task_group_t *dstg;
8181d452cf5Sahrens 	objset_t *os;
819fa9e4066Sahrens 	dsl_dir_t *dd;
8201d452cf5Sahrens 	uint64_t obj;
8211d452cf5Sahrens 
8223cb34c60Sahrens 	if (ds->ds_open_refcount != DS_REF_MAX) {
8233cb34c60Sahrens 		if (dsl_dataset_tryupgrade(ds, DS_MODE_PRIMARY,
8243cb34c60Sahrens 		    DS_MODE_EXCLUSIVE) == 0) {
8253cb34c60Sahrens 			dsl_dataset_close(ds, DS_MODE_PRIMARY, tag);
8263cb34c60Sahrens 			return (EBUSY);
8273cb34c60Sahrens 		}
8283cb34c60Sahrens 	}
8293cb34c60Sahrens 
8303cb34c60Sahrens 	if (dsl_dataset_is_snapshot(ds)) {
8311d452cf5Sahrens 		/* Destroying a snapshot is simpler */
8321d452cf5Sahrens 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
8331d452cf5Sahrens 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
8343cb34c60Sahrens 		    ds, tag, 0);
8353cb34c60Sahrens 		goto out;
8361d452cf5Sahrens 	}
837fa9e4066Sahrens 
8381d452cf5Sahrens 	dd = ds->ds_dir;
839fa9e4066Sahrens 
8401d452cf5Sahrens 	/*
8411d452cf5Sahrens 	 * Check for errors and mark this ds as inconsistent, in
8421d452cf5Sahrens 	 * case we crash while freeing the objects.
8431d452cf5Sahrens 	 */
8441d452cf5Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
8451d452cf5Sahrens 	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
8463cb34c60Sahrens 	if (err)
8473cb34c60Sahrens 		goto out;
8483cb34c60Sahrens 
8493cb34c60Sahrens 	err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
8503cb34c60Sahrens 	if (err)
8513cb34c60Sahrens 		goto out;
852fa9e4066Sahrens 
8531d452cf5Sahrens 	/*
8541d452cf5Sahrens 	 * remove the objects in open context, so that we won't
8551d452cf5Sahrens 	 * have too much to do in syncing context.
8561d452cf5Sahrens 	 */
8576754306eSahrens 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
8586754306eSahrens 	    ds->ds_phys->ds_prev_snap_txg)) {
8591d452cf5Sahrens 		dmu_tx_t *tx = dmu_tx_create(os);
8601d452cf5Sahrens 		dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
8611d452cf5Sahrens 		dmu_tx_hold_bonus(tx, obj);
8621d452cf5Sahrens 		err = dmu_tx_assign(tx, TXG_WAIT);
8631d452cf5Sahrens 		if (err) {
8641d452cf5Sahrens 			/*
8651d452cf5Sahrens 			 * Perhaps there is not enough disk
8661d452cf5Sahrens 			 * space.  Just deal with it from
8671d452cf5Sahrens 			 * dsl_dataset_destroy_sync().
8681d452cf5Sahrens 			 */
8691d452cf5Sahrens 			dmu_tx_abort(tx);
8701d452cf5Sahrens 			continue;
8711d452cf5Sahrens 		}
8721d452cf5Sahrens 		VERIFY(0 == dmu_object_free(os, obj, tx));
8731d452cf5Sahrens 		dmu_tx_commit(tx);
8741d452cf5Sahrens 	}
8751d452cf5Sahrens 	/* Make sure it's not dirty before we finish destroying it. */
8761d452cf5Sahrens 	txg_wait_synced(dd->dd_pool, 0);
8771d452cf5Sahrens 
8781d452cf5Sahrens 	dmu_objset_close(os);
8791d452cf5Sahrens 	if (err != ESRCH)
8803cb34c60Sahrens 		goto out;
8811d452cf5Sahrens 
8823cb34c60Sahrens 	if (ds->ds_user_ptr) {
8833cb34c60Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
8843cb34c60Sahrens 		ds->ds_user_ptr = NULL;
8851d452cf5Sahrens 	}
8861d452cf5Sahrens 
8873cb34c60Sahrens 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
8883cb34c60Sahrens 	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
8893cb34c60Sahrens 	rw_exit(&dd->dd_pool->dp_config_rwlock);
8903cb34c60Sahrens 
8913cb34c60Sahrens 	if (err)
8923cb34c60Sahrens 		goto out;
8933cb34c60Sahrens 
8941d452cf5Sahrens 	/*
8951d452cf5Sahrens 	 * Blow away the dsl_dir + head dataset.
8961d452cf5Sahrens 	 */
8971d452cf5Sahrens 	dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
8981d452cf5Sahrens 	dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
8993cb34c60Sahrens 	    dsl_dataset_destroy_sync, ds, tag, 0);
9001d452cf5Sahrens 	dsl_sync_task_create(dstg, dsl_dir_destroy_check,
9011d452cf5Sahrens 	    dsl_dir_destroy_sync, dd, FTAG, 0);
9021d452cf5Sahrens 	err = dsl_sync_task_group_wait(dstg);
9031d452cf5Sahrens 	dsl_sync_task_group_destroy(dstg);
9041d452cf5Sahrens 	/* if it is successful, *destroy_sync will close the ds+dd */
9053cb34c60Sahrens 	if (err)
9061d452cf5Sahrens 		dsl_dir_close(dd, FTAG);
9073cb34c60Sahrens out:
9083cb34c60Sahrens 	if (err)
9093cb34c60Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
910fa9e4066Sahrens 	return (err);
911fa9e4066Sahrens }
912fa9e4066Sahrens 
9131d452cf5Sahrens int
9143cb34c60Sahrens dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost)
9151d452cf5Sahrens {
91655434c77Sek 	ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
9173cb34c60Sahrens 
9181d452cf5Sahrens 	return (dsl_sync_task_do(ds->ds_dir->dd_pool,
9191d452cf5Sahrens 	    dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
9203cb34c60Sahrens 	    ds, &ost, 0));
9211d452cf5Sahrens }
9221d452cf5Sahrens 
923fa9e4066Sahrens void *
924fa9e4066Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
925fa9e4066Sahrens     void *p, dsl_dataset_evict_func_t func)
926fa9e4066Sahrens {
927fa9e4066Sahrens 	void *old;
928fa9e4066Sahrens 
929fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
930fa9e4066Sahrens 	old = ds->ds_user_ptr;
931fa9e4066Sahrens 	if (old == NULL) {
932fa9e4066Sahrens 		ds->ds_user_ptr = p;
933fa9e4066Sahrens 		ds->ds_user_evict_func = func;
934fa9e4066Sahrens 	}
935fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
936fa9e4066Sahrens 	return (old);
937fa9e4066Sahrens }
938fa9e4066Sahrens 
939fa9e4066Sahrens void *
940fa9e4066Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
941fa9e4066Sahrens {
942fa9e4066Sahrens 	return (ds->ds_user_ptr);
943fa9e4066Sahrens }
944fa9e4066Sahrens 
945fa9e4066Sahrens 
946c717a561Smaybee blkptr_t *
947c717a561Smaybee dsl_dataset_get_blkptr(dsl_dataset_t *ds)
948fa9e4066Sahrens {
949c717a561Smaybee 	return (&ds->ds_phys->ds_bp);
950fa9e4066Sahrens }
951fa9e4066Sahrens 
952fa9e4066Sahrens void
953fa9e4066Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
954fa9e4066Sahrens {
955fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
956fa9e4066Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
957fa9e4066Sahrens 	if (ds == NULL) {
958fa9e4066Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
959fa9e4066Sahrens 	} else {
960fa9e4066Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
961fa9e4066Sahrens 		ds->ds_phys->ds_bp = *bp;
962fa9e4066Sahrens 	}
963fa9e4066Sahrens }
964fa9e4066Sahrens 
965fa9e4066Sahrens spa_t *
966fa9e4066Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
967fa9e4066Sahrens {
968fa9e4066Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
969fa9e4066Sahrens }
970fa9e4066Sahrens 
971fa9e4066Sahrens void
972fa9e4066Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
973fa9e4066Sahrens {
974fa9e4066Sahrens 	dsl_pool_t *dp;
975fa9e4066Sahrens 
976fa9e4066Sahrens 	if (ds == NULL) /* this is the meta-objset */
977fa9e4066Sahrens 		return;
978fa9e4066Sahrens 
979fa9e4066Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
980a2eea2e1Sahrens 
981a2eea2e1Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
982a2eea2e1Sahrens 		panic("dirtying snapshot!");
983fa9e4066Sahrens 
984fa9e4066Sahrens 	dp = ds->ds_dir->dd_pool;
985fa9e4066Sahrens 
986fa9e4066Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
987fa9e4066Sahrens 		/* up the hold count until we can be written out */
988fa9e4066Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
989fa9e4066Sahrens 	}
990fa9e4066Sahrens }
991fa9e4066Sahrens 
992*a9799022Sck /*
993*a9799022Sck  * The unique space in the head dataset can be calculated by subtracting
994*a9799022Sck  * the space used in the most recent snapshot, that is still being used
995*a9799022Sck  * in this file system, from the space currently in use.  To figure out
996*a9799022Sck  * the space in the most recent snapshot still in use, we need to take
997*a9799022Sck  * the total space used in the snapshot and subtract out the space that
998*a9799022Sck  * has been freed up since the snapshot was taken.
999*a9799022Sck  */
1000*a9799022Sck static void
1001*a9799022Sck dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
1002*a9799022Sck {
1003*a9799022Sck 	uint64_t mrs_used;
1004*a9799022Sck 	uint64_t dlused, dlcomp, dluncomp;
1005*a9799022Sck 
1006*a9799022Sck 	ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj);
1007*a9799022Sck 
1008*a9799022Sck 	if (ds->ds_phys->ds_prev_snap_obj != 0)
1009*a9799022Sck 		mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
1010*a9799022Sck 	else
1011*a9799022Sck 		mrs_used = 0;
1012*a9799022Sck 
1013*a9799022Sck 	VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp,
1014*a9799022Sck 	    &dluncomp));
1015*a9799022Sck 
1016*a9799022Sck 	ASSERT3U(dlused, <=, mrs_used);
1017*a9799022Sck 	ds->ds_phys->ds_unique_bytes =
1018*a9799022Sck 	    ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
1019*a9799022Sck 
1020*a9799022Sck 	if (!DS_UNIQUE_IS_ACCURATE(ds) &&
1021*a9799022Sck 	    spa_version(ds->ds_dir->dd_pool->dp_spa) >=
1022*a9799022Sck 	    SPA_VERSION_UNIQUE_ACCURATE)
1023*a9799022Sck 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1024*a9799022Sck }
1025*a9799022Sck 
1026*a9799022Sck static uint64_t
1027*a9799022Sck dsl_dataset_unique(dsl_dataset_t *ds)
1028*a9799022Sck {
1029*a9799022Sck 	if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds))
1030*a9799022Sck 		dsl_dataset_recalc_head_uniq(ds);
1031*a9799022Sck 
1032*a9799022Sck 	return (ds->ds_phys->ds_unique_bytes);
1033*a9799022Sck }
1034*a9799022Sck 
1035fa9e4066Sahrens struct killarg {
1036*a9799022Sck 	int64_t *usedp;
1037*a9799022Sck 	int64_t *compressedp;
1038*a9799022Sck 	int64_t *uncompressedp;
1039fa9e4066Sahrens 	zio_t *zio;
1040fa9e4066Sahrens 	dmu_tx_t *tx;
1041fa9e4066Sahrens };
1042fa9e4066Sahrens 
1043fa9e4066Sahrens static int
1044fa9e4066Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
1045fa9e4066Sahrens {
1046fa9e4066Sahrens 	struct killarg *ka = arg;
1047fa9e4066Sahrens 	blkptr_t *bp = &bc->bc_blkptr;
1048fa9e4066Sahrens 
1049fa9e4066Sahrens 	ASSERT3U(bc->bc_errno, ==, 0);
1050fa9e4066Sahrens 
1051fa9e4066Sahrens 	/*
1052fa9e4066Sahrens 	 * Since this callback is not called concurrently, no lock is
1053fa9e4066Sahrens 	 * needed on the accounting values.
1054fa9e4066Sahrens 	 */
105599653d4eSeschrock 	*ka->usedp += bp_get_dasize(spa, bp);
1056fa9e4066Sahrens 	*ka->compressedp += BP_GET_PSIZE(bp);
1057fa9e4066Sahrens 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
1058fa9e4066Sahrens 	/* XXX check for EIO? */
1059fa9e4066Sahrens 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
1060fa9e4066Sahrens 	    ARC_NOWAIT);
1061fa9e4066Sahrens 	return (0);
1062fa9e4066Sahrens }
1063fa9e4066Sahrens 
1064fa9e4066Sahrens /* ARGSUSED */
10651d452cf5Sahrens static int
10661d452cf5Sahrens dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
1067fa9e4066Sahrens {
10681d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
10693cb34c60Sahrens 	dmu_objset_type_t *ost = arg2;
1070fa9e4066Sahrens 
10711d452cf5Sahrens 	/*
10723cb34c60Sahrens 	 * We can only roll back to emptyness if it is a ZPL objset.
10731d452cf5Sahrens 	 */
10743cb34c60Sahrens 	if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0)
1075fa9e4066Sahrens 		return (EINVAL);
1076fa9e4066Sahrens 
10771d452cf5Sahrens 	/*
10781d452cf5Sahrens 	 * This must not be a snapshot.
10791d452cf5Sahrens 	 */
10801d452cf5Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
1081fa9e4066Sahrens 		return (EINVAL);
1082fa9e4066Sahrens 
1083fa9e4066Sahrens 	/*
1084fa9e4066Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
1085fa9e4066Sahrens 	 * them.  Try again.
1086fa9e4066Sahrens 	 */
10871d452cf5Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1088fa9e4066Sahrens 		return (EAGAIN);
1089fa9e4066Sahrens 
10901d452cf5Sahrens 	return (0);
10911d452cf5Sahrens }
10921d452cf5Sahrens 
10931d452cf5Sahrens /* ARGSUSED */
10941d452cf5Sahrens static void
1095ecd6cf80Smarks dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
10961d452cf5Sahrens {
10971d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
10983cb34c60Sahrens 	dmu_objset_type_t *ost = arg2;
10991d452cf5Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1100fa9e4066Sahrens 
1101fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1102fa9e4066Sahrens 
110386ccc033Sperrin 	/*
110486ccc033Sperrin 	 * Before the roll back destroy the zil.
110586ccc033Sperrin 	 */
110686ccc033Sperrin 	if (ds->ds_user_ptr != NULL) {
110786ccc033Sperrin 		zil_rollback_destroy(
110886ccc033Sperrin 		    ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx);
11093cb34c60Sahrens 
11103cb34c60Sahrens 		/*
11113cb34c60Sahrens 		 * We need to make sure that the objset_impl_t is reopened after
11123cb34c60Sahrens 		 * we do the rollback, otherwise it will have the wrong
11133cb34c60Sahrens 		 * objset_phys_t.  Normally this would happen when this
11143cb34c60Sahrens 		 * DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the
11153cb34c60Sahrens 		 * dataset to be immediately evicted.  But when doing "zfs recv
11163cb34c60Sahrens 		 * -F", we reopen the objset before that, so that there is no
11173cb34c60Sahrens 		 * window where the dataset is closed and inconsistent.
11183cb34c60Sahrens 		 */
11193cb34c60Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
11203cb34c60Sahrens 		ds->ds_user_ptr = NULL;
112186ccc033Sperrin 	}
11223a8a1de4Sperrin 
1123fa9e4066Sahrens 	/* Zero out the deadlist. */
1124fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
1125fa9e4066Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1126fa9e4066Sahrens 	ds->ds_phys->ds_deadlist_obj =
1127fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1128ea8dc4b6Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
1129ea8dc4b6Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1130fa9e4066Sahrens 
1131fa9e4066Sahrens 	{
1132fa9e4066Sahrens 		/* Free blkptrs that we gave birth to */
1133fa9e4066Sahrens 		zio_t *zio;
1134*a9799022Sck 		int64_t used = 0, compressed = 0, uncompressed = 0;
1135fa9e4066Sahrens 		struct killarg ka;
1136fa9e4066Sahrens 
1137fa9e4066Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
1138fa9e4066Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
1139fa9e4066Sahrens 		ka.usedp = &used;
1140fa9e4066Sahrens 		ka.compressedp = &compressed;
1141fa9e4066Sahrens 		ka.uncompressedp = &uncompressed;
1142fa9e4066Sahrens 		ka.zio = zio;
1143fa9e4066Sahrens 		ka.tx = tx;
1144fa9e4066Sahrens 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1145fa9e4066Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1146fa9e4066Sahrens 		(void) zio_wait(zio);
1147fa9e4066Sahrens 
11481d452cf5Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
1149fa9e4066Sahrens 		    -used, -compressed, -uncompressed, tx);
1150fa9e4066Sahrens 	}
1151fa9e4066Sahrens 
11523cb34c60Sahrens 	if (ds->ds_prev) {
11533cb34c60Sahrens 		/* Change our contents to that of the prev snapshot */
11543cb34c60Sahrens 		ASSERT3U(ds->ds_prev->ds_object, ==,
11553cb34c60Sahrens 		    ds->ds_phys->ds_prev_snap_obj);
11563cb34c60Sahrens 		ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
11573cb34c60Sahrens 		ds->ds_phys->ds_used_bytes =
11583cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_used_bytes;
11593cb34c60Sahrens 		ds->ds_phys->ds_compressed_bytes =
11603cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_compressed_bytes;
11613cb34c60Sahrens 		ds->ds_phys->ds_uncompressed_bytes =
11623cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
11633cb34c60Sahrens 		ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
11643cb34c60Sahrens 		ds->ds_phys->ds_unique_bytes = 0;
1165fa9e4066Sahrens 
11663cb34c60Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
11673cb34c60Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
11683cb34c60Sahrens 			ds->ds_prev->ds_phys->ds_unique_bytes = 0;
11693cb34c60Sahrens 		}
11703cb34c60Sahrens 	} else {
11713cb34c60Sahrens 		/* Zero out our contents, recreate objset */
11723cb34c60Sahrens 		bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t));
11733cb34c60Sahrens 		ds->ds_phys->ds_used_bytes = 0;
11743cb34c60Sahrens 		ds->ds_phys->ds_compressed_bytes = 0;
11753cb34c60Sahrens 		ds->ds_phys->ds_uncompressed_bytes = 0;
11763cb34c60Sahrens 		ds->ds_phys->ds_flags = 0;
11773cb34c60Sahrens 		ds->ds_phys->ds_unique_bytes = 0;
11783cb34c60Sahrens 		(void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds,
11793cb34c60Sahrens 		    &ds->ds_phys->ds_bp, *ost, tx);
118085edac42Sahrens 	}
1181ecd6cf80Smarks 
1182ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa,
1183ecd6cf80Smarks 	    tx, cr, "dataset = %llu", ds->ds_object);
1184fa9e4066Sahrens }
1185fa9e4066Sahrens 
1186e1930233Sbonwick /* ARGSUSED */
1187e1930233Sbonwick static int
11881d452cf5Sahrens dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
1189e1930233Sbonwick {
11901d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
11913cb34c60Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
11923cb34c60Sahrens 	uint64_t count;
11933cb34c60Sahrens 	int err;
1194e1930233Sbonwick 
1195e1930233Sbonwick 	/*
1196e1930233Sbonwick 	 * Can't delete a head dataset if there are snapshots of it.
1197e1930233Sbonwick 	 * (Except if the only snapshots are from the branch we cloned
1198e1930233Sbonwick 	 * from.)
1199e1930233Sbonwick 	 */
1200e1930233Sbonwick 	if (ds->ds_prev != NULL &&
1201e1930233Sbonwick 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1202e1930233Sbonwick 		return (EINVAL);
1203e1930233Sbonwick 
12043cb34c60Sahrens 	/*
12053cb34c60Sahrens 	 * This is really a dsl_dir thing, but check it here so that
12063cb34c60Sahrens 	 * we'll be less likely to leave this dataset inconsistent &
12073cb34c60Sahrens 	 * nearly destroyed.
12083cb34c60Sahrens 	 */
12093cb34c60Sahrens 	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
12103cb34c60Sahrens 	if (err)
12113cb34c60Sahrens 		return (err);
12123cb34c60Sahrens 	if (count != 0)
12133cb34c60Sahrens 		return (EEXIST);
12143cb34c60Sahrens 
1215e1930233Sbonwick 	return (0);
1216e1930233Sbonwick }
1217e1930233Sbonwick 
12181d452cf5Sahrens /* ARGSUSED */
12191d452cf5Sahrens static void
1220ecd6cf80Smarks dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
1221fa9e4066Sahrens {
12221d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1223ecd6cf80Smarks 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1224fa9e4066Sahrens 
12251d452cf5Sahrens 	/* Mark it as inconsistent on-disk, in case we crash */
12261d452cf5Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
12271d452cf5Sahrens 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
1228ecd6cf80Smarks 
1229ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
1230ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
12311d452cf5Sahrens }
1232fa9e4066Sahrens 
12331d452cf5Sahrens /* ARGSUSED */
12343cb34c60Sahrens int
12351d452cf5Sahrens dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
12361d452cf5Sahrens {
12371d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1238fa9e4066Sahrens 
1239fa9e4066Sahrens 	/* Can't delete a branch point. */
12401d452cf5Sahrens 	if (ds->ds_phys->ds_num_children > 1)
12411d452cf5Sahrens 		return (EEXIST);
1242fa9e4066Sahrens 
1243fa9e4066Sahrens 	/*
1244fa9e4066Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
1245fa9e4066Sahrens 	 * (Except if the only snapshots are from the branch we cloned
1246fa9e4066Sahrens 	 * from.)
1247fa9e4066Sahrens 	 */
1248fa9e4066Sahrens 	if (ds->ds_prev != NULL &&
12491d452cf5Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1250fa9e4066Sahrens 		return (EINVAL);
1251fa9e4066Sahrens 
1252fa9e4066Sahrens 	/*
1253fa9e4066Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
1254fa9e4066Sahrens 	 * them.  Try again.
1255fa9e4066Sahrens 	 */
12561d452cf5Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1257fa9e4066Sahrens 		return (EAGAIN);
12581d452cf5Sahrens 
12591d452cf5Sahrens 	/* XXX we should do some i/o error checking... */
12601d452cf5Sahrens 	return (0);
12611d452cf5Sahrens }
12621d452cf5Sahrens 
12633cb34c60Sahrens void
1264ecd6cf80Smarks dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
12651d452cf5Sahrens {
12661d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1267*a9799022Sck 	int64_t used = 0, compressed = 0, uncompressed = 0;
12681d452cf5Sahrens 	zio_t *zio;
12691d452cf5Sahrens 	int err;
12701d452cf5Sahrens 	int after_branch_point = FALSE;
12711d452cf5Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
12721d452cf5Sahrens 	objset_t *mos = dp->dp_meta_objset;
12731d452cf5Sahrens 	dsl_dataset_t *ds_prev = NULL;
12741d452cf5Sahrens 	uint64_t obj;
12751d452cf5Sahrens 
127655434c77Sek 	ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
12771d452cf5Sahrens 	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
12781d452cf5Sahrens 	ASSERT(ds->ds_prev == NULL ||
12791d452cf5Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
12801d452cf5Sahrens 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
12811d452cf5Sahrens 
1282*a9799022Sck 	/* Remove our reservation */
1283*a9799022Sck 	if (ds->ds_reserved != 0) {
1284*a9799022Sck 		uint64_t val = 0;
1285*a9799022Sck 		dsl_dataset_set_reservation_sync(ds, &val, cr, tx);
1286*a9799022Sck 		ASSERT3U(ds->ds_reserved, ==, 0);
1287*a9799022Sck 	}
1288*a9799022Sck 
12891d452cf5Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
12901d452cf5Sahrens 
12911d452cf5Sahrens 	obj = ds->ds_object;
1292fa9e4066Sahrens 
1293fa9e4066Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1294fa9e4066Sahrens 		if (ds->ds_prev) {
1295fa9e4066Sahrens 			ds_prev = ds->ds_prev;
1296fa9e4066Sahrens 		} else {
12971d452cf5Sahrens 			VERIFY(0 == dsl_dataset_open_obj(dp,
1298fa9e4066Sahrens 			    ds->ds_phys->ds_prev_snap_obj, NULL,
12991d452cf5Sahrens 			    DS_MODE_NONE, FTAG, &ds_prev));
1300fa9e4066Sahrens 		}
1301fa9e4066Sahrens 		after_branch_point =
1302fa9e4066Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
1303fa9e4066Sahrens 
1304fa9e4066Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1305fa9e4066Sahrens 		if (after_branch_point &&
1306fa9e4066Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
1307fa9e4066Sahrens 			/* This clone is toast. */
1308fa9e4066Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1309fa9e4066Sahrens 			ds_prev->ds_phys->ds_num_children--;
1310fa9e4066Sahrens 		} else if (!after_branch_point) {
1311fa9e4066Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
1312fa9e4066Sahrens 			    ds->ds_phys->ds_next_snap_obj;
1313fa9e4066Sahrens 		}
1314fa9e4066Sahrens 	}
1315fa9e4066Sahrens 
1316fa9e4066Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1317fa9e4066Sahrens 
1318fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
13191d452cf5Sahrens 		blkptr_t bp;
1320fa9e4066Sahrens 		dsl_dataset_t *ds_next;
1321fa9e4066Sahrens 		uint64_t itor = 0;
1322*a9799022Sck 		uint64_t old_unique;
1323fa9e4066Sahrens 
1324fa9e4066Sahrens 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1325fa9e4066Sahrens 
13261d452cf5Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
1327ea8dc4b6Seschrock 		    ds->ds_phys->ds_next_snap_obj, NULL,
1328ea8dc4b6Seschrock 		    DS_MODE_NONE, FTAG, &ds_next));
1329fa9e4066Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1330fa9e4066Sahrens 
1331*a9799022Sck 		old_unique = dsl_dataset_unique(ds_next);
1332*a9799022Sck 
1333fa9e4066Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1334fa9e4066Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
1335fa9e4066Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
1336fa9e4066Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
1337fa9e4066Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
1338fa9e4066Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1339fa9e4066Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1340fa9e4066Sahrens 
1341fa9e4066Sahrens 		/*
1342fa9e4066Sahrens 		 * Transfer to our deadlist (which will become next's
1343fa9e4066Sahrens 		 * new deadlist) any entries from next's current
1344fa9e4066Sahrens 		 * deadlist which were born before prev, and free the
1345fa9e4066Sahrens 		 * other entries.
1346fa9e4066Sahrens 		 *
1347fa9e4066Sahrens 		 * XXX we're doing this long task with the config lock held
1348fa9e4066Sahrens 		 */
1349fa9e4066Sahrens 		while (bplist_iterate(&ds_next->ds_deadlist, &itor,
1350fa9e4066Sahrens 		    &bp) == 0) {
1351fa9e4066Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
1352ea8dc4b6Seschrock 				VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
1353ea8dc4b6Seschrock 				    &bp, tx));
1354fa9e4066Sahrens 				if (ds_prev && !after_branch_point &&
1355fa9e4066Sahrens 				    bp.blk_birth >
1356fa9e4066Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1357fa9e4066Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
135899653d4eSeschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1359fa9e4066Sahrens 				}
1360fa9e4066Sahrens 			} else {
136199653d4eSeschrock 				used += bp_get_dasize(dp->dp_spa, &bp);
1362fa9e4066Sahrens 				compressed += BP_GET_PSIZE(&bp);
1363fa9e4066Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1364fa9e4066Sahrens 				/* XXX check return value? */
1365fa9e4066Sahrens 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1366fa9e4066Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1367fa9e4066Sahrens 			}
1368fa9e4066Sahrens 		}
1369fa9e4066Sahrens 
1370fa9e4066Sahrens 		/* free next's deadlist */
1371fa9e4066Sahrens 		bplist_close(&ds_next->ds_deadlist);
1372fa9e4066Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1373fa9e4066Sahrens 
1374fa9e4066Sahrens 		/* set next's deadlist to our deadlist */
1375fa9e4066Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1376fa9e4066Sahrens 		    ds->ds_phys->ds_deadlist_obj;
1377ea8dc4b6Seschrock 		VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
1378ea8dc4b6Seschrock 		    ds_next->ds_phys->ds_deadlist_obj));
1379fa9e4066Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1380fa9e4066Sahrens 
1381fa9e4066Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1382fa9e4066Sahrens 			/*
1383fa9e4066Sahrens 			 * Update next's unique to include blocks which
1384fa9e4066Sahrens 			 * were previously shared by only this snapshot
1385fa9e4066Sahrens 			 * and it.  Those blocks will be born after the
1386fa9e4066Sahrens 			 * prev snap and before this snap, and will have
1387fa9e4066Sahrens 			 * died after the next snap and before the one
1388fa9e4066Sahrens 			 * after that (ie. be on the snap after next's
1389fa9e4066Sahrens 			 * deadlist).
1390fa9e4066Sahrens 			 *
1391fa9e4066Sahrens 			 * XXX we're doing this long task with the
1392fa9e4066Sahrens 			 * config lock held
1393fa9e4066Sahrens 			 */
1394fa9e4066Sahrens 			dsl_dataset_t *ds_after_next;
1395fa9e4066Sahrens 
13961d452cf5Sahrens 			VERIFY(0 == dsl_dataset_open_obj(dp,
1397fa9e4066Sahrens 			    ds_next->ds_phys->ds_next_snap_obj, NULL,
1398ea8dc4b6Seschrock 			    DS_MODE_NONE, FTAG, &ds_after_next));
1399fa9e4066Sahrens 			itor = 0;
1400fa9e4066Sahrens 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1401fa9e4066Sahrens 			    &itor, &bp) == 0) {
1402fa9e4066Sahrens 				if (bp.blk_birth >
1403fa9e4066Sahrens 				    ds->ds_phys->ds_prev_snap_txg &&
1404fa9e4066Sahrens 				    bp.blk_birth <=
1405fa9e4066Sahrens 				    ds->ds_phys->ds_creation_txg) {
1406fa9e4066Sahrens 					ds_next->ds_phys->ds_unique_bytes +=
140799653d4eSeschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1408fa9e4066Sahrens 				}
1409fa9e4066Sahrens 			}
1410fa9e4066Sahrens 
1411fa9e4066Sahrens 			dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
1412fa9e4066Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1413fa9e4066Sahrens 		} else {
1414fa9e4066Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
1415fa9e4066Sahrens 			dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
1416fa9e4066Sahrens 			    ds_next);
1417fa9e4066Sahrens 			if (ds_prev) {
14181d452cf5Sahrens 				VERIFY(0 == dsl_dataset_open_obj(dp,
1419ea8dc4b6Seschrock 				    ds->ds_phys->ds_prev_snap_obj, NULL,
1420ea8dc4b6Seschrock 				    DS_MODE_NONE, ds_next, &ds_next->ds_prev));
1421fa9e4066Sahrens 			} else {
1422fa9e4066Sahrens 				ds_next->ds_prev = NULL;
1423fa9e4066Sahrens 			}
1424*a9799022Sck 
1425*a9799022Sck 			dsl_dataset_recalc_head_uniq(ds_next);
1426*a9799022Sck 
1427*a9799022Sck 			/*
1428*a9799022Sck 			 * Reduce the amount of our unconsmed refreservation
1429*a9799022Sck 			 * being charged to our parent by the amount of
1430*a9799022Sck 			 * new unique data we have gained.
1431*a9799022Sck 			 */
1432*a9799022Sck 			if (old_unique < ds_next->ds_reserved) {
1433*a9799022Sck 				int64_t mrsdelta;
1434*a9799022Sck 				uint64_t new_unique =
1435*a9799022Sck 				    ds_next->ds_phys->ds_unique_bytes;
1436*a9799022Sck 
1437*a9799022Sck 				ASSERT(old_unique <= new_unique);
1438*a9799022Sck 				mrsdelta = MIN(new_unique - old_unique,
1439*a9799022Sck 				    ds_next->ds_reserved - old_unique);
1440*a9799022Sck 				dsl_dir_diduse_space(ds->ds_dir, -mrsdelta,
1441*a9799022Sck 				    0, 0, tx);
1442*a9799022Sck 			}
1443fa9e4066Sahrens 		}
1444fa9e4066Sahrens 		dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
1445fa9e4066Sahrens 
1446fa9e4066Sahrens 		/*
1447*a9799022Sck 		 * NB: unique_bytes might not be accurate for the head objset.
1448*a9799022Sck 		 * Before SPA_VERSION 9, we didn't update its value when we
1449*a9799022Sck 		 * deleted the most recent snapshot.
1450fa9e4066Sahrens 		 */
1451fa9e4066Sahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1452fa9e4066Sahrens 	} else {
1453fa9e4066Sahrens 		/*
1454fa9e4066Sahrens 		 * There's no next snapshot, so this is a head dataset.
1455fa9e4066Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1456fa9e4066Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1457fa9e4066Sahrens 		 * safe to ignore the deadlist contents.)
1458fa9e4066Sahrens 		 */
1459fa9e4066Sahrens 		struct killarg ka;
1460fa9e4066Sahrens 
1461fa9e4066Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1462fa9e4066Sahrens 		bplist_close(&ds->ds_deadlist);
1463fa9e4066Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1464fa9e4066Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1465fa9e4066Sahrens 
1466fa9e4066Sahrens 		/*
1467fa9e4066Sahrens 		 * Free everything that we point to (that's born after
1468fa9e4066Sahrens 		 * the previous snapshot, if we are a clone)
1469fa9e4066Sahrens 		 *
1470fa9e4066Sahrens 		 * XXX we're doing this long task with the config lock held
1471fa9e4066Sahrens 		 */
1472fa9e4066Sahrens 		ka.usedp = &used;
1473fa9e4066Sahrens 		ka.compressedp = &compressed;
1474fa9e4066Sahrens 		ka.uncompressedp = &uncompressed;
1475fa9e4066Sahrens 		ka.zio = zio;
1476fa9e4066Sahrens 		ka.tx = tx;
1477fa9e4066Sahrens 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1478fa9e4066Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1479fa9e4066Sahrens 		ASSERT3U(err, ==, 0);
1480*a9799022Sck 		ASSERT(spa_version(dp->dp_spa) <
1481*a9799022Sck 		    SPA_VERSION_UNIQUE_ACCURATE ||
1482*a9799022Sck 		    used == ds->ds_phys->ds_unique_bytes);
1483fa9e4066Sahrens 	}
1484fa9e4066Sahrens 
1485fa9e4066Sahrens 	err = zio_wait(zio);
1486fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1487fa9e4066Sahrens 
14881d452cf5Sahrens 	dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx);
1489fa9e4066Sahrens 
1490fa9e4066Sahrens 	if (ds->ds_phys->ds_snapnames_zapobj) {
1491fa9e4066Sahrens 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1492fa9e4066Sahrens 		ASSERT(err == 0);
1493fa9e4066Sahrens 	}
1494fa9e4066Sahrens 
14951d452cf5Sahrens 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1496fa9e4066Sahrens 		/* Erase the link in the dataset */
14971d452cf5Sahrens 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
14981d452cf5Sahrens 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
1499fa9e4066Sahrens 		/*
1500fa9e4066Sahrens 		 * dsl_dir_sync_destroy() called us, they'll destroy
1501fa9e4066Sahrens 		 * the dataset.
1502fa9e4066Sahrens 		 */
1503fa9e4066Sahrens 	} else {
1504fa9e4066Sahrens 		/* remove from snapshot namespace */
1505fa9e4066Sahrens 		dsl_dataset_t *ds_head;
15061d452cf5Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
15071d452cf5Sahrens 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL,
1508ea8dc4b6Seschrock 		    DS_MODE_NONE, FTAG, &ds_head));
15098660574dSahrens 		VERIFY(0 == dsl_dataset_get_snapname(ds));
1510fa9e4066Sahrens #ifdef ZFS_DEBUG
1511fa9e4066Sahrens 		{
1512fa9e4066Sahrens 			uint64_t val;
1513fa9e4066Sahrens 			err = zap_lookup(mos,
1514fa9e4066Sahrens 			    ds_head->ds_phys->ds_snapnames_zapobj,
15151d452cf5Sahrens 			    ds->ds_snapname, 8, 1, &val);
1516fa9e4066Sahrens 			ASSERT3U(err, ==, 0);
1517fa9e4066Sahrens 			ASSERT3U(val, ==, obj);
1518fa9e4066Sahrens 		}
1519fa9e4066Sahrens #endif
1520fa9e4066Sahrens 		err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj,
15211d452cf5Sahrens 		    ds->ds_snapname, tx);
1522fa9e4066Sahrens 		ASSERT(err == 0);
1523fa9e4066Sahrens 		dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
1524fa9e4066Sahrens 	}
1525fa9e4066Sahrens 
1526fa9e4066Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
1527fa9e4066Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1528fa9e4066Sahrens 
1529990b4856Slling 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1530ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx,
1531ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
1532ecd6cf80Smarks 
15331d452cf5Sahrens 	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
15341d452cf5Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
1535b1b8ab34Slling 
1536fa9e4066Sahrens }
1537fa9e4066Sahrens 
1538*a9799022Sck static int
1539*a9799022Sck dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
1540*a9799022Sck {
1541*a9799022Sck 	uint64_t asize;
1542*a9799022Sck 
1543*a9799022Sck 	if (!dmu_tx_is_syncing(tx))
1544*a9799022Sck 		return (0);
1545*a9799022Sck 
1546*a9799022Sck 	/*
1547*a9799022Sck 	 * If there's an fs-only reservation, any blocks that might become
1548*a9799022Sck 	 * owned by the snapshot dataset must be accommodated by space
1549*a9799022Sck 	 * outside of the reservation.
1550*a9799022Sck 	 */
1551*a9799022Sck 	asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
1552*a9799022Sck 	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE))
1553*a9799022Sck 		return (ENOSPC);
1554*a9799022Sck 
1555*a9799022Sck 	/*
1556*a9799022Sck 	 * Propogate any reserved space for this snapshot to other
1557*a9799022Sck 	 * snapshot checks in this sync group.
1558*a9799022Sck 	 */
1559*a9799022Sck 	if (asize > 0)
1560*a9799022Sck 		dsl_dir_willuse_space(ds->ds_dir, asize, tx);
1561*a9799022Sck 
1562*a9799022Sck 	return (0);
1563*a9799022Sck }
1564*a9799022Sck 
15651d452cf5Sahrens /* ARGSUSED */
1566fa9e4066Sahrens int
15671d452cf5Sahrens dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
1568fa9e4066Sahrens {
15693cb34c60Sahrens 	dsl_dataset_t *ds = arg1;
15701d452cf5Sahrens 	const char *snapname = arg2;
15711d452cf5Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1572fa9e4066Sahrens 	int err;
15731d452cf5Sahrens 	uint64_t value;
1574fa9e4066Sahrens 
15751d452cf5Sahrens 	/*
15761d452cf5Sahrens 	 * We don't allow multiple snapshots of the same txg.  If there
15771d452cf5Sahrens 	 * is already one, try again.
15781d452cf5Sahrens 	 */
15791d452cf5Sahrens 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
15801d452cf5Sahrens 		return (EAGAIN);
1581fa9e4066Sahrens 
15821d452cf5Sahrens 	/*
15831d452cf5Sahrens 	 * Check for conflicting name snapshot name.
15841d452cf5Sahrens 	 */
1585fa9e4066Sahrens 	err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
1586fa9e4066Sahrens 	    snapname, 8, 1, &value);
15871d452cf5Sahrens 	if (err == 0)
1588fa9e4066Sahrens 		return (EEXIST);
15891d452cf5Sahrens 	if (err != ENOENT)
15901d452cf5Sahrens 		return (err);
1591fa9e4066Sahrens 
1592b7661cccSmmusante 	/*
1593b7661cccSmmusante 	 * Check that the dataset's name is not too long.  Name consists
1594b7661cccSmmusante 	 * of the dataset's length + 1 for the @-sign + snapshot name's length
1595b7661cccSmmusante 	 */
1596b7661cccSmmusante 	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
1597b7661cccSmmusante 		return (ENAMETOOLONG);
1598b7661cccSmmusante 
1599*a9799022Sck 	err = dsl_dataset_snapshot_reserve_space(ds, tx);
1600*a9799022Sck 	if (err)
1601*a9799022Sck 		return (err);
1602*a9799022Sck 
16031d452cf5Sahrens 	ds->ds_trysnap_txg = tx->tx_txg;
16041d452cf5Sahrens 	return (0);
16051d452cf5Sahrens }
1606fa9e4066Sahrens 
16071d452cf5Sahrens void
1608ecd6cf80Smarks dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
16091d452cf5Sahrens {
16103cb34c60Sahrens 	dsl_dataset_t *ds = arg1;
16111d452cf5Sahrens 	const char *snapname = arg2;
16121d452cf5Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
16131d452cf5Sahrens 	dmu_buf_t *dbuf;
16141d452cf5Sahrens 	dsl_dataset_phys_t *dsphys;
16151d452cf5Sahrens 	uint64_t dsobj;
16161d452cf5Sahrens 	objset_t *mos = dp->dp_meta_objset;
16171d452cf5Sahrens 	int err;
1618fa9e4066Sahrens 
1619fa9e4066Sahrens 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
16201d452cf5Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1621fa9e4066Sahrens 
16221649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
16231649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
1624ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1625fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1626fa9e4066Sahrens 	dsphys = dbuf->db_data;
16271d452cf5Sahrens 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1628fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
1629fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1630fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
1631fa9e4066Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1632fa9e4066Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1633fa9e4066Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1634fa9e4066Sahrens 	dsphys->ds_num_children = 1;
1635fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1636fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
1637fa9e4066Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1638fa9e4066Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1639fa9e4066Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1640fa9e4066Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
164199653d4eSeschrock 	dsphys->ds_flags = ds->ds_phys->ds_flags;
1642fa9e4066Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
1643ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
1644fa9e4066Sahrens 
16451d452cf5Sahrens 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
16461d452cf5Sahrens 	if (ds->ds_prev) {
16471d452cf5Sahrens 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
1648fa9e4066Sahrens 		    ds->ds_object ||
16491d452cf5Sahrens 		    ds->ds_prev->ds_phys->ds_num_children > 1);
16501d452cf5Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
16511d452cf5Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1652fa9e4066Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
16531d452cf5Sahrens 			    ds->ds_prev->ds_phys->ds_creation_txg);
16541d452cf5Sahrens 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1655fa9e4066Sahrens 		}
1656fa9e4066Sahrens 	}
1657fa9e4066Sahrens 
1658*a9799022Sck 	/*
1659*a9799022Sck 	 * If we have a reference-reservation on this dataset, we will
1660*a9799022Sck 	 * need to increase the amount of refreservation being charged
1661*a9799022Sck 	 * since our unique space is going to zero.
1662*a9799022Sck 	 */
1663*a9799022Sck 	if (ds->ds_reserved) {
1664*a9799022Sck 		int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
1665*a9799022Sck 		dsl_dir_diduse_space(ds->ds_dir, add, 0, 0, tx);
1666*a9799022Sck 	}
1667*a9799022Sck 
1668fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
1669fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1670fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
1671fa9e4066Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1672fa9e4066Sahrens 	ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
1673fa9e4066Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1674*a9799022Sck 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
1675*a9799022Sck 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1676fa9e4066Sahrens 	ds->ds_phys->ds_deadlist_obj =
1677fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1678ea8dc4b6Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
1679ea8dc4b6Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1680fa9e4066Sahrens 
1681fa9e4066Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1682fa9e4066Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1683fa9e4066Sahrens 	    snapname, 8, 1, &dsobj, tx);
1684fa9e4066Sahrens 	ASSERT(err == 0);
1685fa9e4066Sahrens 
1686fa9e4066Sahrens 	if (ds->ds_prev)
1687fa9e4066Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
1688ea8dc4b6Seschrock 	VERIFY(0 == dsl_dataset_open_obj(dp,
1689ea8dc4b6Seschrock 	    ds->ds_phys->ds_prev_snap_obj, snapname,
1690ea8dc4b6Seschrock 	    DS_MODE_NONE, ds, &ds->ds_prev));
1691ecd6cf80Smarks 
1692ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr,
169340feaa91Sahrens 	    "dataset = %llu", dsobj);
1694fa9e4066Sahrens }
1695fa9e4066Sahrens 
1696fa9e4066Sahrens void
1697c717a561Smaybee dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
1698fa9e4066Sahrens {
1699fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1700fa9e4066Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1701fa9e4066Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1702fa9e4066Sahrens 
170391ebeef5Sahrens 	/*
170491ebeef5Sahrens 	 * in case we had to change ds_fsid_guid when we opened it,
170591ebeef5Sahrens 	 * sync it out now.
170691ebeef5Sahrens 	 */
170791ebeef5Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
170891ebeef5Sahrens 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
170991ebeef5Sahrens 
1710fa9e4066Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
1711c717a561Smaybee 	dmu_objset_sync(ds->ds_user_ptr, zio, tx);
1712fa9e4066Sahrens }
1713fa9e4066Sahrens 
1714fa9e4066Sahrens void
1715a2eea2e1Sahrens dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1716fa9e4066Sahrens {
1717*a9799022Sck 	uint64_t refd, avail, uobjs, aobjs;
1718*a9799022Sck 
1719a2eea2e1Sahrens 	dsl_dir_stats(ds->ds_dir, nv);
1720fa9e4066Sahrens 
1721*a9799022Sck 	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
1722*a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
1723*a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
1724*a9799022Sck 
1725a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
1726a2eea2e1Sahrens 	    ds->ds_phys->ds_creation_time);
1727a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
1728a2eea2e1Sahrens 	    ds->ds_phys->ds_creation_txg);
1729*a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
1730*a9799022Sck 	    ds->ds_quota);
1731*a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
1732*a9799022Sck 	    ds->ds_reserved);
1733fa9e4066Sahrens 
1734fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1735fa9e4066Sahrens 		/*
1736fa9e4066Sahrens 		 * This is a snapshot; override the dd's space used with
1737a2eea2e1Sahrens 		 * our unique space and compression ratio.
1738fa9e4066Sahrens 		 */
1739a2eea2e1Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
1740a2eea2e1Sahrens 		    ds->ds_phys->ds_unique_bytes);
1741a2eea2e1Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
1742a2eea2e1Sahrens 		    ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
1743a2eea2e1Sahrens 		    (ds->ds_phys->ds_uncompressed_bytes * 100 /
1744a2eea2e1Sahrens 		    ds->ds_phys->ds_compressed_bytes));
1745fa9e4066Sahrens 	}
1746fa9e4066Sahrens }
1747fa9e4066Sahrens 
1748a2eea2e1Sahrens void
1749a2eea2e1Sahrens dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1750a2eea2e1Sahrens {
1751a2eea2e1Sahrens 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
1752a2eea2e1Sahrens 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
17533cb34c60Sahrens 	stat->dds_guid = ds->ds_phys->ds_guid;
1754a2eea2e1Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1755a2eea2e1Sahrens 		stat->dds_is_snapshot = B_TRUE;
1756a2eea2e1Sahrens 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
1757a2eea2e1Sahrens 	}
1758a2eea2e1Sahrens 
1759a2eea2e1Sahrens 	/* clone origin is really a dsl_dir thing... */
17603cb34c60Sahrens 	if (ds->ds_dir->dd_phys->dd_origin_obj) {
1761a2eea2e1Sahrens 		dsl_dataset_t *ods;
1762a2eea2e1Sahrens 
1763a2eea2e1Sahrens 		rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
1764a2eea2e1Sahrens 		VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool,
17653cb34c60Sahrens 		    ds->ds_dir->dd_phys->dd_origin_obj,
1766a2eea2e1Sahrens 		    NULL, DS_MODE_NONE, FTAG, &ods));
17673cb34c60Sahrens 		dsl_dataset_name(ods, stat->dds_origin);
1768a2eea2e1Sahrens 		dsl_dataset_close(ods, DS_MODE_NONE, FTAG);
1769a2eea2e1Sahrens 		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
1770a2eea2e1Sahrens 	}
1771a2eea2e1Sahrens }
1772a2eea2e1Sahrens 
1773a2eea2e1Sahrens uint64_t
1774a2eea2e1Sahrens dsl_dataset_fsid_guid(dsl_dataset_t *ds)
1775a2eea2e1Sahrens {
177691ebeef5Sahrens 	return (ds->ds_fsid_guid);
1777a2eea2e1Sahrens }
1778a2eea2e1Sahrens 
1779a2eea2e1Sahrens void
1780a2eea2e1Sahrens dsl_dataset_space(dsl_dataset_t *ds,
1781a2eea2e1Sahrens     uint64_t *refdbytesp, uint64_t *availbytesp,
1782a2eea2e1Sahrens     uint64_t *usedobjsp, uint64_t *availobjsp)
1783fa9e4066Sahrens {
1784a2eea2e1Sahrens 	*refdbytesp = ds->ds_phys->ds_used_bytes;
1785a2eea2e1Sahrens 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
1786*a9799022Sck 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
1787*a9799022Sck 		*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
1788*a9799022Sck 	if (ds->ds_quota != 0) {
1789*a9799022Sck 		/*
1790*a9799022Sck 		 * Adjust available bytes according to refquota
1791*a9799022Sck 		 */
1792*a9799022Sck 		if (*refdbytesp < ds->ds_quota)
1793*a9799022Sck 			*availbytesp = MIN(*availbytesp,
1794*a9799022Sck 			    ds->ds_quota - *refdbytesp);
1795*a9799022Sck 		else
1796*a9799022Sck 			*availbytesp = 0;
1797*a9799022Sck 	}
1798a2eea2e1Sahrens 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
1799a2eea2e1Sahrens 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
1800fa9e4066Sahrens }
1801fa9e4066Sahrens 
1802f18faf3fSek boolean_t
1803f18faf3fSek dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
1804f18faf3fSek {
1805f18faf3fSek 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1806f18faf3fSek 
1807f18faf3fSek 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
1808f18faf3fSek 	    dsl_pool_sync_context(dp));
1809f18faf3fSek 	if (ds->ds_prev == NULL)
1810f18faf3fSek 		return (B_FALSE);
1811f18faf3fSek 	if (ds->ds_phys->ds_bp.blk_birth >
1812f18faf3fSek 	    ds->ds_prev->ds_phys->ds_creation_txg)
1813f18faf3fSek 		return (B_TRUE);
1814f18faf3fSek 	return (B_FALSE);
1815f18faf3fSek }
1816f18faf3fSek 
18171d452cf5Sahrens /* ARGSUSED */
1818fa9e4066Sahrens static int
18191d452cf5Sahrens dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1820fa9e4066Sahrens {
18211d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
18221d452cf5Sahrens 	char *newsnapname = arg2;
18231d452cf5Sahrens 	dsl_dir_t *dd = ds->ds_dir;
1824fa9e4066Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
18251d452cf5Sahrens 	dsl_dataset_t *hds;
1826fa9e4066Sahrens 	uint64_t val;
18271d452cf5Sahrens 	int err;
1828fa9e4066Sahrens 
18291d452cf5Sahrens 	err = dsl_dataset_open_obj(dd->dd_pool,
18301d452cf5Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds);
1831fa9e4066Sahrens 	if (err)
1832fa9e4066Sahrens 		return (err);
1833fa9e4066Sahrens 
18341d452cf5Sahrens 	/* new name better not be in use */
18351d452cf5Sahrens 	err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj,
18361d452cf5Sahrens 	    newsnapname, 8, 1, &val);
18371d452cf5Sahrens 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
18381d452cf5Sahrens 
18391d452cf5Sahrens 	if (err == 0)
18401d452cf5Sahrens 		err = EEXIST;
18411d452cf5Sahrens 	else if (err == ENOENT)
18421d452cf5Sahrens 		err = 0;
1843cdf5b4caSmmusante 
1844cdf5b4caSmmusante 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
1845cdf5b4caSmmusante 	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
1846cdf5b4caSmmusante 		err = ENAMETOOLONG;
1847cdf5b4caSmmusante 
18481d452cf5Sahrens 	return (err);
18491d452cf5Sahrens }
1850fa9e4066Sahrens 
18511d452cf5Sahrens static void
1852ecd6cf80Smarks dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2,
1853ecd6cf80Smarks     cred_t *cr, dmu_tx_t *tx)
18541d452cf5Sahrens {
18551d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1856ecd6cf80Smarks 	const char *newsnapname = arg2;
18571d452cf5Sahrens 	dsl_dir_t *dd = ds->ds_dir;
18581d452cf5Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
18591d452cf5Sahrens 	dsl_dataset_t *hds;
18601d452cf5Sahrens 	int err;
1861fa9e4066Sahrens 
18621d452cf5Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
1863fa9e4066Sahrens 
18641d452cf5Sahrens 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
18651d452cf5Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds));
1866fa9e4066Sahrens 
18671d452cf5Sahrens 	VERIFY(0 == dsl_dataset_get_snapname(ds));
18681d452cf5Sahrens 	err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj,
18691d452cf5Sahrens 	    ds->ds_snapname, tx);
1870fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
18711d452cf5Sahrens 	mutex_enter(&ds->ds_lock);
18721d452cf5Sahrens 	(void) strcpy(ds->ds_snapname, newsnapname);
18731d452cf5Sahrens 	mutex_exit(&ds->ds_lock);
18741d452cf5Sahrens 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
18751d452cf5Sahrens 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
1876fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1877fa9e4066Sahrens 
1878ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
1879ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
18801d452cf5Sahrens 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
1881fa9e4066Sahrens }
1882fa9e4066Sahrens 
1883f18faf3fSek struct renamesnaparg {
1884cdf5b4caSmmusante 	dsl_sync_task_group_t *dstg;
1885cdf5b4caSmmusante 	char failed[MAXPATHLEN];
1886cdf5b4caSmmusante 	char *oldsnap;
1887cdf5b4caSmmusante 	char *newsnap;
1888cdf5b4caSmmusante };
1889cdf5b4caSmmusante 
1890cdf5b4caSmmusante static int
1891cdf5b4caSmmusante dsl_snapshot_rename_one(char *name, void *arg)
1892cdf5b4caSmmusante {
1893f18faf3fSek 	struct renamesnaparg *ra = arg;
1894cdf5b4caSmmusante 	dsl_dataset_t *ds = NULL;
1895cdf5b4caSmmusante 	char *cp;
1896cdf5b4caSmmusante 	int err;
1897cdf5b4caSmmusante 
1898cdf5b4caSmmusante 	cp = name + strlen(name);
1899cdf5b4caSmmusante 	*cp = '@';
1900cdf5b4caSmmusante 	(void) strcpy(cp + 1, ra->oldsnap);
1901ecd6cf80Smarks 
1902ecd6cf80Smarks 	/*
1903ecd6cf80Smarks 	 * For recursive snapshot renames the parent won't be changing
1904ecd6cf80Smarks 	 * so we just pass name for both the to/from argument.
1905ecd6cf80Smarks 	 */
1906ecd6cf80Smarks 	if (err = zfs_secpolicy_rename_perms(name, name, CRED())) {
1907ecd6cf80Smarks 		(void) strcpy(ra->failed, name);
1908ecd6cf80Smarks 		return (err);
1909ecd6cf80Smarks 	}
1910ecd6cf80Smarks 
1911cdf5b4caSmmusante 	err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD,
1912cdf5b4caSmmusante 	    ra->dstg, &ds);
1913cdf5b4caSmmusante 	if (err == ENOENT) {
1914cdf5b4caSmmusante 		*cp = '\0';
1915cdf5b4caSmmusante 		return (0);
1916cdf5b4caSmmusante 	}
1917cdf5b4caSmmusante 	if (err) {
1918cdf5b4caSmmusante 		(void) strcpy(ra->failed, name);
1919cdf5b4caSmmusante 		*cp = '\0';
1920cdf5b4caSmmusante 		dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
1921cdf5b4caSmmusante 		return (err);
1922cdf5b4caSmmusante 	}
1923cdf5b4caSmmusante 
1924cdf5b4caSmmusante #ifdef _KERNEL
1925cdf5b4caSmmusante 	/* for all filesystems undergoing rename, we'll need to unmount it */
1926cdf5b4caSmmusante 	(void) zfs_unmount_snap(name, NULL);
1927cdf5b4caSmmusante #endif
1928cdf5b4caSmmusante 
1929cdf5b4caSmmusante 	*cp = '\0';
1930cdf5b4caSmmusante 
1931cdf5b4caSmmusante 	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
1932cdf5b4caSmmusante 	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
1933cdf5b4caSmmusante 
1934cdf5b4caSmmusante 	return (0);
1935cdf5b4caSmmusante }
1936cdf5b4caSmmusante 
1937cdf5b4caSmmusante static int
1938cdf5b4caSmmusante dsl_recursive_rename(char *oldname, const char *newname)
1939cdf5b4caSmmusante {
1940cdf5b4caSmmusante 	int err;
1941f18faf3fSek 	struct renamesnaparg *ra;
1942cdf5b4caSmmusante 	dsl_sync_task_t *dst;
1943cdf5b4caSmmusante 	spa_t *spa;
1944cdf5b4caSmmusante 	char *cp, *fsname = spa_strdup(oldname);
1945cdf5b4caSmmusante 	int len = strlen(oldname);
1946cdf5b4caSmmusante 
1947cdf5b4caSmmusante 	/* truncate the snapshot name to get the fsname */
1948cdf5b4caSmmusante 	cp = strchr(fsname, '@');
1949cdf5b4caSmmusante 	*cp = '\0';
1950cdf5b4caSmmusante 
195140feaa91Sahrens 	err = spa_open(fsname, &spa, FTAG);
1952cdf5b4caSmmusante 	if (err) {
1953cdf5b4caSmmusante 		kmem_free(fsname, len + 1);
1954cdf5b4caSmmusante 		return (err);
1955cdf5b4caSmmusante 	}
1956f18faf3fSek 	ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
1957cdf5b4caSmmusante 	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
1958cdf5b4caSmmusante 
1959cdf5b4caSmmusante 	ra->oldsnap = strchr(oldname, '@') + 1;
1960cdf5b4caSmmusante 	ra->newsnap = strchr(newname, '@') + 1;
1961cdf5b4caSmmusante 	*ra->failed = '\0';
1962cdf5b4caSmmusante 
1963cdf5b4caSmmusante 	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
1964cdf5b4caSmmusante 	    DS_FIND_CHILDREN);
1965cdf5b4caSmmusante 	kmem_free(fsname, len + 1);
1966cdf5b4caSmmusante 
1967cdf5b4caSmmusante 	if (err == 0) {
1968cdf5b4caSmmusante 		err = dsl_sync_task_group_wait(ra->dstg);
1969cdf5b4caSmmusante 	}
1970cdf5b4caSmmusante 
1971cdf5b4caSmmusante 	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
1972cdf5b4caSmmusante 	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
1973cdf5b4caSmmusante 		dsl_dataset_t *ds = dst->dst_arg1;
1974cdf5b4caSmmusante 		if (dst->dst_err) {
1975cdf5b4caSmmusante 			dsl_dir_name(ds->ds_dir, ra->failed);
19762572aa4eSmmusante 			(void) strcat(ra->failed, "@");
19772572aa4eSmmusante 			(void) strcat(ra->failed, ra->newsnap);
1978cdf5b4caSmmusante 		}
1979cdf5b4caSmmusante 		dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
1980cdf5b4caSmmusante 	}
1981cdf5b4caSmmusante 
1982ecd6cf80Smarks 	if (err)
1983ecd6cf80Smarks 		(void) strcpy(oldname, ra->failed);
1984cdf5b4caSmmusante 
1985cdf5b4caSmmusante 	dsl_sync_task_group_destroy(ra->dstg);
1986f18faf3fSek 	kmem_free(ra, sizeof (struct renamesnaparg));
1987cdf5b4caSmmusante 	spa_close(spa, FTAG);
1988cdf5b4caSmmusante 	return (err);
1989cdf5b4caSmmusante }
1990cdf5b4caSmmusante 
19913a5a36beSmmusante static int
19923a5a36beSmmusante dsl_valid_rename(char *oldname, void *arg)
19933a5a36beSmmusante {
19943a5a36beSmmusante 	int delta = *(int *)arg;
19953a5a36beSmmusante 
19963a5a36beSmmusante 	if (strlen(oldname) + delta >= MAXNAMELEN)
19973a5a36beSmmusante 		return (ENAMETOOLONG);
19983a5a36beSmmusante 
19993a5a36beSmmusante 	return (0);
20003a5a36beSmmusante }
20013a5a36beSmmusante 
2002fa9e4066Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
2003fa9e4066Sahrens int
2004cdf5b4caSmmusante dsl_dataset_rename(char *oldname, const char *newname,
2005cdf5b4caSmmusante     boolean_t recursive)
2006fa9e4066Sahrens {
2007fa9e4066Sahrens 	dsl_dir_t *dd;
20081d452cf5Sahrens 	dsl_dataset_t *ds;
2009fa9e4066Sahrens 	const char *tail;
2010fa9e4066Sahrens 	int err;
2011fa9e4066Sahrens 
20121d452cf5Sahrens 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
2013ea8dc4b6Seschrock 	if (err)
2014ea8dc4b6Seschrock 		return (err);
2015fa9e4066Sahrens 	if (tail == NULL) {
20163a5a36beSmmusante 		int delta = strlen(newname) - strlen(oldname);
20173a5a36beSmmusante 
20183a5a36beSmmusante 		/* if we're growing, validate child size lengths */
20193a5a36beSmmusante 		if (delta > 0)
20203a5a36beSmmusante 			err = dmu_objset_find(oldname, dsl_valid_rename,
20213a5a36beSmmusante 			    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
20223a5a36beSmmusante 
20233a5a36beSmmusante 		if (!err)
20243a5a36beSmmusante 			err = dsl_dir_rename(dd, newname);
2025fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
2026fa9e4066Sahrens 		return (err);
2027fa9e4066Sahrens 	}
2028fa9e4066Sahrens 	if (tail[0] != '@') {
2029fa9e4066Sahrens 		/* the name ended in a nonexistant component */
2030fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
2031fa9e4066Sahrens 		return (ENOENT);
2032fa9e4066Sahrens 	}
2033fa9e4066Sahrens 
2034fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
20351d452cf5Sahrens 
20361d452cf5Sahrens 	/* new name must be snapshot in same filesystem */
20371d452cf5Sahrens 	tail = strchr(newname, '@');
20381d452cf5Sahrens 	if (tail == NULL)
20391d452cf5Sahrens 		return (EINVAL);
20401d452cf5Sahrens 	tail++;
20411d452cf5Sahrens 	if (strncmp(oldname, newname, tail - newname) != 0)
20421d452cf5Sahrens 		return (EXDEV);
20431d452cf5Sahrens 
2044cdf5b4caSmmusante 	if (recursive) {
2045cdf5b4caSmmusante 		err = dsl_recursive_rename(oldname, newname);
2046cdf5b4caSmmusante 	} else {
2047cdf5b4caSmmusante 		err = dsl_dataset_open(oldname,
2048cdf5b4caSmmusante 		    DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds);
2049cdf5b4caSmmusante 		if (err)
2050cdf5b4caSmmusante 			return (err);
20511d452cf5Sahrens 
2052cdf5b4caSmmusante 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2053cdf5b4caSmmusante 		    dsl_dataset_snapshot_rename_check,
2054cdf5b4caSmmusante 		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
20551d452cf5Sahrens 
2056cdf5b4caSmmusante 		dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
2057cdf5b4caSmmusante 	}
20581d452cf5Sahrens 
2059fa9e4066Sahrens 	return (err);
2060fa9e4066Sahrens }
206199653d4eSeschrock 
20621d452cf5Sahrens struct promotearg {
20631d452cf5Sahrens 	uint64_t used, comp, uncomp, unique;
20641d452cf5Sahrens 	uint64_t newnext_obj, snapnames_obj;
20651d452cf5Sahrens };
20661d452cf5Sahrens 
2067ecd6cf80Smarks /* ARGSUSED */
206899653d4eSeschrock static int
20691d452cf5Sahrens dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
207099653d4eSeschrock {
20711d452cf5Sahrens 	dsl_dataset_t *hds = arg1;
20721d452cf5Sahrens 	struct promotearg *pa = arg2;
20731d452cf5Sahrens 	dsl_dir_t *dd = hds->ds_dir;
20741d452cf5Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
20753cb34c60Sahrens 	dsl_dir_t *odd = NULL;
207699653d4eSeschrock 	dsl_dataset_t *ds = NULL;
20773cb34c60Sahrens 	dsl_dataset_t *origin_ds = NULL;
207899653d4eSeschrock 	dsl_dataset_t *newnext_ds = NULL;
207999653d4eSeschrock 	int err;
208099653d4eSeschrock 	char *name = NULL;
20811d452cf5Sahrens 	uint64_t itor = 0;
208299653d4eSeschrock 	blkptr_t bp;
208399653d4eSeschrock 
20841d452cf5Sahrens 	bzero(pa, sizeof (*pa));
20851d452cf5Sahrens 
208699653d4eSeschrock 	/* Check that it is a clone */
20873cb34c60Sahrens 	if (dd->dd_phys->dd_origin_obj == 0)
208899653d4eSeschrock 		return (EINVAL);
208999653d4eSeschrock 
20901d452cf5Sahrens 	/* Since this is so expensive, don't do the preliminary check */
20911d452cf5Sahrens 	if (!dmu_tx_is_syncing(tx))
20921d452cf5Sahrens 		return (0);
20931d452cf5Sahrens 
20943cb34c60Sahrens 	if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj,
20953cb34c60Sahrens 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds))
209699653d4eSeschrock 		goto out;
20973cb34c60Sahrens 	odd = origin_ds->ds_dir;
20981d452cf5Sahrens 
20991d452cf5Sahrens 	{
21001d452cf5Sahrens 		dsl_dataset_t *phds;
21011d452cf5Sahrens 		if (err = dsl_dataset_open_obj(dd->dd_pool,
21023cb34c60Sahrens 		    odd->dd_phys->dd_head_dataset_obj,
21031d452cf5Sahrens 		    NULL, DS_MODE_NONE, FTAG, &phds))
21041d452cf5Sahrens 			goto out;
21051d452cf5Sahrens 		pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj;
21061d452cf5Sahrens 		dsl_dataset_close(phds, DS_MODE_NONE, FTAG);
21071d452cf5Sahrens 	}
210899653d4eSeschrock 
210999653d4eSeschrock 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
211099653d4eSeschrock 		err = EXDEV;
211199653d4eSeschrock 		goto out;
211299653d4eSeschrock 	}
211399653d4eSeschrock 
21143cb34c60Sahrens 	/* find origin's new next ds */
211599653d4eSeschrock 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object,
211699653d4eSeschrock 	    NULL, DS_MODE_NONE, FTAG, &newnext_ds));
21173cb34c60Sahrens 	while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) {
211899653d4eSeschrock 		dsl_dataset_t *prev;
211999653d4eSeschrock 
212099653d4eSeschrock 		if (err = dsl_dataset_open_obj(dd->dd_pool,
21211d452cf5Sahrens 		    newnext_ds->ds_phys->ds_prev_snap_obj,
21221d452cf5Sahrens 		    NULL, DS_MODE_NONE, FTAG, &prev))
212399653d4eSeschrock 			goto out;
212499653d4eSeschrock 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
212599653d4eSeschrock 		newnext_ds = prev;
212699653d4eSeschrock 	}
21271d452cf5Sahrens 	pa->newnext_obj = newnext_ds->ds_object;
212899653d4eSeschrock 
21293cb34c60Sahrens 	/* compute origin's new unique space */
213099653d4eSeschrock 	while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
213199653d4eSeschrock 	    &itor, &bp)) == 0) {
21323cb34c60Sahrens 		if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg)
21331d452cf5Sahrens 			pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp);
213499653d4eSeschrock 	}
213599653d4eSeschrock 	if (err != ENOENT)
213699653d4eSeschrock 		goto out;
213799653d4eSeschrock 
213899653d4eSeschrock 	/* Walk the snapshots that we are moving */
213999653d4eSeschrock 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
21403cb34c60Sahrens 	ds = origin_ds;
214199653d4eSeschrock 	/* CONSTCOND */
214299653d4eSeschrock 	while (TRUE) {
214399653d4eSeschrock 		uint64_t val, dlused, dlcomp, dluncomp;
214499653d4eSeschrock 		dsl_dataset_t *prev;
214599653d4eSeschrock 
214699653d4eSeschrock 		/* Check that the snapshot name does not conflict */
214799653d4eSeschrock 		dsl_dataset_name(ds, name);
214899653d4eSeschrock 		err = zap_lookup(dd->dd_pool->dp_meta_objset,
214999653d4eSeschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
215099653d4eSeschrock 		    8, 1, &val);
215199653d4eSeschrock 		if (err != ENOENT) {
215299653d4eSeschrock 			if (err == 0)
215399653d4eSeschrock 				err = EEXIST;
215499653d4eSeschrock 			goto out;
215599653d4eSeschrock 		}
215699653d4eSeschrock 
215799653d4eSeschrock 		/*
215899653d4eSeschrock 		 * compute space to transfer.  Each snapshot gave birth to:
215999653d4eSeschrock 		 * (my used) - (prev's used) + (deadlist's used)
216099653d4eSeschrock 		 */
21611d452cf5Sahrens 		pa->used += ds->ds_phys->ds_used_bytes;
21621d452cf5Sahrens 		pa->comp += ds->ds_phys->ds_compressed_bytes;
21631d452cf5Sahrens 		pa->uncomp += ds->ds_phys->ds_uncompressed_bytes;
216499653d4eSeschrock 
216599653d4eSeschrock 		/* If we reach the first snapshot, we're done. */
216699653d4eSeschrock 		if (ds->ds_phys->ds_prev_snap_obj == 0)
216799653d4eSeschrock 			break;
216899653d4eSeschrock 
216999653d4eSeschrock 		if (err = bplist_space(&ds->ds_deadlist,
217099653d4eSeschrock 		    &dlused, &dlcomp, &dluncomp))
217199653d4eSeschrock 			goto out;
217299653d4eSeschrock 		if (err = dsl_dataset_open_obj(dd->dd_pool,
217399653d4eSeschrock 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
217499653d4eSeschrock 		    FTAG, &prev))
217599653d4eSeschrock 			goto out;
21761d452cf5Sahrens 		pa->used += dlused - prev->ds_phys->ds_used_bytes;
21771d452cf5Sahrens 		pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes;
21781d452cf5Sahrens 		pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes;
217999653d4eSeschrock 
218099653d4eSeschrock 		/*
218199653d4eSeschrock 		 * We could be a clone of a clone.  If we reach our
218299653d4eSeschrock 		 * parent's branch point, we're done.
218399653d4eSeschrock 		 */
218499653d4eSeschrock 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
218599653d4eSeschrock 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
218699653d4eSeschrock 			break;
218799653d4eSeschrock 		}
21883cb34c60Sahrens 		if (ds != origin_ds)
218999653d4eSeschrock 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
219099653d4eSeschrock 		ds = prev;
219199653d4eSeschrock 	}
219299653d4eSeschrock 
219399653d4eSeschrock 	/* Check that there is enough space here */
21943cb34c60Sahrens 	err = dsl_dir_transfer_possible(odd, dd, pa->used);
21951d452cf5Sahrens 
21961d452cf5Sahrens out:
21973cb34c60Sahrens 	if (ds && ds != origin_ds)
21981d452cf5Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
21993cb34c60Sahrens 	if (origin_ds)
22003cb34c60Sahrens 		dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG);
22011d452cf5Sahrens 	if (newnext_ds)
22021d452cf5Sahrens 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
22031d452cf5Sahrens 	if (name)
22041d452cf5Sahrens 		kmem_free(name, MAXPATHLEN);
22051d452cf5Sahrens 	return (err);
22061d452cf5Sahrens }
220799653d4eSeschrock 
22081d452cf5Sahrens static void
2209ecd6cf80Smarks dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
22101d452cf5Sahrens {
22111d452cf5Sahrens 	dsl_dataset_t *hds = arg1;
22121d452cf5Sahrens 	struct promotearg *pa = arg2;
22131d452cf5Sahrens 	dsl_dir_t *dd = hds->ds_dir;
22141d452cf5Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
22153cb34c60Sahrens 	dsl_dir_t *odd = NULL;
22163cb34c60Sahrens 	dsl_dataset_t *ds, *origin_ds;
22171d452cf5Sahrens 	char *name;
22181d452cf5Sahrens 
22193cb34c60Sahrens 	ASSERT(dd->dd_phys->dd_origin_obj != 0);
22201d452cf5Sahrens 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
22211d452cf5Sahrens 
22223cb34c60Sahrens 	VERIFY(0 == dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj,
22233cb34c60Sahrens 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds));
22240b69c2f0Sahrens 	/*
22253cb34c60Sahrens 	 * We need to explicitly open odd, since origin_ds's dd will be
22260b69c2f0Sahrens 	 * changing.
22270b69c2f0Sahrens 	 */
22283cb34c60Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
22293cb34c60Sahrens 	    NULL, FTAG, &odd));
223099653d4eSeschrock 
223199653d4eSeschrock 	/* move snapshots to this dir */
22321d452cf5Sahrens 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
22333cb34c60Sahrens 	ds = origin_ds;
223499653d4eSeschrock 	/* CONSTCOND */
223599653d4eSeschrock 	while (TRUE) {
223699653d4eSeschrock 		dsl_dataset_t *prev;
223799653d4eSeschrock 
223899653d4eSeschrock 		/* move snap name entry */
223999653d4eSeschrock 		dsl_dataset_name(ds, name);
22401d452cf5Sahrens 		VERIFY(0 == zap_remove(dp->dp_meta_objset,
22411d452cf5Sahrens 		    pa->snapnames_obj, ds->ds_snapname, tx));
22421d452cf5Sahrens 		VERIFY(0 == zap_add(dp->dp_meta_objset,
224399653d4eSeschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
224499653d4eSeschrock 		    8, 1, &ds->ds_object, tx));
224599653d4eSeschrock 
224699653d4eSeschrock 		/* change containing dsl_dir */
224799653d4eSeschrock 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
22483cb34c60Sahrens 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
224999653d4eSeschrock 		ds->ds_phys->ds_dir_obj = dd->dd_object;
22503cb34c60Sahrens 		ASSERT3P(ds->ds_dir, ==, odd);
225199653d4eSeschrock 		dsl_dir_close(ds->ds_dir, ds);
22521d452cf5Sahrens 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
225399653d4eSeschrock 		    NULL, ds, &ds->ds_dir));
225499653d4eSeschrock 
225599653d4eSeschrock 		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
225699653d4eSeschrock 
225799653d4eSeschrock 		if (ds->ds_phys->ds_prev_snap_obj == 0)
225899653d4eSeschrock 			break;
225999653d4eSeschrock 
22601d452cf5Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
226199653d4eSeschrock 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
226299653d4eSeschrock 		    FTAG, &prev));
226399653d4eSeschrock 
226499653d4eSeschrock 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
226599653d4eSeschrock 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
226699653d4eSeschrock 			break;
226799653d4eSeschrock 		}
22683cb34c60Sahrens 		if (ds != origin_ds)
226999653d4eSeschrock 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
227099653d4eSeschrock 		ds = prev;
227199653d4eSeschrock 	}
22723cb34c60Sahrens 	if (ds != origin_ds)
22731d452cf5Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
227499653d4eSeschrock 
22753cb34c60Sahrens 	/* change origin's next snap */
22763cb34c60Sahrens 	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
22773cb34c60Sahrens 	origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
227899653d4eSeschrock 
22793cb34c60Sahrens 	/* change origin */
228099653d4eSeschrock 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
22813cb34c60Sahrens 	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
22823cb34c60Sahrens 	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
22833cb34c60Sahrens 	dmu_buf_will_dirty(odd->dd_dbuf, tx);
22843cb34c60Sahrens 	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
228599653d4eSeschrock 
228699653d4eSeschrock 	/* change space accounting */
22873cb34c60Sahrens 	dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx);
22881d452cf5Sahrens 	dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx);
22893cb34c60Sahrens 	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
229099653d4eSeschrock 
2291ecd6cf80Smarks 	/* log history record */
2292ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
2293ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
2294ecd6cf80Smarks 
22953cb34c60Sahrens 	dsl_dir_close(odd, FTAG);
22963cb34c60Sahrens 	dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG);
22971d452cf5Sahrens 	kmem_free(name, MAXPATHLEN);
229899653d4eSeschrock }
229999653d4eSeschrock 
230099653d4eSeschrock int
230199653d4eSeschrock dsl_dataset_promote(const char *name)
230299653d4eSeschrock {
230399653d4eSeschrock 	dsl_dataset_t *ds;
230499653d4eSeschrock 	int err;
230599653d4eSeschrock 	dmu_object_info_t doi;
23061d452cf5Sahrens 	struct promotearg pa;
230799653d4eSeschrock 
230899653d4eSeschrock 	err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds);
230999653d4eSeschrock 	if (err)
231099653d4eSeschrock 		return (err);
231199653d4eSeschrock 
231299653d4eSeschrock 	err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset,
231399653d4eSeschrock 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
231499653d4eSeschrock 	if (err) {
231599653d4eSeschrock 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
231699653d4eSeschrock 		return (err);
231799653d4eSeschrock 	}
231899653d4eSeschrock 
231999653d4eSeschrock 	/*
232099653d4eSeschrock 	 * Add in 128x the snapnames zapobj size, since we will be moving
232199653d4eSeschrock 	 * a bunch of snapnames to the promoted ds, and dirtying their
232299653d4eSeschrock 	 * bonus buffers.
232399653d4eSeschrock 	 */
23241d452cf5Sahrens 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
23251d452cf5Sahrens 	    dsl_dataset_promote_check,
23261d452cf5Sahrens 	    dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks);
232799653d4eSeschrock 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
232899653d4eSeschrock 	return (err);
232999653d4eSeschrock }
2330b1b8ab34Slling 
23313cb34c60Sahrens struct cloneswaparg {
23323cb34c60Sahrens 	dsl_dataset_t *cds; /* clone dataset */
23333cb34c60Sahrens 	dsl_dataset_t *ohds; /* origin's head dataset */
23343cb34c60Sahrens 	boolean_t force;
23353cb34c60Sahrens };
2336f18faf3fSek 
2337f18faf3fSek /* ARGSUSED */
2338f18faf3fSek static int
2339f18faf3fSek dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
2340f18faf3fSek {
23413cb34c60Sahrens 	struct cloneswaparg *csa = arg1;
2342f18faf3fSek 
23433cb34c60Sahrens 	/* they should both be heads */
23443cb34c60Sahrens 	if (dsl_dataset_is_snapshot(csa->cds) ||
23453cb34c60Sahrens 	    dsl_dataset_is_snapshot(csa->ohds))
2346f18faf3fSek 		return (EINVAL);
2347f18faf3fSek 
23483cb34c60Sahrens 	/* the branch point should be just before them */
23493cb34c60Sahrens 	if (csa->cds->ds_prev != csa->ohds->ds_prev)
2350f18faf3fSek 		return (EINVAL);
2351f18faf3fSek 
23523cb34c60Sahrens 	/* cds should be the clone */
23533cb34c60Sahrens 	if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj !=
23543cb34c60Sahrens 	    csa->ohds->ds_object)
23553cb34c60Sahrens 		return (EINVAL);
2356f18faf3fSek 
23573cb34c60Sahrens 	/* the clone should be a child of the origin */
23583cb34c60Sahrens 	if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
23593cb34c60Sahrens 		return (EINVAL);
2360f18faf3fSek 
23613cb34c60Sahrens 	/* ohds shouldn't be modified unless 'force' */
23623cb34c60Sahrens 	if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
23633cb34c60Sahrens 		return (ETXTBSY);
23643cb34c60Sahrens 	return (0);
2365f18faf3fSek }
2366f18faf3fSek 
2367f18faf3fSek /* ARGSUSED */
2368f18faf3fSek static void
2369f18faf3fSek dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
2370f18faf3fSek {
23713cb34c60Sahrens 	struct cloneswaparg *csa = arg1;
23723cb34c60Sahrens 	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
2373f18faf3fSek 	uint64_t itor = 0;
2374f18faf3fSek 	blkptr_t bp;
2375f18faf3fSek 	uint64_t unique = 0;
2376f18faf3fSek 	int err;
2377f18faf3fSek 
2378*a9799022Sck 	if (csa->ohds->ds_reserved)
2379*a9799022Sck 		panic("refreservation and clone swap are incompatible");
2380*a9799022Sck 
23813cb34c60Sahrens 	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
23823cb34c60Sahrens 	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
23833cb34c60Sahrens 	dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx);
2384f18faf3fSek 
23853cb34c60Sahrens 	if (csa->cds->ds_user_ptr != NULL) {
23863cb34c60Sahrens 		csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr);
23873cb34c60Sahrens 		csa->cds->ds_user_ptr = NULL;
23883cb34c60Sahrens 	}
2389f18faf3fSek 
23903cb34c60Sahrens 	if (csa->ohds->ds_user_ptr != NULL) {
23913cb34c60Sahrens 		csa->ohds->ds_user_evict_func(csa->ohds,
23923cb34c60Sahrens 		    csa->ohds->ds_user_ptr);
23933cb34c60Sahrens 		csa->ohds->ds_user_ptr = NULL;
23943cb34c60Sahrens 	}
2395f18faf3fSek 
2396f18faf3fSek 	/* compute unique space */
23973cb34c60Sahrens 	while ((err = bplist_iterate(&csa->cds->ds_deadlist,
23983cb34c60Sahrens 	    &itor, &bp)) == 0) {
23993cb34c60Sahrens 		if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg)
24003cb34c60Sahrens 			unique += bp_get_dasize(dp->dp_spa, &bp);
2401f18faf3fSek 	}
2402f18faf3fSek 	VERIFY(err == ENOENT);
2403f18faf3fSek 
2404*a9799022Sck 	/* undo any accounting due to a refreservation */
2405*a9799022Sck 	if (csa->ohds->ds_reserved > csa->ohds->ds_phys->ds_unique_bytes) {
2406*a9799022Sck 		dsl_dir_diduse_space(csa->ohds->ds_dir,
2407*a9799022Sck 		    csa->ohds->ds_phys->ds_unique_bytes -
2408*a9799022Sck 		    csa->ohds->ds_reserved, 0, 0, tx);
2409*a9799022Sck 	}
2410*a9799022Sck 
2411f18faf3fSek 	/* reset origin's unique bytes */
24123cb34c60Sahrens 	csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique;
2413f18faf3fSek 
2414f18faf3fSek 	/* swap blkptrs */
2415f18faf3fSek 	{
2416f18faf3fSek 		blkptr_t tmp;
24173cb34c60Sahrens 		tmp = csa->ohds->ds_phys->ds_bp;
24183cb34c60Sahrens 		csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
24193cb34c60Sahrens 		csa->cds->ds_phys->ds_bp = tmp;
2420f18faf3fSek 	}
2421f18faf3fSek 
2422f18faf3fSek 	/* set dd_*_bytes */
2423f18faf3fSek 	{
2424f18faf3fSek 		int64_t dused, dcomp, duncomp;
2425f18faf3fSek 		uint64_t cdl_used, cdl_comp, cdl_uncomp;
2426f18faf3fSek 		uint64_t odl_used, odl_comp, odl_uncomp;
2427f18faf3fSek 
24283cb34c60Sahrens 		VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used,
2429f18faf3fSek 		    &cdl_comp, &cdl_uncomp));
24303cb34c60Sahrens 		VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used,
2431f18faf3fSek 		    &odl_comp, &odl_uncomp));
24323cb34c60Sahrens 		dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
24333cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_used_bytes + odl_used);
24343cb34c60Sahrens 		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
24353cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
24363cb34c60Sahrens 		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
24373cb34c60Sahrens 		    cdl_uncomp -
24383cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
24393cb34c60Sahrens 
24403cb34c60Sahrens 		dsl_dir_diduse_space(csa->ohds->ds_dir,
24413cb34c60Sahrens 		    dused, dcomp, duncomp, tx);
24423cb34c60Sahrens 		dsl_dir_diduse_space(csa->cds->ds_dir,
24433cb34c60Sahrens 		    -dused, -dcomp, -duncomp, tx);
24443cb34c60Sahrens 	}
24453cb34c60Sahrens 
24463cb34c60Sahrens #define	SWITCH64(x, y) \
24473cb34c60Sahrens 	{ \
24483cb34c60Sahrens 		uint64_t __tmp = (x); \
24493cb34c60Sahrens 		(x) = (y); \
24503cb34c60Sahrens 		(y) = __tmp; \
2451f18faf3fSek 	}
2452f18faf3fSek 
2453*a9799022Sck 	/* redo any accounting due to a refreservation */
2454*a9799022Sck 	if (csa->ohds->ds_reserved > csa->ohds->ds_phys->ds_unique_bytes) {
2455*a9799022Sck 		dsl_dir_diduse_space(csa->ohds->ds_dir,
2456*a9799022Sck 		    csa->ohds->ds_reserved -
2457*a9799022Sck 		    csa->ohds->ds_phys->ds_unique_bytes, 0, 0, tx);
2458*a9799022Sck 	}
2459*a9799022Sck 
2460f18faf3fSek 	/* swap ds_*_bytes */
24613cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
24623cb34c60Sahrens 	    csa->cds->ds_phys->ds_used_bytes);
24633cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
24643cb34c60Sahrens 	    csa->cds->ds_phys->ds_compressed_bytes);
24653cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
24663cb34c60Sahrens 	    csa->cds->ds_phys->ds_uncompressed_bytes);
2467f18faf3fSek 
2468f18faf3fSek 	/* swap deadlists */
24693cb34c60Sahrens 	bplist_close(&csa->cds->ds_deadlist);
24703cb34c60Sahrens 	bplist_close(&csa->ohds->ds_deadlist);
24713cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
24723cb34c60Sahrens 	    csa->cds->ds_phys->ds_deadlist_obj);
24733cb34c60Sahrens 	VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
24743cb34c60Sahrens 	    csa->cds->ds_phys->ds_deadlist_obj));
24753cb34c60Sahrens 	VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
24763cb34c60Sahrens 	    csa->ohds->ds_phys->ds_deadlist_obj));
2477*a9799022Sck 	/* fix up clone's unique */
2478*a9799022Sck 	dsl_dataset_recalc_head_uniq(csa->cds);
2479*a9799022Sck 
2480f18faf3fSek }
2481f18faf3fSek 
2482f18faf3fSek /*
2483f18faf3fSek  * Swap the clone "cosname" with its origin head file system.
2484f18faf3fSek  */
2485f18faf3fSek int
24863cb34c60Sahrens dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
24873cb34c60Sahrens     boolean_t force)
2488f18faf3fSek {
24893cb34c60Sahrens 	struct cloneswaparg csa;
2490f18faf3fSek 
24913cb34c60Sahrens 	ASSERT(clone->ds_open_refcount == DS_REF_MAX);
24923cb34c60Sahrens 	ASSERT(origin_head->ds_open_refcount == DS_REF_MAX);
2493f18faf3fSek 
24943cb34c60Sahrens 	csa.cds = clone;
24953cb34c60Sahrens 	csa.ohds = origin_head;
24963cb34c60Sahrens 	csa.force = force;
24973cb34c60Sahrens 	return (dsl_sync_task_do(clone->ds_dir->dd_pool,
2498f18faf3fSek 	    dsl_dataset_clone_swap_check,
24993cb34c60Sahrens 	    dsl_dataset_clone_swap_sync, &csa, NULL, 9));
2500f18faf3fSek }
2501f18faf3fSek 
2502b1b8ab34Slling /*
2503b1b8ab34Slling  * Given a pool name and a dataset object number in that pool,
2504b1b8ab34Slling  * return the name of that dataset.
2505b1b8ab34Slling  */
2506b1b8ab34Slling int
2507b1b8ab34Slling dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
2508b1b8ab34Slling {
2509b1b8ab34Slling 	spa_t *spa;
2510b1b8ab34Slling 	dsl_pool_t *dp;
2511b1b8ab34Slling 	dsl_dataset_t *ds = NULL;
2512b1b8ab34Slling 	int error;
2513b1b8ab34Slling 
2514b1b8ab34Slling 	if ((error = spa_open(pname, &spa, FTAG)) != 0)
2515b1b8ab34Slling 		return (error);
2516b1b8ab34Slling 	dp = spa_get_dsl(spa);
2517b1b8ab34Slling 	rw_enter(&dp->dp_config_rwlock, RW_READER);
2518b1b8ab34Slling 	if ((error = dsl_dataset_open_obj(dp, obj,
2519b1b8ab34Slling 	    NULL, DS_MODE_NONE, FTAG, &ds)) != 0) {
2520b1b8ab34Slling 		rw_exit(&dp->dp_config_rwlock);
2521b1b8ab34Slling 		spa_close(spa, FTAG);
2522b1b8ab34Slling 		return (error);
2523b1b8ab34Slling 	}
2524b1b8ab34Slling 	dsl_dataset_name(ds, buf);
2525b1b8ab34Slling 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
2526b1b8ab34Slling 	rw_exit(&dp->dp_config_rwlock);
2527b1b8ab34Slling 	spa_close(spa, FTAG);
2528b1b8ab34Slling 
2529b1b8ab34Slling 	return (0);
2530b1b8ab34Slling }
2531*a9799022Sck 
2532*a9799022Sck int
2533*a9799022Sck dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
2534*a9799022Sck     uint64_t asize, uint64_t inflight, uint64_t *used)
2535*a9799022Sck {
2536*a9799022Sck 	int error = 0;
2537*a9799022Sck 
2538*a9799022Sck 	ASSERT3S(asize, >, 0);
2539*a9799022Sck 
2540*a9799022Sck 	mutex_enter(&ds->ds_lock);
2541*a9799022Sck 	/*
2542*a9799022Sck 	 * Make a space adjustment for reserved bytes.
2543*a9799022Sck 	 */
2544*a9799022Sck 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
2545*a9799022Sck 		ASSERT3U(*used, >=,
2546*a9799022Sck 		    ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
2547*a9799022Sck 		*used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
2548*a9799022Sck 	}
2549*a9799022Sck 
2550*a9799022Sck 	if (!check_quota || ds->ds_quota == 0) {
2551*a9799022Sck 		mutex_exit(&ds->ds_lock);
2552*a9799022Sck 		return (0);
2553*a9799022Sck 	}
2554*a9799022Sck 	/*
2555*a9799022Sck 	 * If they are requesting more space, and our current estimate
2556*a9799022Sck 	 * is over quota, they get to try again unless the actual
2557*a9799022Sck 	 * on-disk is over quota and there are no pending changes (which
2558*a9799022Sck 	 * may free up space for us).
2559*a9799022Sck 	 */
2560*a9799022Sck 	if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
2561*a9799022Sck 		if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
2562*a9799022Sck 			error = ERESTART;
2563*a9799022Sck 		else
2564*a9799022Sck 			error = EDQUOT;
2565*a9799022Sck 	}
2566*a9799022Sck 	mutex_exit(&ds->ds_lock);
2567*a9799022Sck 
2568*a9799022Sck 	return (error);
2569*a9799022Sck }
2570*a9799022Sck 
2571*a9799022Sck /* ARGSUSED */
2572*a9799022Sck static int
2573*a9799022Sck dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
2574*a9799022Sck {
2575*a9799022Sck 	dsl_dataset_t *ds = arg1;
2576*a9799022Sck 	uint64_t *quotap = arg2;
2577*a9799022Sck 	uint64_t new_quota = *quotap;
2578*a9799022Sck 
2579*a9799022Sck 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
2580*a9799022Sck 		return (ENOTSUP);
2581*a9799022Sck 
2582*a9799022Sck 	if (new_quota == 0)
2583*a9799022Sck 		return (0);
2584*a9799022Sck 
2585*a9799022Sck 	if (new_quota < ds->ds_phys->ds_used_bytes ||
2586*a9799022Sck 	    new_quota < ds->ds_reserved)
2587*a9799022Sck 		return (ENOSPC);
2588*a9799022Sck 
2589*a9799022Sck 	return (0);
2590*a9799022Sck }
2591*a9799022Sck 
2592*a9799022Sck /* ARGSUSED */
2593*a9799022Sck void
2594*a9799022Sck dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
2595*a9799022Sck {
2596*a9799022Sck 	dsl_dataset_t *ds = arg1;
2597*a9799022Sck 	uint64_t *quotap = arg2;
2598*a9799022Sck 	uint64_t new_quota = *quotap;
2599*a9799022Sck 
2600*a9799022Sck 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
2601*a9799022Sck 
2602*a9799022Sck 	mutex_enter(&ds->ds_lock);
2603*a9799022Sck 	ds->ds_quota = new_quota;
2604*a9799022Sck 	mutex_exit(&ds->ds_lock);
2605*a9799022Sck 
2606*a9799022Sck 	dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx);
2607*a9799022Sck 
2608*a9799022Sck 	spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa,
2609*a9799022Sck 	    tx, cr, "%lld dataset = %llu ",
2610*a9799022Sck 	    (longlong_t)new_quota, ds->ds_dir->dd_phys->dd_head_dataset_obj);
2611*a9799022Sck }
2612*a9799022Sck 
2613*a9799022Sck int
2614*a9799022Sck dsl_dataset_set_quota(const char *dsname, uint64_t quota)
2615*a9799022Sck {
2616*a9799022Sck 	dsl_dataset_t *ds;
2617*a9799022Sck 	int err;
2618*a9799022Sck 
2619*a9799022Sck 	err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds);
2620*a9799022Sck 	if (err)
2621*a9799022Sck 		return (err);
2622*a9799022Sck 
2623*a9799022Sck 	/*
2624*a9799022Sck 	 * If someone removes a file, then tries to set the quota, we
2625*a9799022Sck 	 * want to make sure the file freeing takes effect.
2626*a9799022Sck 	 */
2627*a9799022Sck 	txg_wait_open(ds->ds_dir->dd_pool, 0);
2628*a9799022Sck 
2629*a9799022Sck 	err = dsl_sync_task_do(ds->ds_dir->dd_pool, dsl_dataset_set_quota_check,
2630*a9799022Sck 	    dsl_dataset_set_quota_sync, ds, &quota, 0);
2631*a9799022Sck 	dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
2632*a9799022Sck 	return (err);
2633*a9799022Sck }
2634*a9799022Sck 
2635*a9799022Sck static int
2636*a9799022Sck dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
2637*a9799022Sck {
2638*a9799022Sck 	dsl_dataset_t *ds = arg1;
2639*a9799022Sck 	uint64_t *reservationp = arg2;
2640*a9799022Sck 	uint64_t new_reservation = *reservationp;
2641*a9799022Sck 	int64_t delta;
2642*a9799022Sck 	uint64_t unique;
2643*a9799022Sck 
2644*a9799022Sck 	if (new_reservation > INT64_MAX)
2645*a9799022Sck 		return (EOVERFLOW);
2646*a9799022Sck 
2647*a9799022Sck 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
2648*a9799022Sck 	    SPA_VERSION_REFRESERVATION)
2649*a9799022Sck 		return (ENOTSUP);
2650*a9799022Sck 
2651*a9799022Sck 	if (dsl_dataset_is_snapshot(ds))
2652*a9799022Sck 		return (EINVAL);
2653*a9799022Sck 
2654*a9799022Sck 	/*
2655*a9799022Sck 	 * If we are doing the preliminary check in open context, the
2656*a9799022Sck 	 * space estimates may be inaccurate.
2657*a9799022Sck 	 */
2658*a9799022Sck 	if (!dmu_tx_is_syncing(tx))
2659*a9799022Sck 		return (0);
2660*a9799022Sck 
2661*a9799022Sck 	mutex_enter(&ds->ds_lock);
2662*a9799022Sck 	unique = dsl_dataset_unique(ds);
2663*a9799022Sck 	delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved);
2664*a9799022Sck 	mutex_exit(&ds->ds_lock);
2665*a9799022Sck 
2666*a9799022Sck 	if (delta > 0 &&
2667*a9799022Sck 	    delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
2668*a9799022Sck 		return (ENOSPC);
2669*a9799022Sck 	if (delta > 0 && ds->ds_quota > 0 &&
2670*a9799022Sck 	    new_reservation > ds->ds_quota)
2671*a9799022Sck 		return (ENOSPC);
2672*a9799022Sck 
2673*a9799022Sck 	return (0);
2674*a9799022Sck }
2675*a9799022Sck 
2676*a9799022Sck /* ARGSUSED */
2677*a9799022Sck static void
2678*a9799022Sck dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr,
2679*a9799022Sck     dmu_tx_t *tx)
2680*a9799022Sck {
2681*a9799022Sck 	dsl_dataset_t *ds = arg1;
2682*a9799022Sck 	uint64_t *reservationp = arg2;
2683*a9799022Sck 	uint64_t new_reservation = *reservationp;
2684*a9799022Sck 	uint64_t unique;
2685*a9799022Sck 	int64_t delta;
2686*a9799022Sck 
2687*a9799022Sck 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
2688*a9799022Sck 
2689*a9799022Sck 	mutex_enter(&ds->ds_lock);
2690*a9799022Sck 	unique = dsl_dataset_unique(ds);
2691*a9799022Sck 	delta = MAX(0, (int64_t)(new_reservation - unique)) -
2692*a9799022Sck 	    MAX(0, (int64_t)(ds->ds_reserved - unique));
2693*a9799022Sck 	ds->ds_reserved = new_reservation;
2694*a9799022Sck 	mutex_exit(&ds->ds_lock);
2695*a9799022Sck 
2696*a9799022Sck 	dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation",
2697*a9799022Sck 	    new_reservation, cr, tx);
2698*a9799022Sck 
2699*a9799022Sck 	dsl_dir_diduse_space(ds->ds_dir, delta, 0, 0, tx);
2700*a9799022Sck 
2701*a9799022Sck 	spa_history_internal_log(LOG_DS_REFRESERV,
2702*a9799022Sck 	    ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu",
2703*a9799022Sck 	    (longlong_t)new_reservation,
2704*a9799022Sck 	    ds->ds_dir->dd_phys->dd_head_dataset_obj);
2705*a9799022Sck }
2706*a9799022Sck 
2707*a9799022Sck int
2708*a9799022Sck dsl_dataset_set_reservation(const char *dsname, uint64_t reservation)
2709*a9799022Sck {
2710*a9799022Sck 	dsl_dataset_t *ds;
2711*a9799022Sck 	int err;
2712*a9799022Sck 
2713*a9799022Sck 	err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds);
2714*a9799022Sck 	if (err)
2715*a9799022Sck 		return (err);
2716*a9799022Sck 
2717*a9799022Sck 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2718*a9799022Sck 	    dsl_dataset_set_reservation_check,
2719*a9799022Sck 	    dsl_dataset_set_reservation_sync, ds, &reservation, 0);
2720*a9799022Sck 	dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
2721*a9799022Sck 	return (err);
2722*a9799022Sck }
2723