1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
221db42183SEric Taylor  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #include <sys/dmu_objset.h>
27fa9e4066Sahrens #include <sys/dsl_dataset.h>
28fa9e4066Sahrens #include <sys/dsl_dir.h>
2999653d4eSeschrock #include <sys/dsl_prop.h>
301d452cf5Sahrens #include <sys/dsl_synctask.h>
31fa9e4066Sahrens #include <sys/dmu_traverse.h>
32fa9e4066Sahrens #include <sys/dmu_tx.h>
33fa9e4066Sahrens #include <sys/arc.h>
34fa9e4066Sahrens #include <sys/zio.h>
35fa9e4066Sahrens #include <sys/zap.h>
36fa9e4066Sahrens #include <sys/unique.h>
37fa9e4066Sahrens #include <sys/zfs_context.h>
38cdf5b4caSmmusante #include <sys/zfs_ioctl.h>
39ecd6cf80Smarks #include <sys/spa.h>
40088f3894Sahrens #include <sys/zfs_znode.h>
41ecd6cf80Smarks #include <sys/sunddi.h>
42fa9e4066Sahrens 
43745cd3c5Smaybee static char *dsl_reaper = "the grim reaper";
44745cd3c5Smaybee 
451d452cf5Sahrens static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
461d452cf5Sahrens static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
471d452cf5Sahrens static dsl_checkfunc_t dsl_dataset_rollback_check;
481d452cf5Sahrens static dsl_syncfunc_t dsl_dataset_rollback_sync;
49a9799022Sck static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
50e1930233Sbonwick 
5155434c77Sek #define	DS_REF_MAX	(1ULL << 62)
52fa9e4066Sahrens 
53fa9e4066Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
54fa9e4066Sahrens 
55745cd3c5Smaybee #define	DSL_DATASET_IS_DESTROYED(ds)	((ds)->ds_owner == dsl_reaper)
56745cd3c5Smaybee 
57fa9e4066Sahrens 
58a9799022Sck /*
59a9799022Sck  * Figure out how much of this delta should be propogated to the dsl_dir
60a9799022Sck  * layer.  If there's a refreservation, that space has already been
61a9799022Sck  * partially accounted for in our ancestors.
62a9799022Sck  */
63a9799022Sck static int64_t
64a9799022Sck parent_delta(dsl_dataset_t *ds, int64_t delta)
65a9799022Sck {
66a9799022Sck 	uint64_t old_bytes, new_bytes;
67a9799022Sck 
68a9799022Sck 	if (ds->ds_reserved == 0)
69a9799022Sck 		return (delta);
70a9799022Sck 
71a9799022Sck 	old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
72a9799022Sck 	new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
73a9799022Sck 
74a9799022Sck 	ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
75a9799022Sck 	return (new_bytes - old_bytes);
76a9799022Sck }
77fa9e4066Sahrens 
78fa9e4066Sahrens void
79fa9e4066Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
80fa9e4066Sahrens {
8199653d4eSeschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
82fa9e4066Sahrens 	int compressed = BP_GET_PSIZE(bp);
83fa9e4066Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
84a9799022Sck 	int64_t delta;
85fa9e4066Sahrens 
86fa9e4066Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
87fa9e4066Sahrens 
88fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
89fa9e4066Sahrens 	/* It could have been compressed away to nothing */
90fa9e4066Sahrens 	if (BP_IS_HOLE(bp))
91fa9e4066Sahrens 		return;
92fa9e4066Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
93fa9e4066Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
94fa9e4066Sahrens 	if (ds == NULL) {
95fa9e4066Sahrens 		/*
96fa9e4066Sahrens 		 * Account for the meta-objset space in its placeholder
97fa9e4066Sahrens 		 * dsl_dir.
98fa9e4066Sahrens 		 */
99fa9e4066Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
10074e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
101fa9e4066Sahrens 		    used, compressed, uncompressed, tx);
102fa9e4066Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
103fa9e4066Sahrens 		return;
104fa9e4066Sahrens 	}
105fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
10602c8f3f0SMatthew Ahrens 	mutex_enter(&ds->ds_dir->dd_lock);
107fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
108a9799022Sck 	delta = parent_delta(ds, used);
109fa9e4066Sahrens 	ds->ds_phys->ds_used_bytes += used;
110fa9e4066Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
111fa9e4066Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
112fa9e4066Sahrens 	ds->ds_phys->ds_unique_bytes += used;
113fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
11474e7dc98SMatthew Ahrens 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
11574e7dc98SMatthew Ahrens 	    compressed, uncompressed, tx);
11674e7dc98SMatthew Ahrens 	dsl_dir_transfer_space(ds->ds_dir, used - delta,
11774e7dc98SMatthew Ahrens 	    DD_USED_REFRSRV, DD_USED_HEAD, tx);
11802c8f3f0SMatthew Ahrens 	mutex_exit(&ds->ds_dir->dd_lock);
119fa9e4066Sahrens }
120fa9e4066Sahrens 
121cdb0ab79Smaybee int
122c717a561Smaybee dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
123c717a561Smaybee     dmu_tx_t *tx)
124fa9e4066Sahrens {
12599653d4eSeschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
126fa9e4066Sahrens 	int compressed = BP_GET_PSIZE(bp);
127fa9e4066Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
128fa9e4066Sahrens 
129e14bb325SJeff Bonwick 	ASSERT(pio != NULL);
130fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
131c717a561Smaybee 	/* No block pointer => nothing to free */
132fa9e4066Sahrens 	if (BP_IS_HOLE(bp))
133cdb0ab79Smaybee 		return (0);
134fa9e4066Sahrens 
135fa9e4066Sahrens 	ASSERT(used > 0);
136fa9e4066Sahrens 	if (ds == NULL) {
137c717a561Smaybee 		int err;
138fa9e4066Sahrens 		/*
139fa9e4066Sahrens 		 * Account for the meta-objset space in its placeholder
140fa9e4066Sahrens 		 * dataset.
141fa9e4066Sahrens 		 */
142088f3894Sahrens 		err = dsl_free(pio, tx->tx_pool,
143e14bb325SJeff Bonwick 		    tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT);
144c717a561Smaybee 		ASSERT(err == 0);
145fa9e4066Sahrens 
14674e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
147fa9e4066Sahrens 		    -used, -compressed, -uncompressed, tx);
148fa9e4066Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
149cdb0ab79Smaybee 		return (used);
150fa9e4066Sahrens 	}
151fa9e4066Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
152fa9e4066Sahrens 
15374e7dc98SMatthew Ahrens 	ASSERT(!dsl_dataset_is_snapshot(ds));
154fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
155fa9e4066Sahrens 
156fa9e4066Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
157c717a561Smaybee 		int err;
158a9799022Sck 		int64_t delta;
159c717a561Smaybee 
160fa9e4066Sahrens 		dprintf_bp(bp, "freeing: %s", "");
161088f3894Sahrens 		err = dsl_free(pio, tx->tx_pool,
162e14bb325SJeff Bonwick 		    tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT);
163c717a561Smaybee 		ASSERT(err == 0);
164fa9e4066Sahrens 
16502c8f3f0SMatthew Ahrens 		mutex_enter(&ds->ds_dir->dd_lock);
166fa9e4066Sahrens 		mutex_enter(&ds->ds_lock);
167a9799022Sck 		ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
168a9799022Sck 		    !DS_UNIQUE_IS_ACCURATE(ds));
169a9799022Sck 		delta = parent_delta(ds, -used);
170fa9e4066Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
171fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
17274e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
173a9799022Sck 		    delta, -compressed, -uncompressed, tx);
17474e7dc98SMatthew Ahrens 		dsl_dir_transfer_space(ds->ds_dir, -used - delta,
17574e7dc98SMatthew Ahrens 		    DD_USED_REFRSRV, DD_USED_HEAD, tx);
17602c8f3f0SMatthew Ahrens 		mutex_exit(&ds->ds_dir->dd_lock);
177fa9e4066Sahrens 	} else {
178fa9e4066Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
179ea8dc4b6Seschrock 		VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
180a4611edeSahrens 		ASSERT3U(ds->ds_prev->ds_object, ==,
181a4611edeSahrens 		    ds->ds_phys->ds_prev_snap_obj);
182a4611edeSahrens 		ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
183fa9e4066Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
184a4611edeSahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
185a4611edeSahrens 		    ds->ds_object && bp->blk_birth >
186a4611edeSahrens 		    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
187a4611edeSahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
188a4611edeSahrens 			mutex_enter(&ds->ds_prev->ds_lock);
189a4611edeSahrens 			ds->ds_prev->ds_phys->ds_unique_bytes += used;
190a4611edeSahrens 			mutex_exit(&ds->ds_prev->ds_lock);
191fa9e4066Sahrens 		}
19274e7dc98SMatthew Ahrens 		if (bp->blk_birth > ds->ds_origin_txg) {
19374e7dc98SMatthew Ahrens 			dsl_dir_transfer_space(ds->ds_dir, used,
19474e7dc98SMatthew Ahrens 			    DD_USED_HEAD, DD_USED_SNAP, tx);
19574e7dc98SMatthew Ahrens 		}
196fa9e4066Sahrens 	}
197fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
198fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
199fa9e4066Sahrens 	ds->ds_phys->ds_used_bytes -= used;
200fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
201fa9e4066Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
202fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
203fa9e4066Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
204fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
205cdb0ab79Smaybee 
206cdb0ab79Smaybee 	return (used);
207fa9e4066Sahrens }
208fa9e4066Sahrens 
209ea8dc4b6Seschrock uint64_t
210ea8dc4b6Seschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
211fa9e4066Sahrens {
212a2eea2e1Sahrens 	uint64_t trysnap = 0;
213a2eea2e1Sahrens 
214fa9e4066Sahrens 	if (ds == NULL)
215ea8dc4b6Seschrock 		return (0);
216fa9e4066Sahrens 	/*
217fa9e4066Sahrens 	 * The snapshot creation could fail, but that would cause an
218fa9e4066Sahrens 	 * incorrect FALSE return, which would only result in an
219fa9e4066Sahrens 	 * overestimation of the amount of space that an operation would
220fa9e4066Sahrens 	 * consume, which is OK.
221fa9e4066Sahrens 	 *
222fa9e4066Sahrens 	 * There's also a small window where we could miss a pending
223fa9e4066Sahrens 	 * snapshot, because we could set the sync task in the quiescing
224fa9e4066Sahrens 	 * phase.  So this should only be used as a guess.
225fa9e4066Sahrens 	 */
226a2eea2e1Sahrens 	if (ds->ds_trysnap_txg >
227a2eea2e1Sahrens 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
228a2eea2e1Sahrens 		trysnap = ds->ds_trysnap_txg;
229a2eea2e1Sahrens 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
230ea8dc4b6Seschrock }
231ea8dc4b6Seschrock 
232ea8dc4b6Seschrock int
233ea8dc4b6Seschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
234ea8dc4b6Seschrock {
235ea8dc4b6Seschrock 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
236fa9e4066Sahrens }
237fa9e4066Sahrens 
238fa9e4066Sahrens /* ARGSUSED */
239fa9e4066Sahrens static void
240fa9e4066Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
241fa9e4066Sahrens {
242fa9e4066Sahrens 	dsl_dataset_t *ds = dsv;
243fa9e4066Sahrens 
244745cd3c5Smaybee 	ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
245fa9e4066Sahrens 
246fa9e4066Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
247fa9e4066Sahrens 
24891ebeef5Sahrens 	unique_remove(ds->ds_fsid_guid);
249fa9e4066Sahrens 
250fa9e4066Sahrens 	if (ds->ds_user_ptr != NULL)
251fa9e4066Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
252fa9e4066Sahrens 
253fa9e4066Sahrens 	if (ds->ds_prev) {
254745cd3c5Smaybee 		dsl_dataset_drop_ref(ds->ds_prev, ds);
255fa9e4066Sahrens 		ds->ds_prev = NULL;
256fa9e4066Sahrens 	}
257fa9e4066Sahrens 
258fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
259745cd3c5Smaybee 	if (ds->ds_dir)
260745cd3c5Smaybee 		dsl_dir_close(ds->ds_dir, ds);
261fa9e4066Sahrens 
26291ebeef5Sahrens 	ASSERT(!list_link_active(&ds->ds_synced_link));
263fa9e4066Sahrens 
2645ad82045Snd 	mutex_destroy(&ds->ds_lock);
26591ebeef5Sahrens 	mutex_destroy(&ds->ds_opening_lock);
2665ad82045Snd 	mutex_destroy(&ds->ds_deadlist.bpl_lock);
267745cd3c5Smaybee 	rw_destroy(&ds->ds_rwlock);
268745cd3c5Smaybee 	cv_destroy(&ds->ds_exclusive_cv);
2695ad82045Snd 
270fa9e4066Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
271fa9e4066Sahrens }
272fa9e4066Sahrens 
273ea8dc4b6Seschrock static int
274fa9e4066Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
275fa9e4066Sahrens {
276fa9e4066Sahrens 	dsl_dataset_phys_t *headphys;
277fa9e4066Sahrens 	int err;
278fa9e4066Sahrens 	dmu_buf_t *headdbuf;
279fa9e4066Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
280fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
281fa9e4066Sahrens 
282fa9e4066Sahrens 	if (ds->ds_snapname[0])
283ea8dc4b6Seschrock 		return (0);
284fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
285ea8dc4b6Seschrock 		return (0);
286fa9e4066Sahrens 
287ea8dc4b6Seschrock 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
288ea8dc4b6Seschrock 	    FTAG, &headdbuf);
289ea8dc4b6Seschrock 	if (err)
290ea8dc4b6Seschrock 		return (err);
291fa9e4066Sahrens 	headphys = headdbuf->db_data;
292fa9e4066Sahrens 	err = zap_value_search(dp->dp_meta_objset,
293e7437265Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
294ea8dc4b6Seschrock 	dmu_buf_rele(headdbuf, FTAG);
295ea8dc4b6Seschrock 	return (err);
296fa9e4066Sahrens }
297fa9e4066Sahrens 
298ab04eb8eStimh static int
299745cd3c5Smaybee dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
300ab04eb8eStimh {
301745cd3c5Smaybee 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
302745cd3c5Smaybee 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
303ab04eb8eStimh 	matchtype_t mt;
304ab04eb8eStimh 	int err;
305ab04eb8eStimh 
306745cd3c5Smaybee 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
307ab04eb8eStimh 		mt = MT_FIRST;
308ab04eb8eStimh 	else
309ab04eb8eStimh 		mt = MT_EXACT;
310ab04eb8eStimh 
311745cd3c5Smaybee 	err = zap_lookup_norm(mos, snapobj, name, 8, 1,
312ab04eb8eStimh 	    value, mt, NULL, 0, NULL);
313ab04eb8eStimh 	if (err == ENOTSUP && mt == MT_FIRST)
314745cd3c5Smaybee 		err = zap_lookup(mos, snapobj, name, 8, 1, value);
315ab04eb8eStimh 	return (err);
316ab04eb8eStimh }
317ab04eb8eStimh 
318ab04eb8eStimh static int
319745cd3c5Smaybee dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
320ab04eb8eStimh {
321745cd3c5Smaybee 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
322745cd3c5Smaybee 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
323ab04eb8eStimh 	matchtype_t mt;
324ab04eb8eStimh 	int err;
325ab04eb8eStimh 
326745cd3c5Smaybee 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
327ab04eb8eStimh 		mt = MT_FIRST;
328ab04eb8eStimh 	else
329ab04eb8eStimh 		mt = MT_EXACT;
330ab04eb8eStimh 
331745cd3c5Smaybee 	err = zap_remove_norm(mos, snapobj, name, mt, tx);
332ab04eb8eStimh 	if (err == ENOTSUP && mt == MT_FIRST)
333745cd3c5Smaybee 		err = zap_remove(mos, snapobj, name, tx);
334ab04eb8eStimh 	return (err);
335ab04eb8eStimh }
336ab04eb8eStimh 
337745cd3c5Smaybee static int
338745cd3c5Smaybee dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
339745cd3c5Smaybee     dsl_dataset_t **dsp)
340fa9e4066Sahrens {
341fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
342fa9e4066Sahrens 	dmu_buf_t *dbuf;
343fa9e4066Sahrens 	dsl_dataset_t *ds;
344ea8dc4b6Seschrock 	int err;
345fa9e4066Sahrens 
346fa9e4066Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
347fa9e4066Sahrens 	    dsl_pool_sync_context(dp));
348fa9e4066Sahrens 
349ea8dc4b6Seschrock 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
350ea8dc4b6Seschrock 	if (err)
351ea8dc4b6Seschrock 		return (err);
352fa9e4066Sahrens 	ds = dmu_buf_get_user(dbuf);
353fa9e4066Sahrens 	if (ds == NULL) {
354fa9e4066Sahrens 		dsl_dataset_t *winner;
355fa9e4066Sahrens 
356fa9e4066Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
357fa9e4066Sahrens 		ds->ds_dbuf = dbuf;
358fa9e4066Sahrens 		ds->ds_object = dsobj;
359fa9e4066Sahrens 		ds->ds_phys = dbuf->db_data;
360fa9e4066Sahrens 
3615ad82045Snd 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
36291ebeef5Sahrens 		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
3635ad82045Snd 		mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
3645ad82045Snd 		    NULL);
365745cd3c5Smaybee 		rw_init(&ds->ds_rwlock, 0, 0, 0);
366745cd3c5Smaybee 		cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
3675ad82045Snd 
368ea8dc4b6Seschrock 		err = bplist_open(&ds->ds_deadlist,
369fa9e4066Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
370ea8dc4b6Seschrock 		if (err == 0) {
371ea8dc4b6Seschrock 			err = dsl_dir_open_obj(dp,
372ea8dc4b6Seschrock 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
373ea8dc4b6Seschrock 		}
374ea8dc4b6Seschrock 		if (err) {
375ea8dc4b6Seschrock 			/*
376ea8dc4b6Seschrock 			 * we don't really need to close the blist if we
377ea8dc4b6Seschrock 			 * just opened it.
378ea8dc4b6Seschrock 			 */
3795ad82045Snd 			mutex_destroy(&ds->ds_lock);
38091ebeef5Sahrens 			mutex_destroy(&ds->ds_opening_lock);
3815ad82045Snd 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
382745cd3c5Smaybee 			rw_destroy(&ds->ds_rwlock);
383745cd3c5Smaybee 			cv_destroy(&ds->ds_exclusive_cv);
384ea8dc4b6Seschrock 			kmem_free(ds, sizeof (dsl_dataset_t));
385ea8dc4b6Seschrock 			dmu_buf_rele(dbuf, tag);
386ea8dc4b6Seschrock 			return (err);
387ea8dc4b6Seschrock 		}
388fa9e4066Sahrens 
38974e7dc98SMatthew Ahrens 		if (!dsl_dataset_is_snapshot(ds)) {
390fa9e4066Sahrens 			ds->ds_snapname[0] = '\0';
391fa9e4066Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
392745cd3c5Smaybee 				err = dsl_dataset_get_ref(dp,
393745cd3c5Smaybee 				    ds->ds_phys->ds_prev_snap_obj,
394745cd3c5Smaybee 				    ds, &ds->ds_prev);
395fa9e4066Sahrens 			}
39674e7dc98SMatthew Ahrens 
39774e7dc98SMatthew Ahrens 			if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) {
39874e7dc98SMatthew Ahrens 				dsl_dataset_t *origin;
39974e7dc98SMatthew Ahrens 
40074e7dc98SMatthew Ahrens 				err = dsl_dataset_hold_obj(dp,
40174e7dc98SMatthew Ahrens 				    ds->ds_dir->dd_phys->dd_origin_obj,
40274e7dc98SMatthew Ahrens 				    FTAG, &origin);
40374e7dc98SMatthew Ahrens 				if (err == 0) {
40474e7dc98SMatthew Ahrens 					ds->ds_origin_txg =
40574e7dc98SMatthew Ahrens 					    origin->ds_phys->ds_creation_txg;
40674e7dc98SMatthew Ahrens 					dsl_dataset_rele(origin, FTAG);
40774e7dc98SMatthew Ahrens 				}
40874e7dc98SMatthew Ahrens 			}
409745cd3c5Smaybee 		} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
410745cd3c5Smaybee 			err = dsl_dataset_get_snapname(ds);
411fa9e4066Sahrens 		}
412fa9e4066Sahrens 
41374e7dc98SMatthew Ahrens 		if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
41427345066Sck 			/*
41527345066Sck 			 * In sync context, we're called with either no lock
41627345066Sck 			 * or with the write lock.  If we're not syncing,
41727345066Sck 			 * we're always called with the read lock held.
41827345066Sck 			 */
419cb625fb5Sck 			boolean_t need_lock =
42027345066Sck 			    !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
42127345066Sck 			    dsl_pool_sync_context(dp);
422cb625fb5Sck 
423cb625fb5Sck 			if (need_lock)
424cb625fb5Sck 				rw_enter(&dp->dp_config_rwlock, RW_READER);
425cb625fb5Sck 
426bb0ade09Sahrens 			err = dsl_prop_get_ds(ds,
427cb625fb5Sck 			    "refreservation", sizeof (uint64_t), 1,
428cb625fb5Sck 			    &ds->ds_reserved, NULL);
429cb625fb5Sck 			if (err == 0) {
430bb0ade09Sahrens 				err = dsl_prop_get_ds(ds,
431cb625fb5Sck 				    "refquota", sizeof (uint64_t), 1,
432cb625fb5Sck 				    &ds->ds_quota, NULL);
433cb625fb5Sck 			}
434cb625fb5Sck 
435cb625fb5Sck 			if (need_lock)
436cb625fb5Sck 				rw_exit(&dp->dp_config_rwlock);
437cb625fb5Sck 		} else {
438cb625fb5Sck 			ds->ds_reserved = ds->ds_quota = 0;
439cb625fb5Sck 		}
440cb625fb5Sck 
441ea8dc4b6Seschrock 		if (err == 0) {
442ea8dc4b6Seschrock 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
443ea8dc4b6Seschrock 			    dsl_dataset_evict);
444ea8dc4b6Seschrock 		}
445ea8dc4b6Seschrock 		if (err || winner) {
446fa9e4066Sahrens 			bplist_close(&ds->ds_deadlist);
447745cd3c5Smaybee 			if (ds->ds_prev)
448745cd3c5Smaybee 				dsl_dataset_drop_ref(ds->ds_prev, ds);
449fa9e4066Sahrens 			dsl_dir_close(ds->ds_dir, ds);
4505ad82045Snd 			mutex_destroy(&ds->ds_lock);
45191ebeef5Sahrens 			mutex_destroy(&ds->ds_opening_lock);
4525ad82045Snd 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
453745cd3c5Smaybee 			rw_destroy(&ds->ds_rwlock);
454745cd3c5Smaybee 			cv_destroy(&ds->ds_exclusive_cv);
455fa9e4066Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
456ea8dc4b6Seschrock 			if (err) {
457ea8dc4b6Seschrock 				dmu_buf_rele(dbuf, tag);
458ea8dc4b6Seschrock 				return (err);
459ea8dc4b6Seschrock 			}
460fa9e4066Sahrens 			ds = winner;
461fa9e4066Sahrens 		} else {
46291ebeef5Sahrens 			ds->ds_fsid_guid =
463fa9e4066Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
464fa9e4066Sahrens 		}
465fa9e4066Sahrens 	}
466fa9e4066Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
467fa9e4066Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
468088f3894Sahrens 	ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 ||
469afc6333aSahrens 	    spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
47084db2a68Sahrens 	    dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
471fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
472745cd3c5Smaybee 	if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
473fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
474745cd3c5Smaybee 		dmu_buf_rele(ds->ds_dbuf, tag);
475745cd3c5Smaybee 		return (ENOENT);
476fa9e4066Sahrens 	}
477fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
478ea8dc4b6Seschrock 	*dsp = ds;
479ea8dc4b6Seschrock 	return (0);
480fa9e4066Sahrens }
481fa9e4066Sahrens 
482745cd3c5Smaybee static int
483745cd3c5Smaybee dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
484745cd3c5Smaybee {
485745cd3c5Smaybee 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
486745cd3c5Smaybee 
487745cd3c5Smaybee 	/*
488745cd3c5Smaybee 	 * In syncing context we don't want the rwlock lock: there
489745cd3c5Smaybee 	 * may be an existing writer waiting for sync phase to
490745cd3c5Smaybee 	 * finish.  We don't need to worry about such writers, since
491745cd3c5Smaybee 	 * sync phase is single-threaded, so the writer can't be
492745cd3c5Smaybee 	 * doing anything while we are active.
493745cd3c5Smaybee 	 */
494745cd3c5Smaybee 	if (dsl_pool_sync_context(dp)) {
495745cd3c5Smaybee 		ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
496745cd3c5Smaybee 		return (0);
497745cd3c5Smaybee 	}
498745cd3c5Smaybee 
499745cd3c5Smaybee 	/*
500745cd3c5Smaybee 	 * Normal users will hold the ds_rwlock as a READER until they
501745cd3c5Smaybee 	 * are finished (i.e., call dsl_dataset_rele()).  "Owners" will
502745cd3c5Smaybee 	 * drop their READER lock after they set the ds_owner field.
503745cd3c5Smaybee 	 *
504745cd3c5Smaybee 	 * If the dataset is being destroyed, the destroy thread will
505745cd3c5Smaybee 	 * obtain a WRITER lock for exclusive access after it's done its
506745cd3c5Smaybee 	 * open-context work and then change the ds_owner to
507745cd3c5Smaybee 	 * dsl_reaper once destruction is assured.  So threads
508745cd3c5Smaybee 	 * may block here temporarily, until the "destructability" of
509745cd3c5Smaybee 	 * the dataset is determined.
510745cd3c5Smaybee 	 */
511745cd3c5Smaybee 	ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
512745cd3c5Smaybee 	mutex_enter(&ds->ds_lock);
513745cd3c5Smaybee 	while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
514745cd3c5Smaybee 		rw_exit(&dp->dp_config_rwlock);
515745cd3c5Smaybee 		cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
516745cd3c5Smaybee 		if (DSL_DATASET_IS_DESTROYED(ds)) {
517745cd3c5Smaybee 			mutex_exit(&ds->ds_lock);
518745cd3c5Smaybee 			dsl_dataset_drop_ref(ds, tag);
519745cd3c5Smaybee 			rw_enter(&dp->dp_config_rwlock, RW_READER);
520745cd3c5Smaybee 			return (ENOENT);
521745cd3c5Smaybee 		}
522745cd3c5Smaybee 		rw_enter(&dp->dp_config_rwlock, RW_READER);
523745cd3c5Smaybee 	}
524745cd3c5Smaybee 	mutex_exit(&ds->ds_lock);
525745cd3c5Smaybee 	return (0);
526745cd3c5Smaybee }
527745cd3c5Smaybee 
528745cd3c5Smaybee int
529745cd3c5Smaybee dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
530745cd3c5Smaybee     dsl_dataset_t **dsp)
531745cd3c5Smaybee {
532745cd3c5Smaybee 	int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
533745cd3c5Smaybee 
534745cd3c5Smaybee 	if (err)
535745cd3c5Smaybee 		return (err);
536745cd3c5Smaybee 	return (dsl_dataset_hold_ref(*dsp, tag));
537745cd3c5Smaybee }
538745cd3c5Smaybee 
539745cd3c5Smaybee int
540745cd3c5Smaybee dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner,
541745cd3c5Smaybee     dsl_dataset_t **dsp)
542745cd3c5Smaybee {
543745cd3c5Smaybee 	int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp);
544745cd3c5Smaybee 
545745cd3c5Smaybee 	ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER);
546745cd3c5Smaybee 
547745cd3c5Smaybee 	if (err)
548745cd3c5Smaybee 		return (err);
549745cd3c5Smaybee 	if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
550745cd3c5Smaybee 		dsl_dataset_rele(*dsp, owner);
551745cd3c5Smaybee 		return (EBUSY);
552745cd3c5Smaybee 	}
553745cd3c5Smaybee 	return (0);
554745cd3c5Smaybee }
555745cd3c5Smaybee 
556fa9e4066Sahrens int
557745cd3c5Smaybee dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
558fa9e4066Sahrens {
559fa9e4066Sahrens 	dsl_dir_t *dd;
560fa9e4066Sahrens 	dsl_pool_t *dp;
561745cd3c5Smaybee 	const char *snapname;
562fa9e4066Sahrens 	uint64_t obj;
563fa9e4066Sahrens 	int err = 0;
564fa9e4066Sahrens 
565745cd3c5Smaybee 	err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
566ea8dc4b6Seschrock 	if (err)
567ea8dc4b6Seschrock 		return (err);
568fa9e4066Sahrens 
569fa9e4066Sahrens 	dp = dd->dd_pool;
570fa9e4066Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
571fa9e4066Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
572745cd3c5Smaybee 	if (obj)
573745cd3c5Smaybee 		err = dsl_dataset_get_ref(dp, obj, tag, dsp);
574745cd3c5Smaybee 	else
575fa9e4066Sahrens 		err = ENOENT;
576745cd3c5Smaybee 	if (err)
577fa9e4066Sahrens 		goto out;
578fa9e4066Sahrens 
579745cd3c5Smaybee 	err = dsl_dataset_hold_ref(*dsp, tag);
580fa9e4066Sahrens 
581745cd3c5Smaybee 	/* we may be looking for a snapshot */
582745cd3c5Smaybee 	if (err == 0 && snapname != NULL) {
583745cd3c5Smaybee 		dsl_dataset_t *ds = NULL;
584fa9e4066Sahrens 
585745cd3c5Smaybee 		if (*snapname++ != '@') {
586745cd3c5Smaybee 			dsl_dataset_rele(*dsp, tag);
587fa9e4066Sahrens 			err = ENOENT;
588fa9e4066Sahrens 			goto out;
589fa9e4066Sahrens 		}
590fa9e4066Sahrens 
591745cd3c5Smaybee 		dprintf("looking for snapshot '%s'\n", snapname);
592745cd3c5Smaybee 		err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
593745cd3c5Smaybee 		if (err == 0)
594745cd3c5Smaybee 			err = dsl_dataset_get_ref(dp, obj, tag, &ds);
595745cd3c5Smaybee 		dsl_dataset_rele(*dsp, tag);
596745cd3c5Smaybee 
597745cd3c5Smaybee 		ASSERT3U((err == 0), ==, (ds != NULL));
598745cd3c5Smaybee 
599745cd3c5Smaybee 		if (ds) {
600745cd3c5Smaybee 			mutex_enter(&ds->ds_lock);
601745cd3c5Smaybee 			if (ds->ds_snapname[0] == 0)
602745cd3c5Smaybee 				(void) strlcpy(ds->ds_snapname, snapname,
603745cd3c5Smaybee 				    sizeof (ds->ds_snapname));
604745cd3c5Smaybee 			mutex_exit(&ds->ds_lock);
605745cd3c5Smaybee 			err = dsl_dataset_hold_ref(ds, tag);
606745cd3c5Smaybee 			*dsp = err ? NULL : ds;
607fa9e4066Sahrens 		}
608fa9e4066Sahrens 	}
609fa9e4066Sahrens out:
610fa9e4066Sahrens 	rw_exit(&dp->dp_config_rwlock);
611fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
612fa9e4066Sahrens 	return (err);
613fa9e4066Sahrens }
614fa9e4066Sahrens 
615fa9e4066Sahrens int
616745cd3c5Smaybee dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp)
617fa9e4066Sahrens {
618745cd3c5Smaybee 	int err = dsl_dataset_hold(name, owner, dsp);
619745cd3c5Smaybee 	if (err)
620745cd3c5Smaybee 		return (err);
621745cd3c5Smaybee 	if ((*dsp)->ds_phys->ds_num_children > 0 &&
622745cd3c5Smaybee 	    !DS_MODE_IS_READONLY(flags)) {
623745cd3c5Smaybee 		dsl_dataset_rele(*dsp, owner);
624745cd3c5Smaybee 		return (EROFS);
625745cd3c5Smaybee 	}
626745cd3c5Smaybee 	if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
627745cd3c5Smaybee 		dsl_dataset_rele(*dsp, owner);
628745cd3c5Smaybee 		return (EBUSY);
629745cd3c5Smaybee 	}
630745cd3c5Smaybee 	return (0);
631fa9e4066Sahrens }
632fa9e4066Sahrens 
633fa9e4066Sahrens void
634fa9e4066Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
635fa9e4066Sahrens {
636fa9e4066Sahrens 	if (ds == NULL) {
637fa9e4066Sahrens 		(void) strcpy(name, "mos");
638fa9e4066Sahrens 	} else {
639fa9e4066Sahrens 		dsl_dir_name(ds->ds_dir, name);
640ea8dc4b6Seschrock 		VERIFY(0 == dsl_dataset_get_snapname(ds));
641fa9e4066Sahrens 		if (ds->ds_snapname[0]) {
642fa9e4066Sahrens 			(void) strcat(name, "@");
643745cd3c5Smaybee 			/*
644745cd3c5Smaybee 			 * We use a "recursive" mutex so that we
645745cd3c5Smaybee 			 * can call dprintf_ds() with ds_lock held.
646745cd3c5Smaybee 			 */
647fa9e4066Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
648fa9e4066Sahrens 				mutex_enter(&ds->ds_lock);
649fa9e4066Sahrens 				(void) strcat(name, ds->ds_snapname);
650fa9e4066Sahrens 				mutex_exit(&ds->ds_lock);
651fa9e4066Sahrens 			} else {
652fa9e4066Sahrens 				(void) strcat(name, ds->ds_snapname);
653fa9e4066Sahrens 			}
654fa9e4066Sahrens 		}
655fa9e4066Sahrens 	}
656fa9e4066Sahrens }
657fa9e4066Sahrens 
658b7661cccSmmusante static int
659b7661cccSmmusante dsl_dataset_namelen(dsl_dataset_t *ds)
660b7661cccSmmusante {
661b7661cccSmmusante 	int result;
662b7661cccSmmusante 
663b7661cccSmmusante 	if (ds == NULL) {
664b7661cccSmmusante 		result = 3;	/* "mos" */
665b7661cccSmmusante 	} else {
666b7661cccSmmusante 		result = dsl_dir_namelen(ds->ds_dir);
667b7661cccSmmusante 		VERIFY(0 == dsl_dataset_get_snapname(ds));
668b7661cccSmmusante 		if (ds->ds_snapname[0]) {
669b7661cccSmmusante 			++result;	/* adding one for the @-sign */
670b7661cccSmmusante 			if (!MUTEX_HELD(&ds->ds_lock)) {
671b7661cccSmmusante 				mutex_enter(&ds->ds_lock);
672b7661cccSmmusante 				result += strlen(ds->ds_snapname);
673b7661cccSmmusante 				mutex_exit(&ds->ds_lock);
674b7661cccSmmusante 			} else {
675b7661cccSmmusante 				result += strlen(ds->ds_snapname);
676b7661cccSmmusante 			}
677b7661cccSmmusante 		}
678b7661cccSmmusante 	}
679b7661cccSmmusante 
680b7661cccSmmusante 	return (result);
681b7661cccSmmusante }
682b7661cccSmmusante 
683088f3894Sahrens void
684745cd3c5Smaybee dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
685fa9e4066Sahrens {
686ea8dc4b6Seschrock 	dmu_buf_rele(ds->ds_dbuf, tag);
687fa9e4066Sahrens }
688fa9e4066Sahrens 
6893cb34c60Sahrens void
690745cd3c5Smaybee dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
6913cb34c60Sahrens {
692745cd3c5Smaybee 	if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
693745cd3c5Smaybee 		rw_exit(&ds->ds_rwlock);
694745cd3c5Smaybee 	}
695745cd3c5Smaybee 	dsl_dataset_drop_ref(ds, tag);
696745cd3c5Smaybee }
697745cd3c5Smaybee 
698745cd3c5Smaybee void
699745cd3c5Smaybee dsl_dataset_disown(dsl_dataset_t *ds, void *owner)
700745cd3c5Smaybee {
701745cd3c5Smaybee 	ASSERT((ds->ds_owner == owner && ds->ds_dbuf) ||
702745cd3c5Smaybee 	    (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
703745cd3c5Smaybee 
7043cb34c60Sahrens 	mutex_enter(&ds->ds_lock);
705745cd3c5Smaybee 	ds->ds_owner = NULL;
706745cd3c5Smaybee 	if (RW_WRITE_HELD(&ds->ds_rwlock)) {
707745cd3c5Smaybee 		rw_exit(&ds->ds_rwlock);
708745cd3c5Smaybee 		cv_broadcast(&ds->ds_exclusive_cv);
709745cd3c5Smaybee 	}
7103cb34c60Sahrens 	mutex_exit(&ds->ds_lock);
711745cd3c5Smaybee 	if (ds->ds_dbuf)
712745cd3c5Smaybee 		dsl_dataset_drop_ref(ds, owner);
713745cd3c5Smaybee 	else
714745cd3c5Smaybee 		dsl_dataset_evict(ds->ds_dbuf, ds);
7153cb34c60Sahrens }
7163cb34c60Sahrens 
7173cb34c60Sahrens boolean_t
718745cd3c5Smaybee dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner)
7193cb34c60Sahrens {
720745cd3c5Smaybee 	boolean_t gotit = FALSE;
721745cd3c5Smaybee 
7223cb34c60Sahrens 	mutex_enter(&ds->ds_lock);
723745cd3c5Smaybee 	if (ds->ds_owner == NULL &&
724745cd3c5Smaybee 	    (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
725745cd3c5Smaybee 		ds->ds_owner = owner;
726745cd3c5Smaybee 		if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
727745cd3c5Smaybee 			rw_exit(&ds->ds_rwlock);
728745cd3c5Smaybee 		gotit = TRUE;
7293cb34c60Sahrens 	}
7303cb34c60Sahrens 	mutex_exit(&ds->ds_lock);
731745cd3c5Smaybee 	return (gotit);
732745cd3c5Smaybee }
733745cd3c5Smaybee 
734745cd3c5Smaybee void
735745cd3c5Smaybee dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
736745cd3c5Smaybee {
737745cd3c5Smaybee 	ASSERT3P(owner, ==, ds->ds_owner);
738745cd3c5Smaybee 	if (!RW_WRITE_HELD(&ds->ds_rwlock))
739745cd3c5Smaybee 		rw_enter(&ds->ds_rwlock, RW_WRITER);
7403cb34c60Sahrens }
7413cb34c60Sahrens 
7421d452cf5Sahrens uint64_t
743088f3894Sahrens dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
744ab04eb8eStimh     uint64_t flags, dmu_tx_t *tx)
745fa9e4066Sahrens {
7463cb34c60Sahrens 	dsl_pool_t *dp = dd->dd_pool;
747fa9e4066Sahrens 	dmu_buf_t *dbuf;
748fa9e4066Sahrens 	dsl_dataset_phys_t *dsphys;
7493cb34c60Sahrens 	uint64_t dsobj;
750fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
751fa9e4066Sahrens 
752088f3894Sahrens 	if (origin == NULL)
753088f3894Sahrens 		origin = dp->dp_origin_snap;
754088f3894Sahrens 
7553cb34c60Sahrens 	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
7563cb34c60Sahrens 	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
757fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
7583cb34c60Sahrens 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
759fa9e4066Sahrens 
7601649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
7611649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
762ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
763fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
764fa9e4066Sahrens 	dsphys = dbuf->db_data;
765745cd3c5Smaybee 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
766fa9e4066Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
767ab04eb8eStimh 	dsphys->ds_flags = flags;
768fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
769fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
770fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
771fa9e4066Sahrens 	dsphys->ds_snapnames_zapobj =
772ab04eb8eStimh 	    zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
773ab04eb8eStimh 	    DMU_OT_NONE, 0, tx);
774fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
775088f3894Sahrens 	dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
776fa9e4066Sahrens 	dsphys->ds_deadlist_obj =
777fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
778a9799022Sck 
7793cb34c60Sahrens 	if (origin) {
7803cb34c60Sahrens 		dsphys->ds_prev_snap_obj = origin->ds_object;
781fa9e4066Sahrens 		dsphys->ds_prev_snap_txg =
7823cb34c60Sahrens 		    origin->ds_phys->ds_creation_txg;
783fa9e4066Sahrens 		dsphys->ds_used_bytes =
7843cb34c60Sahrens 		    origin->ds_phys->ds_used_bytes;
785fa9e4066Sahrens 		dsphys->ds_compressed_bytes =
7863cb34c60Sahrens 		    origin->ds_phys->ds_compressed_bytes;
787fa9e4066Sahrens 		dsphys->ds_uncompressed_bytes =
7883cb34c60Sahrens 		    origin->ds_phys->ds_uncompressed_bytes;
7893cb34c60Sahrens 		dsphys->ds_bp = origin->ds_phys->ds_bp;
790579ae4d5Stimh 		dsphys->ds_flags |= origin->ds_phys->ds_flags;
791fa9e4066Sahrens 
7923cb34c60Sahrens 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
7933cb34c60Sahrens 		origin->ds_phys->ds_num_children++;
794fa9e4066Sahrens 
795088f3894Sahrens 		if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
796088f3894Sahrens 			if (origin->ds_phys->ds_next_clones_obj == 0) {
797088f3894Sahrens 				origin->ds_phys->ds_next_clones_obj =
798088f3894Sahrens 				    zap_create(mos,
799088f3894Sahrens 				    DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
800088f3894Sahrens 			}
801088f3894Sahrens 			VERIFY(0 == zap_add_int(mos,
802088f3894Sahrens 			    origin->ds_phys->ds_next_clones_obj,
803088f3894Sahrens 			    dsobj, tx));
804088f3894Sahrens 		}
805088f3894Sahrens 
806fa9e4066Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
8073cb34c60Sahrens 		dd->dd_phys->dd_origin_obj = origin->ds_object;
808fa9e4066Sahrens 	}
809ab04eb8eStimh 
810ab04eb8eStimh 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
811ab04eb8eStimh 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
812ab04eb8eStimh 
813ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
814fa9e4066Sahrens 
815fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
816fa9e4066Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
8173cb34c60Sahrens 
8183cb34c60Sahrens 	return (dsobj);
8193cb34c60Sahrens }
8203cb34c60Sahrens 
8213cb34c60Sahrens uint64_t
822ab04eb8eStimh dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
823ab04eb8eStimh     dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
8243cb34c60Sahrens {
8253cb34c60Sahrens 	dsl_pool_t *dp = pdd->dd_pool;
8263cb34c60Sahrens 	uint64_t dsobj, ddobj;
8273cb34c60Sahrens 	dsl_dir_t *dd;
8283cb34c60Sahrens 
8293cb34c60Sahrens 	ASSERT(lastname[0] != '@');
8303cb34c60Sahrens 
831088f3894Sahrens 	ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
8323cb34c60Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
8333cb34c60Sahrens 
834088f3894Sahrens 	dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx);
8353cb34c60Sahrens 
8363cb34c60Sahrens 	dsl_deleg_set_create_perms(dd, tx, cr);
8373cb34c60Sahrens 
838fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
839fa9e4066Sahrens 
8401d452cf5Sahrens 	return (dsobj);
841fa9e4066Sahrens }
842fa9e4066Sahrens 
8431d452cf5Sahrens struct destroyarg {
8441d452cf5Sahrens 	dsl_sync_task_group_t *dstg;
8451d452cf5Sahrens 	char *snapname;
8461d452cf5Sahrens 	char *failed;
8471d452cf5Sahrens };
8481d452cf5Sahrens 
8491d452cf5Sahrens static int
8501d452cf5Sahrens dsl_snapshot_destroy_one(char *name, void *arg)
851fa9e4066Sahrens {
8521d452cf5Sahrens 	struct destroyarg *da = arg;
8531d452cf5Sahrens 	dsl_dataset_t *ds;
8541d452cf5Sahrens 	char *cp;
855fa9e4066Sahrens 	int err;
856fa9e4066Sahrens 
8571d452cf5Sahrens 	(void) strcat(name, "@");
8581d452cf5Sahrens 	(void) strcat(name, da->snapname);
859745cd3c5Smaybee 	err = dsl_dataset_own(name, DS_MODE_READONLY | DS_MODE_INCONSISTENT,
860cdf5b4caSmmusante 	    da->dstg, &ds);
8611d452cf5Sahrens 	cp = strchr(name, '@');
8621d452cf5Sahrens 	*cp = '\0';
863745cd3c5Smaybee 	if (err == 0) {
864745cd3c5Smaybee 		dsl_dataset_make_exclusive(ds, da->dstg);
8653baa08fcSek 		if (ds->ds_user_ptr) {
8663baa08fcSek 			ds->ds_user_evict_func(ds, ds->ds_user_ptr);
8673baa08fcSek 			ds->ds_user_ptr = NULL;
8683baa08fcSek 		}
869745cd3c5Smaybee 		dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
870745cd3c5Smaybee 		    dsl_dataset_destroy_sync, ds, da->dstg, 0);
871745cd3c5Smaybee 	} else if (err == ENOENT) {
872745cd3c5Smaybee 		err = 0;
873745cd3c5Smaybee 	} else {
8741d452cf5Sahrens 		(void) strcpy(da->failed, name);
8751d452cf5Sahrens 	}
876745cd3c5Smaybee 	return (err);
8771d452cf5Sahrens }
87831fd60d3Sahrens 
8791d452cf5Sahrens /*
8801d452cf5Sahrens  * Destroy 'snapname' in all descendants of 'fsname'.
8811d452cf5Sahrens  */
8821d452cf5Sahrens #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
8831d452cf5Sahrens int
8841d452cf5Sahrens dsl_snapshots_destroy(char *fsname, char *snapname)
8851d452cf5Sahrens {
8861d452cf5Sahrens 	int err;
8871d452cf5Sahrens 	struct destroyarg da;
8881d452cf5Sahrens 	dsl_sync_task_t *dst;
8891d452cf5Sahrens 	spa_t *spa;
8901d452cf5Sahrens 
89140feaa91Sahrens 	err = spa_open(fsname, &spa, FTAG);
8921d452cf5Sahrens 	if (err)
8931d452cf5Sahrens 		return (err);
8941d452cf5Sahrens 	da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
8951d452cf5Sahrens 	da.snapname = snapname;
8961d452cf5Sahrens 	da.failed = fsname;
8971d452cf5Sahrens 
8981d452cf5Sahrens 	err = dmu_objset_find(fsname,
8990b69c2f0Sahrens 	    dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
9001d452cf5Sahrens 
9011d452cf5Sahrens 	if (err == 0)
9021d452cf5Sahrens 		err = dsl_sync_task_group_wait(da.dstg);
9031d452cf5Sahrens 
9041d452cf5Sahrens 	for (dst = list_head(&da.dstg->dstg_tasks); dst;
9051d452cf5Sahrens 	    dst = list_next(&da.dstg->dstg_tasks, dst)) {
9061d452cf5Sahrens 		dsl_dataset_t *ds = dst->dst_arg1;
907745cd3c5Smaybee 		/*
908745cd3c5Smaybee 		 * Return the file system name that triggered the error
909745cd3c5Smaybee 		 */
9101d452cf5Sahrens 		if (dst->dst_err) {
9111d452cf5Sahrens 			dsl_dataset_name(ds, fsname);
91240feaa91Sahrens 			*strchr(fsname, '@') = '\0';
913e1930233Sbonwick 		}
914745cd3c5Smaybee 		dsl_dataset_disown(ds, da.dstg);
915fa9e4066Sahrens 	}
916fa9e4066Sahrens 
9171d452cf5Sahrens 	dsl_sync_task_group_destroy(da.dstg);
9181d452cf5Sahrens 	spa_close(spa, FTAG);
919fa9e4066Sahrens 	return (err);
920fa9e4066Sahrens }
921fa9e4066Sahrens 
9223cb34c60Sahrens /*
923745cd3c5Smaybee  * ds must be opened as OWNER.  On return (whether successful or not),
924745cd3c5Smaybee  * ds will be closed and caller can no longer dereference it.
9253cb34c60Sahrens  */
926fa9e4066Sahrens int
9273cb34c60Sahrens dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
928fa9e4066Sahrens {
929fa9e4066Sahrens 	int err;
9301d452cf5Sahrens 	dsl_sync_task_group_t *dstg;
9311d452cf5Sahrens 	objset_t *os;
932fa9e4066Sahrens 	dsl_dir_t *dd;
9331d452cf5Sahrens 	uint64_t obj;
9341d452cf5Sahrens 
9353cb34c60Sahrens 	if (dsl_dataset_is_snapshot(ds)) {
9361d452cf5Sahrens 		/* Destroying a snapshot is simpler */
937745cd3c5Smaybee 		dsl_dataset_make_exclusive(ds, tag);
9383baa08fcSek 
9393baa08fcSek 		if (ds->ds_user_ptr) {
9403baa08fcSek 			ds->ds_user_evict_func(ds, ds->ds_user_ptr);
9413baa08fcSek 			ds->ds_user_ptr = NULL;
9423baa08fcSek 		}
9431d452cf5Sahrens 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
9441d452cf5Sahrens 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
9453cb34c60Sahrens 		    ds, tag, 0);
9463cb34c60Sahrens 		goto out;
9471d452cf5Sahrens 	}
948fa9e4066Sahrens 
9491d452cf5Sahrens 	dd = ds->ds_dir;
950fa9e4066Sahrens 
9511d452cf5Sahrens 	/*
9521d452cf5Sahrens 	 * Check for errors and mark this ds as inconsistent, in
9531d452cf5Sahrens 	 * case we crash while freeing the objects.
9541d452cf5Sahrens 	 */
9551d452cf5Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
9561d452cf5Sahrens 	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
9573cb34c60Sahrens 	if (err)
9583cb34c60Sahrens 		goto out;
9593cb34c60Sahrens 
9603cb34c60Sahrens 	err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
9613cb34c60Sahrens 	if (err)
9623cb34c60Sahrens 		goto out;
963fa9e4066Sahrens 
9641d452cf5Sahrens 	/*
9651d452cf5Sahrens 	 * remove the objects in open context, so that we won't
9661d452cf5Sahrens 	 * have too much to do in syncing context.
9671d452cf5Sahrens 	 */
9686754306eSahrens 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
9696754306eSahrens 	    ds->ds_phys->ds_prev_snap_txg)) {
970cdb0ab79Smaybee 		/*
971cdb0ab79Smaybee 		 * Ignore errors, if there is not enough disk space
972cdb0ab79Smaybee 		 * we will deal with it in dsl_dataset_destroy_sync().
973cdb0ab79Smaybee 		 */
974cdb0ab79Smaybee 		(void) dmu_free_object(os, obj);
9751d452cf5Sahrens 	}
9761d452cf5Sahrens 
9771d452cf5Sahrens 	dmu_objset_close(os);
9781d452cf5Sahrens 	if (err != ESRCH)
9793cb34c60Sahrens 		goto out;
9801d452cf5Sahrens 
98168038c2cSmaybee 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
98268038c2cSmaybee 	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
98368038c2cSmaybee 	rw_exit(&dd->dd_pool->dp_config_rwlock);
98468038c2cSmaybee 
98568038c2cSmaybee 	if (err)
98668038c2cSmaybee 		goto out;
98768038c2cSmaybee 
9883cb34c60Sahrens 	if (ds->ds_user_ptr) {
989745cd3c5Smaybee 		/*
990745cd3c5Smaybee 		 * We need to sync out all in-flight IO before we try
991745cd3c5Smaybee 		 * to evict (the dataset evict func is trying to clear
992745cd3c5Smaybee 		 * the cached entries for this dataset in the ARC).
993745cd3c5Smaybee 		 */
994745cd3c5Smaybee 		txg_wait_synced(dd->dd_pool, 0);
9951d452cf5Sahrens 	}
9961d452cf5Sahrens 
9971d452cf5Sahrens 	/*
9981d452cf5Sahrens 	 * Blow away the dsl_dir + head dataset.
9991d452cf5Sahrens 	 */
1000745cd3c5Smaybee 	dsl_dataset_make_exclusive(ds, tag);
100168038c2cSmaybee 	if (ds->ds_user_ptr) {
100268038c2cSmaybee 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
100368038c2cSmaybee 		ds->ds_user_ptr = NULL;
100468038c2cSmaybee 	}
10051d452cf5Sahrens 	dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
10061d452cf5Sahrens 	dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
10073cb34c60Sahrens 	    dsl_dataset_destroy_sync, ds, tag, 0);
10081d452cf5Sahrens 	dsl_sync_task_create(dstg, dsl_dir_destroy_check,
10091d452cf5Sahrens 	    dsl_dir_destroy_sync, dd, FTAG, 0);
10101d452cf5Sahrens 	err = dsl_sync_task_group_wait(dstg);
10111d452cf5Sahrens 	dsl_sync_task_group_destroy(dstg);
1012745cd3c5Smaybee 	/* if it is successful, dsl_dir_destroy_sync will close the dd */
10133cb34c60Sahrens 	if (err)
10141d452cf5Sahrens 		dsl_dir_close(dd, FTAG);
10153cb34c60Sahrens out:
1016745cd3c5Smaybee 	dsl_dataset_disown(ds, tag);
1017fa9e4066Sahrens 	return (err);
1018fa9e4066Sahrens }
1019fa9e4066Sahrens 
10201d452cf5Sahrens int
10213cb34c60Sahrens dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost)
10221d452cf5Sahrens {
10231c8564a7SMark Maybee 	int err;
10241c8564a7SMark Maybee 
1025745cd3c5Smaybee 	ASSERT(ds->ds_owner);
10263cb34c60Sahrens 
10271c8564a7SMark Maybee 	dsl_dataset_make_exclusive(ds, ds->ds_owner);
10281c8564a7SMark Maybee 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
10291d452cf5Sahrens 	    dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
10301c8564a7SMark Maybee 	    ds, &ost, 0);
10311c8564a7SMark Maybee 	/* drop exclusive access */
10321c8564a7SMark Maybee 	mutex_enter(&ds->ds_lock);
10331c8564a7SMark Maybee 	rw_exit(&ds->ds_rwlock);
10341c8564a7SMark Maybee 	cv_broadcast(&ds->ds_exclusive_cv);
10351c8564a7SMark Maybee 	mutex_exit(&ds->ds_lock);
10361c8564a7SMark Maybee 	return (err);
10371d452cf5Sahrens }
10381d452cf5Sahrens 
1039fa9e4066Sahrens void *
1040fa9e4066Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
1041fa9e4066Sahrens     void *p, dsl_dataset_evict_func_t func)
1042fa9e4066Sahrens {
1043fa9e4066Sahrens 	void *old;
1044fa9e4066Sahrens 
1045fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
1046fa9e4066Sahrens 	old = ds->ds_user_ptr;
1047fa9e4066Sahrens 	if (old == NULL) {
1048fa9e4066Sahrens 		ds->ds_user_ptr = p;
1049fa9e4066Sahrens 		ds->ds_user_evict_func = func;
1050fa9e4066Sahrens 	}
1051fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
1052fa9e4066Sahrens 	return (old);
1053fa9e4066Sahrens }
1054fa9e4066Sahrens 
1055fa9e4066Sahrens void *
1056fa9e4066Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
1057fa9e4066Sahrens {
1058fa9e4066Sahrens 	return (ds->ds_user_ptr);
1059fa9e4066Sahrens }
1060fa9e4066Sahrens 
1061fa9e4066Sahrens 
1062c717a561Smaybee blkptr_t *
1063c717a561Smaybee dsl_dataset_get_blkptr(dsl_dataset_t *ds)
1064fa9e4066Sahrens {
1065c717a561Smaybee 	return (&ds->ds_phys->ds_bp);
1066fa9e4066Sahrens }
1067fa9e4066Sahrens 
1068fa9e4066Sahrens void
1069fa9e4066Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
1070fa9e4066Sahrens {
1071fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1072fa9e4066Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
1073fa9e4066Sahrens 	if (ds == NULL) {
1074fa9e4066Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
1075fa9e4066Sahrens 	} else {
1076fa9e4066Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
1077fa9e4066Sahrens 		ds->ds_phys->ds_bp = *bp;
1078fa9e4066Sahrens 	}
1079fa9e4066Sahrens }
1080fa9e4066Sahrens 
1081fa9e4066Sahrens spa_t *
1082fa9e4066Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
1083fa9e4066Sahrens {
1084fa9e4066Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
1085fa9e4066Sahrens }
1086fa9e4066Sahrens 
1087fa9e4066Sahrens void
1088fa9e4066Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
1089fa9e4066Sahrens {
1090fa9e4066Sahrens 	dsl_pool_t *dp;
1091fa9e4066Sahrens 
1092fa9e4066Sahrens 	if (ds == NULL) /* this is the meta-objset */
1093fa9e4066Sahrens 		return;
1094fa9e4066Sahrens 
1095fa9e4066Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1096a2eea2e1Sahrens 
1097a2eea2e1Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
1098a2eea2e1Sahrens 		panic("dirtying snapshot!");
1099fa9e4066Sahrens 
1100fa9e4066Sahrens 	dp = ds->ds_dir->dd_pool;
1101fa9e4066Sahrens 
1102fa9e4066Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
1103fa9e4066Sahrens 		/* up the hold count until we can be written out */
1104fa9e4066Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
1105fa9e4066Sahrens 	}
1106fa9e4066Sahrens }
1107fa9e4066Sahrens 
1108a9799022Sck /*
1109a9799022Sck  * The unique space in the head dataset can be calculated by subtracting
1110a9799022Sck  * the space used in the most recent snapshot, that is still being used
1111a9799022Sck  * in this file system, from the space currently in use.  To figure out
1112a9799022Sck  * the space in the most recent snapshot still in use, we need to take
1113a9799022Sck  * the total space used in the snapshot and subtract out the space that
1114a9799022Sck  * has been freed up since the snapshot was taken.
1115a9799022Sck  */
1116a9799022Sck static void
1117a9799022Sck dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
1118a9799022Sck {
1119a9799022Sck 	uint64_t mrs_used;
1120a9799022Sck 	uint64_t dlused, dlcomp, dluncomp;
1121a9799022Sck 
1122a9799022Sck 	ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj);
1123a9799022Sck 
1124a9799022Sck 	if (ds->ds_phys->ds_prev_snap_obj != 0)
1125a9799022Sck 		mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
1126a9799022Sck 	else
1127a9799022Sck 		mrs_used = 0;
1128a9799022Sck 
1129a9799022Sck 	VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp,
1130a9799022Sck 	    &dluncomp));
1131a9799022Sck 
1132a9799022Sck 	ASSERT3U(dlused, <=, mrs_used);
1133a9799022Sck 	ds->ds_phys->ds_unique_bytes =
1134a9799022Sck 	    ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
1135a9799022Sck 
1136a9799022Sck 	if (!DS_UNIQUE_IS_ACCURATE(ds) &&
1137a9799022Sck 	    spa_version(ds->ds_dir->dd_pool->dp_spa) >=
1138a9799022Sck 	    SPA_VERSION_UNIQUE_ACCURATE)
1139a9799022Sck 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1140a9799022Sck }
1141a9799022Sck 
1142a9799022Sck static uint64_t
1143a9799022Sck dsl_dataset_unique(dsl_dataset_t *ds)
1144a9799022Sck {
1145a9799022Sck 	if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds))
1146a9799022Sck 		dsl_dataset_recalc_head_uniq(ds);
1147a9799022Sck 
1148a9799022Sck 	return (ds->ds_phys->ds_unique_bytes);
1149a9799022Sck }
1150a9799022Sck 
1151fa9e4066Sahrens struct killarg {
115274e7dc98SMatthew Ahrens 	dsl_dataset_t *ds;
1153fa9e4066Sahrens 	zio_t *zio;
1154fa9e4066Sahrens 	dmu_tx_t *tx;
1155fa9e4066Sahrens };
1156fa9e4066Sahrens 
115774e7dc98SMatthew Ahrens /* ARGSUSED */
1158fa9e4066Sahrens static int
115988b7b0f2SMatthew Ahrens kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
116088b7b0f2SMatthew Ahrens     const dnode_phys_t *dnp, void *arg)
1161fa9e4066Sahrens {
1162fa9e4066Sahrens 	struct killarg *ka = arg;
1163fa9e4066Sahrens 
116488b7b0f2SMatthew Ahrens 	if (bp == NULL)
116588b7b0f2SMatthew Ahrens 		return (0);
1166fa9e4066Sahrens 
116774e7dc98SMatthew Ahrens 	ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
116874e7dc98SMatthew Ahrens 	(void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx);
116974e7dc98SMatthew Ahrens 
1170fa9e4066Sahrens 	return (0);
1171fa9e4066Sahrens }
1172fa9e4066Sahrens 
1173fa9e4066Sahrens /* ARGSUSED */
11741d452cf5Sahrens static int
11751d452cf5Sahrens dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
1176fa9e4066Sahrens {
11771d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
11783cb34c60Sahrens 	dmu_objset_type_t *ost = arg2;
1179fa9e4066Sahrens 
11801d452cf5Sahrens 	/*
11813cb34c60Sahrens 	 * We can only roll back to emptyness if it is a ZPL objset.
11821d452cf5Sahrens 	 */
11833cb34c60Sahrens 	if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0)
1184fa9e4066Sahrens 		return (EINVAL);
1185fa9e4066Sahrens 
11861d452cf5Sahrens 	/*
11871d452cf5Sahrens 	 * This must not be a snapshot.
11881d452cf5Sahrens 	 */
11891d452cf5Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
1190fa9e4066Sahrens 		return (EINVAL);
1191fa9e4066Sahrens 
1192fa9e4066Sahrens 	/*
119388b7b0f2SMatthew Ahrens 	 * If we made changes this txg, traverse_dataset won't find
1194fa9e4066Sahrens 	 * them.  Try again.
1195fa9e4066Sahrens 	 */
11961d452cf5Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1197fa9e4066Sahrens 		return (EAGAIN);
1198fa9e4066Sahrens 
11991d452cf5Sahrens 	return (0);
12001d452cf5Sahrens }
12011d452cf5Sahrens 
12021d452cf5Sahrens /* ARGSUSED */
12031d452cf5Sahrens static void
1204ecd6cf80Smarks dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
12051d452cf5Sahrens {
12061d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
12073cb34c60Sahrens 	dmu_objset_type_t *ost = arg2;
12081d452cf5Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1209fa9e4066Sahrens 
1210fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1211fa9e4066Sahrens 
121286ccc033Sperrin 	/*
121386ccc033Sperrin 	 * Before the roll back destroy the zil.
121486ccc033Sperrin 	 */
121586ccc033Sperrin 	if (ds->ds_user_ptr != NULL) {
121686ccc033Sperrin 		zil_rollback_destroy(
121786ccc033Sperrin 		    ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx);
12183cb34c60Sahrens 
12193cb34c60Sahrens 		/*
12203cb34c60Sahrens 		 * We need to make sure that the objset_impl_t is reopened after
12213cb34c60Sahrens 		 * we do the rollback, otherwise it will have the wrong
12223cb34c60Sahrens 		 * objset_phys_t.  Normally this would happen when this
1223745cd3c5Smaybee 		 * dataset-open is closed, thus causing the
12243cb34c60Sahrens 		 * dataset to be immediately evicted.  But when doing "zfs recv
12253cb34c60Sahrens 		 * -F", we reopen the objset before that, so that there is no
12263cb34c60Sahrens 		 * window where the dataset is closed and inconsistent.
12273cb34c60Sahrens 		 */
12283cb34c60Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
12293cb34c60Sahrens 		ds->ds_user_ptr = NULL;
123086ccc033Sperrin 	}
12313a8a1de4Sperrin 
123274e7dc98SMatthew Ahrens 	/* Transfer space that was freed since last snap back to the head. */
123374e7dc98SMatthew Ahrens 	{
123474e7dc98SMatthew Ahrens 		uint64_t used;
123574e7dc98SMatthew Ahrens 
123674e7dc98SMatthew Ahrens 		VERIFY(0 == bplist_space_birthrange(&ds->ds_deadlist,
123774e7dc98SMatthew Ahrens 		    ds->ds_origin_txg, UINT64_MAX, &used));
123874e7dc98SMatthew Ahrens 		dsl_dir_transfer_space(ds->ds_dir, used,
123974e7dc98SMatthew Ahrens 		    DD_USED_SNAP, DD_USED_HEAD, tx);
124074e7dc98SMatthew Ahrens 	}
124174e7dc98SMatthew Ahrens 
1242fa9e4066Sahrens 	/* Zero out the deadlist. */
1243fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
1244fa9e4066Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1245fa9e4066Sahrens 	ds->ds_phys->ds_deadlist_obj =
1246fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1247ea8dc4b6Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
1248ea8dc4b6Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1249fa9e4066Sahrens 
1250fa9e4066Sahrens 	{
1251fa9e4066Sahrens 		/* Free blkptrs that we gave birth to */
1252fa9e4066Sahrens 		zio_t *zio;
1253fa9e4066Sahrens 		struct killarg ka;
1254fa9e4066Sahrens 
1255fa9e4066Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
1256fa9e4066Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
125774e7dc98SMatthew Ahrens 		ka.ds = ds;
1258fa9e4066Sahrens 		ka.zio = zio;
1259fa9e4066Sahrens 		ka.tx = tx;
126088b7b0f2SMatthew Ahrens 		(void) traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
126188b7b0f2SMatthew Ahrens 		    TRAVERSE_POST, kill_blkptr, &ka);
1262fa9e4066Sahrens 		(void) zio_wait(zio);
1263fa9e4066Sahrens 	}
1264fa9e4066Sahrens 
126574e7dc98SMatthew Ahrens 	ASSERT(!(ds->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) ||
126674e7dc98SMatthew Ahrens 	    ds->ds_phys->ds_unique_bytes == 0);
126774e7dc98SMatthew Ahrens 
1268088f3894Sahrens 	if (ds->ds_prev && ds->ds_prev != ds->ds_dir->dd_pool->dp_origin_snap) {
12693cb34c60Sahrens 		/* Change our contents to that of the prev snapshot */
127074e7dc98SMatthew Ahrens 
12713cb34c60Sahrens 		ASSERT3U(ds->ds_prev->ds_object, ==,
12723cb34c60Sahrens 		    ds->ds_phys->ds_prev_snap_obj);
127374e7dc98SMatthew Ahrens 		ASSERT3U(ds->ds_phys->ds_used_bytes, <=,
127474e7dc98SMatthew Ahrens 		    ds->ds_prev->ds_phys->ds_used_bytes);
127574e7dc98SMatthew Ahrens 
12763cb34c60Sahrens 		ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
12773cb34c60Sahrens 		ds->ds_phys->ds_used_bytes =
12783cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_used_bytes;
12793cb34c60Sahrens 		ds->ds_phys->ds_compressed_bytes =
12803cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_compressed_bytes;
12813cb34c60Sahrens 		ds->ds_phys->ds_uncompressed_bytes =
12823cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
12833cb34c60Sahrens 		ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
1284fa9e4066Sahrens 
12853cb34c60Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
12863cb34c60Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
12873cb34c60Sahrens 			ds->ds_prev->ds_phys->ds_unique_bytes = 0;
12883cb34c60Sahrens 		}
12893cb34c60Sahrens 	} else {
1290088f3894Sahrens 		objset_impl_t *osi;
1291088f3894Sahrens 
129274e7dc98SMatthew Ahrens 		ASSERT3U(ds->ds_phys->ds_used_bytes, ==, 0);
129374e7dc98SMatthew Ahrens 		ASSERT3U(ds->ds_phys->ds_compressed_bytes, ==, 0);
129474e7dc98SMatthew Ahrens 		ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, ==, 0);
129574e7dc98SMatthew Ahrens 
12963cb34c60Sahrens 		bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t));
12973cb34c60Sahrens 		ds->ds_phys->ds_flags = 0;
12983cb34c60Sahrens 		ds->ds_phys->ds_unique_bytes = 0;
129974e7dc98SMatthew Ahrens 		if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
130074e7dc98SMatthew Ahrens 		    SPA_VERSION_UNIQUE_ACCURATE)
130174e7dc98SMatthew Ahrens 			ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
130274e7dc98SMatthew Ahrens 
1303088f3894Sahrens 		osi = dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds,
13043cb34c60Sahrens 		    &ds->ds_phys->ds_bp, *ost, tx);
1305088f3894Sahrens #ifdef _KERNEL
1306088f3894Sahrens 		zfs_create_fs(&osi->os, kcred, NULL, tx);
1307088f3894Sahrens #endif
130885edac42Sahrens 	}
1309ecd6cf80Smarks 
1310ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa,
1311ecd6cf80Smarks 	    tx, cr, "dataset = %llu", ds->ds_object);
1312fa9e4066Sahrens }
1313fa9e4066Sahrens 
1314e1930233Sbonwick /* ARGSUSED */
1315e1930233Sbonwick static int
13161d452cf5Sahrens dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
1317e1930233Sbonwick {
13181d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
13193cb34c60Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
13203cb34c60Sahrens 	uint64_t count;
13213cb34c60Sahrens 	int err;
1322e1930233Sbonwick 
1323e1930233Sbonwick 	/*
1324e1930233Sbonwick 	 * Can't delete a head dataset if there are snapshots of it.
1325e1930233Sbonwick 	 * (Except if the only snapshots are from the branch we cloned
1326e1930233Sbonwick 	 * from.)
1327e1930233Sbonwick 	 */
1328e1930233Sbonwick 	if (ds->ds_prev != NULL &&
1329e1930233Sbonwick 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1330e1930233Sbonwick 		return (EINVAL);
1331e1930233Sbonwick 
13323cb34c60Sahrens 	/*
13333cb34c60Sahrens 	 * This is really a dsl_dir thing, but check it here so that
13343cb34c60Sahrens 	 * we'll be less likely to leave this dataset inconsistent &
13353cb34c60Sahrens 	 * nearly destroyed.
13363cb34c60Sahrens 	 */
13373cb34c60Sahrens 	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
13383cb34c60Sahrens 	if (err)
13393cb34c60Sahrens 		return (err);
13403cb34c60Sahrens 	if (count != 0)
13413cb34c60Sahrens 		return (EEXIST);
13423cb34c60Sahrens 
1343e1930233Sbonwick 	return (0);
1344e1930233Sbonwick }
1345e1930233Sbonwick 
13461d452cf5Sahrens /* ARGSUSED */
13471d452cf5Sahrens static void
1348ecd6cf80Smarks dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
1349fa9e4066Sahrens {
13501d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1351ecd6cf80Smarks 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1352fa9e4066Sahrens 
13531d452cf5Sahrens 	/* Mark it as inconsistent on-disk, in case we crash */
13541d452cf5Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
13551d452cf5Sahrens 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
1356ecd6cf80Smarks 
1357ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
1358ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
13591d452cf5Sahrens }
1360fa9e4066Sahrens 
13611d452cf5Sahrens /* ARGSUSED */
13623cb34c60Sahrens int
13631d452cf5Sahrens dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
13641d452cf5Sahrens {
13651d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1366fa9e4066Sahrens 
1367745cd3c5Smaybee 	/* we have an owner hold, so noone else can destroy us */
1368745cd3c5Smaybee 	ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
1369745cd3c5Smaybee 
1370fa9e4066Sahrens 	/* Can't delete a branch point. */
13711d452cf5Sahrens 	if (ds->ds_phys->ds_num_children > 1)
13721d452cf5Sahrens 		return (EEXIST);
1373fa9e4066Sahrens 
1374fa9e4066Sahrens 	/*
1375fa9e4066Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
1376fa9e4066Sahrens 	 * (Except if the only snapshots are from the branch we cloned
1377fa9e4066Sahrens 	 * from.)
1378fa9e4066Sahrens 	 */
1379fa9e4066Sahrens 	if (ds->ds_prev != NULL &&
13801d452cf5Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1381fa9e4066Sahrens 		return (EINVAL);
1382fa9e4066Sahrens 
1383fa9e4066Sahrens 	/*
1384fa9e4066Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
1385fa9e4066Sahrens 	 * them.  Try again.
1386fa9e4066Sahrens 	 */
13871d452cf5Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1388fa9e4066Sahrens 		return (EAGAIN);
13891d452cf5Sahrens 
13901d452cf5Sahrens 	/* XXX we should do some i/o error checking... */
13911d452cf5Sahrens 	return (0);
13921d452cf5Sahrens }
13931d452cf5Sahrens 
1394745cd3c5Smaybee struct refsarg {
1395745cd3c5Smaybee 	kmutex_t lock;
1396745cd3c5Smaybee 	boolean_t gone;
1397745cd3c5Smaybee 	kcondvar_t cv;
1398745cd3c5Smaybee };
1399745cd3c5Smaybee 
1400745cd3c5Smaybee /* ARGSUSED */
1401745cd3c5Smaybee static void
1402745cd3c5Smaybee dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
1403745cd3c5Smaybee {
1404745cd3c5Smaybee 	struct refsarg *arg = argv;
1405745cd3c5Smaybee 
1406745cd3c5Smaybee 	mutex_enter(&arg->lock);
1407745cd3c5Smaybee 	arg->gone = TRUE;
1408745cd3c5Smaybee 	cv_signal(&arg->cv);
1409745cd3c5Smaybee 	mutex_exit(&arg->lock);
1410745cd3c5Smaybee }
1411745cd3c5Smaybee 
1412745cd3c5Smaybee static void
1413745cd3c5Smaybee dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
1414745cd3c5Smaybee {
1415745cd3c5Smaybee 	struct refsarg arg;
1416745cd3c5Smaybee 
1417745cd3c5Smaybee 	mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
1418745cd3c5Smaybee 	cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
1419745cd3c5Smaybee 	arg.gone = FALSE;
1420745cd3c5Smaybee 	(void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
1421745cd3c5Smaybee 	    dsl_dataset_refs_gone);
1422745cd3c5Smaybee 	dmu_buf_rele(ds->ds_dbuf, tag);
1423745cd3c5Smaybee 	mutex_enter(&arg.lock);
1424745cd3c5Smaybee 	while (!arg.gone)
1425745cd3c5Smaybee 		cv_wait(&arg.cv, &arg.lock);
1426745cd3c5Smaybee 	ASSERT(arg.gone);
1427745cd3c5Smaybee 	mutex_exit(&arg.lock);
1428745cd3c5Smaybee 	ds->ds_dbuf = NULL;
1429745cd3c5Smaybee 	ds->ds_phys = NULL;
1430745cd3c5Smaybee 	mutex_destroy(&arg.lock);
1431745cd3c5Smaybee 	cv_destroy(&arg.cv);
1432745cd3c5Smaybee }
1433745cd3c5Smaybee 
14343cb34c60Sahrens void
1435ecd6cf80Smarks dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
14361d452cf5Sahrens {
14371d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
14381d452cf5Sahrens 	zio_t *zio;
14391d452cf5Sahrens 	int err;
14401d452cf5Sahrens 	int after_branch_point = FALSE;
14411d452cf5Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
14421d452cf5Sahrens 	objset_t *mos = dp->dp_meta_objset;
14431d452cf5Sahrens 	dsl_dataset_t *ds_prev = NULL;
14441d452cf5Sahrens 	uint64_t obj;
14451d452cf5Sahrens 
1446745cd3c5Smaybee 	ASSERT(ds->ds_owner);
14471d452cf5Sahrens 	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
14481d452cf5Sahrens 	ASSERT(ds->ds_prev == NULL ||
14491d452cf5Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
14501d452cf5Sahrens 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
14511d452cf5Sahrens 
1452745cd3c5Smaybee 	/* signal any waiters that this dataset is going away */
1453745cd3c5Smaybee 	mutex_enter(&ds->ds_lock);
1454745cd3c5Smaybee 	ds->ds_owner = dsl_reaper;
1455745cd3c5Smaybee 	cv_broadcast(&ds->ds_exclusive_cv);
1456745cd3c5Smaybee 	mutex_exit(&ds->ds_lock);
1457745cd3c5Smaybee 
1458a9799022Sck 	/* Remove our reservation */
1459a9799022Sck 	if (ds->ds_reserved != 0) {
1460a9799022Sck 		uint64_t val = 0;
1461a9799022Sck 		dsl_dataset_set_reservation_sync(ds, &val, cr, tx);
1462a9799022Sck 		ASSERT3U(ds->ds_reserved, ==, 0);
1463a9799022Sck 	}
1464a9799022Sck 
14651d452cf5Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
14661d452cf5Sahrens 
1467088f3894Sahrens 	dsl_pool_ds_destroyed(ds, tx);
1468088f3894Sahrens 
14691d452cf5Sahrens 	obj = ds->ds_object;
1470fa9e4066Sahrens 
1471fa9e4066Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1472fa9e4066Sahrens 		if (ds->ds_prev) {
1473fa9e4066Sahrens 			ds_prev = ds->ds_prev;
1474fa9e4066Sahrens 		} else {
1475745cd3c5Smaybee 			VERIFY(0 == dsl_dataset_hold_obj(dp,
1476745cd3c5Smaybee 			    ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
1477fa9e4066Sahrens 		}
1478fa9e4066Sahrens 		after_branch_point =
1479fa9e4066Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
1480fa9e4066Sahrens 
1481fa9e4066Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1482088f3894Sahrens 		if (after_branch_point &&
1483088f3894Sahrens 		    ds_prev->ds_phys->ds_next_clones_obj != 0) {
1484088f3894Sahrens 			VERIFY(0 == zap_remove_int(mos,
1485088f3894Sahrens 			    ds_prev->ds_phys->ds_next_clones_obj, obj, tx));
1486088f3894Sahrens 			if (ds->ds_phys->ds_next_snap_obj != 0) {
1487088f3894Sahrens 				VERIFY(0 == zap_add_int(mos,
1488088f3894Sahrens 				    ds_prev->ds_phys->ds_next_clones_obj,
1489088f3894Sahrens 				    ds->ds_phys->ds_next_snap_obj, tx));
1490088f3894Sahrens 			}
1491088f3894Sahrens 		}
1492fa9e4066Sahrens 		if (after_branch_point &&
1493fa9e4066Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
1494fa9e4066Sahrens 			/* This clone is toast. */
1495fa9e4066Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1496fa9e4066Sahrens 			ds_prev->ds_phys->ds_num_children--;
1497fa9e4066Sahrens 		} else if (!after_branch_point) {
1498fa9e4066Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
1499fa9e4066Sahrens 			    ds->ds_phys->ds_next_snap_obj;
1500fa9e4066Sahrens 		}
1501fa9e4066Sahrens 	}
1502fa9e4066Sahrens 
1503fa9e4066Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1504fa9e4066Sahrens 
1505fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
15061d452cf5Sahrens 		blkptr_t bp;
1507fa9e4066Sahrens 		dsl_dataset_t *ds_next;
1508fa9e4066Sahrens 		uint64_t itor = 0;
1509a9799022Sck 		uint64_t old_unique;
151074e7dc98SMatthew Ahrens 		int64_t used = 0, compressed = 0, uncompressed = 0;
1511fa9e4066Sahrens 
1512745cd3c5Smaybee 		VERIFY(0 == dsl_dataset_hold_obj(dp,
1513745cd3c5Smaybee 		    ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
1514fa9e4066Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1515fa9e4066Sahrens 
1516a9799022Sck 		old_unique = dsl_dataset_unique(ds_next);
1517a9799022Sck 
1518fa9e4066Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1519fa9e4066Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
1520fa9e4066Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
1521fa9e4066Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
1522fa9e4066Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
1523fa9e4066Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1524fa9e4066Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1525fa9e4066Sahrens 
1526fa9e4066Sahrens 		/*
1527fa9e4066Sahrens 		 * Transfer to our deadlist (which will become next's
1528fa9e4066Sahrens 		 * new deadlist) any entries from next's current
1529fa9e4066Sahrens 		 * deadlist which were born before prev, and free the
1530fa9e4066Sahrens 		 * other entries.
1531fa9e4066Sahrens 		 *
1532fa9e4066Sahrens 		 * XXX we're doing this long task with the config lock held
1533fa9e4066Sahrens 		 */
1534745cd3c5Smaybee 		while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) {
1535fa9e4066Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
1536ea8dc4b6Seschrock 				VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
1537ea8dc4b6Seschrock 				    &bp, tx));
1538fa9e4066Sahrens 				if (ds_prev && !after_branch_point &&
1539fa9e4066Sahrens 				    bp.blk_birth >
1540fa9e4066Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1541fa9e4066Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
154299653d4eSeschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1543fa9e4066Sahrens 				}
1544fa9e4066Sahrens 			} else {
154599653d4eSeschrock 				used += bp_get_dasize(dp->dp_spa, &bp);
1546fa9e4066Sahrens 				compressed += BP_GET_PSIZE(&bp);
1547fa9e4066Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1548fa9e4066Sahrens 				/* XXX check return value? */
1549088f3894Sahrens 				(void) dsl_free(zio, dp, tx->tx_txg,
1550fa9e4066Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1551fa9e4066Sahrens 			}
1552fa9e4066Sahrens 		}
1553fa9e4066Sahrens 
155474e7dc98SMatthew Ahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
155574e7dc98SMatthew Ahrens 
155674e7dc98SMatthew Ahrens 		/* change snapused */
155774e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
155874e7dc98SMatthew Ahrens 		    -used, -compressed, -uncompressed, tx);
155974e7dc98SMatthew Ahrens 
1560fa9e4066Sahrens 		/* free next's deadlist */
1561fa9e4066Sahrens 		bplist_close(&ds_next->ds_deadlist);
1562fa9e4066Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1563fa9e4066Sahrens 
1564fa9e4066Sahrens 		/* set next's deadlist to our deadlist */
1565745cd3c5Smaybee 		bplist_close(&ds->ds_deadlist);
1566fa9e4066Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1567fa9e4066Sahrens 		    ds->ds_phys->ds_deadlist_obj;
1568ea8dc4b6Seschrock 		VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
1569ea8dc4b6Seschrock 		    ds_next->ds_phys->ds_deadlist_obj));
1570fa9e4066Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1571fa9e4066Sahrens 
1572fa9e4066Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1573fa9e4066Sahrens 			/*
1574fa9e4066Sahrens 			 * Update next's unique to include blocks which
1575fa9e4066Sahrens 			 * were previously shared by only this snapshot
1576fa9e4066Sahrens 			 * and it.  Those blocks will be born after the
1577fa9e4066Sahrens 			 * prev snap and before this snap, and will have
1578fa9e4066Sahrens 			 * died after the next snap and before the one
1579fa9e4066Sahrens 			 * after that (ie. be on the snap after next's
1580fa9e4066Sahrens 			 * deadlist).
1581fa9e4066Sahrens 			 *
1582fa9e4066Sahrens 			 * XXX we're doing this long task with the
1583fa9e4066Sahrens 			 * config lock held
1584fa9e4066Sahrens 			 */
1585fa9e4066Sahrens 			dsl_dataset_t *ds_after_next;
158674e7dc98SMatthew Ahrens 			uint64_t space;
1587fa9e4066Sahrens 
1588745cd3c5Smaybee 			VERIFY(0 == dsl_dataset_hold_obj(dp,
1589745cd3c5Smaybee 			    ds_next->ds_phys->ds_next_snap_obj,
1590745cd3c5Smaybee 			    FTAG, &ds_after_next));
159174e7dc98SMatthew Ahrens 
159274e7dc98SMatthew Ahrens 			VERIFY(0 ==
159374e7dc98SMatthew Ahrens 			    bplist_space_birthrange(&ds_after_next->ds_deadlist,
159474e7dc98SMatthew Ahrens 			    ds->ds_phys->ds_prev_snap_txg,
159574e7dc98SMatthew Ahrens 			    ds->ds_phys->ds_creation_txg, &space));
159674e7dc98SMatthew Ahrens 			ds_next->ds_phys->ds_unique_bytes += space;
1597fa9e4066Sahrens 
1598745cd3c5Smaybee 			dsl_dataset_rele(ds_after_next, FTAG);
1599fa9e4066Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1600fa9e4066Sahrens 		} else {
1601fa9e4066Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
1602745cd3c5Smaybee 			dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
1603745cd3c5Smaybee 			ds_next->ds_prev = NULL;
1604fa9e4066Sahrens 			if (ds_prev) {
1605745cd3c5Smaybee 				VERIFY(0 == dsl_dataset_get_ref(dp,
1606745cd3c5Smaybee 				    ds->ds_phys->ds_prev_snap_obj,
1607745cd3c5Smaybee 				    ds_next, &ds_next->ds_prev));
1608fa9e4066Sahrens 			}
1609a9799022Sck 
1610a9799022Sck 			dsl_dataset_recalc_head_uniq(ds_next);
1611a9799022Sck 
1612a9799022Sck 			/*
1613a9799022Sck 			 * Reduce the amount of our unconsmed refreservation
1614a9799022Sck 			 * being charged to our parent by the amount of
1615a9799022Sck 			 * new unique data we have gained.
1616a9799022Sck 			 */
1617a9799022Sck 			if (old_unique < ds_next->ds_reserved) {
1618a9799022Sck 				int64_t mrsdelta;
1619a9799022Sck 				uint64_t new_unique =
1620a9799022Sck 				    ds_next->ds_phys->ds_unique_bytes;
1621a9799022Sck 
1622a9799022Sck 				ASSERT(old_unique <= new_unique);
1623a9799022Sck 				mrsdelta = MIN(new_unique - old_unique,
1624a9799022Sck 				    ds_next->ds_reserved - old_unique);
162574e7dc98SMatthew Ahrens 				dsl_dir_diduse_space(ds->ds_dir,
162674e7dc98SMatthew Ahrens 				    DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
1627a9799022Sck 			}
1628fa9e4066Sahrens 		}
1629745cd3c5Smaybee 		dsl_dataset_rele(ds_next, FTAG);
1630fa9e4066Sahrens 	} else {
1631fa9e4066Sahrens 		/*
1632fa9e4066Sahrens 		 * There's no next snapshot, so this is a head dataset.
1633fa9e4066Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1634fa9e4066Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1635fa9e4066Sahrens 		 * safe to ignore the deadlist contents.)
1636fa9e4066Sahrens 		 */
1637fa9e4066Sahrens 		struct killarg ka;
1638fa9e4066Sahrens 
1639fa9e4066Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1640fa9e4066Sahrens 		bplist_close(&ds->ds_deadlist);
1641fa9e4066Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1642fa9e4066Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1643fa9e4066Sahrens 
1644fa9e4066Sahrens 		/*
1645fa9e4066Sahrens 		 * Free everything that we point to (that's born after
1646fa9e4066Sahrens 		 * the previous snapshot, if we are a clone)
1647fa9e4066Sahrens 		 *
164874e7dc98SMatthew Ahrens 		 * NB: this should be very quick, because we already
164974e7dc98SMatthew Ahrens 		 * freed all the objects in open context.
1650fa9e4066Sahrens 		 */
165174e7dc98SMatthew Ahrens 		ka.ds = ds;
1652fa9e4066Sahrens 		ka.zio = zio;
1653fa9e4066Sahrens 		ka.tx = tx;
165488b7b0f2SMatthew Ahrens 		err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
165588b7b0f2SMatthew Ahrens 		    TRAVERSE_POST, kill_blkptr, &ka);
1656fa9e4066Sahrens 		ASSERT3U(err, ==, 0);
165774e7dc98SMatthew Ahrens 		ASSERT(spa_version(dp->dp_spa) < SPA_VERSION_UNIQUE_ACCURATE ||
165874e7dc98SMatthew Ahrens 		    ds->ds_phys->ds_unique_bytes == 0);
1659fa9e4066Sahrens 	}
1660fa9e4066Sahrens 
1661fa9e4066Sahrens 	err = zio_wait(zio);
1662fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1663fa9e4066Sahrens 
16641d452cf5Sahrens 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1665745cd3c5Smaybee 		/* Erase the link in the dir */
16661d452cf5Sahrens 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
16671d452cf5Sahrens 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
1668745cd3c5Smaybee 		ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
1669745cd3c5Smaybee 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1670745cd3c5Smaybee 		ASSERT(err == 0);
1671fa9e4066Sahrens 	} else {
1672fa9e4066Sahrens 		/* remove from snapshot namespace */
1673fa9e4066Sahrens 		dsl_dataset_t *ds_head;
1674745cd3c5Smaybee 		ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
1675745cd3c5Smaybee 		VERIFY(0 == dsl_dataset_hold_obj(dp,
1676745cd3c5Smaybee 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
16778660574dSahrens 		VERIFY(0 == dsl_dataset_get_snapname(ds));
1678fa9e4066Sahrens #ifdef ZFS_DEBUG
1679fa9e4066Sahrens 		{
1680fa9e4066Sahrens 			uint64_t val;
1681ab04eb8eStimh 
1682745cd3c5Smaybee 			err = dsl_dataset_snap_lookup(ds_head,
1683ab04eb8eStimh 			    ds->ds_snapname, &val);
1684fa9e4066Sahrens 			ASSERT3U(err, ==, 0);
1685fa9e4066Sahrens 			ASSERT3U(val, ==, obj);
1686fa9e4066Sahrens 		}
1687fa9e4066Sahrens #endif
1688745cd3c5Smaybee 		err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
1689fa9e4066Sahrens 		ASSERT(err == 0);
1690745cd3c5Smaybee 		dsl_dataset_rele(ds_head, FTAG);
1691fa9e4066Sahrens 	}
1692fa9e4066Sahrens 
1693fa9e4066Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
1694745cd3c5Smaybee 		dsl_dataset_rele(ds_prev, FTAG);
1695fa9e4066Sahrens 
1696990b4856Slling 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1697ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx,
1698ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
1699ecd6cf80Smarks 
1700088f3894Sahrens 	if (ds->ds_phys->ds_next_clones_obj != 0) {
1701088f3894Sahrens 		uint64_t count;
1702088f3894Sahrens 		ASSERT(0 == zap_count(mos,
1703088f3894Sahrens 		    ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
1704088f3894Sahrens 		VERIFY(0 == dmu_object_free(mos,
1705088f3894Sahrens 		    ds->ds_phys->ds_next_clones_obj, tx));
1706088f3894Sahrens 	}
170774e7dc98SMatthew Ahrens 	if (ds->ds_phys->ds_props_obj != 0)
170874e7dc98SMatthew Ahrens 		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
1709745cd3c5Smaybee 	dsl_dir_close(ds->ds_dir, ds);
1710745cd3c5Smaybee 	ds->ds_dir = NULL;
1711745cd3c5Smaybee 	dsl_dataset_drain_refs(ds, tag);
17121d452cf5Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
1713fa9e4066Sahrens }
1714fa9e4066Sahrens 
1715a9799022Sck static int
1716a9799022Sck dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
1717a9799022Sck {
1718a9799022Sck 	uint64_t asize;
1719a9799022Sck 
1720a9799022Sck 	if (!dmu_tx_is_syncing(tx))
1721a9799022Sck 		return (0);
1722a9799022Sck 
1723a9799022Sck 	/*
1724a9799022Sck 	 * If there's an fs-only reservation, any blocks that might become
1725a9799022Sck 	 * owned by the snapshot dataset must be accommodated by space
1726a9799022Sck 	 * outside of the reservation.
1727a9799022Sck 	 */
1728a9799022Sck 	asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
1729a9799022Sck 	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE))
1730a9799022Sck 		return (ENOSPC);
1731a9799022Sck 
1732a9799022Sck 	/*
1733a9799022Sck 	 * Propogate any reserved space for this snapshot to other
1734a9799022Sck 	 * snapshot checks in this sync group.
1735a9799022Sck 	 */
1736a9799022Sck 	if (asize > 0)
1737a9799022Sck 		dsl_dir_willuse_space(ds->ds_dir, asize, tx);
1738a9799022Sck 
1739a9799022Sck 	return (0);
1740a9799022Sck }
1741a9799022Sck 
17421d452cf5Sahrens /* ARGSUSED */
1743fa9e4066Sahrens int
17441d452cf5Sahrens dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
1745fa9e4066Sahrens {
17463cb34c60Sahrens 	dsl_dataset_t *ds = arg1;
17471d452cf5Sahrens 	const char *snapname = arg2;
1748fa9e4066Sahrens 	int err;
17491d452cf5Sahrens 	uint64_t value;
1750fa9e4066Sahrens 
17511d452cf5Sahrens 	/*
17521d452cf5Sahrens 	 * We don't allow multiple snapshots of the same txg.  If there
17531d452cf5Sahrens 	 * is already one, try again.
17541d452cf5Sahrens 	 */
17551d452cf5Sahrens 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
17561d452cf5Sahrens 		return (EAGAIN);
1757fa9e4066Sahrens 
17581d452cf5Sahrens 	/*
17591d452cf5Sahrens 	 * Check for conflicting name snapshot name.
17601d452cf5Sahrens 	 */
1761745cd3c5Smaybee 	err = dsl_dataset_snap_lookup(ds, snapname, &value);
17621d452cf5Sahrens 	if (err == 0)
1763fa9e4066Sahrens 		return (EEXIST);
17641d452cf5Sahrens 	if (err != ENOENT)
17651d452cf5Sahrens 		return (err);
1766fa9e4066Sahrens 
1767b7661cccSmmusante 	/*
1768b7661cccSmmusante 	 * Check that the dataset's name is not too long.  Name consists
1769b7661cccSmmusante 	 * of the dataset's length + 1 for the @-sign + snapshot name's length
1770b7661cccSmmusante 	 */
1771b7661cccSmmusante 	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
1772b7661cccSmmusante 		return (ENAMETOOLONG);
1773b7661cccSmmusante 
1774a9799022Sck 	err = dsl_dataset_snapshot_reserve_space(ds, tx);
1775a9799022Sck 	if (err)
1776a9799022Sck 		return (err);
1777a9799022Sck 
17781d452cf5Sahrens 	ds->ds_trysnap_txg = tx->tx_txg;
17791d452cf5Sahrens 	return (0);
17801d452cf5Sahrens }
1781fa9e4066Sahrens 
17821d452cf5Sahrens void
1783ecd6cf80Smarks dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
17841d452cf5Sahrens {
17853cb34c60Sahrens 	dsl_dataset_t *ds = arg1;
17861d452cf5Sahrens 	const char *snapname = arg2;
17871d452cf5Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
17881d452cf5Sahrens 	dmu_buf_t *dbuf;
17891d452cf5Sahrens 	dsl_dataset_phys_t *dsphys;
1790088f3894Sahrens 	uint64_t dsobj, crtxg;
17911d452cf5Sahrens 	objset_t *mos = dp->dp_meta_objset;
17921d452cf5Sahrens 	int err;
1793fa9e4066Sahrens 
17941d452cf5Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1795fa9e4066Sahrens 
1796088f3894Sahrens 	/*
1797088f3894Sahrens 	 * The origin's ds_creation_txg has to be < TXG_INITIAL
1798088f3894Sahrens 	 */
1799088f3894Sahrens 	if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
1800088f3894Sahrens 		crtxg = 1;
1801088f3894Sahrens 	else
1802088f3894Sahrens 		crtxg = tx->tx_txg;
1803088f3894Sahrens 
18041649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
18051649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
1806ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1807fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1808fa9e4066Sahrens 	dsphys = dbuf->db_data;
1809745cd3c5Smaybee 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
18101d452cf5Sahrens 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1811fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
1812fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1813fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
1814fa9e4066Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1815fa9e4066Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1816fa9e4066Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1817fa9e4066Sahrens 	dsphys->ds_num_children = 1;
1818fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1819088f3894Sahrens 	dsphys->ds_creation_txg = crtxg;
1820fa9e4066Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1821fa9e4066Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1822fa9e4066Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1823fa9e4066Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
182499653d4eSeschrock 	dsphys->ds_flags = ds->ds_phys->ds_flags;
1825fa9e4066Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
1826ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
1827fa9e4066Sahrens 
18281d452cf5Sahrens 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
18291d452cf5Sahrens 	if (ds->ds_prev) {
1830088f3894Sahrens 		uint64_t next_clones_obj =
1831088f3894Sahrens 		    ds->ds_prev->ds_phys->ds_next_clones_obj;
18321d452cf5Sahrens 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
1833fa9e4066Sahrens 		    ds->ds_object ||
18341d452cf5Sahrens 		    ds->ds_prev->ds_phys->ds_num_children > 1);
18351d452cf5Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
18361d452cf5Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1837fa9e4066Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
18381d452cf5Sahrens 			    ds->ds_prev->ds_phys->ds_creation_txg);
18391d452cf5Sahrens 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1840088f3894Sahrens 		} else if (next_clones_obj != 0) {
1841088f3894Sahrens 			VERIFY3U(0, ==, zap_remove_int(mos,
1842088f3894Sahrens 			    next_clones_obj, dsphys->ds_next_snap_obj, tx));
1843088f3894Sahrens 			VERIFY3U(0, ==, zap_add_int(mos,
1844088f3894Sahrens 			    next_clones_obj, dsobj, tx));
1845fa9e4066Sahrens 		}
1846fa9e4066Sahrens 	}
1847fa9e4066Sahrens 
1848a9799022Sck 	/*
1849a9799022Sck 	 * If we have a reference-reservation on this dataset, we will
1850a9799022Sck 	 * need to increase the amount of refreservation being charged
1851a9799022Sck 	 * since our unique space is going to zero.
1852a9799022Sck 	 */
1853a9799022Sck 	if (ds->ds_reserved) {
1854a9799022Sck 		int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
185574e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
185674e7dc98SMatthew Ahrens 		    add, 0, 0, tx);
1857a9799022Sck 	}
1858a9799022Sck 
1859fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
1860fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1861a4611edeSahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg);
1862fa9e4066Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1863088f3894Sahrens 	ds->ds_phys->ds_prev_snap_txg = crtxg;
1864fa9e4066Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1865a9799022Sck 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
1866a9799022Sck 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1867fa9e4066Sahrens 	ds->ds_phys->ds_deadlist_obj =
1868fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1869ea8dc4b6Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
1870ea8dc4b6Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1871fa9e4066Sahrens 
1872fa9e4066Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1873fa9e4066Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1874fa9e4066Sahrens 	    snapname, 8, 1, &dsobj, tx);
1875fa9e4066Sahrens 	ASSERT(err == 0);
1876fa9e4066Sahrens 
1877fa9e4066Sahrens 	if (ds->ds_prev)
1878745cd3c5Smaybee 		dsl_dataset_drop_ref(ds->ds_prev, ds);
1879745cd3c5Smaybee 	VERIFY(0 == dsl_dataset_get_ref(dp,
1880745cd3c5Smaybee 	    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
1881ecd6cf80Smarks 
1882088f3894Sahrens 	dsl_pool_ds_snapshotted(ds, tx);
1883088f3894Sahrens 
1884ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr,
188540feaa91Sahrens 	    "dataset = %llu", dsobj);
1886fa9e4066Sahrens }
1887fa9e4066Sahrens 
1888fa9e4066Sahrens void
1889c717a561Smaybee dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
1890fa9e4066Sahrens {
1891fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1892fa9e4066Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1893fa9e4066Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1894fa9e4066Sahrens 
189591ebeef5Sahrens 	/*
189691ebeef5Sahrens 	 * in case we had to change ds_fsid_guid when we opened it,
189791ebeef5Sahrens 	 * sync it out now.
189891ebeef5Sahrens 	 */
189991ebeef5Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
190091ebeef5Sahrens 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
190191ebeef5Sahrens 
1902fa9e4066Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
1903c717a561Smaybee 	dmu_objset_sync(ds->ds_user_ptr, zio, tx);
1904fa9e4066Sahrens }
1905fa9e4066Sahrens 
1906fa9e4066Sahrens void
1907a2eea2e1Sahrens dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1908fa9e4066Sahrens {
1909a9799022Sck 	uint64_t refd, avail, uobjs, aobjs;
1910a9799022Sck 
1911a2eea2e1Sahrens 	dsl_dir_stats(ds->ds_dir, nv);
1912fa9e4066Sahrens 
1913a9799022Sck 	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
1914a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
1915a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
1916a9799022Sck 
1917a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
1918a2eea2e1Sahrens 	    ds->ds_phys->ds_creation_time);
1919a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
1920a2eea2e1Sahrens 	    ds->ds_phys->ds_creation_txg);
1921a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
1922a9799022Sck 	    ds->ds_quota);
1923a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
1924a9799022Sck 	    ds->ds_reserved);
1925c5904d13Seschrock 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
1926c5904d13Seschrock 	    ds->ds_phys->ds_guid);
1927fa9e4066Sahrens 
1928fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1929fa9e4066Sahrens 		/*
1930fa9e4066Sahrens 		 * This is a snapshot; override the dd's space used with
1931a2eea2e1Sahrens 		 * our unique space and compression ratio.
1932fa9e4066Sahrens 		 */
1933a2eea2e1Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
1934a2eea2e1Sahrens 		    ds->ds_phys->ds_unique_bytes);
1935a2eea2e1Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
1936a2eea2e1Sahrens 		    ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
1937a2eea2e1Sahrens 		    (ds->ds_phys->ds_uncompressed_bytes * 100 /
1938a2eea2e1Sahrens 		    ds->ds_phys->ds_compressed_bytes));
1939fa9e4066Sahrens 	}
1940fa9e4066Sahrens }
1941fa9e4066Sahrens 
1942a2eea2e1Sahrens void
1943a2eea2e1Sahrens dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1944a2eea2e1Sahrens {
1945a2eea2e1Sahrens 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
1946a2eea2e1Sahrens 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
19473cb34c60Sahrens 	stat->dds_guid = ds->ds_phys->ds_guid;
1948a2eea2e1Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1949a2eea2e1Sahrens 		stat->dds_is_snapshot = B_TRUE;
1950a2eea2e1Sahrens 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
1951ebedde84SEric Taylor 	} else {
1952ebedde84SEric Taylor 		stat->dds_is_snapshot = B_FALSE;
1953ebedde84SEric Taylor 		stat->dds_num_clones = 0;
1954a2eea2e1Sahrens 	}
1955a2eea2e1Sahrens 
1956a2eea2e1Sahrens 	/* clone origin is really a dsl_dir thing... */
19574ccbb6e7Sahrens 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
1958088f3894Sahrens 	if (dsl_dir_is_clone(ds->ds_dir)) {
1959a2eea2e1Sahrens 		dsl_dataset_t *ods;
1960a2eea2e1Sahrens 
1961745cd3c5Smaybee 		VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
1962745cd3c5Smaybee 		    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
19633cb34c60Sahrens 		dsl_dataset_name(ods, stat->dds_origin);
1964745cd3c5Smaybee 		dsl_dataset_drop_ref(ods, FTAG);
1965ebedde84SEric Taylor 	} else {
1966ebedde84SEric Taylor 		stat->dds_origin[0] = '\0';
1967a2eea2e1Sahrens 	}
19684ccbb6e7Sahrens 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
1969a2eea2e1Sahrens }
1970a2eea2e1Sahrens 
1971a2eea2e1Sahrens uint64_t
1972a2eea2e1Sahrens dsl_dataset_fsid_guid(dsl_dataset_t *ds)
1973a2eea2e1Sahrens {
197491ebeef5Sahrens 	return (ds->ds_fsid_guid);
1975a2eea2e1Sahrens }
1976a2eea2e1Sahrens 
1977a2eea2e1Sahrens void
1978a2eea2e1Sahrens dsl_dataset_space(dsl_dataset_t *ds,
1979a2eea2e1Sahrens     uint64_t *refdbytesp, uint64_t *availbytesp,
1980a2eea2e1Sahrens     uint64_t *usedobjsp, uint64_t *availobjsp)
1981fa9e4066Sahrens {
1982a2eea2e1Sahrens 	*refdbytesp = ds->ds_phys->ds_used_bytes;
1983a2eea2e1Sahrens 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
1984a9799022Sck 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
1985a9799022Sck 		*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
1986a9799022Sck 	if (ds->ds_quota != 0) {
1987a9799022Sck 		/*
1988a9799022Sck 		 * Adjust available bytes according to refquota
1989a9799022Sck 		 */
1990a9799022Sck 		if (*refdbytesp < ds->ds_quota)
1991a9799022Sck 			*availbytesp = MIN(*availbytesp,
1992a9799022Sck 			    ds->ds_quota - *refdbytesp);
1993a9799022Sck 		else
1994a9799022Sck 			*availbytesp = 0;
1995a9799022Sck 	}
1996a2eea2e1Sahrens 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
1997a2eea2e1Sahrens 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
1998fa9e4066Sahrens }
1999fa9e4066Sahrens 
2000f18faf3fSek boolean_t
2001f18faf3fSek dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
2002f18faf3fSek {
2003f18faf3fSek 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
2004f18faf3fSek 
2005f18faf3fSek 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
2006f18faf3fSek 	    dsl_pool_sync_context(dp));
2007f18faf3fSek 	if (ds->ds_prev == NULL)
2008f18faf3fSek 		return (B_FALSE);
2009f18faf3fSek 	if (ds->ds_phys->ds_bp.blk_birth >
2010f18faf3fSek 	    ds->ds_prev->ds_phys->ds_creation_txg)
2011f18faf3fSek 		return (B_TRUE);
2012f18faf3fSek 	return (B_FALSE);
2013f18faf3fSek }
2014f18faf3fSek 
20151d452cf5Sahrens /* ARGSUSED */
2016fa9e4066Sahrens static int
20171d452cf5Sahrens dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
2018fa9e4066Sahrens {
20191d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
20201d452cf5Sahrens 	char *newsnapname = arg2;
20211d452cf5Sahrens 	dsl_dir_t *dd = ds->ds_dir;
20221d452cf5Sahrens 	dsl_dataset_t *hds;
2023fa9e4066Sahrens 	uint64_t val;
20241d452cf5Sahrens 	int err;
2025fa9e4066Sahrens 
2026745cd3c5Smaybee 	err = dsl_dataset_hold_obj(dd->dd_pool,
2027745cd3c5Smaybee 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
2028fa9e4066Sahrens 	if (err)
2029fa9e4066Sahrens 		return (err);
2030fa9e4066Sahrens 
20311d452cf5Sahrens 	/* new name better not be in use */
2032745cd3c5Smaybee 	err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
2033745cd3c5Smaybee 	dsl_dataset_rele(hds, FTAG);
20341d452cf5Sahrens 
20351d452cf5Sahrens 	if (err == 0)
20361d452cf5Sahrens 		err = EEXIST;
20371d452cf5Sahrens 	else if (err == ENOENT)
20381d452cf5Sahrens 		err = 0;
2039cdf5b4caSmmusante 
2040cdf5b4caSmmusante 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
2041cdf5b4caSmmusante 	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
2042cdf5b4caSmmusante 		err = ENAMETOOLONG;
2043cdf5b4caSmmusante 
20441d452cf5Sahrens 	return (err);
20451d452cf5Sahrens }
2046fa9e4066Sahrens 
20471d452cf5Sahrens static void
2048ecd6cf80Smarks dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2,
2049ecd6cf80Smarks     cred_t *cr, dmu_tx_t *tx)
20501d452cf5Sahrens {
20511d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
2052ecd6cf80Smarks 	const char *newsnapname = arg2;
20531d452cf5Sahrens 	dsl_dir_t *dd = ds->ds_dir;
20541d452cf5Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
20551d452cf5Sahrens 	dsl_dataset_t *hds;
20561d452cf5Sahrens 	int err;
2057fa9e4066Sahrens 
20581d452cf5Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
2059fa9e4066Sahrens 
2060745cd3c5Smaybee 	VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
2061745cd3c5Smaybee 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
2062fa9e4066Sahrens 
20631d452cf5Sahrens 	VERIFY(0 == dsl_dataset_get_snapname(ds));
2064745cd3c5Smaybee 	err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
2065fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
20661d452cf5Sahrens 	mutex_enter(&ds->ds_lock);
20671d452cf5Sahrens 	(void) strcpy(ds->ds_snapname, newsnapname);
20681d452cf5Sahrens 	mutex_exit(&ds->ds_lock);
20691d452cf5Sahrens 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
20701d452cf5Sahrens 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
2071fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
2072fa9e4066Sahrens 
2073ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
2074ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
2075745cd3c5Smaybee 	dsl_dataset_rele(hds, FTAG);
2076fa9e4066Sahrens }
2077fa9e4066Sahrens 
2078f18faf3fSek struct renamesnaparg {
2079cdf5b4caSmmusante 	dsl_sync_task_group_t *dstg;
2080cdf5b4caSmmusante 	char failed[MAXPATHLEN];
2081cdf5b4caSmmusante 	char *oldsnap;
2082cdf5b4caSmmusante 	char *newsnap;
2083cdf5b4caSmmusante };
2084cdf5b4caSmmusante 
2085cdf5b4caSmmusante static int
2086cdf5b4caSmmusante dsl_snapshot_rename_one(char *name, void *arg)
2087cdf5b4caSmmusante {
2088f18faf3fSek 	struct renamesnaparg *ra = arg;
2089cdf5b4caSmmusante 	dsl_dataset_t *ds = NULL;
2090cdf5b4caSmmusante 	char *cp;
2091cdf5b4caSmmusante 	int err;
2092cdf5b4caSmmusante 
2093cdf5b4caSmmusante 	cp = name + strlen(name);
2094cdf5b4caSmmusante 	*cp = '@';
2095cdf5b4caSmmusante 	(void) strcpy(cp + 1, ra->oldsnap);
2096ecd6cf80Smarks 
2097ecd6cf80Smarks 	/*
2098ecd6cf80Smarks 	 * For recursive snapshot renames the parent won't be changing
2099ecd6cf80Smarks 	 * so we just pass name for both the to/from argument.
2100ecd6cf80Smarks 	 */
2101a0dc2951SMatthew Ahrens 	err = zfs_secpolicy_rename_perms(name, name, CRED());
2102a0dc2951SMatthew Ahrens 	if (err == ENOENT) {
2103a0dc2951SMatthew Ahrens 		return (0);
2104a0dc2951SMatthew Ahrens 	} else if (err) {
2105ecd6cf80Smarks 		(void) strcpy(ra->failed, name);
2106ecd6cf80Smarks 		return (err);
2107ecd6cf80Smarks 	}
2108ecd6cf80Smarks 
2109745cd3c5Smaybee #ifdef _KERNEL
2110745cd3c5Smaybee 	/*
2111745cd3c5Smaybee 	 * For all filesystems undergoing rename, we'll need to unmount it.
2112745cd3c5Smaybee 	 */
2113745cd3c5Smaybee 	(void) zfs_unmount_snap(name, NULL);
2114745cd3c5Smaybee #endif
2115745cd3c5Smaybee 	err = dsl_dataset_hold(name, ra->dstg, &ds);
2116745cd3c5Smaybee 	*cp = '\0';
2117cdf5b4caSmmusante 	if (err == ENOENT) {
2118cdf5b4caSmmusante 		return (0);
2119745cd3c5Smaybee 	} else if (err) {
2120cdf5b4caSmmusante 		(void) strcpy(ra->failed, name);
2121cdf5b4caSmmusante 		return (err);
2122cdf5b4caSmmusante 	}
2123cdf5b4caSmmusante 
2124cdf5b4caSmmusante 	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
2125cdf5b4caSmmusante 	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
2126cdf5b4caSmmusante 
2127cdf5b4caSmmusante 	return (0);
2128cdf5b4caSmmusante }
2129cdf5b4caSmmusante 
2130cdf5b4caSmmusante static int
2131cdf5b4caSmmusante dsl_recursive_rename(char *oldname, const char *newname)
2132cdf5b4caSmmusante {
2133cdf5b4caSmmusante 	int err;
2134f18faf3fSek 	struct renamesnaparg *ra;
2135cdf5b4caSmmusante 	dsl_sync_task_t *dst;
2136cdf5b4caSmmusante 	spa_t *spa;
2137cdf5b4caSmmusante 	char *cp, *fsname = spa_strdup(oldname);
2138cdf5b4caSmmusante 	int len = strlen(oldname);
2139cdf5b4caSmmusante 
2140cdf5b4caSmmusante 	/* truncate the snapshot name to get the fsname */
2141cdf5b4caSmmusante 	cp = strchr(fsname, '@');
2142cdf5b4caSmmusante 	*cp = '\0';
2143cdf5b4caSmmusante 
214440feaa91Sahrens 	err = spa_open(fsname, &spa, FTAG);
2145cdf5b4caSmmusante 	if (err) {
2146cdf5b4caSmmusante 		kmem_free(fsname, len + 1);
2147cdf5b4caSmmusante 		return (err);
2148cdf5b4caSmmusante 	}
2149f18faf3fSek 	ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
2150cdf5b4caSmmusante 	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
2151cdf5b4caSmmusante 
2152cdf5b4caSmmusante 	ra->oldsnap = strchr(oldname, '@') + 1;
2153cdf5b4caSmmusante 	ra->newsnap = strchr(newname, '@') + 1;
2154cdf5b4caSmmusante 	*ra->failed = '\0';
2155cdf5b4caSmmusante 
2156cdf5b4caSmmusante 	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
2157cdf5b4caSmmusante 	    DS_FIND_CHILDREN);
2158cdf5b4caSmmusante 	kmem_free(fsname, len + 1);
2159cdf5b4caSmmusante 
2160cdf5b4caSmmusante 	if (err == 0) {
2161cdf5b4caSmmusante 		err = dsl_sync_task_group_wait(ra->dstg);
2162cdf5b4caSmmusante 	}
2163cdf5b4caSmmusante 
2164cdf5b4caSmmusante 	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
2165cdf5b4caSmmusante 	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
2166cdf5b4caSmmusante 		dsl_dataset_t *ds = dst->dst_arg1;
2167cdf5b4caSmmusante 		if (dst->dst_err) {
2168cdf5b4caSmmusante 			dsl_dir_name(ds->ds_dir, ra->failed);
21692572aa4eSmmusante 			(void) strcat(ra->failed, "@");
21702572aa4eSmmusante 			(void) strcat(ra->failed, ra->newsnap);
2171cdf5b4caSmmusante 		}
2172745cd3c5Smaybee 		dsl_dataset_rele(ds, ra->dstg);
2173cdf5b4caSmmusante 	}
2174cdf5b4caSmmusante 
2175ecd6cf80Smarks 	if (err)
2176ecd6cf80Smarks 		(void) strcpy(oldname, ra->failed);
2177cdf5b4caSmmusante 
2178cdf5b4caSmmusante 	dsl_sync_task_group_destroy(ra->dstg);
2179f18faf3fSek 	kmem_free(ra, sizeof (struct renamesnaparg));
2180cdf5b4caSmmusante 	spa_close(spa, FTAG);
2181cdf5b4caSmmusante 	return (err);
2182cdf5b4caSmmusante }
2183cdf5b4caSmmusante 
21843a5a36beSmmusante static int
21853a5a36beSmmusante dsl_valid_rename(char *oldname, void *arg)
21863a5a36beSmmusante {
21873a5a36beSmmusante 	int delta = *(int *)arg;
21883a5a36beSmmusante 
21893a5a36beSmmusante 	if (strlen(oldname) + delta >= MAXNAMELEN)
21903a5a36beSmmusante 		return (ENAMETOOLONG);
21913a5a36beSmmusante 
21923a5a36beSmmusante 	return (0);
21933a5a36beSmmusante }
21943a5a36beSmmusante 
2195fa9e4066Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
2196fa9e4066Sahrens int
2197745cd3c5Smaybee dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive)
2198fa9e4066Sahrens {
2199fa9e4066Sahrens 	dsl_dir_t *dd;
22001d452cf5Sahrens 	dsl_dataset_t *ds;
2201fa9e4066Sahrens 	const char *tail;
2202fa9e4066Sahrens 	int err;
2203fa9e4066Sahrens 
22041d452cf5Sahrens 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
2205ea8dc4b6Seschrock 	if (err)
2206ea8dc4b6Seschrock 		return (err);
22071db42183SEric Taylor 	/*
22081db42183SEric Taylor 	 * If there are more than 2 references there may be holds
22091db42183SEric Taylor 	 * hanging around that haven't been cleared out yet.
22101db42183SEric Taylor 	 */
22111db42183SEric Taylor 	if (dmu_buf_refcount(dd->dd_dbuf) > 2)
22121db42183SEric Taylor 		txg_wait_synced(dd->dd_pool, 0);
2213fa9e4066Sahrens 	if (tail == NULL) {
22143a5a36beSmmusante 		int delta = strlen(newname) - strlen(oldname);
22153a5a36beSmmusante 
2216088f3894Sahrens 		/* if we're growing, validate child name lengths */
22173a5a36beSmmusante 		if (delta > 0)
22183a5a36beSmmusante 			err = dmu_objset_find(oldname, dsl_valid_rename,
22193a5a36beSmmusante 			    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
22203a5a36beSmmusante 
22213a5a36beSmmusante 		if (!err)
22223a5a36beSmmusante 			err = dsl_dir_rename(dd, newname);
2223fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
2224fa9e4066Sahrens 		return (err);
2225fa9e4066Sahrens 	}
2226fa9e4066Sahrens 	if (tail[0] != '@') {
2227fa9e4066Sahrens 		/* the name ended in a nonexistant component */
2228fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
2229fa9e4066Sahrens 		return (ENOENT);
2230fa9e4066Sahrens 	}
2231fa9e4066Sahrens 
2232fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
22331d452cf5Sahrens 
22341d452cf5Sahrens 	/* new name must be snapshot in same filesystem */
22351d452cf5Sahrens 	tail = strchr(newname, '@');
22361d452cf5Sahrens 	if (tail == NULL)
22371d452cf5Sahrens 		return (EINVAL);
22381d452cf5Sahrens 	tail++;
22391d452cf5Sahrens 	if (strncmp(oldname, newname, tail - newname) != 0)
22401d452cf5Sahrens 		return (EXDEV);
22411d452cf5Sahrens 
2242cdf5b4caSmmusante 	if (recursive) {
2243cdf5b4caSmmusante 		err = dsl_recursive_rename(oldname, newname);
2244cdf5b4caSmmusante 	} else {
2245745cd3c5Smaybee 		err = dsl_dataset_hold(oldname, FTAG, &ds);
2246cdf5b4caSmmusante 		if (err)
2247cdf5b4caSmmusante 			return (err);
22481d452cf5Sahrens 
2249cdf5b4caSmmusante 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2250cdf5b4caSmmusante 		    dsl_dataset_snapshot_rename_check,
2251cdf5b4caSmmusante 		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
22521d452cf5Sahrens 
2253745cd3c5Smaybee 		dsl_dataset_rele(ds, FTAG);
2254cdf5b4caSmmusante 	}
22551d452cf5Sahrens 
2256fa9e4066Sahrens 	return (err);
2257fa9e4066Sahrens }
225899653d4eSeschrock 
2259088f3894Sahrens struct promotenode {
2260745cd3c5Smaybee 	list_node_t link;
2261745cd3c5Smaybee 	dsl_dataset_t *ds;
2262745cd3c5Smaybee };
2263745cd3c5Smaybee 
22641d452cf5Sahrens struct promotearg {
226574e7dc98SMatthew Ahrens 	list_t shared_snaps, origin_snaps, clone_snaps;
226674e7dc98SMatthew Ahrens 	dsl_dataset_t *origin_origin, *origin_head;
226774e7dc98SMatthew Ahrens 	uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
22681d452cf5Sahrens };
22691d452cf5Sahrens 
227074e7dc98SMatthew Ahrens static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
227174e7dc98SMatthew Ahrens 
2272ecd6cf80Smarks /* ARGSUSED */
227399653d4eSeschrock static int
22741d452cf5Sahrens dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
227599653d4eSeschrock {
22761d452cf5Sahrens 	dsl_dataset_t *hds = arg1;
22771d452cf5Sahrens 	struct promotearg *pa = arg2;
227874e7dc98SMatthew Ahrens 	struct promotenode *snap = list_head(&pa->shared_snaps);
2279745cd3c5Smaybee 	dsl_dataset_t *origin_ds = snap->ds;
2280745cd3c5Smaybee 	int err;
22811d452cf5Sahrens 
2282088f3894Sahrens 	/* Check that it is a real clone */
2283088f3894Sahrens 	if (!dsl_dir_is_clone(hds->ds_dir))
228499653d4eSeschrock 		return (EINVAL);
228599653d4eSeschrock 
22861d452cf5Sahrens 	/* Since this is so expensive, don't do the preliminary check */
22871d452cf5Sahrens 	if (!dmu_tx_is_syncing(tx))
22881d452cf5Sahrens 		return (0);
22891d452cf5Sahrens 
2290745cd3c5Smaybee 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
2291745cd3c5Smaybee 		return (EXDEV);
229299653d4eSeschrock 
22933cb34c60Sahrens 	/* compute origin's new unique space */
229474e7dc98SMatthew Ahrens 	snap = list_tail(&pa->clone_snaps);
229574e7dc98SMatthew Ahrens 	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
229674e7dc98SMatthew Ahrens 	err = bplist_space_birthrange(&snap->ds->ds_deadlist,
229774e7dc98SMatthew Ahrens 	    origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique);
229874e7dc98SMatthew Ahrens 	if (err)
2299745cd3c5Smaybee 		return (err);
230099653d4eSeschrock 
2301745cd3c5Smaybee 	/*
2302745cd3c5Smaybee 	 * Walk the snapshots that we are moving
2303745cd3c5Smaybee 	 *
230474e7dc98SMatthew Ahrens 	 * Compute space to transfer.  Consider the incremental changes
230574e7dc98SMatthew Ahrens 	 * to used for each snapshot:
230674e7dc98SMatthew Ahrens 	 * (my used) = (prev's used) + (blocks born) - (blocks killed)
230774e7dc98SMatthew Ahrens 	 * So each snapshot gave birth to:
230874e7dc98SMatthew Ahrens 	 * (blocks born) = (my used) - (prev's used) + (blocks killed)
2309745cd3c5Smaybee 	 * So a sequence would look like:
231074e7dc98SMatthew Ahrens 	 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
2311745cd3c5Smaybee 	 * Which simplifies to:
231274e7dc98SMatthew Ahrens 	 * uN + kN + kN-1 + ... + k1 + k0
2313745cd3c5Smaybee 	 * Note however, if we stop before we reach the ORIGIN we get:
231474e7dc98SMatthew Ahrens 	 * uN + kN + kN-1 + ... + kM - uM-1
2315745cd3c5Smaybee 	 */
2316745cd3c5Smaybee 	pa->used = origin_ds->ds_phys->ds_used_bytes;
2317745cd3c5Smaybee 	pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
2318745cd3c5Smaybee 	pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
231974e7dc98SMatthew Ahrens 	for (snap = list_head(&pa->shared_snaps); snap;
232074e7dc98SMatthew Ahrens 	    snap = list_next(&pa->shared_snaps, snap)) {
232199653d4eSeschrock 		uint64_t val, dlused, dlcomp, dluncomp;
2322745cd3c5Smaybee 		dsl_dataset_t *ds = snap->ds;
232399653d4eSeschrock 
232499653d4eSeschrock 		/* Check that the snapshot name does not conflict */
232574e7dc98SMatthew Ahrens 		VERIFY(0 == dsl_dataset_get_snapname(ds));
2326745cd3c5Smaybee 		err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
2327745cd3c5Smaybee 		if (err == 0)
232874e7dc98SMatthew Ahrens 			return (EEXIST);
2329745cd3c5Smaybee 		if (err != ENOENT)
233074e7dc98SMatthew Ahrens 			return (err);
233199653d4eSeschrock 
2332745cd3c5Smaybee 		/* The very first snapshot does not have a deadlist */
233374e7dc98SMatthew Ahrens 		if (ds->ds_phys->ds_prev_snap_obj == 0)
233474e7dc98SMatthew Ahrens 			continue;
233574e7dc98SMatthew Ahrens 
233674e7dc98SMatthew Ahrens 		if (err = bplist_space(&ds->ds_deadlist,
233774e7dc98SMatthew Ahrens 		    &dlused, &dlcomp, &dluncomp))
233874e7dc98SMatthew Ahrens 			return (err);
233974e7dc98SMatthew Ahrens 		pa->used += dlused;
234074e7dc98SMatthew Ahrens 		pa->comp += dlcomp;
234174e7dc98SMatthew Ahrens 		pa->uncomp += dluncomp;
234274e7dc98SMatthew Ahrens 	}
2343745cd3c5Smaybee 
2344745cd3c5Smaybee 	/*
2345745cd3c5Smaybee 	 * If we are a clone of a clone then we never reached ORIGIN,
2346745cd3c5Smaybee 	 * so we need to subtract out the clone origin's used space.
2347745cd3c5Smaybee 	 */
234874e7dc98SMatthew Ahrens 	if (pa->origin_origin) {
234974e7dc98SMatthew Ahrens 		pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
235074e7dc98SMatthew Ahrens 		pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
235174e7dc98SMatthew Ahrens 		pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
235299653d4eSeschrock 	}
235399653d4eSeschrock 
235499653d4eSeschrock 	/* Check that there is enough space here */
235574e7dc98SMatthew Ahrens 	err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
235674e7dc98SMatthew Ahrens 	    pa->used);
235774e7dc98SMatthew Ahrens 	if (err)
235874e7dc98SMatthew Ahrens 		return (err);
235974e7dc98SMatthew Ahrens 
236074e7dc98SMatthew Ahrens 	/*
236174e7dc98SMatthew Ahrens 	 * Compute the amounts of space that will be used by snapshots
236274e7dc98SMatthew Ahrens 	 * after the promotion (for both origin and clone).  For each,
236374e7dc98SMatthew Ahrens 	 * it is the amount of space that will be on all of their
236474e7dc98SMatthew Ahrens 	 * deadlists (that was not born before their new origin).
236574e7dc98SMatthew Ahrens 	 */
236674e7dc98SMatthew Ahrens 	if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
236774e7dc98SMatthew Ahrens 		uint64_t space;
236874e7dc98SMatthew Ahrens 
236974e7dc98SMatthew Ahrens 		/*
237074e7dc98SMatthew Ahrens 		 * Note, typically this will not be a clone of a clone,
237174e7dc98SMatthew Ahrens 		 * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so
237274e7dc98SMatthew Ahrens 		 * these snaplist_space() -> bplist_space_birthrange()
237374e7dc98SMatthew Ahrens 		 * calls will be fast because they do not have to
237474e7dc98SMatthew Ahrens 		 * iterate over all bps.
237574e7dc98SMatthew Ahrens 		 */
237674e7dc98SMatthew Ahrens 		snap = list_head(&pa->origin_snaps);
237774e7dc98SMatthew Ahrens 		err = snaplist_space(&pa->shared_snaps,
237874e7dc98SMatthew Ahrens 		    snap->ds->ds_origin_txg, &pa->cloneusedsnap);
237974e7dc98SMatthew Ahrens 		if (err)
238074e7dc98SMatthew Ahrens 			return (err);
238174e7dc98SMatthew Ahrens 
238274e7dc98SMatthew Ahrens 		err = snaplist_space(&pa->clone_snaps,
238374e7dc98SMatthew Ahrens 		    snap->ds->ds_origin_txg, &space);
238474e7dc98SMatthew Ahrens 		if (err)
238574e7dc98SMatthew Ahrens 			return (err);
238674e7dc98SMatthew Ahrens 		pa->cloneusedsnap += space;
238774e7dc98SMatthew Ahrens 	}
238874e7dc98SMatthew Ahrens 	if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
238974e7dc98SMatthew Ahrens 		err = snaplist_space(&pa->origin_snaps,
239074e7dc98SMatthew Ahrens 		    origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap);
239174e7dc98SMatthew Ahrens 		if (err)
239274e7dc98SMatthew Ahrens 			return (err);
2393745cd3c5Smaybee 	}
23941d452cf5Sahrens 
239574e7dc98SMatthew Ahrens 	return (0);
23961d452cf5Sahrens }
239799653d4eSeschrock 
23981d452cf5Sahrens static void
2399ecd6cf80Smarks dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
24001d452cf5Sahrens {
24011d452cf5Sahrens 	dsl_dataset_t *hds = arg1;
24021d452cf5Sahrens 	struct promotearg *pa = arg2;
240374e7dc98SMatthew Ahrens 	struct promotenode *snap = list_head(&pa->shared_snaps);
2404745cd3c5Smaybee 	dsl_dataset_t *origin_ds = snap->ds;
240574e7dc98SMatthew Ahrens 	dsl_dataset_t *origin_head;
24061d452cf5Sahrens 	dsl_dir_t *dd = hds->ds_dir;
24071d452cf5Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
24083cb34c60Sahrens 	dsl_dir_t *odd = NULL;
2409088f3894Sahrens 	uint64_t oldnext_obj;
241074e7dc98SMatthew Ahrens 	int64_t delta;
24111d452cf5Sahrens 
24121d452cf5Sahrens 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
24131d452cf5Sahrens 
241474e7dc98SMatthew Ahrens 	snap = list_head(&pa->origin_snaps);
241574e7dc98SMatthew Ahrens 	origin_head = snap->ds;
241674e7dc98SMatthew Ahrens 
24170b69c2f0Sahrens 	/*
24183cb34c60Sahrens 	 * We need to explicitly open odd, since origin_ds's dd will be
24190b69c2f0Sahrens 	 * changing.
24200b69c2f0Sahrens 	 */
24213cb34c60Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
24223cb34c60Sahrens 	    NULL, FTAG, &odd));
242399653d4eSeschrock 
2424745cd3c5Smaybee 	/* change origin's next snap */
2425745cd3c5Smaybee 	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
2426088f3894Sahrens 	oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj;
242774e7dc98SMatthew Ahrens 	snap = list_tail(&pa->clone_snaps);
242874e7dc98SMatthew Ahrens 	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
242974e7dc98SMatthew Ahrens 	origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object;
2430745cd3c5Smaybee 
2431088f3894Sahrens 	/* change the origin's next clone */
2432088f3894Sahrens 	if (origin_ds->ds_phys->ds_next_clones_obj) {
2433088f3894Sahrens 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
2434088f3894Sahrens 		    origin_ds->ds_phys->ds_next_clones_obj,
243574e7dc98SMatthew Ahrens 		    origin_ds->ds_phys->ds_next_snap_obj, tx));
2436088f3894Sahrens 		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
2437088f3894Sahrens 		    origin_ds->ds_phys->ds_next_clones_obj,
2438088f3894Sahrens 		    oldnext_obj, tx));
2439088f3894Sahrens 	}
2440088f3894Sahrens 
2441745cd3c5Smaybee 	/* change origin */
2442745cd3c5Smaybee 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
2443745cd3c5Smaybee 	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
2444745cd3c5Smaybee 	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
244574e7dc98SMatthew Ahrens 	hds->ds_origin_txg = origin_head->ds_origin_txg;
2446745cd3c5Smaybee 	dmu_buf_will_dirty(odd->dd_dbuf, tx);
2447745cd3c5Smaybee 	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
244874e7dc98SMatthew Ahrens 	origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg;
2449745cd3c5Smaybee 
245099653d4eSeschrock 	/* move snapshots to this dir */
245174e7dc98SMatthew Ahrens 	for (snap = list_head(&pa->shared_snaps); snap;
245274e7dc98SMatthew Ahrens 	    snap = list_next(&pa->shared_snaps, snap)) {
2453745cd3c5Smaybee 		dsl_dataset_t *ds = snap->ds;
245499653d4eSeschrock 
24553baa08fcSek 		/* unregister props as dsl_dir is changing */
24563baa08fcSek 		if (ds->ds_user_ptr) {
24573baa08fcSek 			ds->ds_user_evict_func(ds, ds->ds_user_ptr);
24583baa08fcSek 			ds->ds_user_ptr = NULL;
24593baa08fcSek 		}
246099653d4eSeschrock 		/* move snap name entry */
246174e7dc98SMatthew Ahrens 		VERIFY(0 == dsl_dataset_get_snapname(ds));
246274e7dc98SMatthew Ahrens 		VERIFY(0 == dsl_dataset_snap_remove(origin_head,
2463745cd3c5Smaybee 		    ds->ds_snapname, tx));
24641d452cf5Sahrens 		VERIFY(0 == zap_add(dp->dp_meta_objset,
246599653d4eSeschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
246699653d4eSeschrock 		    8, 1, &ds->ds_object, tx));
246799653d4eSeschrock 		/* change containing dsl_dir */
246899653d4eSeschrock 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
24693cb34c60Sahrens 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
247099653d4eSeschrock 		ds->ds_phys->ds_dir_obj = dd->dd_object;
24713cb34c60Sahrens 		ASSERT3P(ds->ds_dir, ==, odd);
247299653d4eSeschrock 		dsl_dir_close(ds->ds_dir, ds);
24731d452cf5Sahrens 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
247499653d4eSeschrock 		    NULL, ds, &ds->ds_dir));
247599653d4eSeschrock 
247699653d4eSeschrock 		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
247774e7dc98SMatthew Ahrens 	}
247874e7dc98SMatthew Ahrens 
247974e7dc98SMatthew Ahrens 	/*
248074e7dc98SMatthew Ahrens 	 * Change space accounting.
248174e7dc98SMatthew Ahrens 	 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
248274e7dc98SMatthew Ahrens 	 * both be valid, or both be 0 (resulting in delta == 0).  This
248374e7dc98SMatthew Ahrens 	 * is true for each of {clone,origin} independently.
248474e7dc98SMatthew Ahrens 	 */
248574e7dc98SMatthew Ahrens 
248674e7dc98SMatthew Ahrens 	delta = pa->cloneusedsnap -
248774e7dc98SMatthew Ahrens 	    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
248874e7dc98SMatthew Ahrens 	ASSERT3S(delta, >=, 0);
248974e7dc98SMatthew Ahrens 	ASSERT3U(pa->used, >=, delta);
249074e7dc98SMatthew Ahrens 	dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
249174e7dc98SMatthew Ahrens 	dsl_dir_diduse_space(dd, DD_USED_HEAD,
249274e7dc98SMatthew Ahrens 	    pa->used - delta, pa->comp, pa->uncomp, tx);
249374e7dc98SMatthew Ahrens 
249474e7dc98SMatthew Ahrens 	delta = pa->originusedsnap -
249574e7dc98SMatthew Ahrens 	    odd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
249674e7dc98SMatthew Ahrens 	ASSERT3S(delta, <=, 0);
249774e7dc98SMatthew Ahrens 	ASSERT3U(pa->used, >=, -delta);
249874e7dc98SMatthew Ahrens 	dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
249974e7dc98SMatthew Ahrens 	dsl_dir_diduse_space(odd, DD_USED_HEAD,
250074e7dc98SMatthew Ahrens 	    -pa->used - delta, -pa->comp, -pa->uncomp, tx);
250199653d4eSeschrock 
25023cb34c60Sahrens 	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
250399653d4eSeschrock 
2504ecd6cf80Smarks 	/* log history record */
2505ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
2506745cd3c5Smaybee 	    cr, "dataset = %llu", hds->ds_object);
2507ecd6cf80Smarks 
25083cb34c60Sahrens 	dsl_dir_close(odd, FTAG);
250999653d4eSeschrock }
251099653d4eSeschrock 
251174e7dc98SMatthew Ahrens static char *snaplist_tag = "snaplist";
251274e7dc98SMatthew Ahrens /*
251374e7dc98SMatthew Ahrens  * Make a list of dsl_dataset_t's for the snapshots between first_obj
251474e7dc98SMatthew Ahrens  * (exclusive) and last_obj (inclusive).  The list will be in reverse
251574e7dc98SMatthew Ahrens  * order (last_obj will be the list_head()).  If first_obj == 0, do all
251674e7dc98SMatthew Ahrens  * snapshots back to this dataset's origin.
251774e7dc98SMatthew Ahrens  */
251874e7dc98SMatthew Ahrens static int
251974e7dc98SMatthew Ahrens snaplist_make(dsl_pool_t *dp, boolean_t own,
252074e7dc98SMatthew Ahrens     uint64_t first_obj, uint64_t last_obj, list_t *l)
252174e7dc98SMatthew Ahrens {
252274e7dc98SMatthew Ahrens 	uint64_t obj = last_obj;
252374e7dc98SMatthew Ahrens 
252474e7dc98SMatthew Ahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
252574e7dc98SMatthew Ahrens 
252674e7dc98SMatthew Ahrens 	list_create(l, sizeof (struct promotenode),
252774e7dc98SMatthew Ahrens 	    offsetof(struct promotenode, link));
252874e7dc98SMatthew Ahrens 
252974e7dc98SMatthew Ahrens 	while (obj != first_obj) {
253074e7dc98SMatthew Ahrens 		dsl_dataset_t *ds;
253174e7dc98SMatthew Ahrens 		struct promotenode *snap;
253274e7dc98SMatthew Ahrens 		int err;
253374e7dc98SMatthew Ahrens 
253474e7dc98SMatthew Ahrens 		if (own) {
253574e7dc98SMatthew Ahrens 			err = dsl_dataset_own_obj(dp, obj,
253674e7dc98SMatthew Ahrens 			    0, snaplist_tag, &ds);
253774e7dc98SMatthew Ahrens 			if (err == 0)
253874e7dc98SMatthew Ahrens 				dsl_dataset_make_exclusive(ds, snaplist_tag);
253974e7dc98SMatthew Ahrens 		} else {
254074e7dc98SMatthew Ahrens 			err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds);
254174e7dc98SMatthew Ahrens 		}
254274e7dc98SMatthew Ahrens 		if (err == ENOENT) {
254374e7dc98SMatthew Ahrens 			/* lost race with snapshot destroy */
254474e7dc98SMatthew Ahrens 			struct promotenode *last = list_tail(l);
254574e7dc98SMatthew Ahrens 			ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj);
254674e7dc98SMatthew Ahrens 			obj = last->ds->ds_phys->ds_prev_snap_obj;
254774e7dc98SMatthew Ahrens 			continue;
254874e7dc98SMatthew Ahrens 		} else if (err) {
254974e7dc98SMatthew Ahrens 			return (err);
255074e7dc98SMatthew Ahrens 		}
255174e7dc98SMatthew Ahrens 
255274e7dc98SMatthew Ahrens 		if (first_obj == 0)
255374e7dc98SMatthew Ahrens 			first_obj = ds->ds_dir->dd_phys->dd_origin_obj;
255474e7dc98SMatthew Ahrens 
255574e7dc98SMatthew Ahrens 		snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP);
255674e7dc98SMatthew Ahrens 		snap->ds = ds;
255774e7dc98SMatthew Ahrens 		list_insert_tail(l, snap);
255874e7dc98SMatthew Ahrens 		obj = ds->ds_phys->ds_prev_snap_obj;
255974e7dc98SMatthew Ahrens 	}
256074e7dc98SMatthew Ahrens 
256174e7dc98SMatthew Ahrens 	return (0);
256274e7dc98SMatthew Ahrens }
256374e7dc98SMatthew Ahrens 
256474e7dc98SMatthew Ahrens static int
256574e7dc98SMatthew Ahrens snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
256674e7dc98SMatthew Ahrens {
256774e7dc98SMatthew Ahrens 	struct promotenode *snap;
256874e7dc98SMatthew Ahrens 
256974e7dc98SMatthew Ahrens 	*spacep = 0;
257074e7dc98SMatthew Ahrens 	for (snap = list_head(l); snap; snap = list_next(l, snap)) {
257174e7dc98SMatthew Ahrens 		uint64_t used;
257274e7dc98SMatthew Ahrens 		int err = bplist_space_birthrange(&snap->ds->ds_deadlist,
257374e7dc98SMatthew Ahrens 		    mintxg, UINT64_MAX, &used);
257474e7dc98SMatthew Ahrens 		if (err)
257574e7dc98SMatthew Ahrens 			return (err);
257674e7dc98SMatthew Ahrens 		*spacep += used;
257774e7dc98SMatthew Ahrens 	}
257874e7dc98SMatthew Ahrens 	return (0);
257974e7dc98SMatthew Ahrens }
258074e7dc98SMatthew Ahrens 
258174e7dc98SMatthew Ahrens static void
258274e7dc98SMatthew Ahrens snaplist_destroy(list_t *l, boolean_t own)
258374e7dc98SMatthew Ahrens {
258474e7dc98SMatthew Ahrens 	struct promotenode *snap;
258574e7dc98SMatthew Ahrens 
258674e7dc98SMatthew Ahrens 	if (!list_link_active(&l->list_head))
258774e7dc98SMatthew Ahrens 		return;
258874e7dc98SMatthew Ahrens 
258974e7dc98SMatthew Ahrens 	while ((snap = list_tail(l)) != NULL) {
259074e7dc98SMatthew Ahrens 		list_remove(l, snap);
259174e7dc98SMatthew Ahrens 		if (own)
259274e7dc98SMatthew Ahrens 			dsl_dataset_disown(snap->ds, snaplist_tag);
259374e7dc98SMatthew Ahrens 		else
259474e7dc98SMatthew Ahrens 			dsl_dataset_rele(snap->ds, snaplist_tag);
259574e7dc98SMatthew Ahrens 		kmem_free(snap, sizeof (struct promotenode));
259674e7dc98SMatthew Ahrens 	}
259774e7dc98SMatthew Ahrens 	list_destroy(l);
259874e7dc98SMatthew Ahrens }
259974e7dc98SMatthew Ahrens 
260074e7dc98SMatthew Ahrens /*
260174e7dc98SMatthew Ahrens  * Promote a clone.  Nomenclature note:
260274e7dc98SMatthew Ahrens  * "clone" or "cds": the original clone which is being promoted
260374e7dc98SMatthew Ahrens  * "origin" or "ods": the snapshot which is originally clone's origin
260474e7dc98SMatthew Ahrens  * "origin head" or "ohds": the dataset which is the head
260574e7dc98SMatthew Ahrens  * (filesystem/volume) for the origin
260674e7dc98SMatthew Ahrens  * "origin origin": the origin of the origin's filesystem (typically
260774e7dc98SMatthew Ahrens  * NULL, indicating that the clone is not a clone of a clone).
260874e7dc98SMatthew Ahrens  */
260999653d4eSeschrock int
261099653d4eSeschrock dsl_dataset_promote(const char *name)
261199653d4eSeschrock {
261299653d4eSeschrock 	dsl_dataset_t *ds;
2613745cd3c5Smaybee 	dsl_dir_t *dd;
2614745cd3c5Smaybee 	dsl_pool_t *dp;
261599653d4eSeschrock 	dmu_object_info_t doi;
261674e7dc98SMatthew Ahrens 	struct promotearg pa = { 0 };
2617088f3894Sahrens 	struct promotenode *snap;
2618745cd3c5Smaybee 	int err;
261999653d4eSeschrock 
2620745cd3c5Smaybee 	err = dsl_dataset_hold(name, FTAG, &ds);
262199653d4eSeschrock 	if (err)
262299653d4eSeschrock 		return (err);
2623745cd3c5Smaybee 	dd = ds->ds_dir;
2624745cd3c5Smaybee 	dp = dd->dd_pool;
262599653d4eSeschrock 
2626745cd3c5Smaybee 	err = dmu_object_info(dp->dp_meta_objset,
262799653d4eSeschrock 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
262899653d4eSeschrock 	if (err) {
2629745cd3c5Smaybee 		dsl_dataset_rele(ds, FTAG);
263099653d4eSeschrock 		return (err);
263199653d4eSeschrock 	}
263299653d4eSeschrock 
263374e7dc98SMatthew Ahrens 	if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) {
263474e7dc98SMatthew Ahrens 		dsl_dataset_rele(ds, FTAG);
263574e7dc98SMatthew Ahrens 		return (EINVAL);
263674e7dc98SMatthew Ahrens 	}
263774e7dc98SMatthew Ahrens 
2638745cd3c5Smaybee 	/*
2639745cd3c5Smaybee 	 * We are going to inherit all the snapshots taken before our
2640745cd3c5Smaybee 	 * origin (i.e., our new origin will be our parent's origin).
2641745cd3c5Smaybee 	 * Take ownership of them so that we can rename them into our
2642745cd3c5Smaybee 	 * namespace.
2643745cd3c5Smaybee 	 */
2644745cd3c5Smaybee 	rw_enter(&dp->dp_config_rwlock, RW_READER);
2645088f3894Sahrens 
264674e7dc98SMatthew Ahrens 	err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj,
264774e7dc98SMatthew Ahrens 	    &pa.shared_snaps);
264874e7dc98SMatthew Ahrens 	if (err != 0)
264974e7dc98SMatthew Ahrens 		goto out;
2650088f3894Sahrens 
265174e7dc98SMatthew Ahrens 	err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps);
265274e7dc98SMatthew Ahrens 	if (err != 0)
265374e7dc98SMatthew Ahrens 		goto out;
2654088f3894Sahrens 
265574e7dc98SMatthew Ahrens 	snap = list_head(&pa.shared_snaps);
265674e7dc98SMatthew Ahrens 	ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
265774e7dc98SMatthew Ahrens 	err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
265874e7dc98SMatthew Ahrens 	    snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
265974e7dc98SMatthew Ahrens 	if (err != 0)
266074e7dc98SMatthew Ahrens 		goto out;
2661088f3894Sahrens 
266274e7dc98SMatthew Ahrens 	if (dsl_dir_is_clone(snap->ds->ds_dir)) {
266374e7dc98SMatthew Ahrens 		err = dsl_dataset_own_obj(dp,
266474e7dc98SMatthew Ahrens 		    snap->ds->ds_dir->dd_phys->dd_origin_obj,
266574e7dc98SMatthew Ahrens 		    0, FTAG, &pa.origin_origin);
266674e7dc98SMatthew Ahrens 		if (err != 0)
266774e7dc98SMatthew Ahrens 			goto out;
266874e7dc98SMatthew Ahrens 	}
2669745cd3c5Smaybee 
267074e7dc98SMatthew Ahrens out:
267174e7dc98SMatthew Ahrens 	rw_exit(&dp->dp_config_rwlock);
2672745cd3c5Smaybee 
267399653d4eSeschrock 	/*
267499653d4eSeschrock 	 * Add in 128x the snapnames zapobj size, since we will be moving
267599653d4eSeschrock 	 * a bunch of snapnames to the promoted ds, and dirtying their
267699653d4eSeschrock 	 * bonus buffers.
267799653d4eSeschrock 	 */
267874e7dc98SMatthew Ahrens 	if (err == 0) {
267974e7dc98SMatthew Ahrens 		err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
268074e7dc98SMatthew Ahrens 		    dsl_dataset_promote_sync, ds, &pa,
268174e7dc98SMatthew Ahrens 		    2 + 2 * doi.doi_physical_blks);
2682745cd3c5Smaybee 	}
268374e7dc98SMatthew Ahrens 
268474e7dc98SMatthew Ahrens 	snaplist_destroy(&pa.shared_snaps, B_TRUE);
268574e7dc98SMatthew Ahrens 	snaplist_destroy(&pa.clone_snaps, B_FALSE);
268674e7dc98SMatthew Ahrens 	snaplist_destroy(&pa.origin_snaps, B_FALSE);
268774e7dc98SMatthew Ahrens 	if (pa.origin_origin)
268874e7dc98SMatthew Ahrens 		dsl_dataset_disown(pa.origin_origin, FTAG);
2689745cd3c5Smaybee 	dsl_dataset_rele(ds, FTAG);
269099653d4eSeschrock 	return (err);
269199653d4eSeschrock }
2692b1b8ab34Slling 
26933cb34c60Sahrens struct cloneswaparg {
26943cb34c60Sahrens 	dsl_dataset_t *cds; /* clone dataset */
26953cb34c60Sahrens 	dsl_dataset_t *ohds; /* origin's head dataset */
26963cb34c60Sahrens 	boolean_t force;
2697a9b821a0Sck 	int64_t unused_refres_delta; /* change in unconsumed refreservation */
26983cb34c60Sahrens };
2699f18faf3fSek 
2700f18faf3fSek /* ARGSUSED */
2701f18faf3fSek static int
2702f18faf3fSek dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
2703f18faf3fSek {
27043cb34c60Sahrens 	struct cloneswaparg *csa = arg1;
2705f18faf3fSek 
27063cb34c60Sahrens 	/* they should both be heads */
27073cb34c60Sahrens 	if (dsl_dataset_is_snapshot(csa->cds) ||
27083cb34c60Sahrens 	    dsl_dataset_is_snapshot(csa->ohds))
2709f18faf3fSek 		return (EINVAL);
2710f18faf3fSek 
27113cb34c60Sahrens 	/* the branch point should be just before them */
27123cb34c60Sahrens 	if (csa->cds->ds_prev != csa->ohds->ds_prev)
2713f18faf3fSek 		return (EINVAL);
2714f18faf3fSek 
27153cb34c60Sahrens 	/* cds should be the clone */
27163cb34c60Sahrens 	if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj !=
27173cb34c60Sahrens 	    csa->ohds->ds_object)
27183cb34c60Sahrens 		return (EINVAL);
2719f18faf3fSek 
27203cb34c60Sahrens 	/* the clone should be a child of the origin */
27213cb34c60Sahrens 	if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
27223cb34c60Sahrens 		return (EINVAL);
2723f18faf3fSek 
27243cb34c60Sahrens 	/* ohds shouldn't be modified unless 'force' */
27253cb34c60Sahrens 	if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
27263cb34c60Sahrens 		return (ETXTBSY);
2727a9b821a0Sck 
2728a9b821a0Sck 	/* adjust amount of any unconsumed refreservation */
2729a9b821a0Sck 	csa->unused_refres_delta =
2730a9b821a0Sck 	    (int64_t)MIN(csa->ohds->ds_reserved,
2731a9b821a0Sck 	    csa->ohds->ds_phys->ds_unique_bytes) -
2732a9b821a0Sck 	    (int64_t)MIN(csa->ohds->ds_reserved,
2733a9b821a0Sck 	    csa->cds->ds_phys->ds_unique_bytes);
2734a9b821a0Sck 
2735a9b821a0Sck 	if (csa->unused_refres_delta > 0 &&
2736a9b821a0Sck 	    csa->unused_refres_delta >
2737a9b821a0Sck 	    dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE))
2738a9b821a0Sck 		return (ENOSPC);
2739a9b821a0Sck 
27403cb34c60Sahrens 	return (0);
2741f18faf3fSek }
2742f18faf3fSek 
2743f18faf3fSek /* ARGSUSED */
2744f18faf3fSek static void
2745f18faf3fSek dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
2746f18faf3fSek {
27473cb34c60Sahrens 	struct cloneswaparg *csa = arg1;
27483cb34c60Sahrens 	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
2749f18faf3fSek 
2750a9b821a0Sck 	ASSERT(csa->cds->ds_reserved == 0);
2751a9b821a0Sck 	ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota);
2752a9b821a0Sck 
27533cb34c60Sahrens 	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
27543cb34c60Sahrens 	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
27553cb34c60Sahrens 	dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx);
2756f18faf3fSek 
27573cb34c60Sahrens 	if (csa->cds->ds_user_ptr != NULL) {
27583cb34c60Sahrens 		csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr);
27593cb34c60Sahrens 		csa->cds->ds_user_ptr = NULL;
27603cb34c60Sahrens 	}
2761f18faf3fSek 
27623cb34c60Sahrens 	if (csa->ohds->ds_user_ptr != NULL) {
27633cb34c60Sahrens 		csa->ohds->ds_user_evict_func(csa->ohds,
27643cb34c60Sahrens 		    csa->ohds->ds_user_ptr);
27653cb34c60Sahrens 		csa->ohds->ds_user_ptr = NULL;
27663cb34c60Sahrens 	}
2767f18faf3fSek 
2768f18faf3fSek 	/* reset origin's unique bytes */
276974e7dc98SMatthew Ahrens 	VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist,
277074e7dc98SMatthew Ahrens 	    csa->cds->ds_prev->ds_phys->ds_prev_snap_txg, UINT64_MAX,
277174e7dc98SMatthew Ahrens 	    &csa->cds->ds_prev->ds_phys->ds_unique_bytes));
2772f18faf3fSek 
2773f18faf3fSek 	/* swap blkptrs */
2774f18faf3fSek 	{
2775f18faf3fSek 		blkptr_t tmp;
27763cb34c60Sahrens 		tmp = csa->ohds->ds_phys->ds_bp;
27773cb34c60Sahrens 		csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
27783cb34c60Sahrens 		csa->cds->ds_phys->ds_bp = tmp;
2779f18faf3fSek 	}
2780f18faf3fSek 
2781f18faf3fSek 	/* set dd_*_bytes */
2782f18faf3fSek 	{
2783f18faf3fSek 		int64_t dused, dcomp, duncomp;
2784f18faf3fSek 		uint64_t cdl_used, cdl_comp, cdl_uncomp;
2785f18faf3fSek 		uint64_t odl_used, odl_comp, odl_uncomp;
2786f18faf3fSek 
278774e7dc98SMatthew Ahrens 		ASSERT3U(csa->cds->ds_dir->dd_phys->
278874e7dc98SMatthew Ahrens 		    dd_used_breakdown[DD_USED_SNAP], ==, 0);
278974e7dc98SMatthew Ahrens 
27903cb34c60Sahrens 		VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used,
2791f18faf3fSek 		    &cdl_comp, &cdl_uncomp));
27923cb34c60Sahrens 		VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used,
2793f18faf3fSek 		    &odl_comp, &odl_uncomp));
279474e7dc98SMatthew Ahrens 
27953cb34c60Sahrens 		dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
27963cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_used_bytes + odl_used);
27973cb34c60Sahrens 		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
27983cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
27993cb34c60Sahrens 		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
28003cb34c60Sahrens 		    cdl_uncomp -
28013cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
28023cb34c60Sahrens 
280374e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
28043cb34c60Sahrens 		    dused, dcomp, duncomp, tx);
280574e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
28063cb34c60Sahrens 		    -dused, -dcomp, -duncomp, tx);
280774e7dc98SMatthew Ahrens 
280874e7dc98SMatthew Ahrens 		/*
280974e7dc98SMatthew Ahrens 		 * The difference in the space used by snapshots is the
281074e7dc98SMatthew Ahrens 		 * difference in snapshot space due to the head's
281174e7dc98SMatthew Ahrens 		 * deadlist (since that's the only thing that's
281274e7dc98SMatthew Ahrens 		 * changing that affects the snapused).
281374e7dc98SMatthew Ahrens 		 */
281474e7dc98SMatthew Ahrens 		VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist,
281574e7dc98SMatthew Ahrens 		    csa->ohds->ds_origin_txg, UINT64_MAX, &cdl_used));
281674e7dc98SMatthew Ahrens 		VERIFY(0 == bplist_space_birthrange(&csa->ohds->ds_deadlist,
281774e7dc98SMatthew Ahrens 		    csa->ohds->ds_origin_txg, UINT64_MAX, &odl_used));
281874e7dc98SMatthew Ahrens 		dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
281974e7dc98SMatthew Ahrens 		    DD_USED_HEAD, DD_USED_SNAP, tx);
28203cb34c60Sahrens 	}
28213cb34c60Sahrens 
28223cb34c60Sahrens #define	SWITCH64(x, y) \
28233cb34c60Sahrens 	{ \
28243cb34c60Sahrens 		uint64_t __tmp = (x); \
28253cb34c60Sahrens 		(x) = (y); \
28263cb34c60Sahrens 		(y) = __tmp; \
2827f18faf3fSek 	}
2828f18faf3fSek 
2829f18faf3fSek 	/* swap ds_*_bytes */
28303cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
28313cb34c60Sahrens 	    csa->cds->ds_phys->ds_used_bytes);
28323cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
28333cb34c60Sahrens 	    csa->cds->ds_phys->ds_compressed_bytes);
28343cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
28353cb34c60Sahrens 	    csa->cds->ds_phys->ds_uncompressed_bytes);
2836a9b821a0Sck 	SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
2837a9b821a0Sck 	    csa->cds->ds_phys->ds_unique_bytes);
2838a9b821a0Sck 
2839a9b821a0Sck 	/* apply any parent delta for change in unconsumed refreservation */
284074e7dc98SMatthew Ahrens 	dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
284174e7dc98SMatthew Ahrens 	    csa->unused_refres_delta, 0, 0, tx);
2842f18faf3fSek 
2843f18faf3fSek 	/* swap deadlists */
28443cb34c60Sahrens 	bplist_close(&csa->cds->ds_deadlist);
28453cb34c60Sahrens 	bplist_close(&csa->ohds->ds_deadlist);
28463cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
28473cb34c60Sahrens 	    csa->cds->ds_phys->ds_deadlist_obj);
28483cb34c60Sahrens 	VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
28493cb34c60Sahrens 	    csa->cds->ds_phys->ds_deadlist_obj));
28503cb34c60Sahrens 	VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
28513cb34c60Sahrens 	    csa->ohds->ds_phys->ds_deadlist_obj));
285288b7b0f2SMatthew Ahrens 
285388b7b0f2SMatthew Ahrens 	dsl_pool_ds_clone_swapped(csa->ohds, csa->cds, tx);
2854f18faf3fSek }
2855f18faf3fSek 
2856f18faf3fSek /*
2857745cd3c5Smaybee  * Swap 'clone' with its origin head file system.  Used at the end
2858745cd3c5Smaybee  * of "online recv" to swizzle the file system to the new version.
2859f18faf3fSek  */
2860f18faf3fSek int
28613cb34c60Sahrens dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
28623cb34c60Sahrens     boolean_t force)
2863f18faf3fSek {
28643cb34c60Sahrens 	struct cloneswaparg csa;
2865745cd3c5Smaybee 	int error;
2866f18faf3fSek 
2867745cd3c5Smaybee 	ASSERT(clone->ds_owner);
2868745cd3c5Smaybee 	ASSERT(origin_head->ds_owner);
2869745cd3c5Smaybee retry:
2870745cd3c5Smaybee 	/* Need exclusive access for the swap */
2871745cd3c5Smaybee 	rw_enter(&clone->ds_rwlock, RW_WRITER);
2872745cd3c5Smaybee 	if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
2873745cd3c5Smaybee 		rw_exit(&clone->ds_rwlock);
2874745cd3c5Smaybee 		rw_enter(&origin_head->ds_rwlock, RW_WRITER);
2875745cd3c5Smaybee 		if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
2876745cd3c5Smaybee 			rw_exit(&origin_head->ds_rwlock);
2877745cd3c5Smaybee 			goto retry;
2878745cd3c5Smaybee 		}
2879745cd3c5Smaybee 	}
28803cb34c60Sahrens 	csa.cds = clone;
28813cb34c60Sahrens 	csa.ohds = origin_head;
28823cb34c60Sahrens 	csa.force = force;
2883745cd3c5Smaybee 	error = dsl_sync_task_do(clone->ds_dir->dd_pool,
2884f18faf3fSek 	    dsl_dataset_clone_swap_check,
2885745cd3c5Smaybee 	    dsl_dataset_clone_swap_sync, &csa, NULL, 9);
2886745cd3c5Smaybee 	return (error);
2887f18faf3fSek }
2888f18faf3fSek 
2889b1b8ab34Slling /*
2890b1b8ab34Slling  * Given a pool name and a dataset object number in that pool,
2891b1b8ab34Slling  * return the name of that dataset.
2892b1b8ab34Slling  */
2893b1b8ab34Slling int
2894b1b8ab34Slling dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
2895b1b8ab34Slling {
2896b1b8ab34Slling 	spa_t *spa;
2897b1b8ab34Slling 	dsl_pool_t *dp;
2898745cd3c5Smaybee 	dsl_dataset_t *ds;
2899b1b8ab34Slling 	int error;
2900b1b8ab34Slling 
2901b1b8ab34Slling 	if ((error = spa_open(pname, &spa, FTAG)) != 0)
2902b1b8ab34Slling 		return (error);
2903b1b8ab34Slling 	dp = spa_get_dsl(spa);
2904b1b8ab34Slling 	rw_enter(&dp->dp_config_rwlock, RW_READER);
2905745cd3c5Smaybee 	if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) {
2906745cd3c5Smaybee 		dsl_dataset_name(ds, buf);
2907745cd3c5Smaybee 		dsl_dataset_rele(ds, FTAG);
2908b1b8ab34Slling 	}
2909b1b8ab34Slling 	rw_exit(&dp->dp_config_rwlock);
2910b1b8ab34Slling 	spa_close(spa, FTAG);
2911b1b8ab34Slling 
2912745cd3c5Smaybee 	return (error);
2913b1b8ab34Slling }
2914a9799022Sck 
2915a9799022Sck int
2916a9799022Sck dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
2917745cd3c5Smaybee     uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
2918a9799022Sck {
2919a9799022Sck 	int error = 0;
2920a9799022Sck 
2921a9799022Sck 	ASSERT3S(asize, >, 0);
2922a9799022Sck 
29239082849eSck 	/*
29249082849eSck 	 * *ref_rsrv is the portion of asize that will come from any
29259082849eSck 	 * unconsumed refreservation space.
29269082849eSck 	 */
29279082849eSck 	*ref_rsrv = 0;
29289082849eSck 
2929a9799022Sck 	mutex_enter(&ds->ds_lock);
2930a9799022Sck 	/*
2931a9799022Sck 	 * Make a space adjustment for reserved bytes.
2932a9799022Sck 	 */
2933a9799022Sck 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
2934a9799022Sck 		ASSERT3U(*used, >=,
2935a9799022Sck 		    ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
2936a9799022Sck 		*used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
29379082849eSck 		*ref_rsrv =
29389082849eSck 		    asize - MIN(asize, parent_delta(ds, asize + inflight));
2939a9799022Sck 	}
2940a9799022Sck 
2941a9799022Sck 	if (!check_quota || ds->ds_quota == 0) {
2942a9799022Sck 		mutex_exit(&ds->ds_lock);
2943a9799022Sck 		return (0);
2944a9799022Sck 	}
2945a9799022Sck 	/*
2946a9799022Sck 	 * If they are requesting more space, and our current estimate
2947a9799022Sck 	 * is over quota, they get to try again unless the actual
2948a9799022Sck 	 * on-disk is over quota and there are no pending changes (which
2949a9799022Sck 	 * may free up space for us).
2950a9799022Sck 	 */
2951a9799022Sck 	if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
2952a9799022Sck 		if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
2953a9799022Sck 			error = ERESTART;
2954a9799022Sck 		else
2955a9799022Sck 			error = EDQUOT;
2956a9799022Sck 	}
2957a9799022Sck 	mutex_exit(&ds->ds_lock);
2958a9799022Sck 
2959a9799022Sck 	return (error);
2960a9799022Sck }
2961a9799022Sck 
2962a9799022Sck /* ARGSUSED */
2963a9799022Sck static int
2964a9799022Sck dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
2965a9799022Sck {
2966a9799022Sck 	dsl_dataset_t *ds = arg1;
2967a9799022Sck 	uint64_t *quotap = arg2;
2968a9799022Sck 	uint64_t new_quota = *quotap;
2969a9799022Sck 
2970a9799022Sck 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
2971a9799022Sck 		return (ENOTSUP);
2972a9799022Sck 
2973a9799022Sck 	if (new_quota == 0)
2974a9799022Sck 		return (0);
2975a9799022Sck 
2976a9799022Sck 	if (new_quota < ds->ds_phys->ds_used_bytes ||
2977a9799022Sck 	    new_quota < ds->ds_reserved)
2978a9799022Sck 		return (ENOSPC);
2979a9799022Sck 
2980a9799022Sck 	return (0);
2981a9799022Sck }
2982a9799022Sck 
2983a9799022Sck /* ARGSUSED */
2984a9799022Sck void
2985a9799022Sck dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
2986a9799022Sck {
2987a9799022Sck 	dsl_dataset_t *ds = arg1;
2988a9799022Sck 	uint64_t *quotap = arg2;
2989a9799022Sck 	uint64_t new_quota = *quotap;
2990a9799022Sck 
2991a9799022Sck 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
2992a9799022Sck 
2993a9799022Sck 	ds->ds_quota = new_quota;
2994a9799022Sck 
2995a9799022Sck 	dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx);
2996a9799022Sck 
2997a9799022Sck 	spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa,
2998a9799022Sck 	    tx, cr, "%lld dataset = %llu ",
2999745cd3c5Smaybee 	    (longlong_t)new_quota, ds->ds_object);
3000a9799022Sck }
3001a9799022Sck 
3002a9799022Sck int
3003a9799022Sck dsl_dataset_set_quota(const char *dsname, uint64_t quota)
3004a9799022Sck {
3005a9799022Sck 	dsl_dataset_t *ds;
3006a9799022Sck 	int err;
3007a9799022Sck 
3008745cd3c5Smaybee 	err = dsl_dataset_hold(dsname, FTAG, &ds);
3009a9799022Sck 	if (err)
3010a9799022Sck 		return (err);
3011a9799022Sck 
3012a9b821a0Sck 	if (quota != ds->ds_quota) {
3013a9b821a0Sck 		/*
3014a9b821a0Sck 		 * If someone removes a file, then tries to set the quota, we
3015a9b821a0Sck 		 * want to make sure the file freeing takes effect.
3016a9b821a0Sck 		 */
3017a9b821a0Sck 		txg_wait_open(ds->ds_dir->dd_pool, 0);
3018a9799022Sck 
3019a9b821a0Sck 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
3020a9b821a0Sck 		    dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
3021a9b821a0Sck 		    ds, &quota, 0);
3022a9b821a0Sck 	}
3023745cd3c5Smaybee 	dsl_dataset_rele(ds, FTAG);
3024a9799022Sck 	return (err);
3025a9799022Sck }
3026a9799022Sck 
3027a9799022Sck static int
3028a9799022Sck dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
3029a9799022Sck {
3030a9799022Sck 	dsl_dataset_t *ds = arg1;
3031a9799022Sck 	uint64_t *reservationp = arg2;
3032a9799022Sck 	uint64_t new_reservation = *reservationp;
3033a9799022Sck 	uint64_t unique;
3034a9799022Sck 
3035a9799022Sck 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
3036a9799022Sck 	    SPA_VERSION_REFRESERVATION)
3037a9799022Sck 		return (ENOTSUP);
3038a9799022Sck 
3039a9799022Sck 	if (dsl_dataset_is_snapshot(ds))
3040a9799022Sck 		return (EINVAL);
3041a9799022Sck 
3042a9799022Sck 	/*
3043a9799022Sck 	 * If we are doing the preliminary check in open context, the
3044a9799022Sck 	 * space estimates may be inaccurate.
3045a9799022Sck 	 */
3046a9799022Sck 	if (!dmu_tx_is_syncing(tx))
3047a9799022Sck 		return (0);
3048a9799022Sck 
3049a9799022Sck 	mutex_enter(&ds->ds_lock);
3050a9799022Sck 	unique = dsl_dataset_unique(ds);
3051a9799022Sck 	mutex_exit(&ds->ds_lock);
3052a9799022Sck 
3053*379c004dSEric Schrock 	if (MAX(unique, new_reservation) > MAX(unique, ds->ds_reserved)) {
3054*379c004dSEric Schrock 		uint64_t delta = MAX(unique, new_reservation) -
3055*379c004dSEric Schrock 		    MAX(unique, ds->ds_reserved);
3056*379c004dSEric Schrock 
3057*379c004dSEric Schrock 		if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
3058*379c004dSEric Schrock 			return (ENOSPC);
3059*379c004dSEric Schrock 		if (ds->ds_quota > 0 &&
3060*379c004dSEric Schrock 		    new_reservation > ds->ds_quota)
3061*379c004dSEric Schrock 			return (ENOSPC);
3062*379c004dSEric Schrock 	}
3063a9799022Sck 
3064a9799022Sck 	return (0);
3065a9799022Sck }
3066a9799022Sck 
3067a9799022Sck /* ARGSUSED */
3068a9799022Sck static void
3069a9799022Sck dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr,
3070a9799022Sck     dmu_tx_t *tx)
3071a9799022Sck {
3072a9799022Sck 	dsl_dataset_t *ds = arg1;
3073a9799022Sck 	uint64_t *reservationp = arg2;
3074a9799022Sck 	uint64_t new_reservation = *reservationp;
307502c8f3f0SMatthew Ahrens 	uint64_t unique;
307602c8f3f0SMatthew Ahrens 	int64_t delta;
307702c8f3f0SMatthew Ahrens 
307802c8f3f0SMatthew Ahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
307902c8f3f0SMatthew Ahrens 
308002c8f3f0SMatthew Ahrens 	mutex_enter(&ds->ds_dir->dd_lock);
308102c8f3f0SMatthew Ahrens 	mutex_enter(&ds->ds_lock);
308202c8f3f0SMatthew Ahrens 	unique = dsl_dataset_unique(ds);
308302c8f3f0SMatthew Ahrens 	delta = MAX(0, (int64_t)(new_reservation - unique)) -
308402c8f3f0SMatthew Ahrens 	    MAX(0, (int64_t)(ds->ds_reserved - unique));
308502c8f3f0SMatthew Ahrens 	ds->ds_reserved = new_reservation;
308602c8f3f0SMatthew Ahrens 	mutex_exit(&ds->ds_lock);
308702c8f3f0SMatthew Ahrens 
308802c8f3f0SMatthew Ahrens 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
308902c8f3f0SMatthew Ahrens 	mutex_exit(&ds->ds_dir->dd_lock);
309002c8f3f0SMatthew Ahrens 	dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation",
309102c8f3f0SMatthew Ahrens 	    new_reservation, cr, tx);
3092a9799022Sck 
309302c8f3f0SMatthew Ahrens 	spa_history_internal_log(LOG_DS_REFRESERV,
309402c8f3f0SMatthew Ahrens 	    ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu",
309502c8f3f0SMatthew Ahrens 	    (longlong_t)new_reservation, ds->ds_object);
3096a9799022Sck }
3097a9799022Sck 
3098a9799022Sck int
3099a9799022Sck dsl_dataset_set_reservation(const char *dsname, uint64_t reservation)
3100a9799022Sck {
3101a9799022Sck 	dsl_dataset_t *ds;
3102a9799022Sck 	int err;
3103a9799022Sck 
3104745cd3c5Smaybee 	err = dsl_dataset_hold(dsname, FTAG, &ds);
3105a9799022Sck 	if (err)
3106a9799022Sck 		return (err);
3107a9799022Sck 
3108a9799022Sck 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
3109a9799022Sck 	    dsl_dataset_set_reservation_check,
3110a9799022Sck 	    dsl_dataset_set_reservation_sync, ds, &reservation, 0);
3111745cd3c5Smaybee 	dsl_dataset_rele(ds, FTAG);
3112a9799022Sck 	return (err);
3113a9799022Sck }
3114