xref: /illumos-gate/usr/src/uts/common/fs/zfs/dsl_dataset.c (revision 745cd3c5371d020efae7a911c58c526aa1fd0dba)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
229082849eSck  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27fa9e4066Sahrens 
28fa9e4066Sahrens #include <sys/dmu_objset.h>
29fa9e4066Sahrens #include <sys/dsl_dataset.h>
30fa9e4066Sahrens #include <sys/dsl_dir.h>
3199653d4eSeschrock #include <sys/dsl_prop.h>
321d452cf5Sahrens #include <sys/dsl_synctask.h>
33fa9e4066Sahrens #include <sys/dmu_traverse.h>
34fa9e4066Sahrens #include <sys/dmu_tx.h>
35fa9e4066Sahrens #include <sys/arc.h>
36fa9e4066Sahrens #include <sys/zio.h>
37fa9e4066Sahrens #include <sys/zap.h>
38fa9e4066Sahrens #include <sys/unique.h>
39fa9e4066Sahrens #include <sys/zfs_context.h>
40cdf5b4caSmmusante #include <sys/zfs_ioctl.h>
41ecd6cf80Smarks #include <sys/spa.h>
42ecd6cf80Smarks #include <sys/sunddi.h>
43fa9e4066Sahrens 
44*745cd3c5Smaybee static char *dsl_reaper = "the grim reaper";
45*745cd3c5Smaybee 
461d452cf5Sahrens static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
471d452cf5Sahrens static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
481d452cf5Sahrens static dsl_checkfunc_t dsl_dataset_rollback_check;
491d452cf5Sahrens static dsl_syncfunc_t dsl_dataset_rollback_sync;
50a9799022Sck static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
51e1930233Sbonwick 
5255434c77Sek #define	DS_REF_MAX	(1ULL << 62)
53fa9e4066Sahrens 
54fa9e4066Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
55fa9e4066Sahrens 
56*745cd3c5Smaybee #define	DSL_DATASET_IS_DESTROYED(ds)	((ds)->ds_owner == dsl_reaper)
57*745cd3c5Smaybee 
58*745cd3c5Smaybee static void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
59fa9e4066Sahrens 
60a9799022Sck /*
61a9799022Sck  * Figure out how much of this delta should be propogated to the dsl_dir
62a9799022Sck  * layer.  If there's a refreservation, that space has already been
63a9799022Sck  * partially accounted for in our ancestors.
64a9799022Sck  */
65a9799022Sck static int64_t
66a9799022Sck parent_delta(dsl_dataset_t *ds, int64_t delta)
67a9799022Sck {
68a9799022Sck 	uint64_t old_bytes, new_bytes;
69a9799022Sck 
70a9799022Sck 	if (ds->ds_reserved == 0)
71a9799022Sck 		return (delta);
72a9799022Sck 
73a9799022Sck 	old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
74a9799022Sck 	new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
75a9799022Sck 
76a9799022Sck 	ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
77a9799022Sck 	return (new_bytes - old_bytes);
78a9799022Sck }
79fa9e4066Sahrens 
80fa9e4066Sahrens void
81fa9e4066Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
82fa9e4066Sahrens {
8399653d4eSeschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
84fa9e4066Sahrens 	int compressed = BP_GET_PSIZE(bp);
85fa9e4066Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
86a9799022Sck 	int64_t delta;
87fa9e4066Sahrens 
88fa9e4066Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
89fa9e4066Sahrens 
90fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
91fa9e4066Sahrens 	/* It could have been compressed away to nothing */
92fa9e4066Sahrens 	if (BP_IS_HOLE(bp))
93fa9e4066Sahrens 		return;
94fa9e4066Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
95fa9e4066Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
96fa9e4066Sahrens 	if (ds == NULL) {
97fa9e4066Sahrens 		/*
98fa9e4066Sahrens 		 * Account for the meta-objset space in its placeholder
99fa9e4066Sahrens 		 * dsl_dir.
100fa9e4066Sahrens 		 */
101fa9e4066Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
102fa9e4066Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
103fa9e4066Sahrens 		    used, compressed, uncompressed, tx);
104fa9e4066Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
105fa9e4066Sahrens 		return;
106fa9e4066Sahrens 	}
107fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
108fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
109a9799022Sck 	delta = parent_delta(ds, used);
110fa9e4066Sahrens 	ds->ds_phys->ds_used_bytes += used;
111fa9e4066Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
112fa9e4066Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
113fa9e4066Sahrens 	ds->ds_phys->ds_unique_bytes += used;
114fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
115a9799022Sck 	dsl_dir_diduse_space(ds->ds_dir, delta, compressed, uncompressed, tx);
116fa9e4066Sahrens }
117fa9e4066Sahrens 
118fa9e4066Sahrens void
119c717a561Smaybee dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
120c717a561Smaybee     dmu_tx_t *tx)
121fa9e4066Sahrens {
12299653d4eSeschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
123fa9e4066Sahrens 	int compressed = BP_GET_PSIZE(bp);
124fa9e4066Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
125fa9e4066Sahrens 
126fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
127c717a561Smaybee 	/* No block pointer => nothing to free */
128fa9e4066Sahrens 	if (BP_IS_HOLE(bp))
129fa9e4066Sahrens 		return;
130fa9e4066Sahrens 
131fa9e4066Sahrens 	ASSERT(used > 0);
132fa9e4066Sahrens 	if (ds == NULL) {
133c717a561Smaybee 		int err;
134fa9e4066Sahrens 		/*
135fa9e4066Sahrens 		 * Account for the meta-objset space in its placeholder
136fa9e4066Sahrens 		 * dataset.
137fa9e4066Sahrens 		 */
138c717a561Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
139c717a561Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
140c717a561Smaybee 		ASSERT(err == 0);
141fa9e4066Sahrens 
142fa9e4066Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
143fa9e4066Sahrens 		    -used, -compressed, -uncompressed, tx);
144fa9e4066Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
145fa9e4066Sahrens 		return;
146fa9e4066Sahrens 	}
147fa9e4066Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
148fa9e4066Sahrens 
149fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
150fa9e4066Sahrens 
151fa9e4066Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
152c717a561Smaybee 		int err;
153a9799022Sck 		int64_t delta;
154c717a561Smaybee 
155fa9e4066Sahrens 		dprintf_bp(bp, "freeing: %s", "");
156c717a561Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
157c717a561Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
158c717a561Smaybee 		ASSERT(err == 0);
159fa9e4066Sahrens 
160fa9e4066Sahrens 		mutex_enter(&ds->ds_lock);
161a9799022Sck 		ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
162a9799022Sck 		    !DS_UNIQUE_IS_ACCURATE(ds));
163a9799022Sck 		delta = parent_delta(ds, -used);
164fa9e4066Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
165fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
166fa9e4066Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
167a9799022Sck 		    delta, -compressed, -uncompressed, tx);
168fa9e4066Sahrens 	} else {
169fa9e4066Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
170ea8dc4b6Seschrock 		VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
171a4611edeSahrens 		ASSERT3U(ds->ds_prev->ds_object, ==,
172a4611edeSahrens 		    ds->ds_phys->ds_prev_snap_obj);
173a4611edeSahrens 		ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
174fa9e4066Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
175a4611edeSahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
176a4611edeSahrens 		    ds->ds_object && bp->blk_birth >
177a4611edeSahrens 		    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
178a4611edeSahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
179a4611edeSahrens 			mutex_enter(&ds->ds_prev->ds_lock);
180a4611edeSahrens 			ds->ds_prev->ds_phys->ds_unique_bytes += used;
181a4611edeSahrens 			mutex_exit(&ds->ds_prev->ds_lock);
182fa9e4066Sahrens 		}
183fa9e4066Sahrens 	}
184fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
185fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
186fa9e4066Sahrens 	ds->ds_phys->ds_used_bytes -= used;
187fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
188fa9e4066Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
189fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
190fa9e4066Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
191fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
192fa9e4066Sahrens }
193fa9e4066Sahrens 
194ea8dc4b6Seschrock uint64_t
195ea8dc4b6Seschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
196fa9e4066Sahrens {
197a2eea2e1Sahrens 	uint64_t trysnap = 0;
198a2eea2e1Sahrens 
199fa9e4066Sahrens 	if (ds == NULL)
200ea8dc4b6Seschrock 		return (0);
201fa9e4066Sahrens 	/*
202fa9e4066Sahrens 	 * The snapshot creation could fail, but that would cause an
203fa9e4066Sahrens 	 * incorrect FALSE return, which would only result in an
204fa9e4066Sahrens 	 * overestimation of the amount of space that an operation would
205fa9e4066Sahrens 	 * consume, which is OK.
206fa9e4066Sahrens 	 *
207fa9e4066Sahrens 	 * There's also a small window where we could miss a pending
208fa9e4066Sahrens 	 * snapshot, because we could set the sync task in the quiescing
209fa9e4066Sahrens 	 * phase.  So this should only be used as a guess.
210fa9e4066Sahrens 	 */
211a2eea2e1Sahrens 	if (ds->ds_trysnap_txg >
212a2eea2e1Sahrens 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
213a2eea2e1Sahrens 		trysnap = ds->ds_trysnap_txg;
214a2eea2e1Sahrens 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
215ea8dc4b6Seschrock }
216ea8dc4b6Seschrock 
217ea8dc4b6Seschrock int
218ea8dc4b6Seschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
219ea8dc4b6Seschrock {
220ea8dc4b6Seschrock 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
221fa9e4066Sahrens }
222fa9e4066Sahrens 
223fa9e4066Sahrens /* ARGSUSED */
224fa9e4066Sahrens static void
225fa9e4066Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
226fa9e4066Sahrens {
227fa9e4066Sahrens 	dsl_dataset_t *ds = dsv;
228fa9e4066Sahrens 
229*745cd3c5Smaybee 	ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
230fa9e4066Sahrens 
231fa9e4066Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
232fa9e4066Sahrens 
23391ebeef5Sahrens 	unique_remove(ds->ds_fsid_guid);
234fa9e4066Sahrens 
235fa9e4066Sahrens 	if (ds->ds_user_ptr != NULL)
236fa9e4066Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
237fa9e4066Sahrens 
238fa9e4066Sahrens 	if (ds->ds_prev) {
239*745cd3c5Smaybee 		dsl_dataset_drop_ref(ds->ds_prev, ds);
240fa9e4066Sahrens 		ds->ds_prev = NULL;
241fa9e4066Sahrens 	}
242fa9e4066Sahrens 
243fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
244*745cd3c5Smaybee 	if (ds->ds_dir)
245*745cd3c5Smaybee 		dsl_dir_close(ds->ds_dir, ds);
246fa9e4066Sahrens 
24791ebeef5Sahrens 	ASSERT(!list_link_active(&ds->ds_synced_link));
248fa9e4066Sahrens 
2495ad82045Snd 	mutex_destroy(&ds->ds_lock);
25091ebeef5Sahrens 	mutex_destroy(&ds->ds_opening_lock);
2515ad82045Snd 	mutex_destroy(&ds->ds_deadlist.bpl_lock);
252*745cd3c5Smaybee 	rw_destroy(&ds->ds_rwlock);
253*745cd3c5Smaybee 	cv_destroy(&ds->ds_exclusive_cv);
2545ad82045Snd 
255fa9e4066Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
256fa9e4066Sahrens }
257fa9e4066Sahrens 
258ea8dc4b6Seschrock static int
259fa9e4066Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
260fa9e4066Sahrens {
261fa9e4066Sahrens 	dsl_dataset_phys_t *headphys;
262fa9e4066Sahrens 	int err;
263fa9e4066Sahrens 	dmu_buf_t *headdbuf;
264fa9e4066Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
265fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
266fa9e4066Sahrens 
267fa9e4066Sahrens 	if (ds->ds_snapname[0])
268ea8dc4b6Seschrock 		return (0);
269fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
270ea8dc4b6Seschrock 		return (0);
271fa9e4066Sahrens 
272ea8dc4b6Seschrock 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
273ea8dc4b6Seschrock 	    FTAG, &headdbuf);
274ea8dc4b6Seschrock 	if (err)
275ea8dc4b6Seschrock 		return (err);
276fa9e4066Sahrens 	headphys = headdbuf->db_data;
277fa9e4066Sahrens 	err = zap_value_search(dp->dp_meta_objset,
278e7437265Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
279ea8dc4b6Seschrock 	dmu_buf_rele(headdbuf, FTAG);
280ea8dc4b6Seschrock 	return (err);
281fa9e4066Sahrens }
282fa9e4066Sahrens 
283ab04eb8eStimh static int
284*745cd3c5Smaybee dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
285ab04eb8eStimh {
286*745cd3c5Smaybee 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
287*745cd3c5Smaybee 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
288ab04eb8eStimh 	matchtype_t mt;
289ab04eb8eStimh 	int err;
290ab04eb8eStimh 
291*745cd3c5Smaybee 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
292ab04eb8eStimh 		mt = MT_FIRST;
293ab04eb8eStimh 	else
294ab04eb8eStimh 		mt = MT_EXACT;
295ab04eb8eStimh 
296*745cd3c5Smaybee 	err = zap_lookup_norm(mos, snapobj, name, 8, 1,
297ab04eb8eStimh 	    value, mt, NULL, 0, NULL);
298ab04eb8eStimh 	if (err == ENOTSUP && mt == MT_FIRST)
299*745cd3c5Smaybee 		err = zap_lookup(mos, snapobj, name, 8, 1, value);
300ab04eb8eStimh 	return (err);
301ab04eb8eStimh }
302ab04eb8eStimh 
303ab04eb8eStimh static int
304*745cd3c5Smaybee dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
305ab04eb8eStimh {
306*745cd3c5Smaybee 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
307*745cd3c5Smaybee 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
308ab04eb8eStimh 	matchtype_t mt;
309ab04eb8eStimh 	int err;
310ab04eb8eStimh 
311*745cd3c5Smaybee 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
312ab04eb8eStimh 		mt = MT_FIRST;
313ab04eb8eStimh 	else
314ab04eb8eStimh 		mt = MT_EXACT;
315ab04eb8eStimh 
316*745cd3c5Smaybee 	err = zap_remove_norm(mos, snapobj, name, mt, tx);
317ab04eb8eStimh 	if (err == ENOTSUP && mt == MT_FIRST)
318*745cd3c5Smaybee 		err = zap_remove(mos, snapobj, name, tx);
319ab04eb8eStimh 	return (err);
320ab04eb8eStimh }
321ab04eb8eStimh 
322*745cd3c5Smaybee static int
323*745cd3c5Smaybee dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
324*745cd3c5Smaybee     dsl_dataset_t **dsp)
325fa9e4066Sahrens {
326fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
327fa9e4066Sahrens 	dmu_buf_t *dbuf;
328fa9e4066Sahrens 	dsl_dataset_t *ds;
329ea8dc4b6Seschrock 	int err;
330fa9e4066Sahrens 
331fa9e4066Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
332fa9e4066Sahrens 	    dsl_pool_sync_context(dp));
333fa9e4066Sahrens 
334ea8dc4b6Seschrock 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
335ea8dc4b6Seschrock 	if (err)
336ea8dc4b6Seschrock 		return (err);
337fa9e4066Sahrens 	ds = dmu_buf_get_user(dbuf);
338fa9e4066Sahrens 	if (ds == NULL) {
339fa9e4066Sahrens 		dsl_dataset_t *winner;
340fa9e4066Sahrens 
341fa9e4066Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
342fa9e4066Sahrens 		ds->ds_dbuf = dbuf;
343fa9e4066Sahrens 		ds->ds_object = dsobj;
344fa9e4066Sahrens 		ds->ds_phys = dbuf->db_data;
345fa9e4066Sahrens 
3465ad82045Snd 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
34791ebeef5Sahrens 		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
3485ad82045Snd 		mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
3495ad82045Snd 		    NULL);
350*745cd3c5Smaybee 		rw_init(&ds->ds_rwlock, 0, 0, 0);
351*745cd3c5Smaybee 		cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
3525ad82045Snd 
353ea8dc4b6Seschrock 		err = bplist_open(&ds->ds_deadlist,
354fa9e4066Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
355ea8dc4b6Seschrock 		if (err == 0) {
356ea8dc4b6Seschrock 			err = dsl_dir_open_obj(dp,
357ea8dc4b6Seschrock 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
358ea8dc4b6Seschrock 		}
359ea8dc4b6Seschrock 		if (err) {
360ea8dc4b6Seschrock 			/*
361ea8dc4b6Seschrock 			 * we don't really need to close the blist if we
362ea8dc4b6Seschrock 			 * just opened it.
363ea8dc4b6Seschrock 			 */
3645ad82045Snd 			mutex_destroy(&ds->ds_lock);
36591ebeef5Sahrens 			mutex_destroy(&ds->ds_opening_lock);
3665ad82045Snd 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
367*745cd3c5Smaybee 			rw_destroy(&ds->ds_rwlock);
368*745cd3c5Smaybee 			cv_destroy(&ds->ds_exclusive_cv);
369ea8dc4b6Seschrock 			kmem_free(ds, sizeof (dsl_dataset_t));
370ea8dc4b6Seschrock 			dmu_buf_rele(dbuf, tag);
371ea8dc4b6Seschrock 			return (err);
372ea8dc4b6Seschrock 		}
373fa9e4066Sahrens 
374fa9e4066Sahrens 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
375fa9e4066Sahrens 			ds->ds_snapname[0] = '\0';
376fa9e4066Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
377*745cd3c5Smaybee 				err = dsl_dataset_get_ref(dp,
378*745cd3c5Smaybee 				    ds->ds_phys->ds_prev_snap_obj,
379*745cd3c5Smaybee 				    ds, &ds->ds_prev);
380fa9e4066Sahrens 			}
381*745cd3c5Smaybee 		} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
382*745cd3c5Smaybee 			err = dsl_dataset_get_snapname(ds);
383fa9e4066Sahrens 		}
384fa9e4066Sahrens 
385cb625fb5Sck 		if (!dsl_dataset_is_snapshot(ds)) {
38627345066Sck 			/*
38727345066Sck 			 * In sync context, we're called with either no lock
38827345066Sck 			 * or with the write lock.  If we're not syncing,
38927345066Sck 			 * we're always called with the read lock held.
39027345066Sck 			 */
391cb625fb5Sck 			boolean_t need_lock =
39227345066Sck 			    !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
39327345066Sck 			    dsl_pool_sync_context(dp);
394cb625fb5Sck 
395cb625fb5Sck 			if (need_lock)
396cb625fb5Sck 				rw_enter(&dp->dp_config_rwlock, RW_READER);
397cb625fb5Sck 
398cb625fb5Sck 			err = dsl_prop_get_ds_locked(ds->ds_dir,
399cb625fb5Sck 			    "refreservation", sizeof (uint64_t), 1,
400cb625fb5Sck 			    &ds->ds_reserved, NULL);
401cb625fb5Sck 			if (err == 0) {
402cb625fb5Sck 				err = dsl_prop_get_ds_locked(ds->ds_dir,
403cb625fb5Sck 				    "refquota", sizeof (uint64_t), 1,
404cb625fb5Sck 				    &ds->ds_quota, NULL);
405cb625fb5Sck 			}
406cb625fb5Sck 
407cb625fb5Sck 			if (need_lock)
408cb625fb5Sck 				rw_exit(&dp->dp_config_rwlock);
409cb625fb5Sck 		} else {
410cb625fb5Sck 			ds->ds_reserved = ds->ds_quota = 0;
411cb625fb5Sck 		}
412cb625fb5Sck 
413ea8dc4b6Seschrock 		if (err == 0) {
414ea8dc4b6Seschrock 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
415ea8dc4b6Seschrock 			    dsl_dataset_evict);
416ea8dc4b6Seschrock 		}
417ea8dc4b6Seschrock 		if (err || winner) {
418fa9e4066Sahrens 			bplist_close(&ds->ds_deadlist);
419*745cd3c5Smaybee 			if (ds->ds_prev)
420*745cd3c5Smaybee 				dsl_dataset_drop_ref(ds->ds_prev, ds);
421fa9e4066Sahrens 			dsl_dir_close(ds->ds_dir, ds);
4225ad82045Snd 			mutex_destroy(&ds->ds_lock);
42391ebeef5Sahrens 			mutex_destroy(&ds->ds_opening_lock);
4245ad82045Snd 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
425*745cd3c5Smaybee 			rw_destroy(&ds->ds_rwlock);
426*745cd3c5Smaybee 			cv_destroy(&ds->ds_exclusive_cv);
427fa9e4066Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
428ea8dc4b6Seschrock 			if (err) {
429ea8dc4b6Seschrock 				dmu_buf_rele(dbuf, tag);
430ea8dc4b6Seschrock 				return (err);
431ea8dc4b6Seschrock 			}
432fa9e4066Sahrens 			ds = winner;
433fa9e4066Sahrens 		} else {
43491ebeef5Sahrens 			ds->ds_fsid_guid =
435fa9e4066Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
436fa9e4066Sahrens 		}
437fa9e4066Sahrens 	}
438fa9e4066Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
439fa9e4066Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
440fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
441*745cd3c5Smaybee 	if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
442fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
443*745cd3c5Smaybee 		dmu_buf_rele(ds->ds_dbuf, tag);
444*745cd3c5Smaybee 		return (ENOENT);
445fa9e4066Sahrens 	}
446fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
447ea8dc4b6Seschrock 	*dsp = ds;
448ea8dc4b6Seschrock 	return (0);
449fa9e4066Sahrens }
450fa9e4066Sahrens 
451*745cd3c5Smaybee static int
452*745cd3c5Smaybee dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
453*745cd3c5Smaybee {
454*745cd3c5Smaybee 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
455*745cd3c5Smaybee 
456*745cd3c5Smaybee 	/*
457*745cd3c5Smaybee 	 * In syncing context we don't want the rwlock lock: there
458*745cd3c5Smaybee 	 * may be an existing writer waiting for sync phase to
459*745cd3c5Smaybee 	 * finish.  We don't need to worry about such writers, since
460*745cd3c5Smaybee 	 * sync phase is single-threaded, so the writer can't be
461*745cd3c5Smaybee 	 * doing anything while we are active.
462*745cd3c5Smaybee 	 */
463*745cd3c5Smaybee 	if (dsl_pool_sync_context(dp)) {
464*745cd3c5Smaybee 		ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
465*745cd3c5Smaybee 		return (0);
466*745cd3c5Smaybee 	}
467*745cd3c5Smaybee 
468*745cd3c5Smaybee 	/*
469*745cd3c5Smaybee 	 * Normal users will hold the ds_rwlock as a READER until they
470*745cd3c5Smaybee 	 * are finished (i.e., call dsl_dataset_rele()).  "Owners" will
471*745cd3c5Smaybee 	 * drop their READER lock after they set the ds_owner field.
472*745cd3c5Smaybee 	 *
473*745cd3c5Smaybee 	 * If the dataset is being destroyed, the destroy thread will
474*745cd3c5Smaybee 	 * obtain a WRITER lock for exclusive access after it's done its
475*745cd3c5Smaybee 	 * open-context work and then change the ds_owner to
476*745cd3c5Smaybee 	 * dsl_reaper once destruction is assured.  So threads
477*745cd3c5Smaybee 	 * may block here temporarily, until the "destructability" of
478*745cd3c5Smaybee 	 * the dataset is determined.
479*745cd3c5Smaybee 	 */
480*745cd3c5Smaybee 	ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
481*745cd3c5Smaybee 	mutex_enter(&ds->ds_lock);
482*745cd3c5Smaybee 	while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
483*745cd3c5Smaybee 		rw_exit(&dp->dp_config_rwlock);
484*745cd3c5Smaybee 		cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
485*745cd3c5Smaybee 		if (DSL_DATASET_IS_DESTROYED(ds)) {
486*745cd3c5Smaybee 			mutex_exit(&ds->ds_lock);
487*745cd3c5Smaybee 			dsl_dataset_drop_ref(ds, tag);
488*745cd3c5Smaybee 			rw_enter(&dp->dp_config_rwlock, RW_READER);
489*745cd3c5Smaybee 			return (ENOENT);
490*745cd3c5Smaybee 		}
491*745cd3c5Smaybee 		rw_enter(&dp->dp_config_rwlock, RW_READER);
492*745cd3c5Smaybee 	}
493*745cd3c5Smaybee 	mutex_exit(&ds->ds_lock);
494*745cd3c5Smaybee 	return (0);
495*745cd3c5Smaybee }
496*745cd3c5Smaybee 
497*745cd3c5Smaybee int
498*745cd3c5Smaybee dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
499*745cd3c5Smaybee     dsl_dataset_t **dsp)
500*745cd3c5Smaybee {
501*745cd3c5Smaybee 	int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
502*745cd3c5Smaybee 
503*745cd3c5Smaybee 	if (err)
504*745cd3c5Smaybee 		return (err);
505*745cd3c5Smaybee 	return (dsl_dataset_hold_ref(*dsp, tag));
506*745cd3c5Smaybee }
507*745cd3c5Smaybee 
508*745cd3c5Smaybee int
509*745cd3c5Smaybee dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner,
510*745cd3c5Smaybee     dsl_dataset_t **dsp)
511*745cd3c5Smaybee {
512*745cd3c5Smaybee 	int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp);
513*745cd3c5Smaybee 
514*745cd3c5Smaybee 	ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER);
515*745cd3c5Smaybee 
516*745cd3c5Smaybee 	if (err)
517*745cd3c5Smaybee 		return (err);
518*745cd3c5Smaybee 	if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
519*745cd3c5Smaybee 		dsl_dataset_rele(*dsp, owner);
520*745cd3c5Smaybee 		return (EBUSY);
521*745cd3c5Smaybee 	}
522*745cd3c5Smaybee 	return (0);
523*745cd3c5Smaybee }
524*745cd3c5Smaybee 
525fa9e4066Sahrens int
526*745cd3c5Smaybee dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
527fa9e4066Sahrens {
528fa9e4066Sahrens 	dsl_dir_t *dd;
529fa9e4066Sahrens 	dsl_pool_t *dp;
530*745cd3c5Smaybee 	const char *snapname;
531fa9e4066Sahrens 	uint64_t obj;
532fa9e4066Sahrens 	int err = 0;
533fa9e4066Sahrens 
534*745cd3c5Smaybee 	err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
535ea8dc4b6Seschrock 	if (err)
536ea8dc4b6Seschrock 		return (err);
537fa9e4066Sahrens 
538fa9e4066Sahrens 	dp = dd->dd_pool;
539fa9e4066Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
540fa9e4066Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
541*745cd3c5Smaybee 	if (obj)
542*745cd3c5Smaybee 		err = dsl_dataset_get_ref(dp, obj, tag, dsp);
543*745cd3c5Smaybee 	else
544fa9e4066Sahrens 		err = ENOENT;
545*745cd3c5Smaybee 	if (err)
546fa9e4066Sahrens 		goto out;
547fa9e4066Sahrens 
548*745cd3c5Smaybee 	err = dsl_dataset_hold_ref(*dsp, tag);
549fa9e4066Sahrens 
550*745cd3c5Smaybee 	/* we may be looking for a snapshot */
551*745cd3c5Smaybee 	if (err == 0 && snapname != NULL) {
552*745cd3c5Smaybee 		dsl_dataset_t *ds = NULL;
553fa9e4066Sahrens 
554*745cd3c5Smaybee 		if (*snapname++ != '@') {
555*745cd3c5Smaybee 			dsl_dataset_rele(*dsp, tag);
556fa9e4066Sahrens 			err = ENOENT;
557fa9e4066Sahrens 			goto out;
558fa9e4066Sahrens 		}
559fa9e4066Sahrens 
560*745cd3c5Smaybee 		dprintf("looking for snapshot '%s'\n", snapname);
561*745cd3c5Smaybee 		err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
562*745cd3c5Smaybee 		if (err == 0)
563*745cd3c5Smaybee 			err = dsl_dataset_get_ref(dp, obj, tag, &ds);
564*745cd3c5Smaybee 		dsl_dataset_rele(*dsp, tag);
565*745cd3c5Smaybee 
566*745cd3c5Smaybee 		ASSERT3U((err == 0), ==, (ds != NULL));
567*745cd3c5Smaybee 
568*745cd3c5Smaybee 		if (ds) {
569*745cd3c5Smaybee 			mutex_enter(&ds->ds_lock);
570*745cd3c5Smaybee 			if (ds->ds_snapname[0] == 0)
571*745cd3c5Smaybee 				(void) strlcpy(ds->ds_snapname, snapname,
572*745cd3c5Smaybee 				    sizeof (ds->ds_snapname));
573*745cd3c5Smaybee 			mutex_exit(&ds->ds_lock);
574*745cd3c5Smaybee 			err = dsl_dataset_hold_ref(ds, tag);
575*745cd3c5Smaybee 			*dsp = err ? NULL : ds;
576fa9e4066Sahrens 		}
577fa9e4066Sahrens 	}
578fa9e4066Sahrens out:
579fa9e4066Sahrens 	rw_exit(&dp->dp_config_rwlock);
580fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
581fa9e4066Sahrens 	return (err);
582fa9e4066Sahrens }
583fa9e4066Sahrens 
584fa9e4066Sahrens int
585*745cd3c5Smaybee dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp)
586fa9e4066Sahrens {
587*745cd3c5Smaybee 	int err = dsl_dataset_hold(name, owner, dsp);
588*745cd3c5Smaybee 	if (err)
589*745cd3c5Smaybee 		return (err);
590*745cd3c5Smaybee 	if ((*dsp)->ds_phys->ds_num_children > 0 &&
591*745cd3c5Smaybee 	    !DS_MODE_IS_READONLY(flags)) {
592*745cd3c5Smaybee 		dsl_dataset_rele(*dsp, owner);
593*745cd3c5Smaybee 		return (EROFS);
594*745cd3c5Smaybee 	}
595*745cd3c5Smaybee 	if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
596*745cd3c5Smaybee 		dsl_dataset_rele(*dsp, owner);
597*745cd3c5Smaybee 		return (EBUSY);
598*745cd3c5Smaybee 	}
599*745cd3c5Smaybee 	return (0);
600fa9e4066Sahrens }
601fa9e4066Sahrens 
602fa9e4066Sahrens void
603fa9e4066Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
604fa9e4066Sahrens {
605fa9e4066Sahrens 	if (ds == NULL) {
606fa9e4066Sahrens 		(void) strcpy(name, "mos");
607fa9e4066Sahrens 	} else {
608fa9e4066Sahrens 		dsl_dir_name(ds->ds_dir, name);
609ea8dc4b6Seschrock 		VERIFY(0 == dsl_dataset_get_snapname(ds));
610fa9e4066Sahrens 		if (ds->ds_snapname[0]) {
611fa9e4066Sahrens 			(void) strcat(name, "@");
612*745cd3c5Smaybee 			/*
613*745cd3c5Smaybee 			 * We use a "recursive" mutex so that we
614*745cd3c5Smaybee 			 * can call dprintf_ds() with ds_lock held.
615*745cd3c5Smaybee 			 */
616fa9e4066Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
617fa9e4066Sahrens 				mutex_enter(&ds->ds_lock);
618fa9e4066Sahrens 				(void) strcat(name, ds->ds_snapname);
619fa9e4066Sahrens 				mutex_exit(&ds->ds_lock);
620fa9e4066Sahrens 			} else {
621fa9e4066Sahrens 				(void) strcat(name, ds->ds_snapname);
622fa9e4066Sahrens 			}
623fa9e4066Sahrens 		}
624fa9e4066Sahrens 	}
625fa9e4066Sahrens }
626fa9e4066Sahrens 
627b7661cccSmmusante static int
628b7661cccSmmusante dsl_dataset_namelen(dsl_dataset_t *ds)
629b7661cccSmmusante {
630b7661cccSmmusante 	int result;
631b7661cccSmmusante 
632b7661cccSmmusante 	if (ds == NULL) {
633b7661cccSmmusante 		result = 3;	/* "mos" */
634b7661cccSmmusante 	} else {
635b7661cccSmmusante 		result = dsl_dir_namelen(ds->ds_dir);
636b7661cccSmmusante 		VERIFY(0 == dsl_dataset_get_snapname(ds));
637b7661cccSmmusante 		if (ds->ds_snapname[0]) {
638b7661cccSmmusante 			++result;	/* adding one for the @-sign */
639b7661cccSmmusante 			if (!MUTEX_HELD(&ds->ds_lock)) {
640b7661cccSmmusante 				mutex_enter(&ds->ds_lock);
641b7661cccSmmusante 				result += strlen(ds->ds_snapname);
642b7661cccSmmusante 				mutex_exit(&ds->ds_lock);
643b7661cccSmmusante 			} else {
644b7661cccSmmusante 				result += strlen(ds->ds_snapname);
645b7661cccSmmusante 			}
646b7661cccSmmusante 		}
647b7661cccSmmusante 	}
648b7661cccSmmusante 
649b7661cccSmmusante 	return (result);
650b7661cccSmmusante }
651b7661cccSmmusante 
652*745cd3c5Smaybee static void
653*745cd3c5Smaybee dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
654fa9e4066Sahrens {
655ea8dc4b6Seschrock 	dmu_buf_rele(ds->ds_dbuf, tag);
656fa9e4066Sahrens }
657fa9e4066Sahrens 
6583cb34c60Sahrens void
659*745cd3c5Smaybee dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
6603cb34c60Sahrens {
661*745cd3c5Smaybee 	ASSERT(ds->ds_owner != tag);
662*745cd3c5Smaybee 	if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
663*745cd3c5Smaybee 		rw_exit(&ds->ds_rwlock);
664*745cd3c5Smaybee 	}
665*745cd3c5Smaybee 	dsl_dataset_drop_ref(ds, tag);
666*745cd3c5Smaybee }
667*745cd3c5Smaybee 
668*745cd3c5Smaybee void
669*745cd3c5Smaybee dsl_dataset_disown(dsl_dataset_t *ds, void *owner)
670*745cd3c5Smaybee {
671*745cd3c5Smaybee 	ASSERT((ds->ds_owner == owner && ds->ds_dbuf) ||
672*745cd3c5Smaybee 	    (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
673*745cd3c5Smaybee 
6743cb34c60Sahrens 	mutex_enter(&ds->ds_lock);
675*745cd3c5Smaybee 	ds->ds_owner = NULL;
676*745cd3c5Smaybee 	if (RW_WRITE_HELD(&ds->ds_rwlock)) {
677*745cd3c5Smaybee 		rw_exit(&ds->ds_rwlock);
678*745cd3c5Smaybee 		cv_broadcast(&ds->ds_exclusive_cv);
679*745cd3c5Smaybee 	}
6803cb34c60Sahrens 	mutex_exit(&ds->ds_lock);
681*745cd3c5Smaybee 	if (ds->ds_dbuf)
682*745cd3c5Smaybee 		dsl_dataset_drop_ref(ds, owner);
683*745cd3c5Smaybee 	else
684*745cd3c5Smaybee 		dsl_dataset_evict(ds->ds_dbuf, ds);
6853cb34c60Sahrens }
6863cb34c60Sahrens 
6873cb34c60Sahrens boolean_t
688*745cd3c5Smaybee dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner)
6893cb34c60Sahrens {
690*745cd3c5Smaybee 	boolean_t gotit = FALSE;
691*745cd3c5Smaybee 
6923cb34c60Sahrens 	mutex_enter(&ds->ds_lock);
693*745cd3c5Smaybee 	if (ds->ds_owner == NULL &&
694*745cd3c5Smaybee 	    (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
695*745cd3c5Smaybee 		ds->ds_owner = owner;
696*745cd3c5Smaybee 		if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
697*745cd3c5Smaybee 			rw_exit(&ds->ds_rwlock);
698*745cd3c5Smaybee 		gotit = TRUE;
6993cb34c60Sahrens 	}
7003cb34c60Sahrens 	mutex_exit(&ds->ds_lock);
701*745cd3c5Smaybee 	return (gotit);
702*745cd3c5Smaybee }
703*745cd3c5Smaybee 
704*745cd3c5Smaybee void
705*745cd3c5Smaybee dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
706*745cd3c5Smaybee {
707*745cd3c5Smaybee 	ASSERT3P(owner, ==, ds->ds_owner);
708*745cd3c5Smaybee 	if (!RW_WRITE_HELD(&ds->ds_rwlock))
709*745cd3c5Smaybee 		rw_enter(&ds->ds_rwlock, RW_WRITER);
7103cb34c60Sahrens }
7113cb34c60Sahrens 
712fa9e4066Sahrens void
713fa9e4066Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
714fa9e4066Sahrens {
715fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
716fa9e4066Sahrens 	dmu_buf_t *dbuf;
717fa9e4066Sahrens 	dsl_dataset_phys_t *dsphys;
718fa9e4066Sahrens 	dsl_dataset_t *ds;
719fa9e4066Sahrens 	uint64_t dsobj;
720fa9e4066Sahrens 	dsl_dir_t *dd;
721fa9e4066Sahrens 
722fa9e4066Sahrens 	dsl_dir_create_root(mos, ddobjp, tx);
723ea8dc4b6Seschrock 	VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd));
724fa9e4066Sahrens 
7251649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
7261649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
727ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
728fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
729fa9e4066Sahrens 	dsphys = dbuf->db_data;
730fa9e4066Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
731fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
732fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
733fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
734fa9e4066Sahrens 	dsphys->ds_snapnames_zapobj =
735ab04eb8eStimh 	    zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
736ab04eb8eStimh 	    DMU_OT_NONE, 0, tx);
737fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
738fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
739fa9e4066Sahrens 	dsphys->ds_deadlist_obj =
740fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
741a9799022Sck 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
742a9799022Sck 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
743ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
744fa9e4066Sahrens 
745fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
746fa9e4066Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
747fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
748fa9e4066Sahrens 
749*745cd3c5Smaybee 	VERIFY(0 == dsl_dataset_get_ref(dp, dsobj, FTAG, &ds));
750c717a561Smaybee 	(void) dmu_objset_create_impl(dp->dp_spa, ds,
751c717a561Smaybee 	    &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx);
752*745cd3c5Smaybee 	dsl_dataset_drop_ref(ds, FTAG);
753fa9e4066Sahrens }
754fa9e4066Sahrens 
7551d452cf5Sahrens uint64_t
756ab04eb8eStimh dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin,
757ab04eb8eStimh     uint64_t flags, dmu_tx_t *tx)
758fa9e4066Sahrens {
7593cb34c60Sahrens 	dsl_pool_t *dp = dd->dd_pool;
760fa9e4066Sahrens 	dmu_buf_t *dbuf;
761fa9e4066Sahrens 	dsl_dataset_phys_t *dsphys;
7623cb34c60Sahrens 	uint64_t dsobj;
763fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
764fa9e4066Sahrens 
7653cb34c60Sahrens 	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
7663cb34c60Sahrens 	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
767fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
7683cb34c60Sahrens 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
769fa9e4066Sahrens 
7701649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
7711649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
772ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
773fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
774fa9e4066Sahrens 	dsphys = dbuf->db_data;
775*745cd3c5Smaybee 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
776fa9e4066Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
777ab04eb8eStimh 	dsphys->ds_flags = flags;
778fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
779fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
780fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
781fa9e4066Sahrens 	dsphys->ds_snapnames_zapobj =
782ab04eb8eStimh 	    zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
783ab04eb8eStimh 	    DMU_OT_NONE, 0, tx);
784fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
785fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
786fa9e4066Sahrens 	dsphys->ds_deadlist_obj =
787fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
788a9799022Sck 
7893cb34c60Sahrens 	if (origin) {
7903cb34c60Sahrens 		dsphys->ds_prev_snap_obj = origin->ds_object;
791fa9e4066Sahrens 		dsphys->ds_prev_snap_txg =
7923cb34c60Sahrens 		    origin->ds_phys->ds_creation_txg;
793fa9e4066Sahrens 		dsphys->ds_used_bytes =
7943cb34c60Sahrens 		    origin->ds_phys->ds_used_bytes;
795fa9e4066Sahrens 		dsphys->ds_compressed_bytes =
7963cb34c60Sahrens 		    origin->ds_phys->ds_compressed_bytes;
797fa9e4066Sahrens 		dsphys->ds_uncompressed_bytes =
7983cb34c60Sahrens 		    origin->ds_phys->ds_uncompressed_bytes;
7993cb34c60Sahrens 		dsphys->ds_bp = origin->ds_phys->ds_bp;
800579ae4d5Stimh 		dsphys->ds_flags |= origin->ds_phys->ds_flags;
801fa9e4066Sahrens 
8023cb34c60Sahrens 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
8033cb34c60Sahrens 		origin->ds_phys->ds_num_children++;
804fa9e4066Sahrens 
805fa9e4066Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
8063cb34c60Sahrens 		dd->dd_phys->dd_origin_obj = origin->ds_object;
807fa9e4066Sahrens 	}
808ab04eb8eStimh 
809ab04eb8eStimh 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
810ab04eb8eStimh 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
811ab04eb8eStimh 
812ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
813fa9e4066Sahrens 
814fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
815fa9e4066Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
8163cb34c60Sahrens 
8173cb34c60Sahrens 	return (dsobj);
8183cb34c60Sahrens }
8193cb34c60Sahrens 
8203cb34c60Sahrens uint64_t
821ab04eb8eStimh dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
822ab04eb8eStimh     dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
8233cb34c60Sahrens {
8243cb34c60Sahrens 	dsl_pool_t *dp = pdd->dd_pool;
8253cb34c60Sahrens 	uint64_t dsobj, ddobj;
8263cb34c60Sahrens 	dsl_dir_t *dd;
8273cb34c60Sahrens 
8283cb34c60Sahrens 	ASSERT(lastname[0] != '@');
8293cb34c60Sahrens 
8303cb34c60Sahrens 	ddobj = dsl_dir_create_sync(pdd, lastname, tx);
8313cb34c60Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
8323cb34c60Sahrens 
833ab04eb8eStimh 	dsobj = dsl_dataset_create_sync_impl(dd, origin, flags, tx);
8343cb34c60Sahrens 
8353cb34c60Sahrens 	dsl_deleg_set_create_perms(dd, tx, cr);
8363cb34c60Sahrens 
837fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
838fa9e4066Sahrens 
8391d452cf5Sahrens 	return (dsobj);
840fa9e4066Sahrens }
841fa9e4066Sahrens 
8421d452cf5Sahrens struct destroyarg {
8431d452cf5Sahrens 	dsl_sync_task_group_t *dstg;
8441d452cf5Sahrens 	char *snapname;
8451d452cf5Sahrens 	char *failed;
8461d452cf5Sahrens };
8471d452cf5Sahrens 
8481d452cf5Sahrens static int
8491d452cf5Sahrens dsl_snapshot_destroy_one(char *name, void *arg)
850fa9e4066Sahrens {
8511d452cf5Sahrens 	struct destroyarg *da = arg;
8521d452cf5Sahrens 	dsl_dataset_t *ds;
8531d452cf5Sahrens 	char *cp;
854fa9e4066Sahrens 	int err;
855fa9e4066Sahrens 
8561d452cf5Sahrens 	(void) strcat(name, "@");
8571d452cf5Sahrens 	(void) strcat(name, da->snapname);
858*745cd3c5Smaybee 	err = dsl_dataset_own(name, DS_MODE_READONLY | DS_MODE_INCONSISTENT,
859cdf5b4caSmmusante 	    da->dstg, &ds);
8601d452cf5Sahrens 	cp = strchr(name, '@');
8611d452cf5Sahrens 	*cp = '\0';
862*745cd3c5Smaybee 	if (err == 0) {
863*745cd3c5Smaybee 		dsl_dataset_make_exclusive(ds, da->dstg);
864*745cd3c5Smaybee 		dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
865*745cd3c5Smaybee 		    dsl_dataset_destroy_sync, ds, da->dstg, 0);
866*745cd3c5Smaybee 	} else if (err == ENOENT) {
867*745cd3c5Smaybee 		err = 0;
868*745cd3c5Smaybee 	} else {
8691d452cf5Sahrens 		(void) strcpy(da->failed, name);
8701d452cf5Sahrens 	}
871*745cd3c5Smaybee 	return (err);
8721d452cf5Sahrens }
87331fd60d3Sahrens 
8741d452cf5Sahrens /*
8751d452cf5Sahrens  * Destroy 'snapname' in all descendants of 'fsname'.
8761d452cf5Sahrens  */
8771d452cf5Sahrens #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
8781d452cf5Sahrens int
8791d452cf5Sahrens dsl_snapshots_destroy(char *fsname, char *snapname)
8801d452cf5Sahrens {
8811d452cf5Sahrens 	int err;
8821d452cf5Sahrens 	struct destroyarg da;
8831d452cf5Sahrens 	dsl_sync_task_t *dst;
8841d452cf5Sahrens 	spa_t *spa;
8851d452cf5Sahrens 
88640feaa91Sahrens 	err = spa_open(fsname, &spa, FTAG);
8871d452cf5Sahrens 	if (err)
8881d452cf5Sahrens 		return (err);
8891d452cf5Sahrens 	da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
8901d452cf5Sahrens 	da.snapname = snapname;
8911d452cf5Sahrens 	da.failed = fsname;
8921d452cf5Sahrens 
8931d452cf5Sahrens 	err = dmu_objset_find(fsname,
8940b69c2f0Sahrens 	    dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
8951d452cf5Sahrens 
8961d452cf5Sahrens 	if (err == 0)
8971d452cf5Sahrens 		err = dsl_sync_task_group_wait(da.dstg);
8981d452cf5Sahrens 
8991d452cf5Sahrens 	for (dst = list_head(&da.dstg->dstg_tasks); dst;
9001d452cf5Sahrens 	    dst = list_next(&da.dstg->dstg_tasks, dst)) {
9011d452cf5Sahrens 		dsl_dataset_t *ds = dst->dst_arg1;
902*745cd3c5Smaybee 		/*
903*745cd3c5Smaybee 		 * Return the file system name that triggered the error
904*745cd3c5Smaybee 		 */
9051d452cf5Sahrens 		if (dst->dst_err) {
9061d452cf5Sahrens 			dsl_dataset_name(ds, fsname);
90740feaa91Sahrens 			*strchr(fsname, '@') = '\0';
908e1930233Sbonwick 		}
909*745cd3c5Smaybee 		dsl_dataset_disown(ds, da.dstg);
910fa9e4066Sahrens 	}
911fa9e4066Sahrens 
9121d452cf5Sahrens 	dsl_sync_task_group_destroy(da.dstg);
9131d452cf5Sahrens 	spa_close(spa, FTAG);
914fa9e4066Sahrens 	return (err);
915fa9e4066Sahrens }
916fa9e4066Sahrens 
9173cb34c60Sahrens /*
918*745cd3c5Smaybee  * ds must be opened as OWNER.  On return (whether successful or not),
919*745cd3c5Smaybee  * ds will be closed and caller can no longer dereference it.
9203cb34c60Sahrens  */
921fa9e4066Sahrens int
9223cb34c60Sahrens dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
923fa9e4066Sahrens {
924fa9e4066Sahrens 	int err;
9251d452cf5Sahrens 	dsl_sync_task_group_t *dstg;
9261d452cf5Sahrens 	objset_t *os;
927fa9e4066Sahrens 	dsl_dir_t *dd;
9281d452cf5Sahrens 	uint64_t obj;
9291d452cf5Sahrens 
9303cb34c60Sahrens 	if (dsl_dataset_is_snapshot(ds)) {
9311d452cf5Sahrens 		/* Destroying a snapshot is simpler */
932*745cd3c5Smaybee 		dsl_dataset_make_exclusive(ds, tag);
9331d452cf5Sahrens 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
9341d452cf5Sahrens 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
9353cb34c60Sahrens 		    ds, tag, 0);
9363cb34c60Sahrens 		goto out;
9371d452cf5Sahrens 	}
938fa9e4066Sahrens 
9391d452cf5Sahrens 	dd = ds->ds_dir;
940fa9e4066Sahrens 
9411d452cf5Sahrens 	/*
9421d452cf5Sahrens 	 * Check for errors and mark this ds as inconsistent, in
9431d452cf5Sahrens 	 * case we crash while freeing the objects.
9441d452cf5Sahrens 	 */
9451d452cf5Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
9461d452cf5Sahrens 	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
9473cb34c60Sahrens 	if (err)
9483cb34c60Sahrens 		goto out;
9493cb34c60Sahrens 
9503cb34c60Sahrens 	err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
9513cb34c60Sahrens 	if (err)
9523cb34c60Sahrens 		goto out;
953fa9e4066Sahrens 
9541d452cf5Sahrens 	/*
9551d452cf5Sahrens 	 * remove the objects in open context, so that we won't
9561d452cf5Sahrens 	 * have too much to do in syncing context.
9571d452cf5Sahrens 	 */
9586754306eSahrens 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
9596754306eSahrens 	    ds->ds_phys->ds_prev_snap_txg)) {
9601d452cf5Sahrens 		dmu_tx_t *tx = dmu_tx_create(os);
9611d452cf5Sahrens 		dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
9621d452cf5Sahrens 		dmu_tx_hold_bonus(tx, obj);
9631d452cf5Sahrens 		err = dmu_tx_assign(tx, TXG_WAIT);
9641d452cf5Sahrens 		if (err) {
9651d452cf5Sahrens 			/*
9661d452cf5Sahrens 			 * Perhaps there is not enough disk
9671d452cf5Sahrens 			 * space.  Just deal with it from
9681d452cf5Sahrens 			 * dsl_dataset_destroy_sync().
9691d452cf5Sahrens 			 */
9701d452cf5Sahrens 			dmu_tx_abort(tx);
9711d452cf5Sahrens 			continue;
9721d452cf5Sahrens 		}
9731d452cf5Sahrens 		VERIFY(0 == dmu_object_free(os, obj, tx));
9741d452cf5Sahrens 		dmu_tx_commit(tx);
9751d452cf5Sahrens 	}
9761d452cf5Sahrens 
9771d452cf5Sahrens 	dmu_objset_close(os);
9781d452cf5Sahrens 	if (err != ESRCH)
9793cb34c60Sahrens 		goto out;
9801d452cf5Sahrens 
9813cb34c60Sahrens 	if (ds->ds_user_ptr) {
982*745cd3c5Smaybee 		/*
983*745cd3c5Smaybee 		 * We need to sync out all in-flight IO before we try
984*745cd3c5Smaybee 		 * to evict (the dataset evict func is trying to clear
985*745cd3c5Smaybee 		 * the cached entries for this dataset in the ARC).
986*745cd3c5Smaybee 		 */
987*745cd3c5Smaybee 		txg_wait_synced(dd->dd_pool, 0);
9883cb34c60Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
9893cb34c60Sahrens 		ds->ds_user_ptr = NULL;
9901d452cf5Sahrens 	}
9911d452cf5Sahrens 
9923cb34c60Sahrens 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
9933cb34c60Sahrens 	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
9943cb34c60Sahrens 	rw_exit(&dd->dd_pool->dp_config_rwlock);
9953cb34c60Sahrens 
9963cb34c60Sahrens 	if (err)
9973cb34c60Sahrens 		goto out;
9983cb34c60Sahrens 
9991d452cf5Sahrens 	/*
10001d452cf5Sahrens 	 * Blow away the dsl_dir + head dataset.
10011d452cf5Sahrens 	 */
1002*745cd3c5Smaybee 	dsl_dataset_make_exclusive(ds, tag);
10031d452cf5Sahrens 	dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
10041d452cf5Sahrens 	dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
10053cb34c60Sahrens 	    dsl_dataset_destroy_sync, ds, tag, 0);
10061d452cf5Sahrens 	dsl_sync_task_create(dstg, dsl_dir_destroy_check,
10071d452cf5Sahrens 	    dsl_dir_destroy_sync, dd, FTAG, 0);
10081d452cf5Sahrens 	err = dsl_sync_task_group_wait(dstg);
10091d452cf5Sahrens 	dsl_sync_task_group_destroy(dstg);
1010*745cd3c5Smaybee 	/* if it is successful, dsl_dir_destroy_sync will close the dd */
10113cb34c60Sahrens 	if (err)
10121d452cf5Sahrens 		dsl_dir_close(dd, FTAG);
10133cb34c60Sahrens out:
1014*745cd3c5Smaybee 	dsl_dataset_disown(ds, tag);
1015fa9e4066Sahrens 	return (err);
1016fa9e4066Sahrens }
1017fa9e4066Sahrens 
10181d452cf5Sahrens int
10193cb34c60Sahrens dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost)
10201d452cf5Sahrens {
1021*745cd3c5Smaybee 	ASSERT(ds->ds_owner);
10223cb34c60Sahrens 
10231d452cf5Sahrens 	return (dsl_sync_task_do(ds->ds_dir->dd_pool,
10241d452cf5Sahrens 	    dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
10253cb34c60Sahrens 	    ds, &ost, 0));
10261d452cf5Sahrens }
10271d452cf5Sahrens 
1028fa9e4066Sahrens void *
1029fa9e4066Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
1030fa9e4066Sahrens     void *p, dsl_dataset_evict_func_t func)
1031fa9e4066Sahrens {
1032fa9e4066Sahrens 	void *old;
1033fa9e4066Sahrens 
1034fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
1035fa9e4066Sahrens 	old = ds->ds_user_ptr;
1036fa9e4066Sahrens 	if (old == NULL) {
1037fa9e4066Sahrens 		ds->ds_user_ptr = p;
1038fa9e4066Sahrens 		ds->ds_user_evict_func = func;
1039fa9e4066Sahrens 	}
1040fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
1041fa9e4066Sahrens 	return (old);
1042fa9e4066Sahrens }
1043fa9e4066Sahrens 
1044fa9e4066Sahrens void *
1045fa9e4066Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
1046fa9e4066Sahrens {
1047fa9e4066Sahrens 	return (ds->ds_user_ptr);
1048fa9e4066Sahrens }
1049fa9e4066Sahrens 
1050fa9e4066Sahrens 
1051c717a561Smaybee blkptr_t *
1052c717a561Smaybee dsl_dataset_get_blkptr(dsl_dataset_t *ds)
1053fa9e4066Sahrens {
1054c717a561Smaybee 	return (&ds->ds_phys->ds_bp);
1055fa9e4066Sahrens }
1056fa9e4066Sahrens 
1057fa9e4066Sahrens void
1058fa9e4066Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
1059fa9e4066Sahrens {
1060fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1061fa9e4066Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
1062fa9e4066Sahrens 	if (ds == NULL) {
1063fa9e4066Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
1064fa9e4066Sahrens 	} else {
1065fa9e4066Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
1066fa9e4066Sahrens 		ds->ds_phys->ds_bp = *bp;
1067fa9e4066Sahrens 	}
1068fa9e4066Sahrens }
1069fa9e4066Sahrens 
1070fa9e4066Sahrens spa_t *
1071fa9e4066Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
1072fa9e4066Sahrens {
1073fa9e4066Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
1074fa9e4066Sahrens }
1075fa9e4066Sahrens 
1076fa9e4066Sahrens void
1077fa9e4066Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
1078fa9e4066Sahrens {
1079fa9e4066Sahrens 	dsl_pool_t *dp;
1080fa9e4066Sahrens 
1081fa9e4066Sahrens 	if (ds == NULL) /* this is the meta-objset */
1082fa9e4066Sahrens 		return;
1083fa9e4066Sahrens 
1084fa9e4066Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1085a2eea2e1Sahrens 
1086a2eea2e1Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
1087a2eea2e1Sahrens 		panic("dirtying snapshot!");
1088fa9e4066Sahrens 
1089fa9e4066Sahrens 	dp = ds->ds_dir->dd_pool;
1090fa9e4066Sahrens 
1091fa9e4066Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
1092fa9e4066Sahrens 		/* up the hold count until we can be written out */
1093fa9e4066Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
1094fa9e4066Sahrens 	}
1095fa9e4066Sahrens }
1096fa9e4066Sahrens 
1097a9799022Sck /*
1098a9799022Sck  * The unique space in the head dataset can be calculated by subtracting
1099a9799022Sck  * the space used in the most recent snapshot, that is still being used
1100a9799022Sck  * in this file system, from the space currently in use.  To figure out
1101a9799022Sck  * the space in the most recent snapshot still in use, we need to take
1102a9799022Sck  * the total space used in the snapshot and subtract out the space that
1103a9799022Sck  * has been freed up since the snapshot was taken.
1104a9799022Sck  */
1105a9799022Sck static void
1106a9799022Sck dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
1107a9799022Sck {
1108a9799022Sck 	uint64_t mrs_used;
1109a9799022Sck 	uint64_t dlused, dlcomp, dluncomp;
1110a9799022Sck 
1111a9799022Sck 	ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj);
1112a9799022Sck 
1113a9799022Sck 	if (ds->ds_phys->ds_prev_snap_obj != 0)
1114a9799022Sck 		mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
1115a9799022Sck 	else
1116a9799022Sck 		mrs_used = 0;
1117a9799022Sck 
1118a9799022Sck 	VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp,
1119a9799022Sck 	    &dluncomp));
1120a9799022Sck 
1121a9799022Sck 	ASSERT3U(dlused, <=, mrs_used);
1122a9799022Sck 	ds->ds_phys->ds_unique_bytes =
1123a9799022Sck 	    ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
1124a9799022Sck 
1125a9799022Sck 	if (!DS_UNIQUE_IS_ACCURATE(ds) &&
1126a9799022Sck 	    spa_version(ds->ds_dir->dd_pool->dp_spa) >=
1127a9799022Sck 	    SPA_VERSION_UNIQUE_ACCURATE)
1128a9799022Sck 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1129a9799022Sck }
1130a9799022Sck 
1131a9799022Sck static uint64_t
1132a9799022Sck dsl_dataset_unique(dsl_dataset_t *ds)
1133a9799022Sck {
1134a9799022Sck 	if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds))
1135a9799022Sck 		dsl_dataset_recalc_head_uniq(ds);
1136a9799022Sck 
1137a9799022Sck 	return (ds->ds_phys->ds_unique_bytes);
1138a9799022Sck }
1139a9799022Sck 
1140fa9e4066Sahrens struct killarg {
1141a9799022Sck 	int64_t *usedp;
1142a9799022Sck 	int64_t *compressedp;
1143a9799022Sck 	int64_t *uncompressedp;
1144fa9e4066Sahrens 	zio_t *zio;
1145fa9e4066Sahrens 	dmu_tx_t *tx;
1146fa9e4066Sahrens };
1147fa9e4066Sahrens 
1148fa9e4066Sahrens static int
1149fa9e4066Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
1150fa9e4066Sahrens {
1151fa9e4066Sahrens 	struct killarg *ka = arg;
1152fa9e4066Sahrens 	blkptr_t *bp = &bc->bc_blkptr;
1153fa9e4066Sahrens 
1154fa9e4066Sahrens 	ASSERT3U(bc->bc_errno, ==, 0);
1155fa9e4066Sahrens 
1156fa9e4066Sahrens 	/*
1157fa9e4066Sahrens 	 * Since this callback is not called concurrently, no lock is
1158fa9e4066Sahrens 	 * needed on the accounting values.
1159fa9e4066Sahrens 	 */
116099653d4eSeschrock 	*ka->usedp += bp_get_dasize(spa, bp);
1161fa9e4066Sahrens 	*ka->compressedp += BP_GET_PSIZE(bp);
1162fa9e4066Sahrens 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
1163fa9e4066Sahrens 	/* XXX check for EIO? */
1164fa9e4066Sahrens 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
1165fa9e4066Sahrens 	    ARC_NOWAIT);
1166fa9e4066Sahrens 	return (0);
1167fa9e4066Sahrens }
1168fa9e4066Sahrens 
1169fa9e4066Sahrens /* ARGSUSED */
11701d452cf5Sahrens static int
11711d452cf5Sahrens dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
1172fa9e4066Sahrens {
11731d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
11743cb34c60Sahrens 	dmu_objset_type_t *ost = arg2;
1175fa9e4066Sahrens 
11761d452cf5Sahrens 	/*
11773cb34c60Sahrens 	 * We can only roll back to emptyness if it is a ZPL objset.
11781d452cf5Sahrens 	 */
11793cb34c60Sahrens 	if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0)
1180fa9e4066Sahrens 		return (EINVAL);
1181fa9e4066Sahrens 
11821d452cf5Sahrens 	/*
11831d452cf5Sahrens 	 * This must not be a snapshot.
11841d452cf5Sahrens 	 */
11851d452cf5Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
1186fa9e4066Sahrens 		return (EINVAL);
1187fa9e4066Sahrens 
1188fa9e4066Sahrens 	/*
1189fa9e4066Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
1190fa9e4066Sahrens 	 * them.  Try again.
1191fa9e4066Sahrens 	 */
11921d452cf5Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1193fa9e4066Sahrens 		return (EAGAIN);
1194fa9e4066Sahrens 
11951d452cf5Sahrens 	return (0);
11961d452cf5Sahrens }
11971d452cf5Sahrens 
11981d452cf5Sahrens /* ARGSUSED */
11991d452cf5Sahrens static void
1200ecd6cf80Smarks dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
12011d452cf5Sahrens {
12021d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
12033cb34c60Sahrens 	dmu_objset_type_t *ost = arg2;
12041d452cf5Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1205fa9e4066Sahrens 
1206fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1207fa9e4066Sahrens 
120886ccc033Sperrin 	/*
120986ccc033Sperrin 	 * Before the roll back destroy the zil.
121086ccc033Sperrin 	 */
121186ccc033Sperrin 	if (ds->ds_user_ptr != NULL) {
121286ccc033Sperrin 		zil_rollback_destroy(
121386ccc033Sperrin 		    ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx);
12143cb34c60Sahrens 
12153cb34c60Sahrens 		/*
12163cb34c60Sahrens 		 * We need to make sure that the objset_impl_t is reopened after
12173cb34c60Sahrens 		 * we do the rollback, otherwise it will have the wrong
12183cb34c60Sahrens 		 * objset_phys_t.  Normally this would happen when this
1219*745cd3c5Smaybee 		 * dataset-open is closed, thus causing the
12203cb34c60Sahrens 		 * dataset to be immediately evicted.  But when doing "zfs recv
12213cb34c60Sahrens 		 * -F", we reopen the objset before that, so that there is no
12223cb34c60Sahrens 		 * window where the dataset is closed and inconsistent.
12233cb34c60Sahrens 		 */
12243cb34c60Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
12253cb34c60Sahrens 		ds->ds_user_ptr = NULL;
122686ccc033Sperrin 	}
12273a8a1de4Sperrin 
1228fa9e4066Sahrens 	/* Zero out the deadlist. */
1229fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
1230fa9e4066Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1231fa9e4066Sahrens 	ds->ds_phys->ds_deadlist_obj =
1232fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1233ea8dc4b6Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
1234ea8dc4b6Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1235fa9e4066Sahrens 
1236fa9e4066Sahrens 	{
1237fa9e4066Sahrens 		/* Free blkptrs that we gave birth to */
1238fa9e4066Sahrens 		zio_t *zio;
1239a9799022Sck 		int64_t used = 0, compressed = 0, uncompressed = 0;
1240fa9e4066Sahrens 		struct killarg ka;
1241d9b87188Sck 		int64_t delta;
1242fa9e4066Sahrens 
1243fa9e4066Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
1244fa9e4066Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
1245fa9e4066Sahrens 		ka.usedp = &used;
1246fa9e4066Sahrens 		ka.compressedp = &compressed;
1247fa9e4066Sahrens 		ka.uncompressedp = &uncompressed;
1248fa9e4066Sahrens 		ka.zio = zio;
1249fa9e4066Sahrens 		ka.tx = tx;
1250fa9e4066Sahrens 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1251fa9e4066Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1252fa9e4066Sahrens 		(void) zio_wait(zio);
1253fa9e4066Sahrens 
1254d9b87188Sck 		/* only deduct space beyond any refreservation */
1255d9b87188Sck 		delta = parent_delta(ds, -used);
12561d452cf5Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
1257d9b87188Sck 		    delta, -compressed, -uncompressed, tx);
1258fa9e4066Sahrens 	}
1259fa9e4066Sahrens 
12603cb34c60Sahrens 	if (ds->ds_prev) {
12613cb34c60Sahrens 		/* Change our contents to that of the prev snapshot */
12623cb34c60Sahrens 		ASSERT3U(ds->ds_prev->ds_object, ==,
12633cb34c60Sahrens 		    ds->ds_phys->ds_prev_snap_obj);
12643cb34c60Sahrens 		ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
12653cb34c60Sahrens 		ds->ds_phys->ds_used_bytes =
12663cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_used_bytes;
12673cb34c60Sahrens 		ds->ds_phys->ds_compressed_bytes =
12683cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_compressed_bytes;
12693cb34c60Sahrens 		ds->ds_phys->ds_uncompressed_bytes =
12703cb34c60Sahrens 		    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
12713cb34c60Sahrens 		ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
12723cb34c60Sahrens 		ds->ds_phys->ds_unique_bytes = 0;
1273fa9e4066Sahrens 
12743cb34c60Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
12753cb34c60Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
12763cb34c60Sahrens 			ds->ds_prev->ds_phys->ds_unique_bytes = 0;
12773cb34c60Sahrens 		}
12783cb34c60Sahrens 	} else {
12793cb34c60Sahrens 		/* Zero out our contents, recreate objset */
12803cb34c60Sahrens 		bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t));
12813cb34c60Sahrens 		ds->ds_phys->ds_used_bytes = 0;
12823cb34c60Sahrens 		ds->ds_phys->ds_compressed_bytes = 0;
12833cb34c60Sahrens 		ds->ds_phys->ds_uncompressed_bytes = 0;
12843cb34c60Sahrens 		ds->ds_phys->ds_flags = 0;
12853cb34c60Sahrens 		ds->ds_phys->ds_unique_bytes = 0;
12863cb34c60Sahrens 		(void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds,
12873cb34c60Sahrens 		    &ds->ds_phys->ds_bp, *ost, tx);
128885edac42Sahrens 	}
1289ecd6cf80Smarks 
1290ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa,
1291ecd6cf80Smarks 	    tx, cr, "dataset = %llu", ds->ds_object);
1292fa9e4066Sahrens }
1293fa9e4066Sahrens 
1294e1930233Sbonwick /* ARGSUSED */
1295e1930233Sbonwick static int
12961d452cf5Sahrens dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
1297e1930233Sbonwick {
12981d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
12993cb34c60Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
13003cb34c60Sahrens 	uint64_t count;
13013cb34c60Sahrens 	int err;
1302e1930233Sbonwick 
1303e1930233Sbonwick 	/*
1304e1930233Sbonwick 	 * Can't delete a head dataset if there are snapshots of it.
1305e1930233Sbonwick 	 * (Except if the only snapshots are from the branch we cloned
1306e1930233Sbonwick 	 * from.)
1307e1930233Sbonwick 	 */
1308e1930233Sbonwick 	if (ds->ds_prev != NULL &&
1309e1930233Sbonwick 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1310e1930233Sbonwick 		return (EINVAL);
1311e1930233Sbonwick 
13123cb34c60Sahrens 	/*
13133cb34c60Sahrens 	 * This is really a dsl_dir thing, but check it here so that
13143cb34c60Sahrens 	 * we'll be less likely to leave this dataset inconsistent &
13153cb34c60Sahrens 	 * nearly destroyed.
13163cb34c60Sahrens 	 */
13173cb34c60Sahrens 	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
13183cb34c60Sahrens 	if (err)
13193cb34c60Sahrens 		return (err);
13203cb34c60Sahrens 	if (count != 0)
13213cb34c60Sahrens 		return (EEXIST);
13223cb34c60Sahrens 
1323e1930233Sbonwick 	return (0);
1324e1930233Sbonwick }
1325e1930233Sbonwick 
13261d452cf5Sahrens /* ARGSUSED */
13271d452cf5Sahrens static void
1328ecd6cf80Smarks dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
1329fa9e4066Sahrens {
13301d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1331ecd6cf80Smarks 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1332fa9e4066Sahrens 
13331d452cf5Sahrens 	/* Mark it as inconsistent on-disk, in case we crash */
13341d452cf5Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
13351d452cf5Sahrens 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
1336ecd6cf80Smarks 
1337ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
1338ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
13391d452cf5Sahrens }
1340fa9e4066Sahrens 
13411d452cf5Sahrens /* ARGSUSED */
13423cb34c60Sahrens int
13431d452cf5Sahrens dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
13441d452cf5Sahrens {
13451d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1346fa9e4066Sahrens 
1347*745cd3c5Smaybee 	/* we have an owner hold, so noone else can destroy us */
1348*745cd3c5Smaybee 	ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
1349*745cd3c5Smaybee 
1350fa9e4066Sahrens 	/* Can't delete a branch point. */
13511d452cf5Sahrens 	if (ds->ds_phys->ds_num_children > 1)
13521d452cf5Sahrens 		return (EEXIST);
1353fa9e4066Sahrens 
1354fa9e4066Sahrens 	/*
1355fa9e4066Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
1356fa9e4066Sahrens 	 * (Except if the only snapshots are from the branch we cloned
1357fa9e4066Sahrens 	 * from.)
1358fa9e4066Sahrens 	 */
1359fa9e4066Sahrens 	if (ds->ds_prev != NULL &&
13601d452cf5Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1361fa9e4066Sahrens 		return (EINVAL);
1362fa9e4066Sahrens 
1363fa9e4066Sahrens 	/*
1364fa9e4066Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
1365fa9e4066Sahrens 	 * them.  Try again.
1366fa9e4066Sahrens 	 */
13671d452cf5Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1368fa9e4066Sahrens 		return (EAGAIN);
13691d452cf5Sahrens 
13701d452cf5Sahrens 	/* XXX we should do some i/o error checking... */
13711d452cf5Sahrens 	return (0);
13721d452cf5Sahrens }
13731d452cf5Sahrens 
1374*745cd3c5Smaybee struct refsarg {
1375*745cd3c5Smaybee 	kmutex_t lock;
1376*745cd3c5Smaybee 	boolean_t gone;
1377*745cd3c5Smaybee 	kcondvar_t cv;
1378*745cd3c5Smaybee };
1379*745cd3c5Smaybee 
1380*745cd3c5Smaybee /* ARGSUSED */
1381*745cd3c5Smaybee static void
1382*745cd3c5Smaybee dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
1383*745cd3c5Smaybee {
1384*745cd3c5Smaybee 	struct refsarg *arg = argv;
1385*745cd3c5Smaybee 
1386*745cd3c5Smaybee 	mutex_enter(&arg->lock);
1387*745cd3c5Smaybee 	arg->gone = TRUE;
1388*745cd3c5Smaybee 	cv_signal(&arg->cv);
1389*745cd3c5Smaybee 	mutex_exit(&arg->lock);
1390*745cd3c5Smaybee }
1391*745cd3c5Smaybee 
1392*745cd3c5Smaybee static void
1393*745cd3c5Smaybee dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
1394*745cd3c5Smaybee {
1395*745cd3c5Smaybee 	struct refsarg arg;
1396*745cd3c5Smaybee 
1397*745cd3c5Smaybee 	mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
1398*745cd3c5Smaybee 	cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
1399*745cd3c5Smaybee 	arg.gone = FALSE;
1400*745cd3c5Smaybee 	(void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
1401*745cd3c5Smaybee 	    dsl_dataset_refs_gone);
1402*745cd3c5Smaybee 	dmu_buf_rele(ds->ds_dbuf, tag);
1403*745cd3c5Smaybee 	mutex_enter(&arg.lock);
1404*745cd3c5Smaybee 	while (!arg.gone)
1405*745cd3c5Smaybee 		cv_wait(&arg.cv, &arg.lock);
1406*745cd3c5Smaybee 	ASSERT(arg.gone);
1407*745cd3c5Smaybee 	mutex_exit(&arg.lock);
1408*745cd3c5Smaybee 	ds->ds_dbuf = NULL;
1409*745cd3c5Smaybee 	ds->ds_phys = NULL;
1410*745cd3c5Smaybee 	mutex_destroy(&arg.lock);
1411*745cd3c5Smaybee 	cv_destroy(&arg.cv);
1412*745cd3c5Smaybee }
1413*745cd3c5Smaybee 
14143cb34c60Sahrens void
1415ecd6cf80Smarks dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
14161d452cf5Sahrens {
14171d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
1418a9799022Sck 	int64_t used = 0, compressed = 0, uncompressed = 0;
14191d452cf5Sahrens 	zio_t *zio;
14201d452cf5Sahrens 	int err;
14211d452cf5Sahrens 	int after_branch_point = FALSE;
14221d452cf5Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
14231d452cf5Sahrens 	objset_t *mos = dp->dp_meta_objset;
14241d452cf5Sahrens 	dsl_dataset_t *ds_prev = NULL;
14251d452cf5Sahrens 	uint64_t obj;
14261d452cf5Sahrens 
1427*745cd3c5Smaybee 	ASSERT(ds->ds_owner);
14281d452cf5Sahrens 	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
14291d452cf5Sahrens 	ASSERT(ds->ds_prev == NULL ||
14301d452cf5Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
14311d452cf5Sahrens 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
14321d452cf5Sahrens 
1433*745cd3c5Smaybee 	/* signal any waiters that this dataset is going away */
1434*745cd3c5Smaybee 	mutex_enter(&ds->ds_lock);
1435*745cd3c5Smaybee 	ds->ds_owner = dsl_reaper;
1436*745cd3c5Smaybee 	cv_broadcast(&ds->ds_exclusive_cv);
1437*745cd3c5Smaybee 	mutex_exit(&ds->ds_lock);
1438*745cd3c5Smaybee 
1439a9799022Sck 	/* Remove our reservation */
1440a9799022Sck 	if (ds->ds_reserved != 0) {
1441a9799022Sck 		uint64_t val = 0;
1442a9799022Sck 		dsl_dataset_set_reservation_sync(ds, &val, cr, tx);
1443a9799022Sck 		ASSERT3U(ds->ds_reserved, ==, 0);
1444a9799022Sck 	}
1445a9799022Sck 
14461d452cf5Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
14471d452cf5Sahrens 
14481d452cf5Sahrens 	obj = ds->ds_object;
1449fa9e4066Sahrens 
1450fa9e4066Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1451fa9e4066Sahrens 		if (ds->ds_prev) {
1452fa9e4066Sahrens 			ds_prev = ds->ds_prev;
1453fa9e4066Sahrens 		} else {
1454*745cd3c5Smaybee 			VERIFY(0 == dsl_dataset_hold_obj(dp,
1455*745cd3c5Smaybee 			    ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
1456fa9e4066Sahrens 		}
1457fa9e4066Sahrens 		after_branch_point =
1458fa9e4066Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
1459fa9e4066Sahrens 
1460fa9e4066Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1461fa9e4066Sahrens 		if (after_branch_point &&
1462fa9e4066Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
1463fa9e4066Sahrens 			/* This clone is toast. */
1464fa9e4066Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1465fa9e4066Sahrens 			ds_prev->ds_phys->ds_num_children--;
1466fa9e4066Sahrens 		} else if (!after_branch_point) {
1467fa9e4066Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
1468fa9e4066Sahrens 			    ds->ds_phys->ds_next_snap_obj;
1469fa9e4066Sahrens 		}
1470fa9e4066Sahrens 	}
1471fa9e4066Sahrens 
1472fa9e4066Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1473fa9e4066Sahrens 
1474fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
14751d452cf5Sahrens 		blkptr_t bp;
1476fa9e4066Sahrens 		dsl_dataset_t *ds_next;
1477fa9e4066Sahrens 		uint64_t itor = 0;
1478a9799022Sck 		uint64_t old_unique;
1479fa9e4066Sahrens 
1480fa9e4066Sahrens 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1481fa9e4066Sahrens 
1482*745cd3c5Smaybee 		VERIFY(0 == dsl_dataset_hold_obj(dp,
1483*745cd3c5Smaybee 		    ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
1484fa9e4066Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1485fa9e4066Sahrens 
1486a9799022Sck 		old_unique = dsl_dataset_unique(ds_next);
1487a9799022Sck 
1488fa9e4066Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1489fa9e4066Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
1490fa9e4066Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
1491fa9e4066Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
1492fa9e4066Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
1493fa9e4066Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1494fa9e4066Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1495fa9e4066Sahrens 
1496fa9e4066Sahrens 		/*
1497fa9e4066Sahrens 		 * Transfer to our deadlist (which will become next's
1498fa9e4066Sahrens 		 * new deadlist) any entries from next's current
1499fa9e4066Sahrens 		 * deadlist which were born before prev, and free the
1500fa9e4066Sahrens 		 * other entries.
1501fa9e4066Sahrens 		 *
1502fa9e4066Sahrens 		 * XXX we're doing this long task with the config lock held
1503fa9e4066Sahrens 		 */
1504*745cd3c5Smaybee 		while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) {
1505fa9e4066Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
1506ea8dc4b6Seschrock 				VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
1507ea8dc4b6Seschrock 				    &bp, tx));
1508fa9e4066Sahrens 				if (ds_prev && !after_branch_point &&
1509fa9e4066Sahrens 				    bp.blk_birth >
1510fa9e4066Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1511fa9e4066Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
151299653d4eSeschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1513fa9e4066Sahrens 				}
1514fa9e4066Sahrens 			} else {
151599653d4eSeschrock 				used += bp_get_dasize(dp->dp_spa, &bp);
1516fa9e4066Sahrens 				compressed += BP_GET_PSIZE(&bp);
1517fa9e4066Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1518fa9e4066Sahrens 				/* XXX check return value? */
1519fa9e4066Sahrens 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1520fa9e4066Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1521fa9e4066Sahrens 			}
1522fa9e4066Sahrens 		}
1523fa9e4066Sahrens 
1524fa9e4066Sahrens 		/* free next's deadlist */
1525fa9e4066Sahrens 		bplist_close(&ds_next->ds_deadlist);
1526fa9e4066Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1527fa9e4066Sahrens 
1528fa9e4066Sahrens 		/* set next's deadlist to our deadlist */
1529*745cd3c5Smaybee 		bplist_close(&ds->ds_deadlist);
1530fa9e4066Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1531fa9e4066Sahrens 		    ds->ds_phys->ds_deadlist_obj;
1532ea8dc4b6Seschrock 		VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
1533ea8dc4b6Seschrock 		    ds_next->ds_phys->ds_deadlist_obj));
1534fa9e4066Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1535fa9e4066Sahrens 
1536fa9e4066Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1537fa9e4066Sahrens 			/*
1538fa9e4066Sahrens 			 * Update next's unique to include blocks which
1539fa9e4066Sahrens 			 * were previously shared by only this snapshot
1540fa9e4066Sahrens 			 * and it.  Those blocks will be born after the
1541fa9e4066Sahrens 			 * prev snap and before this snap, and will have
1542fa9e4066Sahrens 			 * died after the next snap and before the one
1543fa9e4066Sahrens 			 * after that (ie. be on the snap after next's
1544fa9e4066Sahrens 			 * deadlist).
1545fa9e4066Sahrens 			 *
1546fa9e4066Sahrens 			 * XXX we're doing this long task with the
1547fa9e4066Sahrens 			 * config lock held
1548fa9e4066Sahrens 			 */
1549fa9e4066Sahrens 			dsl_dataset_t *ds_after_next;
1550fa9e4066Sahrens 
1551*745cd3c5Smaybee 			VERIFY(0 == dsl_dataset_hold_obj(dp,
1552*745cd3c5Smaybee 			    ds_next->ds_phys->ds_next_snap_obj,
1553*745cd3c5Smaybee 			    FTAG, &ds_after_next));
1554fa9e4066Sahrens 			itor = 0;
1555fa9e4066Sahrens 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1556fa9e4066Sahrens 			    &itor, &bp) == 0) {
1557fa9e4066Sahrens 				if (bp.blk_birth >
1558fa9e4066Sahrens 				    ds->ds_phys->ds_prev_snap_txg &&
1559fa9e4066Sahrens 				    bp.blk_birth <=
1560fa9e4066Sahrens 				    ds->ds_phys->ds_creation_txg) {
1561fa9e4066Sahrens 					ds_next->ds_phys->ds_unique_bytes +=
156299653d4eSeschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1563fa9e4066Sahrens 				}
1564fa9e4066Sahrens 			}
1565fa9e4066Sahrens 
1566*745cd3c5Smaybee 			dsl_dataset_rele(ds_after_next, FTAG);
1567fa9e4066Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1568fa9e4066Sahrens 		} else {
1569fa9e4066Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
1570*745cd3c5Smaybee 			dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
1571*745cd3c5Smaybee 			ds_next->ds_prev = NULL;
1572fa9e4066Sahrens 			if (ds_prev) {
1573*745cd3c5Smaybee 				VERIFY(0 == dsl_dataset_get_ref(dp,
1574*745cd3c5Smaybee 				    ds->ds_phys->ds_prev_snap_obj,
1575*745cd3c5Smaybee 				    ds_next, &ds_next->ds_prev));
1576fa9e4066Sahrens 			}
1577a9799022Sck 
1578a9799022Sck 			dsl_dataset_recalc_head_uniq(ds_next);
1579a9799022Sck 
1580a9799022Sck 			/*
1581a9799022Sck 			 * Reduce the amount of our unconsmed refreservation
1582a9799022Sck 			 * being charged to our parent by the amount of
1583a9799022Sck 			 * new unique data we have gained.
1584a9799022Sck 			 */
1585a9799022Sck 			if (old_unique < ds_next->ds_reserved) {
1586a9799022Sck 				int64_t mrsdelta;
1587a9799022Sck 				uint64_t new_unique =
1588a9799022Sck 				    ds_next->ds_phys->ds_unique_bytes;
1589a9799022Sck 
1590a9799022Sck 				ASSERT(old_unique <= new_unique);
1591a9799022Sck 				mrsdelta = MIN(new_unique - old_unique,
1592a9799022Sck 				    ds_next->ds_reserved - old_unique);
1593a9799022Sck 				dsl_dir_diduse_space(ds->ds_dir, -mrsdelta,
1594a9799022Sck 				    0, 0, tx);
1595a9799022Sck 			}
1596fa9e4066Sahrens 		}
1597*745cd3c5Smaybee 		dsl_dataset_rele(ds_next, FTAG);
1598fa9e4066Sahrens 
1599fa9e4066Sahrens 		/*
1600a9799022Sck 		 * NB: unique_bytes might not be accurate for the head objset.
1601a9799022Sck 		 * Before SPA_VERSION 9, we didn't update its value when we
1602a9799022Sck 		 * deleted the most recent snapshot.
1603fa9e4066Sahrens 		 */
1604fa9e4066Sahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1605fa9e4066Sahrens 	} else {
1606fa9e4066Sahrens 		/*
1607fa9e4066Sahrens 		 * There's no next snapshot, so this is a head dataset.
1608fa9e4066Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1609fa9e4066Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1610fa9e4066Sahrens 		 * safe to ignore the deadlist contents.)
1611fa9e4066Sahrens 		 */
1612fa9e4066Sahrens 		struct killarg ka;
1613fa9e4066Sahrens 
1614fa9e4066Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1615fa9e4066Sahrens 		bplist_close(&ds->ds_deadlist);
1616fa9e4066Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1617fa9e4066Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1618fa9e4066Sahrens 
1619fa9e4066Sahrens 		/*
1620fa9e4066Sahrens 		 * Free everything that we point to (that's born after
1621fa9e4066Sahrens 		 * the previous snapshot, if we are a clone)
1622fa9e4066Sahrens 		 *
1623fa9e4066Sahrens 		 * XXX we're doing this long task with the config lock held
1624fa9e4066Sahrens 		 */
1625fa9e4066Sahrens 		ka.usedp = &used;
1626fa9e4066Sahrens 		ka.compressedp = &compressed;
1627fa9e4066Sahrens 		ka.uncompressedp = &uncompressed;
1628fa9e4066Sahrens 		ka.zio = zio;
1629fa9e4066Sahrens 		ka.tx = tx;
1630fa9e4066Sahrens 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1631fa9e4066Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1632fa9e4066Sahrens 		ASSERT3U(err, ==, 0);
1633a9799022Sck 		ASSERT(spa_version(dp->dp_spa) <
1634a9799022Sck 		    SPA_VERSION_UNIQUE_ACCURATE ||
1635a9799022Sck 		    used == ds->ds_phys->ds_unique_bytes);
1636fa9e4066Sahrens 	}
1637fa9e4066Sahrens 
1638fa9e4066Sahrens 	err = zio_wait(zio);
1639fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1640fa9e4066Sahrens 
16411d452cf5Sahrens 	dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx);
1642fa9e4066Sahrens 
16431d452cf5Sahrens 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1644*745cd3c5Smaybee 		/* Erase the link in the dir */
16451d452cf5Sahrens 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
16461d452cf5Sahrens 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
1647*745cd3c5Smaybee 		ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
1648*745cd3c5Smaybee 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1649*745cd3c5Smaybee 		ASSERT(err == 0);
1650fa9e4066Sahrens 	} else {
1651fa9e4066Sahrens 		/* remove from snapshot namespace */
1652fa9e4066Sahrens 		dsl_dataset_t *ds_head;
1653*745cd3c5Smaybee 		ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
1654*745cd3c5Smaybee 		VERIFY(0 == dsl_dataset_hold_obj(dp,
1655*745cd3c5Smaybee 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
16568660574dSahrens 		VERIFY(0 == dsl_dataset_get_snapname(ds));
1657fa9e4066Sahrens #ifdef ZFS_DEBUG
1658fa9e4066Sahrens 		{
1659fa9e4066Sahrens 			uint64_t val;
1660ab04eb8eStimh 
1661*745cd3c5Smaybee 			err = dsl_dataset_snap_lookup(ds_head,
1662ab04eb8eStimh 			    ds->ds_snapname, &val);
1663fa9e4066Sahrens 			ASSERT3U(err, ==, 0);
1664fa9e4066Sahrens 			ASSERT3U(val, ==, obj);
1665fa9e4066Sahrens 		}
1666fa9e4066Sahrens #endif
1667*745cd3c5Smaybee 		err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
1668fa9e4066Sahrens 		ASSERT(err == 0);
1669*745cd3c5Smaybee 		dsl_dataset_rele(ds_head, FTAG);
1670fa9e4066Sahrens 	}
1671fa9e4066Sahrens 
1672fa9e4066Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
1673*745cd3c5Smaybee 		dsl_dataset_rele(ds_prev, FTAG);
1674fa9e4066Sahrens 
1675990b4856Slling 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1676ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx,
1677ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
1678ecd6cf80Smarks 
1679*745cd3c5Smaybee 	dsl_dir_close(ds->ds_dir, ds);
1680*745cd3c5Smaybee 	ds->ds_dir = NULL;
1681*745cd3c5Smaybee 	dsl_dataset_drain_refs(ds, tag);
16821d452cf5Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
1683fa9e4066Sahrens }
1684fa9e4066Sahrens 
1685a9799022Sck static int
1686a9799022Sck dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
1687a9799022Sck {
1688a9799022Sck 	uint64_t asize;
1689a9799022Sck 
1690a9799022Sck 	if (!dmu_tx_is_syncing(tx))
1691a9799022Sck 		return (0);
1692a9799022Sck 
1693a9799022Sck 	/*
1694a9799022Sck 	 * If there's an fs-only reservation, any blocks that might become
1695a9799022Sck 	 * owned by the snapshot dataset must be accommodated by space
1696a9799022Sck 	 * outside of the reservation.
1697a9799022Sck 	 */
1698a9799022Sck 	asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
1699a9799022Sck 	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE))
1700a9799022Sck 		return (ENOSPC);
1701a9799022Sck 
1702a9799022Sck 	/*
1703a9799022Sck 	 * Propogate any reserved space for this snapshot to other
1704a9799022Sck 	 * snapshot checks in this sync group.
1705a9799022Sck 	 */
1706a9799022Sck 	if (asize > 0)
1707a9799022Sck 		dsl_dir_willuse_space(ds->ds_dir, asize, tx);
1708a9799022Sck 
1709a9799022Sck 	return (0);
1710a9799022Sck }
1711a9799022Sck 
17121d452cf5Sahrens /* ARGSUSED */
1713fa9e4066Sahrens int
17141d452cf5Sahrens dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
1715fa9e4066Sahrens {
17163cb34c60Sahrens 	dsl_dataset_t *ds = arg1;
17171d452cf5Sahrens 	const char *snapname = arg2;
1718fa9e4066Sahrens 	int err;
17191d452cf5Sahrens 	uint64_t value;
1720fa9e4066Sahrens 
17211d452cf5Sahrens 	/*
17221d452cf5Sahrens 	 * We don't allow multiple snapshots of the same txg.  If there
17231d452cf5Sahrens 	 * is already one, try again.
17241d452cf5Sahrens 	 */
17251d452cf5Sahrens 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
17261d452cf5Sahrens 		return (EAGAIN);
1727fa9e4066Sahrens 
17281d452cf5Sahrens 	/*
17291d452cf5Sahrens 	 * Check for conflicting name snapshot name.
17301d452cf5Sahrens 	 */
1731*745cd3c5Smaybee 	err = dsl_dataset_snap_lookup(ds, snapname, &value);
17321d452cf5Sahrens 	if (err == 0)
1733fa9e4066Sahrens 		return (EEXIST);
17341d452cf5Sahrens 	if (err != ENOENT)
17351d452cf5Sahrens 		return (err);
1736fa9e4066Sahrens 
1737b7661cccSmmusante 	/*
1738b7661cccSmmusante 	 * Check that the dataset's name is not too long.  Name consists
1739b7661cccSmmusante 	 * of the dataset's length + 1 for the @-sign + snapshot name's length
1740b7661cccSmmusante 	 */
1741b7661cccSmmusante 	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
1742b7661cccSmmusante 		return (ENAMETOOLONG);
1743b7661cccSmmusante 
1744a9799022Sck 	err = dsl_dataset_snapshot_reserve_space(ds, tx);
1745a9799022Sck 	if (err)
1746a9799022Sck 		return (err);
1747a9799022Sck 
17481d452cf5Sahrens 	ds->ds_trysnap_txg = tx->tx_txg;
17491d452cf5Sahrens 	return (0);
17501d452cf5Sahrens }
1751fa9e4066Sahrens 
17521d452cf5Sahrens void
1753ecd6cf80Smarks dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
17541d452cf5Sahrens {
17553cb34c60Sahrens 	dsl_dataset_t *ds = arg1;
17561d452cf5Sahrens 	const char *snapname = arg2;
17571d452cf5Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
17581d452cf5Sahrens 	dmu_buf_t *dbuf;
17591d452cf5Sahrens 	dsl_dataset_phys_t *dsphys;
17601d452cf5Sahrens 	uint64_t dsobj;
17611d452cf5Sahrens 	objset_t *mos = dp->dp_meta_objset;
17621d452cf5Sahrens 	int err;
1763fa9e4066Sahrens 
1764fa9e4066Sahrens 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
17651d452cf5Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1766fa9e4066Sahrens 
17671649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
17681649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
1769ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1770fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1771fa9e4066Sahrens 	dsphys = dbuf->db_data;
1772*745cd3c5Smaybee 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
17731d452cf5Sahrens 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1774fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
1775fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1776fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
1777fa9e4066Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1778fa9e4066Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1779fa9e4066Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1780fa9e4066Sahrens 	dsphys->ds_num_children = 1;
1781fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1782fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
1783fa9e4066Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1784fa9e4066Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1785fa9e4066Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1786fa9e4066Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
178799653d4eSeschrock 	dsphys->ds_flags = ds->ds_phys->ds_flags;
1788fa9e4066Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
1789ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
1790fa9e4066Sahrens 
17911d452cf5Sahrens 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
17921d452cf5Sahrens 	if (ds->ds_prev) {
17931d452cf5Sahrens 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
1794fa9e4066Sahrens 		    ds->ds_object ||
17951d452cf5Sahrens 		    ds->ds_prev->ds_phys->ds_num_children > 1);
17961d452cf5Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
17971d452cf5Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1798fa9e4066Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
17991d452cf5Sahrens 			    ds->ds_prev->ds_phys->ds_creation_txg);
18001d452cf5Sahrens 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1801fa9e4066Sahrens 		}
1802fa9e4066Sahrens 	}
1803fa9e4066Sahrens 
1804a9799022Sck 	/*
1805a9799022Sck 	 * If we have a reference-reservation on this dataset, we will
1806a9799022Sck 	 * need to increase the amount of refreservation being charged
1807a9799022Sck 	 * since our unique space is going to zero.
1808a9799022Sck 	 */
1809a9799022Sck 	if (ds->ds_reserved) {
1810a9799022Sck 		int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
1811a9799022Sck 		dsl_dir_diduse_space(ds->ds_dir, add, 0, 0, tx);
1812a9799022Sck 	}
1813a9799022Sck 
1814fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
1815fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1816a4611edeSahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg);
1817fa9e4066Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1818a4611edeSahrens 	ds->ds_phys->ds_prev_snap_txg = tx->tx_txg;
1819fa9e4066Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1820a9799022Sck 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
1821a9799022Sck 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1822fa9e4066Sahrens 	ds->ds_phys->ds_deadlist_obj =
1823fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1824ea8dc4b6Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
1825ea8dc4b6Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1826fa9e4066Sahrens 
1827fa9e4066Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1828fa9e4066Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1829fa9e4066Sahrens 	    snapname, 8, 1, &dsobj, tx);
1830fa9e4066Sahrens 	ASSERT(err == 0);
1831fa9e4066Sahrens 
1832fa9e4066Sahrens 	if (ds->ds_prev)
1833*745cd3c5Smaybee 		dsl_dataset_drop_ref(ds->ds_prev, ds);
1834*745cd3c5Smaybee 	VERIFY(0 == dsl_dataset_get_ref(dp,
1835*745cd3c5Smaybee 	    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
1836ecd6cf80Smarks 
1837ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr,
183840feaa91Sahrens 	    "dataset = %llu", dsobj);
1839fa9e4066Sahrens }
1840fa9e4066Sahrens 
1841fa9e4066Sahrens void
1842c717a561Smaybee dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
1843fa9e4066Sahrens {
1844fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1845fa9e4066Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1846fa9e4066Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1847fa9e4066Sahrens 
184891ebeef5Sahrens 	/*
184991ebeef5Sahrens 	 * in case we had to change ds_fsid_guid when we opened it,
185091ebeef5Sahrens 	 * sync it out now.
185191ebeef5Sahrens 	 */
185291ebeef5Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
185391ebeef5Sahrens 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
185491ebeef5Sahrens 
1855fa9e4066Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
1856c717a561Smaybee 	dmu_objset_sync(ds->ds_user_ptr, zio, tx);
1857fa9e4066Sahrens }
1858fa9e4066Sahrens 
1859fa9e4066Sahrens void
1860a2eea2e1Sahrens dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1861fa9e4066Sahrens {
1862a9799022Sck 	uint64_t refd, avail, uobjs, aobjs;
1863a9799022Sck 
1864a2eea2e1Sahrens 	dsl_dir_stats(ds->ds_dir, nv);
1865fa9e4066Sahrens 
1866a9799022Sck 	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
1867a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
1868a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
1869a9799022Sck 
1870a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
1871a2eea2e1Sahrens 	    ds->ds_phys->ds_creation_time);
1872a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
1873a2eea2e1Sahrens 	    ds->ds_phys->ds_creation_txg);
1874a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
1875a9799022Sck 	    ds->ds_quota);
1876a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
1877a9799022Sck 	    ds->ds_reserved);
1878c5904d13Seschrock 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
1879c5904d13Seschrock 	    ds->ds_phys->ds_guid);
1880fa9e4066Sahrens 
1881fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1882fa9e4066Sahrens 		/*
1883fa9e4066Sahrens 		 * This is a snapshot; override the dd's space used with
1884a2eea2e1Sahrens 		 * our unique space and compression ratio.
1885fa9e4066Sahrens 		 */
1886a2eea2e1Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
1887a2eea2e1Sahrens 		    ds->ds_phys->ds_unique_bytes);
1888a2eea2e1Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
1889a2eea2e1Sahrens 		    ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
1890a2eea2e1Sahrens 		    (ds->ds_phys->ds_uncompressed_bytes * 100 /
1891a2eea2e1Sahrens 		    ds->ds_phys->ds_compressed_bytes));
1892fa9e4066Sahrens 	}
1893fa9e4066Sahrens }
1894fa9e4066Sahrens 
1895a2eea2e1Sahrens void
1896a2eea2e1Sahrens dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1897a2eea2e1Sahrens {
1898a2eea2e1Sahrens 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
1899a2eea2e1Sahrens 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
19003cb34c60Sahrens 	stat->dds_guid = ds->ds_phys->ds_guid;
1901a2eea2e1Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1902a2eea2e1Sahrens 		stat->dds_is_snapshot = B_TRUE;
1903a2eea2e1Sahrens 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
1904a2eea2e1Sahrens 	}
1905a2eea2e1Sahrens 
1906a2eea2e1Sahrens 	/* clone origin is really a dsl_dir thing... */
19074ccbb6e7Sahrens 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
19083cb34c60Sahrens 	if (ds->ds_dir->dd_phys->dd_origin_obj) {
1909a2eea2e1Sahrens 		dsl_dataset_t *ods;
1910a2eea2e1Sahrens 
1911*745cd3c5Smaybee 		VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
1912*745cd3c5Smaybee 		    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
19133cb34c60Sahrens 		dsl_dataset_name(ods, stat->dds_origin);
1914*745cd3c5Smaybee 		dsl_dataset_drop_ref(ods, FTAG);
1915a2eea2e1Sahrens 	}
19164ccbb6e7Sahrens 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
1917a2eea2e1Sahrens }
1918a2eea2e1Sahrens 
1919a2eea2e1Sahrens uint64_t
1920a2eea2e1Sahrens dsl_dataset_fsid_guid(dsl_dataset_t *ds)
1921a2eea2e1Sahrens {
192291ebeef5Sahrens 	return (ds->ds_fsid_guid);
1923a2eea2e1Sahrens }
1924a2eea2e1Sahrens 
1925a2eea2e1Sahrens void
1926a2eea2e1Sahrens dsl_dataset_space(dsl_dataset_t *ds,
1927a2eea2e1Sahrens     uint64_t *refdbytesp, uint64_t *availbytesp,
1928a2eea2e1Sahrens     uint64_t *usedobjsp, uint64_t *availobjsp)
1929fa9e4066Sahrens {
1930a2eea2e1Sahrens 	*refdbytesp = ds->ds_phys->ds_used_bytes;
1931a2eea2e1Sahrens 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
1932a9799022Sck 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
1933a9799022Sck 		*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
1934a9799022Sck 	if (ds->ds_quota != 0) {
1935a9799022Sck 		/*
1936a9799022Sck 		 * Adjust available bytes according to refquota
1937a9799022Sck 		 */
1938a9799022Sck 		if (*refdbytesp < ds->ds_quota)
1939a9799022Sck 			*availbytesp = MIN(*availbytesp,
1940a9799022Sck 			    ds->ds_quota - *refdbytesp);
1941a9799022Sck 		else
1942a9799022Sck 			*availbytesp = 0;
1943a9799022Sck 	}
1944a2eea2e1Sahrens 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
1945a2eea2e1Sahrens 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
1946fa9e4066Sahrens }
1947fa9e4066Sahrens 
1948f18faf3fSek boolean_t
1949f18faf3fSek dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
1950f18faf3fSek {
1951f18faf3fSek 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1952f18faf3fSek 
1953f18faf3fSek 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
1954f18faf3fSek 	    dsl_pool_sync_context(dp));
1955f18faf3fSek 	if (ds->ds_prev == NULL)
1956f18faf3fSek 		return (B_FALSE);
1957f18faf3fSek 	if (ds->ds_phys->ds_bp.blk_birth >
1958f18faf3fSek 	    ds->ds_prev->ds_phys->ds_creation_txg)
1959f18faf3fSek 		return (B_TRUE);
1960f18faf3fSek 	return (B_FALSE);
1961f18faf3fSek }
1962f18faf3fSek 
19631d452cf5Sahrens /* ARGSUSED */
1964fa9e4066Sahrens static int
19651d452cf5Sahrens dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1966fa9e4066Sahrens {
19671d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
19681d452cf5Sahrens 	char *newsnapname = arg2;
19691d452cf5Sahrens 	dsl_dir_t *dd = ds->ds_dir;
19701d452cf5Sahrens 	dsl_dataset_t *hds;
1971fa9e4066Sahrens 	uint64_t val;
19721d452cf5Sahrens 	int err;
1973fa9e4066Sahrens 
1974*745cd3c5Smaybee 	err = dsl_dataset_hold_obj(dd->dd_pool,
1975*745cd3c5Smaybee 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
1976fa9e4066Sahrens 	if (err)
1977fa9e4066Sahrens 		return (err);
1978fa9e4066Sahrens 
19791d452cf5Sahrens 	/* new name better not be in use */
1980*745cd3c5Smaybee 	err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
1981*745cd3c5Smaybee 	dsl_dataset_rele(hds, FTAG);
19821d452cf5Sahrens 
19831d452cf5Sahrens 	if (err == 0)
19841d452cf5Sahrens 		err = EEXIST;
19851d452cf5Sahrens 	else if (err == ENOENT)
19861d452cf5Sahrens 		err = 0;
1987cdf5b4caSmmusante 
1988cdf5b4caSmmusante 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
1989cdf5b4caSmmusante 	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
1990cdf5b4caSmmusante 		err = ENAMETOOLONG;
1991cdf5b4caSmmusante 
19921d452cf5Sahrens 	return (err);
19931d452cf5Sahrens }
1994fa9e4066Sahrens 
19951d452cf5Sahrens static void
1996ecd6cf80Smarks dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2,
1997ecd6cf80Smarks     cred_t *cr, dmu_tx_t *tx)
19981d452cf5Sahrens {
19991d452cf5Sahrens 	dsl_dataset_t *ds = arg1;
2000ecd6cf80Smarks 	const char *newsnapname = arg2;
20011d452cf5Sahrens 	dsl_dir_t *dd = ds->ds_dir;
20021d452cf5Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
20031d452cf5Sahrens 	dsl_dataset_t *hds;
20041d452cf5Sahrens 	int err;
2005fa9e4066Sahrens 
20061d452cf5Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
2007fa9e4066Sahrens 
2008*745cd3c5Smaybee 	VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
2009*745cd3c5Smaybee 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
2010fa9e4066Sahrens 
20111d452cf5Sahrens 	VERIFY(0 == dsl_dataset_get_snapname(ds));
2012*745cd3c5Smaybee 	err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
2013fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
20141d452cf5Sahrens 	mutex_enter(&ds->ds_lock);
20151d452cf5Sahrens 	(void) strcpy(ds->ds_snapname, newsnapname);
20161d452cf5Sahrens 	mutex_exit(&ds->ds_lock);
20171d452cf5Sahrens 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
20181d452cf5Sahrens 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
2019fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
2020fa9e4066Sahrens 
2021ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
2022ecd6cf80Smarks 	    cr, "dataset = %llu", ds->ds_object);
2023*745cd3c5Smaybee 	dsl_dataset_rele(hds, FTAG);
2024fa9e4066Sahrens }
2025fa9e4066Sahrens 
2026f18faf3fSek struct renamesnaparg {
2027cdf5b4caSmmusante 	dsl_sync_task_group_t *dstg;
2028cdf5b4caSmmusante 	char failed[MAXPATHLEN];
2029cdf5b4caSmmusante 	char *oldsnap;
2030cdf5b4caSmmusante 	char *newsnap;
2031cdf5b4caSmmusante };
2032cdf5b4caSmmusante 
2033cdf5b4caSmmusante static int
2034cdf5b4caSmmusante dsl_snapshot_rename_one(char *name, void *arg)
2035cdf5b4caSmmusante {
2036f18faf3fSek 	struct renamesnaparg *ra = arg;
2037cdf5b4caSmmusante 	dsl_dataset_t *ds = NULL;
2038cdf5b4caSmmusante 	char *cp;
2039cdf5b4caSmmusante 	int err;
2040cdf5b4caSmmusante 
2041cdf5b4caSmmusante 	cp = name + strlen(name);
2042cdf5b4caSmmusante 	*cp = '@';
2043cdf5b4caSmmusante 	(void) strcpy(cp + 1, ra->oldsnap);
2044ecd6cf80Smarks 
2045ecd6cf80Smarks 	/*
2046ecd6cf80Smarks 	 * For recursive snapshot renames the parent won't be changing
2047ecd6cf80Smarks 	 * so we just pass name for both the to/from argument.
2048ecd6cf80Smarks 	 */
2049ecd6cf80Smarks 	if (err = zfs_secpolicy_rename_perms(name, name, CRED())) {
2050ecd6cf80Smarks 		(void) strcpy(ra->failed, name);
2051ecd6cf80Smarks 		return (err);
2052ecd6cf80Smarks 	}
2053ecd6cf80Smarks 
2054*745cd3c5Smaybee #ifdef _KERNEL
2055*745cd3c5Smaybee 	/*
2056*745cd3c5Smaybee 	 * For all filesystems undergoing rename, we'll need to unmount it.
2057*745cd3c5Smaybee 	 */
2058*745cd3c5Smaybee 	(void) zfs_unmount_snap(name, NULL);
2059*745cd3c5Smaybee #endif
2060*745cd3c5Smaybee 	err = dsl_dataset_hold(name, ra->dstg, &ds);
2061*745cd3c5Smaybee 	*cp = '\0';
2062cdf5b4caSmmusante 	if (err == ENOENT) {
2063cdf5b4caSmmusante 		return (0);
2064*745cd3c5Smaybee 	} else if (err) {
2065cdf5b4caSmmusante 		(void) strcpy(ra->failed, name);
2066cdf5b4caSmmusante 		return (err);
2067cdf5b4caSmmusante 	}
2068cdf5b4caSmmusante 
2069cdf5b4caSmmusante 	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
2070cdf5b4caSmmusante 	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
2071cdf5b4caSmmusante 
2072cdf5b4caSmmusante 	return (0);
2073cdf5b4caSmmusante }
2074cdf5b4caSmmusante 
2075cdf5b4caSmmusante static int
2076cdf5b4caSmmusante dsl_recursive_rename(char *oldname, const char *newname)
2077cdf5b4caSmmusante {
2078cdf5b4caSmmusante 	int err;
2079f18faf3fSek 	struct renamesnaparg *ra;
2080cdf5b4caSmmusante 	dsl_sync_task_t *dst;
2081cdf5b4caSmmusante 	spa_t *spa;
2082cdf5b4caSmmusante 	char *cp, *fsname = spa_strdup(oldname);
2083cdf5b4caSmmusante 	int len = strlen(oldname);
2084cdf5b4caSmmusante 
2085cdf5b4caSmmusante 	/* truncate the snapshot name to get the fsname */
2086cdf5b4caSmmusante 	cp = strchr(fsname, '@');
2087cdf5b4caSmmusante 	*cp = '\0';
2088cdf5b4caSmmusante 
208940feaa91Sahrens 	err = spa_open(fsname, &spa, FTAG);
2090cdf5b4caSmmusante 	if (err) {
2091cdf5b4caSmmusante 		kmem_free(fsname, len + 1);
2092cdf5b4caSmmusante 		return (err);
2093cdf5b4caSmmusante 	}
2094f18faf3fSek 	ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
2095cdf5b4caSmmusante 	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
2096cdf5b4caSmmusante 
2097cdf5b4caSmmusante 	ra->oldsnap = strchr(oldname, '@') + 1;
2098cdf5b4caSmmusante 	ra->newsnap = strchr(newname, '@') + 1;
2099cdf5b4caSmmusante 	*ra->failed = '\0';
2100cdf5b4caSmmusante 
2101cdf5b4caSmmusante 	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
2102cdf5b4caSmmusante 	    DS_FIND_CHILDREN);
2103cdf5b4caSmmusante 	kmem_free(fsname, len + 1);
2104cdf5b4caSmmusante 
2105cdf5b4caSmmusante 	if (err == 0) {
2106cdf5b4caSmmusante 		err = dsl_sync_task_group_wait(ra->dstg);
2107cdf5b4caSmmusante 	}
2108cdf5b4caSmmusante 
2109cdf5b4caSmmusante 	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
2110cdf5b4caSmmusante 	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
2111cdf5b4caSmmusante 		dsl_dataset_t *ds = dst->dst_arg1;
2112cdf5b4caSmmusante 		if (dst->dst_err) {
2113cdf5b4caSmmusante 			dsl_dir_name(ds->ds_dir, ra->failed);
21142572aa4eSmmusante 			(void) strcat(ra->failed, "@");
21152572aa4eSmmusante 			(void) strcat(ra->failed, ra->newsnap);
2116cdf5b4caSmmusante 		}
2117*745cd3c5Smaybee 		dsl_dataset_rele(ds, ra->dstg);
2118cdf5b4caSmmusante 	}
2119cdf5b4caSmmusante 
2120ecd6cf80Smarks 	if (err)
2121ecd6cf80Smarks 		(void) strcpy(oldname, ra->failed);
2122cdf5b4caSmmusante 
2123cdf5b4caSmmusante 	dsl_sync_task_group_destroy(ra->dstg);
2124f18faf3fSek 	kmem_free(ra, sizeof (struct renamesnaparg));
2125cdf5b4caSmmusante 	spa_close(spa, FTAG);
2126cdf5b4caSmmusante 	return (err);
2127cdf5b4caSmmusante }
2128cdf5b4caSmmusante 
21293a5a36beSmmusante static int
21303a5a36beSmmusante dsl_valid_rename(char *oldname, void *arg)
21313a5a36beSmmusante {
21323a5a36beSmmusante 	int delta = *(int *)arg;
21333a5a36beSmmusante 
21343a5a36beSmmusante 	if (strlen(oldname) + delta >= MAXNAMELEN)
21353a5a36beSmmusante 		return (ENAMETOOLONG);
21363a5a36beSmmusante 
21373a5a36beSmmusante 	return (0);
21383a5a36beSmmusante }
21393a5a36beSmmusante 
2140fa9e4066Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
2141fa9e4066Sahrens int
2142*745cd3c5Smaybee dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive)
2143fa9e4066Sahrens {
2144fa9e4066Sahrens 	dsl_dir_t *dd;
21451d452cf5Sahrens 	dsl_dataset_t *ds;
2146fa9e4066Sahrens 	const char *tail;
2147fa9e4066Sahrens 	int err;
2148fa9e4066Sahrens 
21491d452cf5Sahrens 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
2150ea8dc4b6Seschrock 	if (err)
2151ea8dc4b6Seschrock 		return (err);
2152fa9e4066Sahrens 	if (tail == NULL) {
21533a5a36beSmmusante 		int delta = strlen(newname) - strlen(oldname);
21543a5a36beSmmusante 
21553a5a36beSmmusante 		/* if we're growing, validate child size lengths */
21563a5a36beSmmusante 		if (delta > 0)
21573a5a36beSmmusante 			err = dmu_objset_find(oldname, dsl_valid_rename,
21583a5a36beSmmusante 			    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
21593a5a36beSmmusante 
21603a5a36beSmmusante 		if (!err)
21613a5a36beSmmusante 			err = dsl_dir_rename(dd, newname);
2162fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
2163fa9e4066Sahrens 		return (err);
2164fa9e4066Sahrens 	}
2165fa9e4066Sahrens 	if (tail[0] != '@') {
2166fa9e4066Sahrens 		/* the name ended in a nonexistant component */
2167fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
2168fa9e4066Sahrens 		return (ENOENT);
2169fa9e4066Sahrens 	}
2170fa9e4066Sahrens 
2171fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
21721d452cf5Sahrens 
21731d452cf5Sahrens 	/* new name must be snapshot in same filesystem */
21741d452cf5Sahrens 	tail = strchr(newname, '@');
21751d452cf5Sahrens 	if (tail == NULL)
21761d452cf5Sahrens 		return (EINVAL);
21771d452cf5Sahrens 	tail++;
21781d452cf5Sahrens 	if (strncmp(oldname, newname, tail - newname) != 0)
21791d452cf5Sahrens 		return (EXDEV);
21801d452cf5Sahrens 
2181cdf5b4caSmmusante 	if (recursive) {
2182cdf5b4caSmmusante 		err = dsl_recursive_rename(oldname, newname);
2183cdf5b4caSmmusante 	} else {
2184*745cd3c5Smaybee 		err = dsl_dataset_hold(oldname, FTAG, &ds);
2185cdf5b4caSmmusante 		if (err)
2186cdf5b4caSmmusante 			return (err);
21871d452cf5Sahrens 
2188cdf5b4caSmmusante 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2189cdf5b4caSmmusante 		    dsl_dataset_snapshot_rename_check,
2190cdf5b4caSmmusante 		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
21911d452cf5Sahrens 
2192*745cd3c5Smaybee 		dsl_dataset_rele(ds, FTAG);
2193cdf5b4caSmmusante 	}
21941d452cf5Sahrens 
2195fa9e4066Sahrens 	return (err);
2196fa9e4066Sahrens }
219799653d4eSeschrock 
2198*745cd3c5Smaybee struct promotedsarg {
2199*745cd3c5Smaybee 	list_node_t link;
2200*745cd3c5Smaybee 	dsl_dataset_t *ds;
2201*745cd3c5Smaybee };
2202*745cd3c5Smaybee 
22031d452cf5Sahrens struct promotearg {
2204*745cd3c5Smaybee 	list_t snap_list;
2205*745cd3c5Smaybee 	dsl_dataset_t *clone_origin, *old_head;
22061d452cf5Sahrens 	uint64_t used, comp, uncomp, unique;
2207*745cd3c5Smaybee 	uint64_t newnext_obj;
22081d452cf5Sahrens };
22091d452cf5Sahrens 
2210ecd6cf80Smarks /* ARGSUSED */
221199653d4eSeschrock static int
22121d452cf5Sahrens dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
221399653d4eSeschrock {
22141d452cf5Sahrens 	dsl_dataset_t *hds = arg1;
22151d452cf5Sahrens 	struct promotearg *pa = arg2;
2216*745cd3c5Smaybee 	struct promotedsarg *snap = list_head(&pa->snap_list);
22171d452cf5Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
2218*745cd3c5Smaybee 	dsl_dataset_t *origin_ds = snap->ds;
2219*745cd3c5Smaybee 	dsl_dataset_t *newnext_ds;
2220*745cd3c5Smaybee 	char *name;
22211d452cf5Sahrens 	uint64_t itor = 0;
222299653d4eSeschrock 	blkptr_t bp;
2223*745cd3c5Smaybee 	int err;
22241d452cf5Sahrens 
222599653d4eSeschrock 	/* Check that it is a clone */
2226*745cd3c5Smaybee 	if (hds->ds_dir->dd_phys->dd_origin_obj == 0)
222799653d4eSeschrock 		return (EINVAL);
222899653d4eSeschrock 
22291d452cf5Sahrens 	/* Since this is so expensive, don't do the preliminary check */
22301d452cf5Sahrens 	if (!dmu_tx_is_syncing(tx))
22311d452cf5Sahrens 		return (0);
22321d452cf5Sahrens 
2233*745cd3c5Smaybee 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
2234*745cd3c5Smaybee 		return (EXDEV);
223599653d4eSeschrock 
22363cb34c60Sahrens 	/* find origin's new next ds */
2237*745cd3c5Smaybee 	newnext_ds = hds;
22383cb34c60Sahrens 	while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) {
223999653d4eSeschrock 		dsl_dataset_t *prev;
224099653d4eSeschrock 
2241*745cd3c5Smaybee 		err = dsl_dataset_hold_obj(dp,
2242*745cd3c5Smaybee 		    newnext_ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
2243*745cd3c5Smaybee 		if (newnext_ds != hds)
2244*745cd3c5Smaybee 			dsl_dataset_rele(newnext_ds, FTAG);
2245*745cd3c5Smaybee 		if (err)
2246*745cd3c5Smaybee 			return (err);
224799653d4eSeschrock 		newnext_ds = prev;
224899653d4eSeschrock 	}
22491d452cf5Sahrens 	pa->newnext_obj = newnext_ds->ds_object;
225099653d4eSeschrock 
22513cb34c60Sahrens 	/* compute origin's new unique space */
2252*745cd3c5Smaybee 	pa->unique = 0;
225399653d4eSeschrock 	while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
225499653d4eSeschrock 	    &itor, &bp)) == 0) {
22553cb34c60Sahrens 		if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg)
2256*745cd3c5Smaybee 			pa->unique += bp_get_dasize(dp->dp_spa, &bp);
225799653d4eSeschrock 	}
2258*745cd3c5Smaybee 	if (newnext_ds != hds)
2259*745cd3c5Smaybee 		dsl_dataset_rele(newnext_ds, FTAG);
226099653d4eSeschrock 	if (err != ENOENT)
2261*745cd3c5Smaybee 		return (err);
226299653d4eSeschrock 
226399653d4eSeschrock 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2264*745cd3c5Smaybee 
2265*745cd3c5Smaybee 	/*
2266*745cd3c5Smaybee 	 * Walk the snapshots that we are moving
2267*745cd3c5Smaybee 	 *
2268*745cd3c5Smaybee 	 * Compute space to transfer.  Each snapshot gave birth to:
2269*745cd3c5Smaybee 	 * (my used) - (prev's used) + (deadlist's used)
2270*745cd3c5Smaybee 	 * So a sequence would look like:
2271*745cd3c5Smaybee 	 * uN - u(N-1) + dN + ... + u1 - u0 + d1 + u0 - 0 + d0
2272*745cd3c5Smaybee 	 * Which simplifies to:
2273*745cd3c5Smaybee 	 * uN + dN + ... + d1 + d0
2274*745cd3c5Smaybee 	 * Note however, if we stop before we reach the ORIGIN we get:
2275*745cd3c5Smaybee 	 * uN + dN + ... + dM - uM-1
2276*745cd3c5Smaybee 	 */
2277*745cd3c5Smaybee 	pa->used = origin_ds->ds_phys->ds_used_bytes;
2278*745cd3c5Smaybee 	pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
2279*745cd3c5Smaybee 	pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
2280*745cd3c5Smaybee 	do {
228199653d4eSeschrock 		uint64_t val, dlused, dlcomp, dluncomp;
2282*745cd3c5Smaybee 		dsl_dataset_t *ds = snap->ds;
228399653d4eSeschrock 
228499653d4eSeschrock 		/* Check that the snapshot name does not conflict */
228599653d4eSeschrock 		dsl_dataset_name(ds, name);
2286*745cd3c5Smaybee 		err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
2287*745cd3c5Smaybee 		if (err == 0)
2288*745cd3c5Smaybee 			err = EEXIST;
2289*745cd3c5Smaybee 		if (err != ENOENT)
229099653d4eSeschrock 			break;
2291*745cd3c5Smaybee 		err = 0;
229299653d4eSeschrock 
2293*745cd3c5Smaybee 		/* The very first snapshot does not have a deadlist */
2294*745cd3c5Smaybee 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
2295*745cd3c5Smaybee 			if (err = bplist_space(&ds->ds_deadlist,
2296*745cd3c5Smaybee 			    &dlused, &dlcomp, &dluncomp))
2297*745cd3c5Smaybee 				break;
2298*745cd3c5Smaybee 			pa->used += dlused;
2299*745cd3c5Smaybee 			pa->comp += dlcomp;
2300*745cd3c5Smaybee 			pa->uncomp += dluncomp;
230199653d4eSeschrock 		}
2302*745cd3c5Smaybee 	} while (snap = list_next(&pa->snap_list, snap));
2303*745cd3c5Smaybee 
2304*745cd3c5Smaybee 	/*
2305*745cd3c5Smaybee 	 * If we are a clone of a clone then we never reached ORIGIN,
2306*745cd3c5Smaybee 	 * so we need to subtract out the clone origin's used space.
2307*745cd3c5Smaybee 	 */
2308*745cd3c5Smaybee 	if (pa->clone_origin) {
2309*745cd3c5Smaybee 		pa->used -= pa->clone_origin->ds_phys->ds_used_bytes;
2310*745cd3c5Smaybee 		pa->comp -= pa->clone_origin->ds_phys->ds_compressed_bytes;
2311*745cd3c5Smaybee 		pa->uncomp -= pa->clone_origin->ds_phys->ds_uncompressed_bytes;
231299653d4eSeschrock 	}
231399653d4eSeschrock 
2314*745cd3c5Smaybee 	kmem_free(name, MAXPATHLEN);
2315*745cd3c5Smaybee 
231699653d4eSeschrock 	/* Check that there is enough space here */
2317*745cd3c5Smaybee 	if (err == 0) {
2318*745cd3c5Smaybee 		dsl_dir_t *odd = origin_ds->ds_dir;
2319*745cd3c5Smaybee 		err = dsl_dir_transfer_possible(odd, hds->ds_dir, pa->used);
2320*745cd3c5Smaybee 	}
23211d452cf5Sahrens 
23221d452cf5Sahrens 	return (err);
23231d452cf5Sahrens }
232499653d4eSeschrock 
23251d452cf5Sahrens static void
2326ecd6cf80Smarks dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
23271d452cf5Sahrens {
23281d452cf5Sahrens 	dsl_dataset_t *hds = arg1;
23291d452cf5Sahrens 	struct promotearg *pa = arg2;
2330*745cd3c5Smaybee 	struct promotedsarg *snap = list_head(&pa->snap_list);
2331*745cd3c5Smaybee 	dsl_dataset_t *origin_ds = snap->ds;
23321d452cf5Sahrens 	dsl_dir_t *dd = hds->ds_dir;
23331d452cf5Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
23343cb34c60Sahrens 	dsl_dir_t *odd = NULL;
23351d452cf5Sahrens 	char *name;
23361d452cf5Sahrens 
23371d452cf5Sahrens 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
23381d452cf5Sahrens 
23390b69c2f0Sahrens 	/*
23403cb34c60Sahrens 	 * We need to explicitly open odd, since origin_ds's dd will be
23410b69c2f0Sahrens 	 * changing.
23420b69c2f0Sahrens 	 */
23433cb34c60Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
23443cb34c60Sahrens 	    NULL, FTAG, &odd));
234599653d4eSeschrock 
2346*745cd3c5Smaybee 	/* change origin's next snap */
2347*745cd3c5Smaybee 	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
2348*745cd3c5Smaybee 	origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
2349*745cd3c5Smaybee 
2350*745cd3c5Smaybee 	/* change origin */
2351*745cd3c5Smaybee 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
2352*745cd3c5Smaybee 	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
2353*745cd3c5Smaybee 	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
2354*745cd3c5Smaybee 	dmu_buf_will_dirty(odd->dd_dbuf, tx);
2355*745cd3c5Smaybee 	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
2356*745cd3c5Smaybee 
235799653d4eSeschrock 	/* move snapshots to this dir */
23581d452cf5Sahrens 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2359*745cd3c5Smaybee 	do {
2360*745cd3c5Smaybee 		dsl_dataset_t *ds = snap->ds;
236199653d4eSeschrock 
236299653d4eSeschrock 		/* move snap name entry */
236399653d4eSeschrock 		dsl_dataset_name(ds, name);
2364*745cd3c5Smaybee 		VERIFY(0 == dsl_dataset_snap_remove(pa->old_head,
2365*745cd3c5Smaybee 		    ds->ds_snapname, tx));
23661d452cf5Sahrens 		VERIFY(0 == zap_add(dp->dp_meta_objset,
236799653d4eSeschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
236899653d4eSeschrock 		    8, 1, &ds->ds_object, tx));
236999653d4eSeschrock 
237099653d4eSeschrock 		/* change containing dsl_dir */
237199653d4eSeschrock 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
23723cb34c60Sahrens 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
237399653d4eSeschrock 		ds->ds_phys->ds_dir_obj = dd->dd_object;
23743cb34c60Sahrens 		ASSERT3P(ds->ds_dir, ==, odd);
237599653d4eSeschrock 		dsl_dir_close(ds->ds_dir, ds);
23761d452cf5Sahrens 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
237799653d4eSeschrock 		    NULL, ds, &ds->ds_dir));
237899653d4eSeschrock 
237999653d4eSeschrock 		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
2380*745cd3c5Smaybee 	} while (snap = list_next(&pa->snap_list, snap));
238199653d4eSeschrock 
238299653d4eSeschrock 	/* change space accounting */
23833cb34c60Sahrens 	dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx);
23841d452cf5Sahrens 	dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx);
23853cb34c60Sahrens 	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
238699653d4eSeschrock 
2387ecd6cf80Smarks 	/* log history record */
2388ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
2389*745cd3c5Smaybee 	    cr, "dataset = %llu", hds->ds_object);
2390ecd6cf80Smarks 
23913cb34c60Sahrens 	dsl_dir_close(odd, FTAG);
23921d452cf5Sahrens 	kmem_free(name, MAXPATHLEN);
239399653d4eSeschrock }
239499653d4eSeschrock 
239599653d4eSeschrock int
239699653d4eSeschrock dsl_dataset_promote(const char *name)
239799653d4eSeschrock {
239899653d4eSeschrock 	dsl_dataset_t *ds;
2399*745cd3c5Smaybee 	dsl_dir_t *dd;
2400*745cd3c5Smaybee 	dsl_pool_t *dp;
240199653d4eSeschrock 	dmu_object_info_t doi;
24021d452cf5Sahrens 	struct promotearg pa;
2403*745cd3c5Smaybee 	struct promotedsarg *snap;
2404*745cd3c5Smaybee 	uint64_t snap_obj;
2405*745cd3c5Smaybee 	uint64_t last_snap = 0;
2406*745cd3c5Smaybee 	int err;
240799653d4eSeschrock 
2408*745cd3c5Smaybee 	err = dsl_dataset_hold(name, FTAG, &ds);
240999653d4eSeschrock 	if (err)
241099653d4eSeschrock 		return (err);
2411*745cd3c5Smaybee 	dd = ds->ds_dir;
2412*745cd3c5Smaybee 	dp = dd->dd_pool;
241399653d4eSeschrock 
2414*745cd3c5Smaybee 	err = dmu_object_info(dp->dp_meta_objset,
241599653d4eSeschrock 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
241699653d4eSeschrock 	if (err) {
2417*745cd3c5Smaybee 		dsl_dataset_rele(ds, FTAG);
241899653d4eSeschrock 		return (err);
241999653d4eSeschrock 	}
242099653d4eSeschrock 
2421*745cd3c5Smaybee 	/*
2422*745cd3c5Smaybee 	 * We are going to inherit all the snapshots taken before our
2423*745cd3c5Smaybee 	 * origin (i.e., our new origin will be our parent's origin).
2424*745cd3c5Smaybee 	 * Take ownership of them so that we can rename them into our
2425*745cd3c5Smaybee 	 * namespace.
2426*745cd3c5Smaybee 	 */
2427*745cd3c5Smaybee 	pa.clone_origin = NULL;
2428*745cd3c5Smaybee 	list_create(&pa.snap_list,
2429*745cd3c5Smaybee 	    sizeof (struct promotedsarg), offsetof(struct promotedsarg, link));
2430*745cd3c5Smaybee 	rw_enter(&dp->dp_config_rwlock, RW_READER);
2431*745cd3c5Smaybee 	ASSERT(dd->dd_phys->dd_origin_obj != 0);
2432*745cd3c5Smaybee 	snap_obj = dd->dd_phys->dd_origin_obj;
2433*745cd3c5Smaybee 	while (snap_obj) {
2434*745cd3c5Smaybee 		snap = kmem_alloc(sizeof (struct promotedsarg), KM_SLEEP);
2435*745cd3c5Smaybee 		err = dsl_dataset_own_obj(dp, snap_obj, 0, FTAG, &snap->ds);
2436*745cd3c5Smaybee 		if (err == ENOENT) {
2437*745cd3c5Smaybee 			/* lost race with snapshot destroy */
2438*745cd3c5Smaybee 			struct promotedsarg *last = list_tail(&pa.snap_list);
2439*745cd3c5Smaybee 			ASSERT(snap_obj != last->ds->ds_phys->ds_prev_snap_obj);
2440*745cd3c5Smaybee 			snap_obj = last->ds->ds_phys->ds_prev_snap_obj;
2441*745cd3c5Smaybee 			kmem_free(snap, sizeof (struct promotedsarg));
2442*745cd3c5Smaybee 			continue;
2443*745cd3c5Smaybee 		} else if (err) {
2444*745cd3c5Smaybee 			kmem_free(snap, sizeof (struct promotedsarg));
2445*745cd3c5Smaybee 			rw_exit(&dp->dp_config_rwlock);
2446*745cd3c5Smaybee 			goto out;
2447*745cd3c5Smaybee 		}
2448*745cd3c5Smaybee 		/*
2449*745cd3c5Smaybee 		 * We could be a clone of a clone.  If we reach our
2450*745cd3c5Smaybee 		 * parent's branch point, we're done.
2451*745cd3c5Smaybee 		 */
2452*745cd3c5Smaybee 		if (last_snap &&
2453*745cd3c5Smaybee 		    snap->ds->ds_phys->ds_next_snap_obj != last_snap) {
2454*745cd3c5Smaybee 			pa.clone_origin = snap->ds;
2455*745cd3c5Smaybee 			kmem_free(snap, sizeof (struct promotedsarg));
2456*745cd3c5Smaybee 			snap_obj = 0;
2457*745cd3c5Smaybee 		} else {
2458*745cd3c5Smaybee 			list_insert_tail(&pa.snap_list, snap);
2459*745cd3c5Smaybee 			last_snap = snap_obj;
2460*745cd3c5Smaybee 			snap_obj = snap->ds->ds_phys->ds_prev_snap_obj;
2461*745cd3c5Smaybee 		}
2462*745cd3c5Smaybee 	}
2463*745cd3c5Smaybee 	snap = list_head(&pa.snap_list);
2464*745cd3c5Smaybee 	ASSERT(snap != NULL);
2465*745cd3c5Smaybee 	err = dsl_dataset_hold_obj(dp,
2466*745cd3c5Smaybee 	    snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &pa.old_head);
2467*745cd3c5Smaybee 	rw_exit(&dp->dp_config_rwlock);
2468*745cd3c5Smaybee 
2469*745cd3c5Smaybee 	if (err)
2470*745cd3c5Smaybee 		goto out;
2471*745cd3c5Smaybee 
247299653d4eSeschrock 	/*
247399653d4eSeschrock 	 * Add in 128x the snapnames zapobj size, since we will be moving
247499653d4eSeschrock 	 * a bunch of snapnames to the promoted ds, and dirtying their
247599653d4eSeschrock 	 * bonus buffers.
247699653d4eSeschrock 	 */
2477*745cd3c5Smaybee 	err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
24781d452cf5Sahrens 	    dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks);
2479*745cd3c5Smaybee 
2480*745cd3c5Smaybee 	dsl_dataset_rele(pa.old_head, FTAG);
2481*745cd3c5Smaybee out:
2482*745cd3c5Smaybee 	while ((snap = list_tail(&pa.snap_list)) != NULL) {
2483*745cd3c5Smaybee 		list_remove(&pa.snap_list, snap);
2484*745cd3c5Smaybee 		dsl_dataset_disown(snap->ds, FTAG);
2485*745cd3c5Smaybee 		kmem_free(snap, sizeof (struct promotedsarg));
2486*745cd3c5Smaybee 	}
2487*745cd3c5Smaybee 	list_destroy(&pa.snap_list);
2488*745cd3c5Smaybee 	if (pa.clone_origin)
2489*745cd3c5Smaybee 		dsl_dataset_disown(pa.clone_origin, FTAG);
2490*745cd3c5Smaybee 	dsl_dataset_rele(ds, FTAG);
249199653d4eSeschrock 	return (err);
249299653d4eSeschrock }
2493b1b8ab34Slling 
24943cb34c60Sahrens struct cloneswaparg {
24953cb34c60Sahrens 	dsl_dataset_t *cds; /* clone dataset */
24963cb34c60Sahrens 	dsl_dataset_t *ohds; /* origin's head dataset */
24973cb34c60Sahrens 	boolean_t force;
2498a9b821a0Sck 	int64_t unused_refres_delta; /* change in unconsumed refreservation */
24993cb34c60Sahrens };
2500f18faf3fSek 
2501f18faf3fSek /* ARGSUSED */
2502f18faf3fSek static int
2503f18faf3fSek dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
2504f18faf3fSek {
25053cb34c60Sahrens 	struct cloneswaparg *csa = arg1;
2506f18faf3fSek 
25073cb34c60Sahrens 	/* they should both be heads */
25083cb34c60Sahrens 	if (dsl_dataset_is_snapshot(csa->cds) ||
25093cb34c60Sahrens 	    dsl_dataset_is_snapshot(csa->ohds))
2510f18faf3fSek 		return (EINVAL);
2511f18faf3fSek 
25123cb34c60Sahrens 	/* the branch point should be just before them */
25133cb34c60Sahrens 	if (csa->cds->ds_prev != csa->ohds->ds_prev)
2514f18faf3fSek 		return (EINVAL);
2515f18faf3fSek 
25163cb34c60Sahrens 	/* cds should be the clone */
25173cb34c60Sahrens 	if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj !=
25183cb34c60Sahrens 	    csa->ohds->ds_object)
25193cb34c60Sahrens 		return (EINVAL);
2520f18faf3fSek 
25213cb34c60Sahrens 	/* the clone should be a child of the origin */
25223cb34c60Sahrens 	if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
25233cb34c60Sahrens 		return (EINVAL);
2524f18faf3fSek 
25253cb34c60Sahrens 	/* ohds shouldn't be modified unless 'force' */
25263cb34c60Sahrens 	if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
25273cb34c60Sahrens 		return (ETXTBSY);
2528a9b821a0Sck 
2529a9b821a0Sck 	/* adjust amount of any unconsumed refreservation */
2530a9b821a0Sck 	csa->unused_refres_delta =
2531a9b821a0Sck 	    (int64_t)MIN(csa->ohds->ds_reserved,
2532a9b821a0Sck 	    csa->ohds->ds_phys->ds_unique_bytes) -
2533a9b821a0Sck 	    (int64_t)MIN(csa->ohds->ds_reserved,
2534a9b821a0Sck 	    csa->cds->ds_phys->ds_unique_bytes);
2535a9b821a0Sck 
2536a9b821a0Sck 	if (csa->unused_refres_delta > 0 &&
2537a9b821a0Sck 	    csa->unused_refres_delta >
2538a9b821a0Sck 	    dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE))
2539a9b821a0Sck 		return (ENOSPC);
2540a9b821a0Sck 
25413cb34c60Sahrens 	return (0);
2542f18faf3fSek }
2543f18faf3fSek 
2544f18faf3fSek /* ARGSUSED */
2545f18faf3fSek static void
2546f18faf3fSek dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
2547f18faf3fSek {
25483cb34c60Sahrens 	struct cloneswaparg *csa = arg1;
25493cb34c60Sahrens 	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
2550f18faf3fSek 	uint64_t itor = 0;
2551f18faf3fSek 	blkptr_t bp;
2552f18faf3fSek 	uint64_t unique = 0;
2553f18faf3fSek 	int err;
2554f18faf3fSek 
2555a9b821a0Sck 	ASSERT(csa->cds->ds_reserved == 0);
2556a9b821a0Sck 	ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota);
2557a9b821a0Sck 
25583cb34c60Sahrens 	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
25593cb34c60Sahrens 	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
25603cb34c60Sahrens 	dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx);
2561f18faf3fSek 
25623cb34c60Sahrens 	if (csa->cds->ds_user_ptr != NULL) {
25633cb34c60Sahrens 		csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr);
25643cb34c60Sahrens 		csa->cds->ds_user_ptr = NULL;
25653cb34c60Sahrens 	}
2566f18faf3fSek 
25673cb34c60Sahrens 	if (csa->ohds->ds_user_ptr != NULL) {
25683cb34c60Sahrens 		csa->ohds->ds_user_evict_func(csa->ohds,
25693cb34c60Sahrens 		    csa->ohds->ds_user_ptr);
25703cb34c60Sahrens 		csa->ohds->ds_user_ptr = NULL;
25713cb34c60Sahrens 	}
2572f18faf3fSek 
2573f18faf3fSek 	/* compute unique space */
25743cb34c60Sahrens 	while ((err = bplist_iterate(&csa->cds->ds_deadlist,
25753cb34c60Sahrens 	    &itor, &bp)) == 0) {
25763cb34c60Sahrens 		if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg)
25773cb34c60Sahrens 			unique += bp_get_dasize(dp->dp_spa, &bp);
2578f18faf3fSek 	}
2579f18faf3fSek 	VERIFY(err == ENOENT);
2580f18faf3fSek 
2581f18faf3fSek 	/* reset origin's unique bytes */
25823cb34c60Sahrens 	csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique;
2583f18faf3fSek 
2584f18faf3fSek 	/* swap blkptrs */
2585f18faf3fSek 	{
2586f18faf3fSek 		blkptr_t tmp;
25873cb34c60Sahrens 		tmp = csa->ohds->ds_phys->ds_bp;
25883cb34c60Sahrens 		csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
25893cb34c60Sahrens 		csa->cds->ds_phys->ds_bp = tmp;
2590f18faf3fSek 	}
2591f18faf3fSek 
2592f18faf3fSek 	/* set dd_*_bytes */
2593f18faf3fSek 	{
2594f18faf3fSek 		int64_t dused, dcomp, duncomp;
2595f18faf3fSek 		uint64_t cdl_used, cdl_comp, cdl_uncomp;
2596f18faf3fSek 		uint64_t odl_used, odl_comp, odl_uncomp;
2597f18faf3fSek 
25983cb34c60Sahrens 		VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used,
2599f18faf3fSek 		    &cdl_comp, &cdl_uncomp));
26003cb34c60Sahrens 		VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used,
2601f18faf3fSek 		    &odl_comp, &odl_uncomp));
26023cb34c60Sahrens 		dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
26033cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_used_bytes + odl_used);
26043cb34c60Sahrens 		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
26053cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
26063cb34c60Sahrens 		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
26073cb34c60Sahrens 		    cdl_uncomp -
26083cb34c60Sahrens 		    (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
26093cb34c60Sahrens 
26103cb34c60Sahrens 		dsl_dir_diduse_space(csa->ohds->ds_dir,
26113cb34c60Sahrens 		    dused, dcomp, duncomp, tx);
26123cb34c60Sahrens 		dsl_dir_diduse_space(csa->cds->ds_dir,
26133cb34c60Sahrens 		    -dused, -dcomp, -duncomp, tx);
26143cb34c60Sahrens 	}
26153cb34c60Sahrens 
26163cb34c60Sahrens #define	SWITCH64(x, y) \
26173cb34c60Sahrens 	{ \
26183cb34c60Sahrens 		uint64_t __tmp = (x); \
26193cb34c60Sahrens 		(x) = (y); \
26203cb34c60Sahrens 		(y) = __tmp; \
2621f18faf3fSek 	}
2622f18faf3fSek 
2623f18faf3fSek 	/* swap ds_*_bytes */
26243cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
26253cb34c60Sahrens 	    csa->cds->ds_phys->ds_used_bytes);
26263cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
26273cb34c60Sahrens 	    csa->cds->ds_phys->ds_compressed_bytes);
26283cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
26293cb34c60Sahrens 	    csa->cds->ds_phys->ds_uncompressed_bytes);
2630a9b821a0Sck 	SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
2631a9b821a0Sck 	    csa->cds->ds_phys->ds_unique_bytes);
2632a9b821a0Sck 
2633a9b821a0Sck 	/* apply any parent delta for change in unconsumed refreservation */
2634a9b821a0Sck 	dsl_dir_diduse_space(csa->ohds->ds_dir, csa->unused_refres_delta,
2635a9b821a0Sck 	    0, 0, tx);
2636f18faf3fSek 
2637f18faf3fSek 	/* swap deadlists */
26383cb34c60Sahrens 	bplist_close(&csa->cds->ds_deadlist);
26393cb34c60Sahrens 	bplist_close(&csa->ohds->ds_deadlist);
26403cb34c60Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
26413cb34c60Sahrens 	    csa->cds->ds_phys->ds_deadlist_obj);
26423cb34c60Sahrens 	VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
26433cb34c60Sahrens 	    csa->cds->ds_phys->ds_deadlist_obj));
26443cb34c60Sahrens 	VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
26453cb34c60Sahrens 	    csa->ohds->ds_phys->ds_deadlist_obj));
2646f18faf3fSek }
2647f18faf3fSek 
2648f18faf3fSek /*
2649*745cd3c5Smaybee  * Swap 'clone' with its origin head file system.  Used at the end
2650*745cd3c5Smaybee  * of "online recv" to swizzle the file system to the new version.
2651f18faf3fSek  */
2652f18faf3fSek int
26533cb34c60Sahrens dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
26543cb34c60Sahrens     boolean_t force)
2655f18faf3fSek {
26563cb34c60Sahrens 	struct cloneswaparg csa;
2657*745cd3c5Smaybee 	int error;
2658f18faf3fSek 
2659*745cd3c5Smaybee 	ASSERT(clone->ds_owner);
2660*745cd3c5Smaybee 	ASSERT(origin_head->ds_owner);
2661*745cd3c5Smaybee retry:
2662*745cd3c5Smaybee 	/* Need exclusive access for the swap */
2663*745cd3c5Smaybee 	rw_enter(&clone->ds_rwlock, RW_WRITER);
2664*745cd3c5Smaybee 	if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
2665*745cd3c5Smaybee 		rw_exit(&clone->ds_rwlock);
2666*745cd3c5Smaybee 		rw_enter(&origin_head->ds_rwlock, RW_WRITER);
2667*745cd3c5Smaybee 		if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
2668*745cd3c5Smaybee 			rw_exit(&origin_head->ds_rwlock);
2669*745cd3c5Smaybee 			goto retry;
2670*745cd3c5Smaybee 		}
2671*745cd3c5Smaybee 	}
26723cb34c60Sahrens 	csa.cds = clone;
26733cb34c60Sahrens 	csa.ohds = origin_head;
26743cb34c60Sahrens 	csa.force = force;
2675*745cd3c5Smaybee 	error = dsl_sync_task_do(clone->ds_dir->dd_pool,
2676f18faf3fSek 	    dsl_dataset_clone_swap_check,
2677*745cd3c5Smaybee 	    dsl_dataset_clone_swap_sync, &csa, NULL, 9);
2678*745cd3c5Smaybee 	return (error);
2679f18faf3fSek }
2680f18faf3fSek 
2681b1b8ab34Slling /*
2682b1b8ab34Slling  * Given a pool name and a dataset object number in that pool,
2683b1b8ab34Slling  * return the name of that dataset.
2684b1b8ab34Slling  */
2685b1b8ab34Slling int
2686b1b8ab34Slling dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
2687b1b8ab34Slling {
2688b1b8ab34Slling 	spa_t *spa;
2689b1b8ab34Slling 	dsl_pool_t *dp;
2690*745cd3c5Smaybee 	dsl_dataset_t *ds;
2691b1b8ab34Slling 	int error;
2692b1b8ab34Slling 
2693b1b8ab34Slling 	if ((error = spa_open(pname, &spa, FTAG)) != 0)
2694b1b8ab34Slling 		return (error);
2695b1b8ab34Slling 	dp = spa_get_dsl(spa);
2696b1b8ab34Slling 	rw_enter(&dp->dp_config_rwlock, RW_READER);
2697*745cd3c5Smaybee 	if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) {
2698*745cd3c5Smaybee 		dsl_dataset_name(ds, buf);
2699*745cd3c5Smaybee 		dsl_dataset_rele(ds, FTAG);
2700b1b8ab34Slling 	}
2701b1b8ab34Slling 	rw_exit(&dp->dp_config_rwlock);
2702b1b8ab34Slling 	spa_close(spa, FTAG);
2703b1b8ab34Slling 
2704*745cd3c5Smaybee 	return (error);
2705b1b8ab34Slling }
2706a9799022Sck 
2707a9799022Sck int
2708a9799022Sck dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
2709*745cd3c5Smaybee     uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
2710a9799022Sck {
2711a9799022Sck 	int error = 0;
2712a9799022Sck 
2713a9799022Sck 	ASSERT3S(asize, >, 0);
2714a9799022Sck 
27159082849eSck 	/*
27169082849eSck 	 * *ref_rsrv is the portion of asize that will come from any
27179082849eSck 	 * unconsumed refreservation space.
27189082849eSck 	 */
27199082849eSck 	*ref_rsrv = 0;
27209082849eSck 
2721a9799022Sck 	mutex_enter(&ds->ds_lock);
2722a9799022Sck 	/*
2723a9799022Sck 	 * Make a space adjustment for reserved bytes.
2724a9799022Sck 	 */
2725a9799022Sck 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
2726a9799022Sck 		ASSERT3U(*used, >=,
2727a9799022Sck 		    ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
2728a9799022Sck 		*used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
27299082849eSck 		*ref_rsrv =
27309082849eSck 		    asize - MIN(asize, parent_delta(ds, asize + inflight));
2731a9799022Sck 	}
2732a9799022Sck 
2733a9799022Sck 	if (!check_quota || ds->ds_quota == 0) {
2734a9799022Sck 		mutex_exit(&ds->ds_lock);
2735a9799022Sck 		return (0);
2736a9799022Sck 	}
2737a9799022Sck 	/*
2738a9799022Sck 	 * If they are requesting more space, and our current estimate
2739a9799022Sck 	 * is over quota, they get to try again unless the actual
2740a9799022Sck 	 * on-disk is over quota and there are no pending changes (which
2741a9799022Sck 	 * may free up space for us).
2742a9799022Sck 	 */
2743a9799022Sck 	if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
2744a9799022Sck 		if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
2745a9799022Sck 			error = ERESTART;
2746a9799022Sck 		else
2747a9799022Sck 			error = EDQUOT;
2748a9799022Sck 	}
2749a9799022Sck 	mutex_exit(&ds->ds_lock);
2750a9799022Sck 
2751a9799022Sck 	return (error);
2752a9799022Sck }
2753a9799022Sck 
2754a9799022Sck /* ARGSUSED */
2755a9799022Sck static int
2756a9799022Sck dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
2757a9799022Sck {
2758a9799022Sck 	dsl_dataset_t *ds = arg1;
2759a9799022Sck 	uint64_t *quotap = arg2;
2760a9799022Sck 	uint64_t new_quota = *quotap;
2761a9799022Sck 
2762a9799022Sck 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
2763a9799022Sck 		return (ENOTSUP);
2764a9799022Sck 
2765a9799022Sck 	if (new_quota == 0)
2766a9799022Sck 		return (0);
2767a9799022Sck 
2768a9799022Sck 	if (new_quota < ds->ds_phys->ds_used_bytes ||
2769a9799022Sck 	    new_quota < ds->ds_reserved)
2770a9799022Sck 		return (ENOSPC);
2771a9799022Sck 
2772a9799022Sck 	return (0);
2773a9799022Sck }
2774a9799022Sck 
2775a9799022Sck /* ARGSUSED */
2776a9799022Sck void
2777a9799022Sck dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
2778a9799022Sck {
2779a9799022Sck 	dsl_dataset_t *ds = arg1;
2780a9799022Sck 	uint64_t *quotap = arg2;
2781a9799022Sck 	uint64_t new_quota = *quotap;
2782a9799022Sck 
2783a9799022Sck 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
2784a9799022Sck 
2785a9799022Sck 	ds->ds_quota = new_quota;
2786a9799022Sck 
2787a9799022Sck 	dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx);
2788a9799022Sck 
2789a9799022Sck 	spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa,
2790a9799022Sck 	    tx, cr, "%lld dataset = %llu ",
2791*745cd3c5Smaybee 	    (longlong_t)new_quota, ds->ds_object);
2792a9799022Sck }
2793a9799022Sck 
2794a9799022Sck int
2795a9799022Sck dsl_dataset_set_quota(const char *dsname, uint64_t quota)
2796a9799022Sck {
2797a9799022Sck 	dsl_dataset_t *ds;
2798a9799022Sck 	int err;
2799a9799022Sck 
2800*745cd3c5Smaybee 	err = dsl_dataset_hold(dsname, FTAG, &ds);
2801a9799022Sck 	if (err)
2802a9799022Sck 		return (err);
2803a9799022Sck 
2804a9b821a0Sck 	if (quota != ds->ds_quota) {
2805a9b821a0Sck 		/*
2806a9b821a0Sck 		 * If someone removes a file, then tries to set the quota, we
2807a9b821a0Sck 		 * want to make sure the file freeing takes effect.
2808a9b821a0Sck 		 */
2809a9b821a0Sck 		txg_wait_open(ds->ds_dir->dd_pool, 0);
2810a9799022Sck 
2811a9b821a0Sck 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2812a9b821a0Sck 		    dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
2813a9b821a0Sck 		    ds, &quota, 0);
2814a9b821a0Sck 	}
2815*745cd3c5Smaybee 	dsl_dataset_rele(ds, FTAG);
2816a9799022Sck 	return (err);
2817a9799022Sck }
2818a9799022Sck 
2819a9799022Sck static int
2820a9799022Sck dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
2821a9799022Sck {
2822a9799022Sck 	dsl_dataset_t *ds = arg1;
2823a9799022Sck 	uint64_t *reservationp = arg2;
2824a9799022Sck 	uint64_t new_reservation = *reservationp;
2825a9799022Sck 	int64_t delta;
2826a9799022Sck 	uint64_t unique;
2827a9799022Sck 
2828a9799022Sck 	if (new_reservation > INT64_MAX)
2829a9799022Sck 		return (EOVERFLOW);
2830a9799022Sck 
2831a9799022Sck 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
2832a9799022Sck 	    SPA_VERSION_REFRESERVATION)
2833a9799022Sck 		return (ENOTSUP);
2834a9799022Sck 
2835a9799022Sck 	if (dsl_dataset_is_snapshot(ds))
2836a9799022Sck 		return (EINVAL);
2837a9799022Sck 
2838a9799022Sck 	/*
2839a9799022Sck 	 * If we are doing the preliminary check in open context, the
2840a9799022Sck 	 * space estimates may be inaccurate.
2841a9799022Sck 	 */
2842a9799022Sck 	if (!dmu_tx_is_syncing(tx))
2843a9799022Sck 		return (0);
2844a9799022Sck 
2845a9799022Sck 	mutex_enter(&ds->ds_lock);
2846a9799022Sck 	unique = dsl_dataset_unique(ds);
2847a9799022Sck 	delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved);
2848a9799022Sck 	mutex_exit(&ds->ds_lock);
2849a9799022Sck 
2850a9799022Sck 	if (delta > 0 &&
2851a9799022Sck 	    delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
2852a9799022Sck 		return (ENOSPC);
2853a9799022Sck 	if (delta > 0 && ds->ds_quota > 0 &&
2854a9799022Sck 	    new_reservation > ds->ds_quota)
2855a9799022Sck 		return (ENOSPC);
2856a9799022Sck 
2857a9799022Sck 	return (0);
2858a9799022Sck }
2859a9799022Sck 
2860a9799022Sck /* ARGSUSED */
2861a9799022Sck static void
2862a9799022Sck dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr,
2863a9799022Sck     dmu_tx_t *tx)
2864a9799022Sck {
2865a9799022Sck 	dsl_dataset_t *ds = arg1;
2866a9799022Sck 	uint64_t *reservationp = arg2;
2867a9799022Sck 	uint64_t new_reservation = *reservationp;
2868a9799022Sck 	uint64_t unique;
2869a9799022Sck 	int64_t delta;
2870a9799022Sck 
2871a9799022Sck 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
2872a9799022Sck 
2873a9799022Sck 	mutex_enter(&ds->ds_lock);
2874a9799022Sck 	unique = dsl_dataset_unique(ds);
2875a9799022Sck 	delta = MAX(0, (int64_t)(new_reservation - unique)) -
2876a9799022Sck 	    MAX(0, (int64_t)(ds->ds_reserved - unique));
2877a9799022Sck 	ds->ds_reserved = new_reservation;
2878a9799022Sck 	mutex_exit(&ds->ds_lock);
2879a9799022Sck 
2880a9799022Sck 	dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation",
2881a9799022Sck 	    new_reservation, cr, tx);
2882a9799022Sck 
2883a9799022Sck 	dsl_dir_diduse_space(ds->ds_dir, delta, 0, 0, tx);
2884a9799022Sck 
2885a9799022Sck 	spa_history_internal_log(LOG_DS_REFRESERV,
2886a9799022Sck 	    ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu",
2887a9799022Sck 	    (longlong_t)new_reservation,
2888a9799022Sck 	    ds->ds_dir->dd_phys->dd_head_dataset_obj);
2889a9799022Sck }
2890a9799022Sck 
2891a9799022Sck int
2892a9799022Sck dsl_dataset_set_reservation(const char *dsname, uint64_t reservation)
2893a9799022Sck {
2894a9799022Sck 	dsl_dataset_t *ds;
2895a9799022Sck 	int err;
2896a9799022Sck 
2897*745cd3c5Smaybee 	err = dsl_dataset_hold(dsname, FTAG, &ds);
2898a9799022Sck 	if (err)
2899a9799022Sck 		return (err);
2900a9799022Sck 
2901a9799022Sck 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2902a9799022Sck 	    dsl_dataset_set_reservation_check,
2903a9799022Sck 	    dsl_dataset_set_reservation_sync, ds, &reservation, 0);
2904*745cd3c5Smaybee 	dsl_dataset_rele(ds, FTAG);
2905a9799022Sck 	return (err);
2906a9799022Sck }
2907