1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5fa9e4066Sahrens  * Common Development and Distribution License, Version 1.0 only
6fa9e4066Sahrens  * (the "License").  You may not use this file except in compliance
7fa9e4066Sahrens  * with the License.
8fa9e4066Sahrens  *
9fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
11fa9e4066Sahrens  * See the License for the specific language governing permissions
12fa9e4066Sahrens  * and limitations under the License.
13fa9e4066Sahrens  *
14fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
15fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
17fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
18fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
19fa9e4066Sahrens  *
20fa9e4066Sahrens  * CDDL HEADER END
21fa9e4066Sahrens  */
22fa9e4066Sahrens /*
23fa9e4066Sahrens  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24fa9e4066Sahrens  * Use is subject to license terms.
25fa9e4066Sahrens  */
26fa9e4066Sahrens 
27fa9e4066Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
28fa9e4066Sahrens 
29fa9e4066Sahrens #include <sys/dmu_objset.h>
30fa9e4066Sahrens #include <sys/dsl_dataset.h>
31fa9e4066Sahrens #include <sys/dsl_dir.h>
32fa9e4066Sahrens #include <sys/dmu_traverse.h>
33fa9e4066Sahrens #include <sys/dmu_tx.h>
34fa9e4066Sahrens #include <sys/arc.h>
35fa9e4066Sahrens #include <sys/zio.h>
36fa9e4066Sahrens #include <sys/zap.h>
37fa9e4066Sahrens #include <sys/unique.h>
38fa9e4066Sahrens #include <sys/zfs_context.h>
39fa9e4066Sahrens 
40fa9e4066Sahrens #define	DOS_REF_MAX	(1ULL << 62)
41fa9e4066Sahrens 
42fa9e4066Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
43fa9e4066Sahrens 
44fa9e4066Sahrens #define	BP_GET_UCSIZE(bp) \
45fa9e4066Sahrens 	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
46fa9e4066Sahrens 	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
47fa9e4066Sahrens 
48fa9e4066Sahrens /*
49fa9e4066Sahrens  * We use weighted reference counts to express the various forms of exclusion
50fa9e4066Sahrens  * between different open modes.  A STANDARD open is 1 point, an EXCLUSIVE open
51fa9e4066Sahrens  * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
52fa9e4066Sahrens  * This makes the exclusion logic simple: the total refcnt for all opens cannot
53fa9e4066Sahrens  * exceed DOS_REF_MAX.  For example, EXCLUSIVE opens are exclusive because their
54fa9e4066Sahrens  * weight (DOS_REF_MAX) consumes the entire refcnt space.  PRIMARY opens consume
55fa9e4066Sahrens  * just over half of the refcnt space, so there can't be more than one, but it
56fa9e4066Sahrens  * can peacefully coexist with any number of STANDARD opens.
57fa9e4066Sahrens  */
58fa9e4066Sahrens static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
59fa9e4066Sahrens 	0,			/* DOS_MODE_NONE - invalid		*/
60fa9e4066Sahrens 	1,			/* DOS_MODE_STANDARD - unlimited number	*/
61fa9e4066Sahrens 	(DOS_REF_MAX >> 1) + 1,	/* DOS_MODE_PRIMARY - only one of these	*/
62fa9e4066Sahrens 	DOS_REF_MAX		/* DOS_MODE_EXCLUSIVE - no other opens	*/
63fa9e4066Sahrens };
64fa9e4066Sahrens 
65fa9e4066Sahrens 
66fa9e4066Sahrens void
67fa9e4066Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
68fa9e4066Sahrens {
69fa9e4066Sahrens 	int used = BP_GET_ASIZE(bp);
70fa9e4066Sahrens 	int compressed = BP_GET_PSIZE(bp);
71fa9e4066Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
72fa9e4066Sahrens 
73fa9e4066Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
74fa9e4066Sahrens 
75fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
76fa9e4066Sahrens 	/* It could have been compressed away to nothing */
77fa9e4066Sahrens 	if (BP_IS_HOLE(bp))
78fa9e4066Sahrens 		return;
79fa9e4066Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
80fa9e4066Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
81fa9e4066Sahrens 	if (ds == NULL) {
82fa9e4066Sahrens 		/*
83fa9e4066Sahrens 		 * Account for the meta-objset space in its placeholder
84fa9e4066Sahrens 		 * dsl_dir.
85fa9e4066Sahrens 		 */
86fa9e4066Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
87fa9e4066Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
88fa9e4066Sahrens 		    used, compressed, uncompressed, tx);
89fa9e4066Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
90fa9e4066Sahrens 		return;
91fa9e4066Sahrens 	}
92fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
93fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
94fa9e4066Sahrens 	ds->ds_phys->ds_used_bytes += used;
95fa9e4066Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
96fa9e4066Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
97fa9e4066Sahrens 	ds->ds_phys->ds_unique_bytes += used;
98fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
99fa9e4066Sahrens 	dsl_dir_diduse_space(ds->ds_dir,
100fa9e4066Sahrens 	    used, compressed, uncompressed, tx);
101fa9e4066Sahrens }
102fa9e4066Sahrens 
103fa9e4066Sahrens void
104fa9e4066Sahrens dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
105fa9e4066Sahrens {
106fa9e4066Sahrens 	int used = BP_GET_ASIZE(bp);
107fa9e4066Sahrens 	int compressed = BP_GET_PSIZE(bp);
108fa9e4066Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
109fa9e4066Sahrens 
110fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
111fa9e4066Sahrens 	if (BP_IS_HOLE(bp))
112fa9e4066Sahrens 		return;
113fa9e4066Sahrens 
114fa9e4066Sahrens 	ASSERT(used > 0);
115fa9e4066Sahrens 	if (ds == NULL) {
116fa9e4066Sahrens 		/*
117fa9e4066Sahrens 		 * Account for the meta-objset space in its placeholder
118fa9e4066Sahrens 		 * dataset.
119fa9e4066Sahrens 		 */
120fa9e4066Sahrens 		/* XXX this can fail, what do we do when it does? */
121fa9e4066Sahrens 		(void) arc_free(NULL, tx->tx_pool->dp_spa,
122fa9e4066Sahrens 		    tx->tx_txg, bp, NULL, NULL, ARC_WAIT);
123fa9e4066Sahrens 		bzero(bp, sizeof (blkptr_t));
124fa9e4066Sahrens 
125fa9e4066Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
126fa9e4066Sahrens 		    -used, -compressed, -uncompressed, tx);
127fa9e4066Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
128fa9e4066Sahrens 		return;
129fa9e4066Sahrens 	}
130fa9e4066Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
131fa9e4066Sahrens 
132fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
133fa9e4066Sahrens 
134fa9e4066Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
135fa9e4066Sahrens 		dprintf_bp(bp, "freeing: %s", "");
136fa9e4066Sahrens 		/* XXX check return code? */
137fa9e4066Sahrens 		(void) arc_free(NULL, tx->tx_pool->dp_spa,
138fa9e4066Sahrens 		    tx->tx_txg, bp, NULL, NULL, ARC_WAIT);
139fa9e4066Sahrens 
140fa9e4066Sahrens 		mutex_enter(&ds->ds_lock);
141fa9e4066Sahrens 		/* XXX unique_bytes is not accurate for head datasets */
142fa9e4066Sahrens 		/* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */
143fa9e4066Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
144fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
145fa9e4066Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
146fa9e4066Sahrens 		    -used, -compressed, -uncompressed, tx);
147fa9e4066Sahrens 	} else {
148fa9e4066Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
149fa9e4066Sahrens 		bplist_enqueue(&ds->ds_deadlist, bp, tx);
150fa9e4066Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
151fa9e4066Sahrens 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
152fa9e4066Sahrens 			ASSERT3U(ds->ds_prev->ds_object, ==,
153fa9e4066Sahrens 			    ds->ds_phys->ds_prev_snap_obj);
154fa9e4066Sahrens 			ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
155fa9e4066Sahrens 			if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
156fa9e4066Sahrens 			    ds->ds_object &&
157fa9e4066Sahrens 			    bp->blk_birth >
158fa9e4066Sahrens 			    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
159fa9e4066Sahrens 				dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
160fa9e4066Sahrens 				mutex_enter(&ds->ds_prev->ds_lock);
161fa9e4066Sahrens 				ds->ds_prev->ds_phys->ds_unique_bytes +=
162fa9e4066Sahrens 				    used;
163fa9e4066Sahrens 				mutex_exit(&ds->ds_prev->ds_lock);
164fa9e4066Sahrens 			}
165fa9e4066Sahrens 		}
166fa9e4066Sahrens 	}
167fa9e4066Sahrens 	bzero(bp, sizeof (blkptr_t));
168fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
169fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
170fa9e4066Sahrens 	ds->ds_phys->ds_used_bytes -= used;
171fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
172fa9e4066Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
173fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
174fa9e4066Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
175fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
176fa9e4066Sahrens }
177fa9e4066Sahrens 
178fa9e4066Sahrens int
179fa9e4066Sahrens dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth, dmu_tx_t *tx)
180fa9e4066Sahrens {
181fa9e4066Sahrens 	uint64_t prev_snap_txg;
182fa9e4066Sahrens 	dsl_dir_t *dd;
183fa9e4066Sahrens 	/* ASSERT that it is not a snapshot */
184fa9e4066Sahrens 	if (ds == NULL)
185fa9e4066Sahrens 		return (TRUE);
186fa9e4066Sahrens 	/*
187fa9e4066Sahrens 	 * The snapshot creation could fail, but that would cause an
188fa9e4066Sahrens 	 * incorrect FALSE return, which would only result in an
189fa9e4066Sahrens 	 * overestimation of the amount of space that an operation would
190fa9e4066Sahrens 	 * consume, which is OK.
191fa9e4066Sahrens 	 *
192fa9e4066Sahrens 	 * There's also a small window where we could miss a pending
193fa9e4066Sahrens 	 * snapshot, because we could set the sync task in the quiescing
194fa9e4066Sahrens 	 * phase.  So this should only be used as a guess.
195fa9e4066Sahrens 	 */
196fa9e4066Sahrens 	dd = ds->ds_dir;
197fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
198fa9e4066Sahrens 	if (dd->dd_sync_func == dsl_dataset_snapshot_sync &&
199fa9e4066Sahrens 	    dd->dd_sync_txg < tx->tx_txg)
200fa9e4066Sahrens 		prev_snap_txg = dd->dd_sync_txg;
201fa9e4066Sahrens 	else
202fa9e4066Sahrens 		prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
203fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
204fa9e4066Sahrens 	return (blk_birth > prev_snap_txg);
205fa9e4066Sahrens }
206fa9e4066Sahrens 
207fa9e4066Sahrens /* ARGSUSED */
208fa9e4066Sahrens static void
209fa9e4066Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
210fa9e4066Sahrens {
211fa9e4066Sahrens 	dsl_dataset_t *ds = dsv;
212fa9e4066Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
213fa9e4066Sahrens 
214fa9e4066Sahrens 	/* open_refcount == DOS_REF_MAX when deleting */
215fa9e4066Sahrens 	ASSERT(ds->ds_open_refcount == 0 ||
216fa9e4066Sahrens 	    ds->ds_open_refcount == DOS_REF_MAX);
217fa9e4066Sahrens 
218fa9e4066Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
219fa9e4066Sahrens 
220fa9e4066Sahrens 	unique_remove(ds->ds_phys->ds_fsid_guid);
221fa9e4066Sahrens 
222fa9e4066Sahrens 	if (ds->ds_user_ptr != NULL)
223fa9e4066Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
224fa9e4066Sahrens 
225fa9e4066Sahrens 	if (ds->ds_prev) {
226fa9e4066Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
227fa9e4066Sahrens 		ds->ds_prev = NULL;
228fa9e4066Sahrens 	}
229fa9e4066Sahrens 
230fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
231fa9e4066Sahrens 	dsl_dir_close(ds->ds_dir, ds);
232fa9e4066Sahrens 
233fa9e4066Sahrens 	if (list_link_active(&ds->ds_synced_link))
234fa9e4066Sahrens 		list_remove(&dp->dp_synced_objsets, ds);
235fa9e4066Sahrens 
236fa9e4066Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
237fa9e4066Sahrens }
238fa9e4066Sahrens 
239fa9e4066Sahrens static void
240fa9e4066Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
241fa9e4066Sahrens {
242fa9e4066Sahrens 	dsl_dataset_phys_t *headphys;
243fa9e4066Sahrens 	int err;
244fa9e4066Sahrens 	dmu_buf_t *headdbuf;
245fa9e4066Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
246fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
247fa9e4066Sahrens 
248fa9e4066Sahrens 	if (ds->ds_snapname[0])
249fa9e4066Sahrens 		return;
250fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
251fa9e4066Sahrens 		return;
252fa9e4066Sahrens 
253fa9e4066Sahrens 	headdbuf = dmu_bonus_hold_tag(mos,
254fa9e4066Sahrens 	    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG);
255fa9e4066Sahrens 	dmu_buf_read(headdbuf);
256fa9e4066Sahrens 	headphys = headdbuf->db_data;
257fa9e4066Sahrens 	err = zap_value_search(dp->dp_meta_objset,
258fa9e4066Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname);
259fa9e4066Sahrens 	ASSERT(err == 0);
260fa9e4066Sahrens 	dmu_buf_rele_tag(headdbuf, FTAG);
261fa9e4066Sahrens }
262fa9e4066Sahrens 
263fa9e4066Sahrens dsl_dataset_t *
264fa9e4066Sahrens dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
265fa9e4066Sahrens     int mode, void *tag)
266fa9e4066Sahrens {
267fa9e4066Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
268fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
269fa9e4066Sahrens 	dmu_buf_t *dbuf;
270fa9e4066Sahrens 	dsl_dataset_t *ds;
271fa9e4066Sahrens 
272fa9e4066Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
273fa9e4066Sahrens 	    dsl_pool_sync_context(dp));
274fa9e4066Sahrens 
275fa9e4066Sahrens 	dbuf = dmu_bonus_hold_tag(mos, dsobj, tag);
276fa9e4066Sahrens 	dmu_buf_read(dbuf);
277fa9e4066Sahrens 	ds = dmu_buf_get_user(dbuf);
278fa9e4066Sahrens 	if (ds == NULL) {
279fa9e4066Sahrens 		dsl_dataset_t *winner;
280fa9e4066Sahrens 
281fa9e4066Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
282fa9e4066Sahrens 		ds->ds_dbuf = dbuf;
283fa9e4066Sahrens 		ds->ds_object = dsobj;
284fa9e4066Sahrens 		ds->ds_phys = dbuf->db_data;
285fa9e4066Sahrens 		ds->ds_dir = dsl_dir_open_obj(dp,
286fa9e4066Sahrens 		    ds->ds_phys->ds_dir_obj, NULL, ds);
287fa9e4066Sahrens 
288fa9e4066Sahrens 		bplist_open(&ds->ds_deadlist,
289fa9e4066Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
290fa9e4066Sahrens 
291fa9e4066Sahrens 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
292fa9e4066Sahrens 			ds->ds_snapname[0] = '\0';
293fa9e4066Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
294fa9e4066Sahrens 				ds->ds_prev =
295fa9e4066Sahrens 				    dsl_dataset_open_obj(dp,
296fa9e4066Sahrens 				    ds->ds_phys->ds_prev_snap_obj, NULL,
297fa9e4066Sahrens 				    DS_MODE_NONE, ds);
298fa9e4066Sahrens 			}
299fa9e4066Sahrens 		} else {
300fa9e4066Sahrens 			if (snapname) {
301fa9e4066Sahrens #ifdef ZFS_DEBUG
302fa9e4066Sahrens 				dsl_dataset_phys_t *headphys;
303fa9e4066Sahrens 				int err;
304fa9e4066Sahrens 				dmu_buf_t *headdbuf = dmu_bonus_hold_tag(mos,
305fa9e4066Sahrens 				    ds->ds_dir->dd_phys->
306fa9e4066Sahrens 				    dd_head_dataset_obj, FTAG);
307fa9e4066Sahrens 				dmu_buf_read(headdbuf);
308fa9e4066Sahrens 				headphys = headdbuf->db_data;
309fa9e4066Sahrens 				uint64_t foundobj;
310fa9e4066Sahrens 				err = zap_lookup(dp->dp_meta_objset,
311fa9e4066Sahrens 				    headphys->ds_snapnames_zapobj,
312fa9e4066Sahrens 				    snapname, sizeof (foundobj), 1, &foundobj);
313fa9e4066Sahrens 				ASSERT3U(err, ==, 0);
314fa9e4066Sahrens 				ASSERT3U(foundobj, ==, dsobj);
315fa9e4066Sahrens 				dmu_buf_rele_tag(headdbuf, FTAG);
316fa9e4066Sahrens #endif
317fa9e4066Sahrens 				(void) strcat(ds->ds_snapname, snapname);
318fa9e4066Sahrens 			} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
319fa9e4066Sahrens 				dsl_dataset_get_snapname(ds);
320fa9e4066Sahrens 			}
321fa9e4066Sahrens 		}
322fa9e4066Sahrens 
323fa9e4066Sahrens 		winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
324fa9e4066Sahrens 		    dsl_dataset_evict);
325fa9e4066Sahrens 		if (winner) {
326fa9e4066Sahrens 			bplist_close(&ds->ds_deadlist);
327fa9e4066Sahrens 			if (ds->ds_prev) {
328fa9e4066Sahrens 				dsl_dataset_close(ds->ds_prev,
329fa9e4066Sahrens 				    DS_MODE_NONE, ds);
330fa9e4066Sahrens 			}
331fa9e4066Sahrens 			dsl_dir_close(ds->ds_dir, ds);
332fa9e4066Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
333fa9e4066Sahrens 			ds = winner;
334fa9e4066Sahrens 		} else {
335fa9e4066Sahrens 			uint64_t new =
336fa9e4066Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
337fa9e4066Sahrens 			if (new != ds->ds_phys->ds_fsid_guid) {
338fa9e4066Sahrens 				/* XXX it won't necessarily be synced... */
339fa9e4066Sahrens 				ds->ds_phys->ds_fsid_guid = new;
340fa9e4066Sahrens 			}
341fa9e4066Sahrens 		}
342fa9e4066Sahrens 	}
343fa9e4066Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
344fa9e4066Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
345fa9e4066Sahrens 
346fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
347fa9e4066Sahrens 	if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
348fa9e4066Sahrens 	    ds->ds_phys->ds_restoring && !DS_MODE_IS_RESTORE(mode)) ||
349fa9e4066Sahrens 	    (ds->ds_open_refcount + weight > DOS_REF_MAX)) {
350fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
351fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
352fa9e4066Sahrens 		return (NULL);
353fa9e4066Sahrens 	}
354fa9e4066Sahrens 	ds->ds_open_refcount += weight;
355fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
356fa9e4066Sahrens 
357fa9e4066Sahrens 	return (ds);
358fa9e4066Sahrens }
359fa9e4066Sahrens 
360fa9e4066Sahrens int
361fa9e4066Sahrens dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
362fa9e4066Sahrens     void *tag, dsl_dataset_t **dsp)
363fa9e4066Sahrens {
364fa9e4066Sahrens 	dsl_dir_t *dd;
365fa9e4066Sahrens 	dsl_pool_t *dp;
366fa9e4066Sahrens 	const char *tail;
367fa9e4066Sahrens 	uint64_t obj;
368fa9e4066Sahrens 	dsl_dataset_t *ds = NULL;
369fa9e4066Sahrens 	int err = 0;
370fa9e4066Sahrens 
371fa9e4066Sahrens 	dd = dsl_dir_open_spa(spa, name, FTAG, &tail);
372fa9e4066Sahrens 	if (dd == NULL)
373fa9e4066Sahrens 		return (ENOENT);
374fa9e4066Sahrens 
375fa9e4066Sahrens 	dp = dd->dd_pool;
376fa9e4066Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
377fa9e4066Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
378fa9e4066Sahrens 	if (obj == 0) {
379fa9e4066Sahrens 		/* A dataset with no associated objset */
380fa9e4066Sahrens 		err = ENOENT;
381fa9e4066Sahrens 		goto out;
382fa9e4066Sahrens 	}
383fa9e4066Sahrens 
384fa9e4066Sahrens 	if (tail != NULL) {
385fa9e4066Sahrens 		objset_t *mos = dp->dp_meta_objset;
386fa9e4066Sahrens 
387fa9e4066Sahrens 		ds = dsl_dataset_open_obj(dp, obj, NULL, DS_MODE_NONE, tag);
388fa9e4066Sahrens 		obj = ds->ds_phys->ds_snapnames_zapobj;
389fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
390fa9e4066Sahrens 		ds = NULL;
391fa9e4066Sahrens 
392fa9e4066Sahrens 		if (tail[0] != '@') {
393fa9e4066Sahrens 			err = ENOENT;
394fa9e4066Sahrens 			goto out;
395fa9e4066Sahrens 		}
396fa9e4066Sahrens 		tail++;
397fa9e4066Sahrens 
398fa9e4066Sahrens 		/* Look for a snapshot */
399fa9e4066Sahrens 		if (!DS_MODE_IS_READONLY(mode)) {
400fa9e4066Sahrens 			err = EROFS;
401fa9e4066Sahrens 			goto out;
402fa9e4066Sahrens 		}
403fa9e4066Sahrens 		dprintf("looking for snapshot '%s'\n", tail);
404fa9e4066Sahrens 		err = zap_lookup(mos, obj, tail, 8, 1, &obj);
405fa9e4066Sahrens 		if (err)
406fa9e4066Sahrens 			goto out;
407fa9e4066Sahrens 	}
408fa9e4066Sahrens 	ds = dsl_dataset_open_obj(dp, obj, tail, mode, tag);
409fa9e4066Sahrens 	if (ds == NULL)
410fa9e4066Sahrens 		err = EBUSY;
411fa9e4066Sahrens 
412fa9e4066Sahrens out:
413fa9e4066Sahrens 	rw_exit(&dp->dp_config_rwlock);
414fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
415fa9e4066Sahrens 
416fa9e4066Sahrens 	ASSERT3U((err == 0), ==, (ds != NULL));
417fa9e4066Sahrens 	/* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
418fa9e4066Sahrens 
419fa9e4066Sahrens 	*dsp = ds;
420fa9e4066Sahrens 	return (err);
421fa9e4066Sahrens }
422fa9e4066Sahrens 
423fa9e4066Sahrens int
424fa9e4066Sahrens dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
425fa9e4066Sahrens {
426fa9e4066Sahrens 	return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
427fa9e4066Sahrens }
428fa9e4066Sahrens 
429fa9e4066Sahrens void
430fa9e4066Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
431fa9e4066Sahrens {
432fa9e4066Sahrens 	if (ds == NULL) {
433fa9e4066Sahrens 		(void) strcpy(name, "mos");
434fa9e4066Sahrens 	} else {
435fa9e4066Sahrens 		dsl_dir_name(ds->ds_dir, name);
436fa9e4066Sahrens 		dsl_dataset_get_snapname(ds);
437fa9e4066Sahrens 		if (ds->ds_snapname[0]) {
438fa9e4066Sahrens 			(void) strcat(name, "@");
439fa9e4066Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
440fa9e4066Sahrens 				/*
441fa9e4066Sahrens 				 * We use a "recursive" mutex so that we
442fa9e4066Sahrens 				 * can call dprintf_ds() with ds_lock held.
443fa9e4066Sahrens 				 */
444fa9e4066Sahrens 				mutex_enter(&ds->ds_lock);
445fa9e4066Sahrens 				(void) strcat(name, ds->ds_snapname);
446fa9e4066Sahrens 				mutex_exit(&ds->ds_lock);
447fa9e4066Sahrens 			} else {
448fa9e4066Sahrens 				(void) strcat(name, ds->ds_snapname);
449fa9e4066Sahrens 			}
450fa9e4066Sahrens 		}
451fa9e4066Sahrens 	}
452fa9e4066Sahrens }
453fa9e4066Sahrens 
454fa9e4066Sahrens void
455fa9e4066Sahrens dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
456fa9e4066Sahrens {
457fa9e4066Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
458fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
459fa9e4066Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, weight);
460fa9e4066Sahrens 	ds->ds_open_refcount -= weight;
461fa9e4066Sahrens 	dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n",
462fa9e4066Sahrens 	    mode, ds->ds_open_refcount);
463fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
464fa9e4066Sahrens 
465fa9e4066Sahrens 	dmu_buf_rele_tag(ds->ds_dbuf, tag);
466fa9e4066Sahrens }
467fa9e4066Sahrens 
468fa9e4066Sahrens void
469fa9e4066Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
470fa9e4066Sahrens {
471fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
472fa9e4066Sahrens 	dmu_buf_t *dbuf;
473fa9e4066Sahrens 	dsl_dataset_phys_t *dsphys;
474fa9e4066Sahrens 	dsl_dataset_t *ds;
475fa9e4066Sahrens 	uint64_t dsobj;
476fa9e4066Sahrens 	dsl_dir_t *dd;
477fa9e4066Sahrens 
478fa9e4066Sahrens 	dsl_dir_create_root(mos, ddobjp, tx);
479fa9e4066Sahrens 	dd = dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG);
480fa9e4066Sahrens 	ASSERT(dd != NULL);
481fa9e4066Sahrens 
482*1649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
483*1649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
484fa9e4066Sahrens 	dbuf = dmu_bonus_hold(mos, dsobj);
485fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
486fa9e4066Sahrens 	dsphys = dbuf->db_data;
487fa9e4066Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
488fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
489fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
490fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
491fa9e4066Sahrens 	dsphys->ds_snapnames_zapobj =
49287e5029aSahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
493fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
494fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
495fa9e4066Sahrens 	dsphys->ds_deadlist_obj =
496fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
497fa9e4066Sahrens 	dmu_buf_rele(dbuf);
498fa9e4066Sahrens 
499fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
500fa9e4066Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
501fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
502fa9e4066Sahrens 
503fa9e4066Sahrens 	ds = dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG);
504fa9e4066Sahrens 	(void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx);
505fa9e4066Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
506fa9e4066Sahrens }
507fa9e4066Sahrens 
508fa9e4066Sahrens int
509fa9e4066Sahrens dsl_dataset_create_sync(dsl_dir_t *pds, const char *fullname,
510fa9e4066Sahrens     const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx)
511fa9e4066Sahrens {
512fa9e4066Sahrens 	int err;
513fa9e4066Sahrens 	dsl_pool_t *dp = pds->dd_pool;
514fa9e4066Sahrens 	dmu_buf_t *dbuf;
515fa9e4066Sahrens 	dsl_dataset_phys_t *dsphys;
516fa9e4066Sahrens 	uint64_t dsobj;
517fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
518fa9e4066Sahrens 	dsl_dir_t *dd;
519fa9e4066Sahrens 
520fa9e4066Sahrens 	if (clone_parent != NULL) {
521fa9e4066Sahrens 		/*
522fa9e4066Sahrens 		 * You can't clone across pools.
523fa9e4066Sahrens 		 */
524fa9e4066Sahrens 		if (clone_parent->ds_dir->dd_pool != dp)
525fa9e4066Sahrens 			return (EXDEV);
526fa9e4066Sahrens 
527fa9e4066Sahrens 		/*
528fa9e4066Sahrens 		 * You can only clone snapshots, not the head datasets.
529fa9e4066Sahrens 		 */
530fa9e4066Sahrens 		if (clone_parent->ds_phys->ds_num_children == 0)
531fa9e4066Sahrens 			return (EINVAL);
532fa9e4066Sahrens 	}
533fa9e4066Sahrens 
534fa9e4066Sahrens 	ASSERT(lastname[0] != '@');
535fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
536fa9e4066Sahrens 
537fa9e4066Sahrens 	err = dsl_dir_create_sync(pds, lastname, tx);
538fa9e4066Sahrens 	if (err)
539fa9e4066Sahrens 		return (err);
540fa9e4066Sahrens 	dd = dsl_dir_open_spa(dp->dp_spa, fullname, FTAG, NULL);
541fa9e4066Sahrens 	ASSERT(dd != NULL);
542fa9e4066Sahrens 
543fa9e4066Sahrens 	/* This is the point of no (unsuccessful) return */
544fa9e4066Sahrens 
545*1649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
546*1649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
547fa9e4066Sahrens 	dbuf = dmu_bonus_hold(mos, dsobj);
548fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
549fa9e4066Sahrens 	dsphys = dbuf->db_data;
550fa9e4066Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
551fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
552fa9e4066Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
553fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
554fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
555fa9e4066Sahrens 	dsphys->ds_snapnames_zapobj =
55687e5029aSahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
557fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
558fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
559fa9e4066Sahrens 	dsphys->ds_deadlist_obj =
560fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
561fa9e4066Sahrens 	if (clone_parent) {
562fa9e4066Sahrens 		dsphys->ds_prev_snap_obj = clone_parent->ds_object;
563fa9e4066Sahrens 		dsphys->ds_prev_snap_txg =
564fa9e4066Sahrens 		    clone_parent->ds_phys->ds_creation_txg;
565fa9e4066Sahrens 		dsphys->ds_used_bytes =
566fa9e4066Sahrens 		    clone_parent->ds_phys->ds_used_bytes;
567fa9e4066Sahrens 		dsphys->ds_compressed_bytes =
568fa9e4066Sahrens 		    clone_parent->ds_phys->ds_compressed_bytes;
569fa9e4066Sahrens 		dsphys->ds_uncompressed_bytes =
570fa9e4066Sahrens 		    clone_parent->ds_phys->ds_uncompressed_bytes;
571fa9e4066Sahrens 		dsphys->ds_bp = clone_parent->ds_phys->ds_bp;
572fa9e4066Sahrens 
573fa9e4066Sahrens 		dmu_buf_will_dirty(clone_parent->ds_dbuf, tx);
574fa9e4066Sahrens 		clone_parent->ds_phys->ds_num_children++;
575fa9e4066Sahrens 
576fa9e4066Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
577fa9e4066Sahrens 		dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object;
578fa9e4066Sahrens 	}
579fa9e4066Sahrens 	dmu_buf_rele(dbuf);
580fa9e4066Sahrens 
581fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
582fa9e4066Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
583fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
584fa9e4066Sahrens 
585fa9e4066Sahrens 	return (0);
586fa9e4066Sahrens }
587fa9e4066Sahrens 
588fa9e4066Sahrens 
589fa9e4066Sahrens int
590fa9e4066Sahrens dsl_dataset_destroy(const char *name)
591fa9e4066Sahrens {
592fa9e4066Sahrens 	int err;
593fa9e4066Sahrens 	dsl_pool_t *dp;
594fa9e4066Sahrens 	dsl_dir_t *dd;
595fa9e4066Sahrens 	const char *tail;
596fa9e4066Sahrens 
597fa9e4066Sahrens 	dd = dsl_dir_open(name, FTAG, &tail);
598fa9e4066Sahrens 	if (dd == NULL)
599fa9e4066Sahrens 		return (ENOENT);
600fa9e4066Sahrens 
601fa9e4066Sahrens 	dp = dd->dd_pool;
602fa9e4066Sahrens 	if (tail != NULL) {
603fa9e4066Sahrens 		if (tail[0] != '@') {
604fa9e4066Sahrens 			dsl_dir_close(dd, FTAG);
605fa9e4066Sahrens 			return (ENOENT);
606fa9e4066Sahrens 		}
607fa9e4066Sahrens 		tail++;
608fa9e4066Sahrens 		/* Just blow away the snapshot */
609fa9e4066Sahrens 		do {
610fa9e4066Sahrens 			txg_wait_synced(dp, 0);
611fa9e4066Sahrens 			err = dsl_dir_sync_task(dd,
612fa9e4066Sahrens 			    dsl_dataset_destroy_sync, (void*)tail, 0);
613fa9e4066Sahrens 		} while (err == EAGAIN);
614fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
615fa9e4066Sahrens 	} else {
616fa9e4066Sahrens 		char buf[MAXNAMELEN];
617fa9e4066Sahrens 		char *cp;
618fa9e4066Sahrens 
619fa9e4066Sahrens 		dsl_dir_t *pds;
620fa9e4066Sahrens 		if (dd->dd_phys->dd_parent_obj == 0) {
621fa9e4066Sahrens 			dsl_dir_close(dd, FTAG);
622fa9e4066Sahrens 			return (EINVAL);
623fa9e4066Sahrens 		}
624fa9e4066Sahrens 		/*
625fa9e4066Sahrens 		 * Make sure it's not dirty before we destroy it.
626fa9e4066Sahrens 		 */
627fa9e4066Sahrens 		txg_wait_synced(dd->dd_pool, 0);
628fa9e4066Sahrens 		/*
629fa9e4066Sahrens 		 * Blow away the dsl_dir + head dataset.
630fa9e4066Sahrens 		 * dsl_dir_destroy_sync() will call
631fa9e4066Sahrens 		 * dsl_dataset_destroy_sync() to destroy the head dataset.
632fa9e4066Sahrens 		 */
633fa9e4066Sahrens 		rw_enter(&dp->dp_config_rwlock, RW_READER);
634fa9e4066Sahrens 		pds = dsl_dir_open_obj(dd->dd_pool,
635fa9e4066Sahrens 		    dd->dd_phys->dd_parent_obj, NULL, FTAG);
636fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
637fa9e4066Sahrens 		rw_exit(&dp->dp_config_rwlock);
638fa9e4066Sahrens 
639fa9e4066Sahrens 		(void) strcpy(buf, name);
640fa9e4066Sahrens 		cp = strrchr(buf, '/') + 1;
641fa9e4066Sahrens 		ASSERT(cp[0] != '\0');
642fa9e4066Sahrens 		do {
643fa9e4066Sahrens 			txg_wait_synced(dp, 0);
644fa9e4066Sahrens 			err = dsl_dir_sync_task(pds,
645fa9e4066Sahrens 			    dsl_dir_destroy_sync, cp, 0);
646fa9e4066Sahrens 		} while (err == EAGAIN);
647fa9e4066Sahrens 		dsl_dir_close(pds, FTAG);
648fa9e4066Sahrens 	}
649fa9e4066Sahrens 
650fa9e4066Sahrens 	return (err);
651fa9e4066Sahrens }
652fa9e4066Sahrens 
653fa9e4066Sahrens int
654fa9e4066Sahrens dsl_dataset_rollback(const char *name)
655fa9e4066Sahrens {
656fa9e4066Sahrens 	int err;
657fa9e4066Sahrens 	dsl_dir_t *dd;
658fa9e4066Sahrens 	const char *tail;
659fa9e4066Sahrens 
660fa9e4066Sahrens 	dd = dsl_dir_open(name, FTAG, &tail);
661fa9e4066Sahrens 	if (dd == NULL)
662fa9e4066Sahrens 		return (ENOENT);
663fa9e4066Sahrens 
664fa9e4066Sahrens 	if (tail != NULL) {
665fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
666fa9e4066Sahrens 		return (EINVAL);
667fa9e4066Sahrens 	}
668fa9e4066Sahrens 	do {
669fa9e4066Sahrens 		txg_wait_synced(dd->dd_pool, 0);
670fa9e4066Sahrens 		err = dsl_dir_sync_task(dd,
671fa9e4066Sahrens 		    dsl_dataset_rollback_sync, NULL, 0);
672fa9e4066Sahrens 	} while (err == EAGAIN);
673fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
674fa9e4066Sahrens 
675fa9e4066Sahrens 	return (err);
676fa9e4066Sahrens }
677fa9e4066Sahrens 
678fa9e4066Sahrens void *
679fa9e4066Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
680fa9e4066Sahrens     void *p, dsl_dataset_evict_func_t func)
681fa9e4066Sahrens {
682fa9e4066Sahrens 	void *old;
683fa9e4066Sahrens 
684fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
685fa9e4066Sahrens 	old = ds->ds_user_ptr;
686fa9e4066Sahrens 	if (old == NULL) {
687fa9e4066Sahrens 		ds->ds_user_ptr = p;
688fa9e4066Sahrens 		ds->ds_user_evict_func = func;
689fa9e4066Sahrens 	}
690fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
691fa9e4066Sahrens 	return (old);
692fa9e4066Sahrens }
693fa9e4066Sahrens 
694fa9e4066Sahrens void *
695fa9e4066Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
696fa9e4066Sahrens {
697fa9e4066Sahrens 	return (ds->ds_user_ptr);
698fa9e4066Sahrens }
699fa9e4066Sahrens 
700fa9e4066Sahrens 
701fa9e4066Sahrens void
702fa9e4066Sahrens dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp)
703fa9e4066Sahrens {
704fa9e4066Sahrens 	*bp = ds->ds_phys->ds_bp;
705fa9e4066Sahrens }
706fa9e4066Sahrens 
707fa9e4066Sahrens void
708fa9e4066Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
709fa9e4066Sahrens {
710fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
711fa9e4066Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
712fa9e4066Sahrens 	if (ds == NULL) {
713fa9e4066Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
714fa9e4066Sahrens 	} else {
715fa9e4066Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
716fa9e4066Sahrens 		ds->ds_phys->ds_bp = *bp;
717fa9e4066Sahrens 	}
718fa9e4066Sahrens }
719fa9e4066Sahrens 
720fa9e4066Sahrens spa_t *
721fa9e4066Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
722fa9e4066Sahrens {
723fa9e4066Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
724fa9e4066Sahrens }
725fa9e4066Sahrens 
726fa9e4066Sahrens void
727fa9e4066Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
728fa9e4066Sahrens {
729fa9e4066Sahrens 	dsl_pool_t *dp;
730fa9e4066Sahrens 
731fa9e4066Sahrens 	if (ds == NULL) /* this is the meta-objset */
732fa9e4066Sahrens 		return;
733fa9e4066Sahrens 
734fa9e4066Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
735fa9e4066Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
736fa9e4066Sahrens 
737fa9e4066Sahrens 	dp = ds->ds_dir->dd_pool;
738fa9e4066Sahrens 
739fa9e4066Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
740fa9e4066Sahrens 		/* up the hold count until we can be written out */
741fa9e4066Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
742fa9e4066Sahrens 	}
743fa9e4066Sahrens }
744fa9e4066Sahrens 
745fa9e4066Sahrens struct killarg {
746fa9e4066Sahrens 	uint64_t *usedp;
747fa9e4066Sahrens 	uint64_t *compressedp;
748fa9e4066Sahrens 	uint64_t *uncompressedp;
749fa9e4066Sahrens 	zio_t *zio;
750fa9e4066Sahrens 	dmu_tx_t *tx;
751fa9e4066Sahrens };
752fa9e4066Sahrens 
753fa9e4066Sahrens static int
754fa9e4066Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
755fa9e4066Sahrens {
756fa9e4066Sahrens 	struct killarg *ka = arg;
757fa9e4066Sahrens 	blkptr_t *bp = &bc->bc_blkptr;
758fa9e4066Sahrens 
759fa9e4066Sahrens 	ASSERT3U(bc->bc_errno, ==, 0);
760fa9e4066Sahrens 
761fa9e4066Sahrens 	/*
762fa9e4066Sahrens 	 * Since this callback is not called concurrently, no lock is
763fa9e4066Sahrens 	 * needed on the accounting values.
764fa9e4066Sahrens 	 */
765fa9e4066Sahrens 	*ka->usedp += BP_GET_ASIZE(bp);
766fa9e4066Sahrens 	*ka->compressedp += BP_GET_PSIZE(bp);
767fa9e4066Sahrens 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
768fa9e4066Sahrens 	/* XXX check for EIO? */
769fa9e4066Sahrens 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
770fa9e4066Sahrens 	    ARC_NOWAIT);
771fa9e4066Sahrens 	return (0);
772fa9e4066Sahrens }
773fa9e4066Sahrens 
774fa9e4066Sahrens /* ARGSUSED */
775fa9e4066Sahrens int
776fa9e4066Sahrens dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
777fa9e4066Sahrens {
778fa9e4066Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
779fa9e4066Sahrens 	dsl_dataset_t *ds;
780fa9e4066Sahrens 
781fa9e4066Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
782fa9e4066Sahrens 		return (EINVAL);
783fa9e4066Sahrens 	ds = dsl_dataset_open_obj(dd->dd_pool,
784fa9e4066Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG);
785fa9e4066Sahrens 
786fa9e4066Sahrens 	if (ds->ds_phys->ds_prev_snap_txg == 0) {
787fa9e4066Sahrens 		/*
788fa9e4066Sahrens 		 * There's no previous snapshot.  I suppose we could
789fa9e4066Sahrens 		 * roll it back to being empty (and re-initialize the
790fa9e4066Sahrens 		 * upper (ZPL) layer).  But for now there's no way to do
791fa9e4066Sahrens 		 * this via the user interface.
792fa9e4066Sahrens 		 */
793fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
794fa9e4066Sahrens 		return (EINVAL);
795fa9e4066Sahrens 	}
796fa9e4066Sahrens 
797fa9e4066Sahrens 	mutex_enter(&ds->ds_lock);
798fa9e4066Sahrens 	if (ds->ds_open_refcount > 0) {
799fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
800fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
801fa9e4066Sahrens 		return (EBUSY);
802fa9e4066Sahrens 	}
803fa9e4066Sahrens 
804fa9e4066Sahrens 	/*
805fa9e4066Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
806fa9e4066Sahrens 	 * them.  Try again.
807fa9e4066Sahrens 	 */
808fa9e4066Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) {
809fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
810fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
811fa9e4066Sahrens 		return (EAGAIN);
812fa9e4066Sahrens 	}
813fa9e4066Sahrens 
814fa9e4066Sahrens 	/* THE POINT OF NO (unsuccessful) RETURN */
815fa9e4066Sahrens 	ds->ds_open_refcount = DOS_REF_MAX;
816fa9e4066Sahrens 	mutex_exit(&ds->ds_lock);
817fa9e4066Sahrens 
818fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
819fa9e4066Sahrens 
820fa9e4066Sahrens 	/* Zero out the deadlist. */
821fa9e4066Sahrens 	dprintf("old deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
822fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
823fa9e4066Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
824fa9e4066Sahrens 	ds->ds_phys->ds_deadlist_obj =
825fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
826fa9e4066Sahrens 	bplist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
827fa9e4066Sahrens 	dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
828fa9e4066Sahrens 
829fa9e4066Sahrens 	{
830fa9e4066Sahrens 		/* Free blkptrs that we gave birth to */
831fa9e4066Sahrens 		zio_t *zio;
832fa9e4066Sahrens 		uint64_t used = 0, compressed = 0, uncompressed = 0;
833fa9e4066Sahrens 		struct killarg ka;
834fa9e4066Sahrens 
835fa9e4066Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
836fa9e4066Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
837fa9e4066Sahrens 		ka.usedp = &used;
838fa9e4066Sahrens 		ka.compressedp = &compressed;
839fa9e4066Sahrens 		ka.uncompressedp = &uncompressed;
840fa9e4066Sahrens 		ka.zio = zio;
841fa9e4066Sahrens 		ka.tx = tx;
842fa9e4066Sahrens 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
843fa9e4066Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
844fa9e4066Sahrens 		(void) zio_wait(zio);
845fa9e4066Sahrens 
846fa9e4066Sahrens 		dsl_dir_diduse_space(dd,
847fa9e4066Sahrens 		    -used, -compressed, -uncompressed, tx);
848fa9e4066Sahrens 	}
849fa9e4066Sahrens 
850fa9e4066Sahrens 	/* Change our contents to that of the prev snapshot (finally!) */
851fa9e4066Sahrens 	ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj);
852fa9e4066Sahrens 	ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
853fa9e4066Sahrens 	ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes;
854fa9e4066Sahrens 	ds->ds_phys->ds_compressed_bytes =
855fa9e4066Sahrens 	    ds->ds_prev->ds_phys->ds_compressed_bytes;
856fa9e4066Sahrens 	ds->ds_phys->ds_uncompressed_bytes =
857fa9e4066Sahrens 	    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
858fa9e4066Sahrens 	ds->ds_phys->ds_restoring = ds->ds_prev->ds_phys->ds_restoring;
859fa9e4066Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
860fa9e4066Sahrens 
861fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
862fa9e4066Sahrens 	ds->ds_prev->ds_phys->ds_unique_bytes = 0;
863fa9e4066Sahrens 
864fa9e4066Sahrens 	dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
865fa9e4066Sahrens 	ds->ds_open_refcount = 0;
866fa9e4066Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
867fa9e4066Sahrens 
868fa9e4066Sahrens 	return (0);
869fa9e4066Sahrens }
870fa9e4066Sahrens 
871fa9e4066Sahrens int
872fa9e4066Sahrens dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
873fa9e4066Sahrens {
874fa9e4066Sahrens 	const char *snapname = arg;
875fa9e4066Sahrens 	uint64_t used = 0, compressed = 0, uncompressed = 0;
876fa9e4066Sahrens 	blkptr_t bp;
877fa9e4066Sahrens 	zio_t *zio;
878fa9e4066Sahrens 	int err;
879fa9e4066Sahrens 	int after_branch_point = FALSE;
880fa9e4066Sahrens 	int drop_lock = FALSE;
881fa9e4066Sahrens 	dsl_pool_t *dp = dd->dd_pool;
882fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
883fa9e4066Sahrens 	dsl_dataset_t *ds, *ds_prev = NULL;
884fa9e4066Sahrens 	uint64_t obj;
885fa9e4066Sahrens 
886fa9e4066Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
887fa9e4066Sahrens 		return (EINVAL);
888fa9e4066Sahrens 
889fa9e4066Sahrens 	if (!RW_WRITE_HELD(&dp->dp_config_rwlock)) {
890fa9e4066Sahrens 		rw_enter(&dp->dp_config_rwlock, RW_WRITER);
891fa9e4066Sahrens 		drop_lock = TRUE;
892fa9e4066Sahrens 	}
893fa9e4066Sahrens 
894fa9e4066Sahrens 	ds = dsl_dataset_open_obj(dd->dd_pool,
895fa9e4066Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL,
896fa9e4066Sahrens 	    snapname ? DS_MODE_NONE : DS_MODE_EXCLUSIVE, FTAG);
897fa9e4066Sahrens 
898fa9e4066Sahrens 	if (snapname) {
899fa9e4066Sahrens 		err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
900fa9e4066Sahrens 		    snapname, 8, 1, &obj);
901fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
902fa9e4066Sahrens 		if (err) {
903fa9e4066Sahrens 			if (drop_lock)
904fa9e4066Sahrens 				rw_exit(&dp->dp_config_rwlock);
905fa9e4066Sahrens 			return (err);
906fa9e4066Sahrens 		}
907fa9e4066Sahrens 
908fa9e4066Sahrens 		ds = dsl_dataset_open_obj(dd->dd_pool, obj, NULL,
909fa9e4066Sahrens 		    DS_MODE_EXCLUSIVE, FTAG);
910fa9e4066Sahrens 	}
911fa9e4066Sahrens 	if (ds == NULL) {
912fa9e4066Sahrens 		if (drop_lock)
913fa9e4066Sahrens 			rw_exit(&dp->dp_config_rwlock);
914fa9e4066Sahrens 		return (EBUSY);
915fa9e4066Sahrens 	}
916fa9e4066Sahrens 
917fa9e4066Sahrens 	obj = ds->ds_object;
918fa9e4066Sahrens 
919fa9e4066Sahrens 	/* Can't delete a branch point. */
920fa9e4066Sahrens 	if (ds->ds_phys->ds_num_children > 1) {
921fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
922fa9e4066Sahrens 		if (drop_lock)
923fa9e4066Sahrens 			rw_exit(&dp->dp_config_rwlock);
924fa9e4066Sahrens 		return (EINVAL);
925fa9e4066Sahrens 	}
926fa9e4066Sahrens 
927fa9e4066Sahrens 	/*
928fa9e4066Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
929fa9e4066Sahrens 	 * (Except if the only snapshots are from the branch we cloned
930fa9e4066Sahrens 	 * from.)
931fa9e4066Sahrens 	 */
932fa9e4066Sahrens 	if (ds->ds_prev != NULL &&
933fa9e4066Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == obj) {
934fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
935fa9e4066Sahrens 		if (drop_lock)
936fa9e4066Sahrens 			rw_exit(&dp->dp_config_rwlock);
937fa9e4066Sahrens 		return (EINVAL);
938fa9e4066Sahrens 	}
939fa9e4066Sahrens 
940fa9e4066Sahrens 	/*
941fa9e4066Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
942fa9e4066Sahrens 	 * them.  Try again.
943fa9e4066Sahrens 	 */
944fa9e4066Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) {
945fa9e4066Sahrens 		mutex_exit(&ds->ds_lock);
946fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
94787e5029aSahrens 		if (drop_lock)
94887e5029aSahrens 			rw_exit(&dp->dp_config_rwlock);
949fa9e4066Sahrens 		return (EAGAIN);
950fa9e4066Sahrens 	}
951fa9e4066Sahrens 
952fa9e4066Sahrens 	/* THE POINT OF NO (unsuccessful) RETURN */
953fa9e4066Sahrens 
954fa9e4066Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
955fa9e4066Sahrens 		if (ds->ds_prev) {
956fa9e4066Sahrens 			ds_prev = ds->ds_prev;
957fa9e4066Sahrens 		} else {
958fa9e4066Sahrens 			ds_prev = dsl_dataset_open_obj(dd->dd_pool,
959fa9e4066Sahrens 			    ds->ds_phys->ds_prev_snap_obj, NULL,
960fa9e4066Sahrens 			    DS_MODE_NONE, FTAG);
961fa9e4066Sahrens 		}
962fa9e4066Sahrens 		after_branch_point =
963fa9e4066Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
964fa9e4066Sahrens 
965fa9e4066Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
966fa9e4066Sahrens 		if (after_branch_point &&
967fa9e4066Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
968fa9e4066Sahrens 			/* This clone is toast. */
969fa9e4066Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
970fa9e4066Sahrens 			ds_prev->ds_phys->ds_num_children--;
971fa9e4066Sahrens 		} else if (!after_branch_point) {
972fa9e4066Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
973fa9e4066Sahrens 			    ds->ds_phys->ds_next_snap_obj;
974fa9e4066Sahrens 		}
975fa9e4066Sahrens 	}
976fa9e4066Sahrens 
977fa9e4066Sahrens 	ASSERT3P(tx->tx_pool, ==, dd->dd_pool);
978fa9e4066Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
979fa9e4066Sahrens 
980fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
981fa9e4066Sahrens 		dsl_dataset_t *ds_next;
982fa9e4066Sahrens 		uint64_t itor = 0;
983fa9e4066Sahrens 
984fa9e4066Sahrens 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
985fa9e4066Sahrens 
986fa9e4066Sahrens 		ds_next = dsl_dataset_open_obj(dd->dd_pool,
987fa9e4066Sahrens 		    ds->ds_phys->ds_next_snap_obj, NULL, DS_MODE_NONE, FTAG);
988fa9e4066Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
989fa9e4066Sahrens 
990fa9e4066Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
991fa9e4066Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
992fa9e4066Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
993fa9e4066Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
994fa9e4066Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
995fa9e4066Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
996fa9e4066Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
997fa9e4066Sahrens 
998fa9e4066Sahrens 		/*
999fa9e4066Sahrens 		 * Transfer to our deadlist (which will become next's
1000fa9e4066Sahrens 		 * new deadlist) any entries from next's current
1001fa9e4066Sahrens 		 * deadlist which were born before prev, and free the
1002fa9e4066Sahrens 		 * other entries.
1003fa9e4066Sahrens 		 *
1004fa9e4066Sahrens 		 * XXX we're doing this long task with the config lock held
1005fa9e4066Sahrens 		 */
1006fa9e4066Sahrens 		while (bplist_iterate(&ds_next->ds_deadlist, &itor,
1007fa9e4066Sahrens 		    &bp) == 0) {
1008fa9e4066Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
1009fa9e4066Sahrens 				bplist_enqueue(&ds->ds_deadlist, &bp, tx);
1010fa9e4066Sahrens 				if (ds_prev && !after_branch_point &&
1011fa9e4066Sahrens 				    bp.blk_birth >
1012fa9e4066Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1013fa9e4066Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
1014fa9e4066Sahrens 					    BP_GET_ASIZE(&bp);
1015fa9e4066Sahrens 				}
1016fa9e4066Sahrens 			} else {
1017fa9e4066Sahrens 				used += BP_GET_ASIZE(&bp);
1018fa9e4066Sahrens 				compressed += BP_GET_PSIZE(&bp);
1019fa9e4066Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1020fa9e4066Sahrens 				/* XXX check return value? */
1021fa9e4066Sahrens 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1022fa9e4066Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1023fa9e4066Sahrens 			}
1024fa9e4066Sahrens 		}
1025fa9e4066Sahrens 
1026fa9e4066Sahrens 		/* free next's deadlist */
1027fa9e4066Sahrens 		bplist_close(&ds_next->ds_deadlist);
1028fa9e4066Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1029fa9e4066Sahrens 
1030fa9e4066Sahrens 		/* set next's deadlist to our deadlist */
1031fa9e4066Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1032fa9e4066Sahrens 		    ds->ds_phys->ds_deadlist_obj;
1033fa9e4066Sahrens 		bplist_open(&ds_next->ds_deadlist, mos,
1034fa9e4066Sahrens 		    ds_next->ds_phys->ds_deadlist_obj);
1035fa9e4066Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1036fa9e4066Sahrens 
1037fa9e4066Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1038fa9e4066Sahrens 			/*
1039fa9e4066Sahrens 			 * Update next's unique to include blocks which
1040fa9e4066Sahrens 			 * were previously shared by only this snapshot
1041fa9e4066Sahrens 			 * and it.  Those blocks will be born after the
1042fa9e4066Sahrens 			 * prev snap and before this snap, and will have
1043fa9e4066Sahrens 			 * died after the next snap and before the one
1044fa9e4066Sahrens 			 * after that (ie. be on the snap after next's
1045fa9e4066Sahrens 			 * deadlist).
1046fa9e4066Sahrens 			 *
1047fa9e4066Sahrens 			 * XXX we're doing this long task with the
1048fa9e4066Sahrens 			 * config lock held
1049fa9e4066Sahrens 			 */
1050fa9e4066Sahrens 			dsl_dataset_t *ds_after_next;
1051fa9e4066Sahrens 
1052fa9e4066Sahrens 			ds_after_next = dsl_dataset_open_obj(dd->dd_pool,
1053fa9e4066Sahrens 			    ds_next->ds_phys->ds_next_snap_obj, NULL,
1054fa9e4066Sahrens 			    DS_MODE_NONE, FTAG);
1055fa9e4066Sahrens 			itor = 0;
1056fa9e4066Sahrens 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1057fa9e4066Sahrens 			    &itor, &bp) == 0) {
1058fa9e4066Sahrens 				if (bp.blk_birth >
1059fa9e4066Sahrens 				    ds->ds_phys->ds_prev_snap_txg &&
1060fa9e4066Sahrens 				    bp.blk_birth <=
1061fa9e4066Sahrens 				    ds->ds_phys->ds_creation_txg) {
1062fa9e4066Sahrens 					ds_next->ds_phys->ds_unique_bytes +=
1063fa9e4066Sahrens 					    BP_GET_ASIZE(&bp);
1064fa9e4066Sahrens 				}
1065fa9e4066Sahrens 			}
1066fa9e4066Sahrens 
1067fa9e4066Sahrens 			dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
1068fa9e4066Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1069fa9e4066Sahrens 		} else {
1070fa9e4066Sahrens 			/*
1071fa9e4066Sahrens 			 * It would be nice to update the head dataset's
1072fa9e4066Sahrens 			 * unique.  To do so we would have to traverse
1073fa9e4066Sahrens 			 * it for blocks born after ds_prev, which is
1074fa9e4066Sahrens 			 * pretty expensive just to maintain something
1075fa9e4066Sahrens 			 * for debugging purposes.
1076fa9e4066Sahrens 			 */
1077fa9e4066Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
1078fa9e4066Sahrens 			dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
1079fa9e4066Sahrens 			    ds_next);
1080fa9e4066Sahrens 			if (ds_prev) {
1081fa9e4066Sahrens 				ds_next->ds_prev = dsl_dataset_open_obj(
1082fa9e4066Sahrens 				    dd->dd_pool, ds->ds_phys->ds_prev_snap_obj,
1083fa9e4066Sahrens 				    NULL, DS_MODE_NONE, ds_next);
1084fa9e4066Sahrens 			} else {
1085fa9e4066Sahrens 				ds_next->ds_prev = NULL;
1086fa9e4066Sahrens 			}
1087fa9e4066Sahrens 		}
1088fa9e4066Sahrens 		dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
1089fa9e4066Sahrens 
1090fa9e4066Sahrens 		/*
1091fa9e4066Sahrens 		 * NB: unique_bytes is not accurate for head objsets
1092fa9e4066Sahrens 		 * because we don't update it when we delete the most
1093fa9e4066Sahrens 		 * recent snapshot -- see above comment.
1094fa9e4066Sahrens 		 */
1095fa9e4066Sahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1096fa9e4066Sahrens 	} else {
1097fa9e4066Sahrens 		/*
1098fa9e4066Sahrens 		 * There's no next snapshot, so this is a head dataset.
1099fa9e4066Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1100fa9e4066Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1101fa9e4066Sahrens 		 * safe to ignore the deadlist contents.)
1102fa9e4066Sahrens 		 */
1103fa9e4066Sahrens 		struct killarg ka;
1104fa9e4066Sahrens 
1105fa9e4066Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1106fa9e4066Sahrens 		bplist_close(&ds->ds_deadlist);
1107fa9e4066Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1108fa9e4066Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1109fa9e4066Sahrens 
1110fa9e4066Sahrens 		/*
1111fa9e4066Sahrens 		 * Free everything that we point to (that's born after
1112fa9e4066Sahrens 		 * the previous snapshot, if we are a clone)
1113fa9e4066Sahrens 		 *
1114fa9e4066Sahrens 		 * XXX we're doing this long task with the config lock held
1115fa9e4066Sahrens 		 */
1116fa9e4066Sahrens 		ka.usedp = &used;
1117fa9e4066Sahrens 		ka.compressedp = &compressed;
1118fa9e4066Sahrens 		ka.uncompressedp = &uncompressed;
1119fa9e4066Sahrens 		ka.zio = zio;
1120fa9e4066Sahrens 		ka.tx = tx;
1121fa9e4066Sahrens 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1122fa9e4066Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1123fa9e4066Sahrens 		ASSERT3U(err, ==, 0);
1124fa9e4066Sahrens 	}
1125fa9e4066Sahrens 
1126fa9e4066Sahrens 	err = zio_wait(zio);
1127fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1128fa9e4066Sahrens 
1129fa9e4066Sahrens 	dsl_dir_diduse_space(dd, -used, -compressed, -uncompressed, tx);
1130fa9e4066Sahrens 
1131fa9e4066Sahrens 	if (ds->ds_phys->ds_snapnames_zapobj) {
1132fa9e4066Sahrens 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1133fa9e4066Sahrens 		ASSERT(err == 0);
1134fa9e4066Sahrens 	}
1135fa9e4066Sahrens 
1136fa9e4066Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1137fa9e4066Sahrens 		/* Erase the link in the dataset */
1138fa9e4066Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
1139fa9e4066Sahrens 		dd->dd_phys->dd_head_dataset_obj = 0;
1140fa9e4066Sahrens 		/*
1141fa9e4066Sahrens 		 * dsl_dir_sync_destroy() called us, they'll destroy
1142fa9e4066Sahrens 		 * the dataset.
1143fa9e4066Sahrens 		 */
1144fa9e4066Sahrens 	} else {
1145fa9e4066Sahrens 		/* remove from snapshot namespace */
1146fa9e4066Sahrens 		dsl_dataset_t *ds_head;
1147fa9e4066Sahrens 		ds_head = dsl_dataset_open_obj(dd->dd_pool,
1148fa9e4066Sahrens 		    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG);
1149fa9e4066Sahrens #ifdef ZFS_DEBUG
1150fa9e4066Sahrens 		{
1151fa9e4066Sahrens 			uint64_t val;
1152fa9e4066Sahrens 			err = zap_lookup(mos,
1153fa9e4066Sahrens 			    ds_head->ds_phys->ds_snapnames_zapobj,
1154fa9e4066Sahrens 			    snapname, 8, 1, &val);
1155fa9e4066Sahrens 			ASSERT3U(err, ==, 0);
1156fa9e4066Sahrens 			ASSERT3U(val, ==, obj);
1157fa9e4066Sahrens 		}
1158fa9e4066Sahrens #endif
1159fa9e4066Sahrens 		err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj,
1160fa9e4066Sahrens 		    snapname, tx);
1161fa9e4066Sahrens 		ASSERT(err == 0);
1162fa9e4066Sahrens 		dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
1163fa9e4066Sahrens 	}
1164fa9e4066Sahrens 
1165fa9e4066Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
1166fa9e4066Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1167fa9e4066Sahrens 
1168fa9e4066Sahrens 	err = dmu_object_free(mos, obj, tx);
1169fa9e4066Sahrens 	ASSERT(err == 0);
1170fa9e4066Sahrens 
1171fa9e4066Sahrens 	/*
1172fa9e4066Sahrens 	 * Close the objset with mode NONE, thus leaving it with
1173fa9e4066Sahrens 	 * DOS_REF_MAX set, so that noone can access it.
1174fa9e4066Sahrens 	 */
1175fa9e4066Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1176fa9e4066Sahrens 
1177fa9e4066Sahrens 	if (drop_lock)
1178fa9e4066Sahrens 		rw_exit(&dp->dp_config_rwlock);
1179fa9e4066Sahrens 	return (0);
1180fa9e4066Sahrens }
1181fa9e4066Sahrens 
1182fa9e4066Sahrens int
1183fa9e4066Sahrens dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
1184fa9e4066Sahrens {
1185fa9e4066Sahrens 	const char *snapname = arg;
1186fa9e4066Sahrens 	dsl_pool_t *dp = dd->dd_pool;
1187fa9e4066Sahrens 	dmu_buf_t *dbuf;
1188fa9e4066Sahrens 	dsl_dataset_phys_t *dsphys;
1189fa9e4066Sahrens 	uint64_t dsobj, value;
1190fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
1191fa9e4066Sahrens 	dsl_dataset_t *ds;
1192fa9e4066Sahrens 	int err;
1193fa9e4066Sahrens 
1194fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1195fa9e4066Sahrens 
1196fa9e4066Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
1197fa9e4066Sahrens 		return (EINVAL);
1198fa9e4066Sahrens 	ds = dsl_dataset_open_obj(dp, dd->dd_phys->dd_head_dataset_obj, NULL,
1199fa9e4066Sahrens 	    DS_MODE_NONE, FTAG);
1200fa9e4066Sahrens 
1201fa9e4066Sahrens 	err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
1202fa9e4066Sahrens 	    snapname, 8, 1, &value);
1203fa9e4066Sahrens 	if (err == 0) {
1204fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1205fa9e4066Sahrens 		return (EEXIST);
1206fa9e4066Sahrens 	}
1207fa9e4066Sahrens 	ASSERT(err == ENOENT);
1208fa9e4066Sahrens 
1209fa9e4066Sahrens 	/* The point of no (unsuccessful) return */
1210fa9e4066Sahrens 
1211fa9e4066Sahrens 	dprintf_dd(dd, "taking snapshot %s in txg %llu\n",
1212fa9e4066Sahrens 	    snapname, tx->tx_txg);
1213fa9e4066Sahrens 
1214fa9e4066Sahrens 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1215fa9e4066Sahrens 
1216fa9e4066Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
1217fa9e4066Sahrens 
1218*1649cd4bStabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
1219*1649cd4bStabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
1220fa9e4066Sahrens 	dbuf = dmu_bonus_hold(mos, dsobj);
1221fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1222fa9e4066Sahrens 	dsphys = dbuf->db_data;
1223fa9e4066Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
1224fa9e4066Sahrens 	dsphys->ds_fsid_guid = unique_create();
1225fa9e4066Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
1226fa9e4066Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1227fa9e4066Sahrens 	    sizeof (dsphys->ds_guid));
1228fa9e4066Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1229fa9e4066Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1230fa9e4066Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1231fa9e4066Sahrens 	dsphys->ds_num_children = 1;
1232fa9e4066Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1233fa9e4066Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
1234fa9e4066Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1235fa9e4066Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1236fa9e4066Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1237fa9e4066Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
1238fa9e4066Sahrens 	dsphys->ds_restoring = ds->ds_phys->ds_restoring;
1239fa9e4066Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
1240fa9e4066Sahrens 	dmu_buf_rele(dbuf);
1241fa9e4066Sahrens 
1242fa9e4066Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1243fa9e4066Sahrens 		dsl_dataset_t *ds_prev;
1244fa9e4066Sahrens 
1245fa9e4066Sahrens 		ds_prev = dsl_dataset_open_obj(dp,
1246fa9e4066Sahrens 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_NONE, FTAG);
1247fa9e4066Sahrens 		ASSERT(ds_prev->ds_phys->ds_next_snap_obj ==
1248fa9e4066Sahrens 		    ds->ds_object ||
1249fa9e4066Sahrens 		    ds_prev->ds_phys->ds_num_children > 1);
1250fa9e4066Sahrens 		if (ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
1251fa9e4066Sahrens 			dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1252fa9e4066Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1253fa9e4066Sahrens 			    ds_prev->ds_phys->ds_creation_txg);
1254fa9e4066Sahrens 			ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1255fa9e4066Sahrens 		}
1256fa9e4066Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1257fa9e4066Sahrens 	} else {
1258fa9e4066Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 0);
1259fa9e4066Sahrens 	}
1260fa9e4066Sahrens 
1261fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
1262fa9e4066Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1263fa9e4066Sahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
1264fa9e4066Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1265fa9e4066Sahrens 	ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
1266fa9e4066Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1267fa9e4066Sahrens 	ds->ds_phys->ds_deadlist_obj =
1268fa9e4066Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1269fa9e4066Sahrens 	bplist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
1270fa9e4066Sahrens 
1271fa9e4066Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1272fa9e4066Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1273fa9e4066Sahrens 	    snapname, 8, 1, &dsobj, tx);
1274fa9e4066Sahrens 	ASSERT(err == 0);
1275fa9e4066Sahrens 
1276fa9e4066Sahrens 	if (ds->ds_prev)
1277fa9e4066Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
1278fa9e4066Sahrens 	ds->ds_prev = dsl_dataset_open_obj(dp,
1279fa9e4066Sahrens 	    ds->ds_phys->ds_prev_snap_obj, snapname, DS_MODE_NONE, ds);
1280fa9e4066Sahrens 
1281fa9e4066Sahrens 	rw_exit(&dp->dp_config_rwlock);
1282fa9e4066Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1283fa9e4066Sahrens 
1284fa9e4066Sahrens 	return (0);
1285fa9e4066Sahrens }
1286fa9e4066Sahrens 
1287fa9e4066Sahrens void
1288fa9e4066Sahrens dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx)
1289fa9e4066Sahrens {
1290fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1291fa9e4066Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1292fa9e4066Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1293fa9e4066Sahrens 
1294fa9e4066Sahrens 	dmu_objset_sync(ds->ds_user_ptr, tx);
1295fa9e4066Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
1296fa9e4066Sahrens 	bplist_close(&ds->ds_deadlist);
1297fa9e4066Sahrens 
1298fa9e4066Sahrens 	dmu_buf_remove_ref(ds->ds_dbuf, ds);
1299fa9e4066Sahrens }
1300fa9e4066Sahrens 
1301fa9e4066Sahrens void
1302fa9e4066Sahrens dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds)
1303fa9e4066Sahrens {
1304fa9e4066Sahrens 	/* fill in properties crap */
1305fa9e4066Sahrens 	dsl_dir_stats(ds->ds_dir, dds);
1306fa9e4066Sahrens 
1307fa9e4066Sahrens 	if (ds->ds_phys->ds_num_children != 0) {
1308fa9e4066Sahrens 		dds->dds_is_snapshot = TRUE;
1309fa9e4066Sahrens 		dds->dds_num_clones = ds->ds_phys->ds_num_children - 1;
1310fa9e4066Sahrens 	}
1311fa9e4066Sahrens 
1312fa9e4066Sahrens 	dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth;
1313fa9e4066Sahrens 
1314fa9e4066Sahrens 	dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill;
1315fa9e4066Sahrens 	dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used;
1316fa9e4066Sahrens 
1317fa9e4066Sahrens 	/* We override the dataset's creation time... they should be the same */
1318fa9e4066Sahrens 	dds->dds_creation_time = ds->ds_phys->ds_creation_time;
1319fa9e4066Sahrens 	dds->dds_creation_txg = ds->ds_phys->ds_creation_txg;
1320fa9e4066Sahrens 	dds->dds_space_refd = ds->ds_phys->ds_used_bytes;
1321fa9e4066Sahrens 	dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid;
1322fa9e4066Sahrens 	dds->dds_guid = ds->ds_phys->ds_guid;
1323fa9e4066Sahrens 
1324fa9e4066Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1325fa9e4066Sahrens 		/*
1326fa9e4066Sahrens 		 * This is a snapshot; override the dd's space used with
1327fa9e4066Sahrens 		 * our unique space
1328fa9e4066Sahrens 		 */
1329fa9e4066Sahrens 		dds->dds_space_used = ds->ds_phys->ds_unique_bytes;
1330fa9e4066Sahrens 		dds->dds_compressed_bytes =
1331fa9e4066Sahrens 		    ds->ds_phys->ds_compressed_bytes;
1332fa9e4066Sahrens 		dds->dds_uncompressed_bytes =
1333fa9e4066Sahrens 		    ds->ds_phys->ds_uncompressed_bytes;
1334fa9e4066Sahrens 	}
1335fa9e4066Sahrens 
1336fa9e4066Sahrens 	dds->dds_objset_obj = ds->ds_object;
1337fa9e4066Sahrens }
1338fa9e4066Sahrens 
1339fa9e4066Sahrens dsl_pool_t *
1340fa9e4066Sahrens dsl_dataset_pool(dsl_dataset_t *ds)
1341fa9e4066Sahrens {
1342fa9e4066Sahrens 	return (ds->ds_dir->dd_pool);
1343fa9e4066Sahrens }
1344fa9e4066Sahrens 
1345fa9e4066Sahrens struct osrenamearg {
1346fa9e4066Sahrens 	const char *oldname;
1347fa9e4066Sahrens 	const char *newname;
1348fa9e4066Sahrens };
1349fa9e4066Sahrens 
1350fa9e4066Sahrens static int
1351fa9e4066Sahrens dsl_dataset_snapshot_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
1352fa9e4066Sahrens {
1353fa9e4066Sahrens 	struct osrenamearg *ora = arg;
1354fa9e4066Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
1355fa9e4066Sahrens 	dsl_dir_t *nds;
1356fa9e4066Sahrens 	const char *tail;
1357fa9e4066Sahrens 	int err;
1358fa9e4066Sahrens 	dsl_dataset_t *snds, *fsds;
1359fa9e4066Sahrens 	uint64_t val;
1360fa9e4066Sahrens 
1361fa9e4066Sahrens 	err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, ora->oldname,
1362fa9e4066Sahrens 	    DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &snds);
1363fa9e4066Sahrens 	if (err)
1364fa9e4066Sahrens 		return (err);
1365fa9e4066Sahrens 
1366fa9e4066Sahrens 	if (snds->ds_dir != dd) {
1367fa9e4066Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1368fa9e4066Sahrens 		return (EINVAL);
1369fa9e4066Sahrens 	}
1370fa9e4066Sahrens 
1371fa9e4066Sahrens 	/* better be changing a snapshot */
1372fa9e4066Sahrens 	if (snds->ds_phys->ds_next_snap_obj == 0) {
1373fa9e4066Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1374fa9e4066Sahrens 		return (EINVAL);
1375fa9e4066Sahrens 	}
1376fa9e4066Sahrens 
1377fa9e4066Sahrens 	/* new fs better exist */
1378fa9e4066Sahrens 	nds = dsl_dir_open_spa(dd->dd_pool->dp_spa, ora->newname, FTAG, &tail);
1379fa9e4066Sahrens 	if (nds == NULL) {
1380fa9e4066Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1381fa9e4066Sahrens 		return (ENOENT);
1382fa9e4066Sahrens 	}
1383fa9e4066Sahrens 
1384fa9e4066Sahrens 	dsl_dir_close(nds, FTAG);
1385fa9e4066Sahrens 
1386fa9e4066Sahrens 	/* new name better be in same fs */
1387fa9e4066Sahrens 	if (nds != dd) {
1388fa9e4066Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1389fa9e4066Sahrens 		return (EINVAL);
1390fa9e4066Sahrens 	}
1391fa9e4066Sahrens 
1392fa9e4066Sahrens 	/* new name better be a snapshot */
1393fa9e4066Sahrens 	if (tail == NULL || tail[0] != '@') {
1394fa9e4066Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1395fa9e4066Sahrens 		return (EINVAL);
1396fa9e4066Sahrens 	}
1397fa9e4066Sahrens 
1398fa9e4066Sahrens 	tail++;
1399fa9e4066Sahrens 
1400fa9e4066Sahrens 	fsds = dsl_dataset_open_obj(dd->dd_pool,
1401fa9e4066Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG);
1402fa9e4066Sahrens 
1403fa9e4066Sahrens 	/* new name better not be in use */
1404fa9e4066Sahrens 	err = zap_lookup(mos, fsds->ds_phys->ds_snapnames_zapobj,
1405fa9e4066Sahrens 	    tail, 8, 1, &val);
1406fa9e4066Sahrens 	if (err != ENOENT) {
1407fa9e4066Sahrens 		if (err == 0)
1408fa9e4066Sahrens 			err = EEXIST;
1409fa9e4066Sahrens 		dsl_dataset_close(fsds, DS_MODE_NONE, FTAG);
1410fa9e4066Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1411fa9e4066Sahrens 		return (EEXIST);
1412fa9e4066Sahrens 	}
1413fa9e4066Sahrens 
1414fa9e4066Sahrens 	/* The point of no (unsuccessful) return */
1415fa9e4066Sahrens 
1416fa9e4066Sahrens 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER);
1417fa9e4066Sahrens 	dsl_dataset_get_snapname(snds);
1418fa9e4066Sahrens 	err = zap_remove(mos, fsds->ds_phys->ds_snapnames_zapobj,
1419fa9e4066Sahrens 	    snds->ds_snapname, tx);
1420fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1421fa9e4066Sahrens 	mutex_enter(&snds->ds_lock);
1422fa9e4066Sahrens 	(void) strcpy(snds->ds_snapname, tail);
1423fa9e4066Sahrens 	mutex_exit(&snds->ds_lock);
1424fa9e4066Sahrens 	err = zap_add(mos, fsds->ds_phys->ds_snapnames_zapobj,
1425fa9e4066Sahrens 	    snds->ds_snapname, 8, 1, &snds->ds_object, tx);
1426fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1427fa9e4066Sahrens 	rw_exit(&dd->dd_pool->dp_config_rwlock);
1428fa9e4066Sahrens 
1429fa9e4066Sahrens 	dsl_dataset_close(fsds, DS_MODE_NONE, FTAG);
1430fa9e4066Sahrens 	dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1431fa9e4066Sahrens 	return (0);
1432fa9e4066Sahrens }
1433fa9e4066Sahrens 
1434fa9e4066Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
1435fa9e4066Sahrens int
1436fa9e4066Sahrens dsl_dataset_rename(const char *osname, const char *newname)
1437fa9e4066Sahrens {
1438fa9e4066Sahrens 	dsl_dir_t *dd;
1439fa9e4066Sahrens 	const char *tail;
1440fa9e4066Sahrens 	struct osrenamearg ora;
1441fa9e4066Sahrens 	int err;
1442fa9e4066Sahrens 
1443fa9e4066Sahrens 	dd = dsl_dir_open(osname, FTAG, &tail);
1444fa9e4066Sahrens 	if (dd == NULL)
1445fa9e4066Sahrens 		return (ENOENT);
1446fa9e4066Sahrens 	if (tail == NULL) {
1447fa9e4066Sahrens 		err = dsl_dir_sync_task(dd,
1448fa9e4066Sahrens 		    dsl_dir_rename_sync, (void*)newname, 1<<12);
1449fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
1450fa9e4066Sahrens 		return (err);
1451fa9e4066Sahrens 	}
1452fa9e4066Sahrens 	if (tail[0] != '@') {
1453fa9e4066Sahrens 		/* the name ended in a nonexistant component */
1454fa9e4066Sahrens 		dsl_dir_close(dd, FTAG);
1455fa9e4066Sahrens 		return (ENOENT);
1456fa9e4066Sahrens 	}
1457fa9e4066Sahrens 
1458fa9e4066Sahrens 	ora.oldname = osname;
1459fa9e4066Sahrens 	ora.newname = newname;
1460fa9e4066Sahrens 
1461fa9e4066Sahrens 	err = dsl_dir_sync_task(dd,
1462fa9e4066Sahrens 	    dsl_dataset_snapshot_rename_sync, &ora, 1<<12);
1463fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
1464fa9e4066Sahrens 	return (err);
1465fa9e4066Sahrens }
1466