xref: /illumos-gate/usr/src/uts/common/fs/zfs/dsl_dir.c (revision e7437265dc2a4920c197ed4337665539d358b22c)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
22b7661cccSmmusante  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27fa9e4066Sahrens 
28fa9e4066Sahrens #include <sys/dmu.h>
29fa9e4066Sahrens #include <sys/dmu_tx.h>
30fa9e4066Sahrens #include <sys/dsl_dataset.h>
31fa9e4066Sahrens #include <sys/dsl_dir.h>
32fa9e4066Sahrens #include <sys/dsl_prop.h>
331d452cf5Sahrens #include <sys/dsl_synctask.h>
34ecd6cf80Smarks #include <sys/dsl_deleg.h>
35fa9e4066Sahrens #include <sys/spa.h>
36fa9e4066Sahrens #include <sys/zap.h>
37fa9e4066Sahrens #include <sys/zio.h>
38fa9e4066Sahrens #include <sys/arc.h>
39ecd6cf80Smarks #include <sys/sunddi.h>
40fa9e4066Sahrens #include "zfs_namecheck.h"
41fa9e4066Sahrens 
42fa9e4066Sahrens static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd);
43ecd6cf80Smarks static void dsl_dir_set_reservation_sync(void *arg1, void *arg2,
44ecd6cf80Smarks     cred_t *cr, dmu_tx_t *tx);
45fa9e4066Sahrens 
46fa9e4066Sahrens 
47fa9e4066Sahrens /* ARGSUSED */
48fa9e4066Sahrens static void
49fa9e4066Sahrens dsl_dir_evict(dmu_buf_t *db, void *arg)
50fa9e4066Sahrens {
51fa9e4066Sahrens 	dsl_dir_t *dd = arg;
52fa9e4066Sahrens 	dsl_pool_t *dp = dd->dd_pool;
53fa9e4066Sahrens 	int t;
54fa9e4066Sahrens 
55fa9e4066Sahrens 	for (t = 0; t < TXG_SIZE; t++) {
56fa9e4066Sahrens 		ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
57fa9e4066Sahrens 		ASSERT(dd->dd_tempreserved[t] == 0);
58fa9e4066Sahrens 		ASSERT(dd->dd_space_towrite[t] == 0);
59fa9e4066Sahrens 	}
60fa9e4066Sahrens 
61fa9e4066Sahrens 	ASSERT3U(dd->dd_used_bytes, ==, dd->dd_phys->dd_used_bytes);
62fa9e4066Sahrens 
63fa9e4066Sahrens 	if (dd->dd_parent)
64fa9e4066Sahrens 		dsl_dir_close(dd->dd_parent, dd);
65fa9e4066Sahrens 
66fa9e4066Sahrens 	spa_close(dd->dd_pool->dp_spa, dd);
67fa9e4066Sahrens 
68fa9e4066Sahrens 	/*
69fa9e4066Sahrens 	 * The props callback list should be empty since they hold the
70fa9e4066Sahrens 	 * dir open.
71fa9e4066Sahrens 	 */
72fa9e4066Sahrens 	list_destroy(&dd->dd_prop_cbs);
735ad82045Snd 	mutex_destroy(&dd->dd_lock);
74fa9e4066Sahrens 	kmem_free(dd, sizeof (dsl_dir_t));
75fa9e4066Sahrens }
76fa9e4066Sahrens 
77ea8dc4b6Seschrock int
78fa9e4066Sahrens dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
79ea8dc4b6Seschrock     const char *tail, void *tag, dsl_dir_t **ddp)
80fa9e4066Sahrens {
81fa9e4066Sahrens 	dmu_buf_t *dbuf;
82fa9e4066Sahrens 	dsl_dir_t *dd;
83ea8dc4b6Seschrock 	int err;
84fa9e4066Sahrens 
85fa9e4066Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
86fa9e4066Sahrens 	    dsl_pool_sync_context(dp));
87fa9e4066Sahrens 
88ea8dc4b6Seschrock 	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
89ea8dc4b6Seschrock 	if (err)
90ea8dc4b6Seschrock 		return (err);
91fa9e4066Sahrens 	dd = dmu_buf_get_user(dbuf);
92fa9e4066Sahrens #ifdef ZFS_DEBUG
93fa9e4066Sahrens 	{
94fa9e4066Sahrens 		dmu_object_info_t doi;
95fa9e4066Sahrens 		dmu_object_info_from_db(dbuf, &doi);
961649cd4bStabriz 		ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
97fa9e4066Sahrens 	}
98fa9e4066Sahrens #endif
99fa9e4066Sahrens 	/* XXX assert bonus buffer size is correct */
100fa9e4066Sahrens 	if (dd == NULL) {
101fa9e4066Sahrens 		dsl_dir_t *winner;
102fa9e4066Sahrens 		int err;
103fa9e4066Sahrens 
104fa9e4066Sahrens 		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
105fa9e4066Sahrens 		dd->dd_object = ddobj;
106fa9e4066Sahrens 		dd->dd_dbuf = dbuf;
107fa9e4066Sahrens 		dd->dd_pool = dp;
108fa9e4066Sahrens 		dd->dd_phys = dbuf->db_data;
109fa9e4066Sahrens 		dd->dd_used_bytes = dd->dd_phys->dd_used_bytes;
1105ad82045Snd 		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
111fa9e4066Sahrens 
112fa9e4066Sahrens 		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
113fa9e4066Sahrens 		    offsetof(dsl_prop_cb_record_t, cbr_node));
114fa9e4066Sahrens 
115fa9e4066Sahrens 		if (dd->dd_phys->dd_parent_obj) {
116ea8dc4b6Seschrock 			err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
117ea8dc4b6Seschrock 			    NULL, dd, &dd->dd_parent);
118ea8dc4b6Seschrock 			if (err) {
1195ad82045Snd 				mutex_destroy(&dd->dd_lock);
120ea8dc4b6Seschrock 				kmem_free(dd, sizeof (dsl_dir_t));
121ea8dc4b6Seschrock 				dmu_buf_rele(dbuf, tag);
122ea8dc4b6Seschrock 				return (err);
123ea8dc4b6Seschrock 			}
124fa9e4066Sahrens 			if (tail) {
125fa9e4066Sahrens #ifdef ZFS_DEBUG
126fa9e4066Sahrens 				uint64_t foundobj;
127fa9e4066Sahrens 
128fa9e4066Sahrens 				err = zap_lookup(dp->dp_meta_objset,
129*e7437265Sahrens 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
130fa9e4066Sahrens 				    tail, sizeof (foundobj), 1, &foundobj);
131ea8dc4b6Seschrock 				ASSERT(err || foundobj == ddobj);
132fa9e4066Sahrens #endif
133fa9e4066Sahrens 				(void) strcpy(dd->dd_myname, tail);
134fa9e4066Sahrens 			} else {
135fa9e4066Sahrens 				err = zap_value_search(dp->dp_meta_objset,
136*e7437265Sahrens 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
137*e7437265Sahrens 				    ddobj, 0, dd->dd_myname);
138ea8dc4b6Seschrock 			}
139ea8dc4b6Seschrock 			if (err) {
140ea8dc4b6Seschrock 				dsl_dir_close(dd->dd_parent, dd);
1415ad82045Snd 				mutex_destroy(&dd->dd_lock);
142ea8dc4b6Seschrock 				kmem_free(dd, sizeof (dsl_dir_t));
143ea8dc4b6Seschrock 				dmu_buf_rele(dbuf, tag);
144ea8dc4b6Seschrock 				return (err);
145fa9e4066Sahrens 			}
146fa9e4066Sahrens 		} else {
147fa9e4066Sahrens 			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
148fa9e4066Sahrens 		}
149fa9e4066Sahrens 
150fa9e4066Sahrens 		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
151fa9e4066Sahrens 		    dsl_dir_evict);
152fa9e4066Sahrens 		if (winner) {
153fa9e4066Sahrens 			if (dd->dd_parent)
154fa9e4066Sahrens 				dsl_dir_close(dd->dd_parent, dd);
1555ad82045Snd 			mutex_destroy(&dd->dd_lock);
156fa9e4066Sahrens 			kmem_free(dd, sizeof (dsl_dir_t));
157fa9e4066Sahrens 			dd = winner;
158fa9e4066Sahrens 		} else {
159fa9e4066Sahrens 			spa_open_ref(dp->dp_spa, dd);
160fa9e4066Sahrens 		}
161fa9e4066Sahrens 	}
162fa9e4066Sahrens 
163fa9e4066Sahrens 	/*
164fa9e4066Sahrens 	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
165fa9e4066Sahrens 	 * holds on the spa.  We need the open-to-close holds because
166fa9e4066Sahrens 	 * otherwise the spa_refcnt wouldn't change when we open a
167fa9e4066Sahrens 	 * dir which the spa also has open, so we could incorrectly
168fa9e4066Sahrens 	 * think it was OK to unload/export/destroy the pool.  We need
169fa9e4066Sahrens 	 * the instantiate-to-evict hold because the dsl_dir_t has a
170fa9e4066Sahrens 	 * pointer to the dd_pool, which has a pointer to the spa_t.
171fa9e4066Sahrens 	 */
172fa9e4066Sahrens 	spa_open_ref(dp->dp_spa, tag);
173fa9e4066Sahrens 	ASSERT3P(dd->dd_pool, ==, dp);
174fa9e4066Sahrens 	ASSERT3U(dd->dd_object, ==, ddobj);
175fa9e4066Sahrens 	ASSERT3P(dd->dd_dbuf, ==, dbuf);
176ea8dc4b6Seschrock 	*ddp = dd;
177ea8dc4b6Seschrock 	return (0);
178fa9e4066Sahrens }
179fa9e4066Sahrens 
180fa9e4066Sahrens void
181fa9e4066Sahrens dsl_dir_close(dsl_dir_t *dd, void *tag)
182fa9e4066Sahrens {
183fa9e4066Sahrens 	dprintf_dd(dd, "%s\n", "");
184fa9e4066Sahrens 	spa_close(dd->dd_pool->dp_spa, tag);
185ea8dc4b6Seschrock 	dmu_buf_rele(dd->dd_dbuf, tag);
186fa9e4066Sahrens }
187fa9e4066Sahrens 
18815f66a7fSek /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
189fa9e4066Sahrens void
190fa9e4066Sahrens dsl_dir_name(dsl_dir_t *dd, char *buf)
191fa9e4066Sahrens {
192fa9e4066Sahrens 	if (dd->dd_parent) {
193fa9e4066Sahrens 		dsl_dir_name(dd->dd_parent, buf);
194fa9e4066Sahrens 		(void) strcat(buf, "/");
195fa9e4066Sahrens 	} else {
196fa9e4066Sahrens 		buf[0] = '\0';
197fa9e4066Sahrens 	}
198fa9e4066Sahrens 	if (!MUTEX_HELD(&dd->dd_lock)) {
199fa9e4066Sahrens 		/*
200fa9e4066Sahrens 		 * recursive mutex so that we can use
201fa9e4066Sahrens 		 * dprintf_dd() with dd_lock held
202fa9e4066Sahrens 		 */
203fa9e4066Sahrens 		mutex_enter(&dd->dd_lock);
204fa9e4066Sahrens 		(void) strcat(buf, dd->dd_myname);
205fa9e4066Sahrens 		mutex_exit(&dd->dd_lock);
206fa9e4066Sahrens 	} else {
207fa9e4066Sahrens 		(void) strcat(buf, dd->dd_myname);
208fa9e4066Sahrens 	}
209fa9e4066Sahrens }
210fa9e4066Sahrens 
211b7661cccSmmusante /* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */
212b7661cccSmmusante int
213b7661cccSmmusante dsl_dir_namelen(dsl_dir_t *dd)
214b7661cccSmmusante {
215b7661cccSmmusante 	int result = 0;
216b7661cccSmmusante 
217b7661cccSmmusante 	if (dd->dd_parent) {
218b7661cccSmmusante 		/* parent's name + 1 for the "/" */
219b7661cccSmmusante 		result = dsl_dir_namelen(dd->dd_parent) + 1;
220b7661cccSmmusante 	}
221b7661cccSmmusante 
222b7661cccSmmusante 	if (!MUTEX_HELD(&dd->dd_lock)) {
223b7661cccSmmusante 		/* see dsl_dir_name */
224b7661cccSmmusante 		mutex_enter(&dd->dd_lock);
225b7661cccSmmusante 		result += strlen(dd->dd_myname);
226b7661cccSmmusante 		mutex_exit(&dd->dd_lock);
227b7661cccSmmusante 	} else {
228b7661cccSmmusante 		result += strlen(dd->dd_myname);
229b7661cccSmmusante 	}
230b7661cccSmmusante 
231b7661cccSmmusante 	return (result);
232b7661cccSmmusante }
233b7661cccSmmusante 
234fa9e4066Sahrens int
235fa9e4066Sahrens dsl_dir_is_private(dsl_dir_t *dd)
236fa9e4066Sahrens {
237fa9e4066Sahrens 	int rv = FALSE;
238fa9e4066Sahrens 
239fa9e4066Sahrens 	if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent))
240fa9e4066Sahrens 		rv = TRUE;
241fa9e4066Sahrens 	if (dataset_name_hidden(dd->dd_myname))
242fa9e4066Sahrens 		rv = TRUE;
243fa9e4066Sahrens 	return (rv);
244fa9e4066Sahrens }
245fa9e4066Sahrens 
246fa9e4066Sahrens 
247fa9e4066Sahrens static int
248fa9e4066Sahrens getcomponent(const char *path, char *component, const char **nextp)
249fa9e4066Sahrens {
250fa9e4066Sahrens 	char *p;
251fa9e4066Sahrens 	if (path == NULL)
252203a47d8Snd 		return (ENOENT);
253fa9e4066Sahrens 	/* This would be a good place to reserve some namespace... */
254fa9e4066Sahrens 	p = strpbrk(path, "/@");
255fa9e4066Sahrens 	if (p && (p[1] == '/' || p[1] == '@')) {
256fa9e4066Sahrens 		/* two separators in a row */
257fa9e4066Sahrens 		return (EINVAL);
258fa9e4066Sahrens 	}
259fa9e4066Sahrens 	if (p == NULL || p == path) {
260fa9e4066Sahrens 		/*
261fa9e4066Sahrens 		 * if the first thing is an @ or /, it had better be an
262fa9e4066Sahrens 		 * @ and it had better not have any more ats or slashes,
263fa9e4066Sahrens 		 * and it had better have something after the @.
264fa9e4066Sahrens 		 */
265fa9e4066Sahrens 		if (p != NULL &&
266fa9e4066Sahrens 		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
267fa9e4066Sahrens 			return (EINVAL);
268fa9e4066Sahrens 		if (strlen(path) >= MAXNAMELEN)
269fa9e4066Sahrens 			return (ENAMETOOLONG);
270fa9e4066Sahrens 		(void) strcpy(component, path);
271fa9e4066Sahrens 		p = NULL;
272fa9e4066Sahrens 	} else if (p[0] == '/') {
273fa9e4066Sahrens 		if (p-path >= MAXNAMELEN)
274fa9e4066Sahrens 			return (ENAMETOOLONG);
275fa9e4066Sahrens 		(void) strncpy(component, path, p - path);
276fa9e4066Sahrens 		component[p-path] = '\0';
277fa9e4066Sahrens 		p++;
278fa9e4066Sahrens 	} else if (p[0] == '@') {
279fa9e4066Sahrens 		/*
280fa9e4066Sahrens 		 * if the next separator is an @, there better not be
281fa9e4066Sahrens 		 * any more slashes.
282fa9e4066Sahrens 		 */
283fa9e4066Sahrens 		if (strchr(path, '/'))
284fa9e4066Sahrens 			return (EINVAL);
285fa9e4066Sahrens 		if (p-path >= MAXNAMELEN)
286fa9e4066Sahrens 			return (ENAMETOOLONG);
287fa9e4066Sahrens 		(void) strncpy(component, path, p - path);
288fa9e4066Sahrens 		component[p-path] = '\0';
289fa9e4066Sahrens 	} else {
290fa9e4066Sahrens 		ASSERT(!"invalid p");
291fa9e4066Sahrens 	}
292fa9e4066Sahrens 	*nextp = p;
293fa9e4066Sahrens 	return (0);
294fa9e4066Sahrens }
295fa9e4066Sahrens 
296fa9e4066Sahrens /*
297fa9e4066Sahrens  * same as dsl_open_dir, ignore the first component of name and use the
298fa9e4066Sahrens  * spa instead
299fa9e4066Sahrens  */
300ea8dc4b6Seschrock int
301ea8dc4b6Seschrock dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
302ea8dc4b6Seschrock     dsl_dir_t **ddp, const char **tailp)
303fa9e4066Sahrens {
304fa9e4066Sahrens 	char buf[MAXNAMELEN];
305fa9e4066Sahrens 	const char *next, *nextnext = NULL;
306fa9e4066Sahrens 	int err;
307fa9e4066Sahrens 	dsl_dir_t *dd;
308fa9e4066Sahrens 	dsl_pool_t *dp;
309fa9e4066Sahrens 	uint64_t ddobj;
310fa9e4066Sahrens 	int openedspa = FALSE;
311fa9e4066Sahrens 
312fa9e4066Sahrens 	dprintf("%s\n", name);
313fa9e4066Sahrens 
314fa9e4066Sahrens 	err = getcomponent(name, buf, &next);
315fa9e4066Sahrens 	if (err)
316ea8dc4b6Seschrock 		return (err);
317fa9e4066Sahrens 	if (spa == NULL) {
318fa9e4066Sahrens 		err = spa_open(buf, &spa, FTAG);
319fa9e4066Sahrens 		if (err) {
320fa9e4066Sahrens 			dprintf("spa_open(%s) failed\n", buf);
321ea8dc4b6Seschrock 			return (err);
322fa9e4066Sahrens 		}
323fa9e4066Sahrens 		openedspa = TRUE;
324fa9e4066Sahrens 
325fa9e4066Sahrens 		/* XXX this assertion belongs in spa_open */
326fa9e4066Sahrens 		ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
327fa9e4066Sahrens 	}
328fa9e4066Sahrens 
329fa9e4066Sahrens 	dp = spa_get_dsl(spa);
330fa9e4066Sahrens 
331fa9e4066Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
332ea8dc4b6Seschrock 	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
333ea8dc4b6Seschrock 	if (err) {
334ea8dc4b6Seschrock 		rw_exit(&dp->dp_config_rwlock);
335ea8dc4b6Seschrock 		if (openedspa)
336ea8dc4b6Seschrock 			spa_close(spa, FTAG);
337ea8dc4b6Seschrock 		return (err);
338ea8dc4b6Seschrock 	}
339ea8dc4b6Seschrock 
340fa9e4066Sahrens 	while (next != NULL) {
341fa9e4066Sahrens 		dsl_dir_t *child_ds;
342fa9e4066Sahrens 		err = getcomponent(next, buf, &nextnext);
343ea8dc4b6Seschrock 		if (err)
344ea8dc4b6Seschrock 			break;
345fa9e4066Sahrens 		ASSERT(next[0] != '\0');
346fa9e4066Sahrens 		if (next[0] == '@')
347fa9e4066Sahrens 			break;
348fa9e4066Sahrens 		dprintf("looking up %s in obj%lld\n",
349fa9e4066Sahrens 		    buf, dd->dd_phys->dd_child_dir_zapobj);
350fa9e4066Sahrens 
351fa9e4066Sahrens 		err = zap_lookup(dp->dp_meta_objset,
352fa9e4066Sahrens 		    dd->dd_phys->dd_child_dir_zapobj,
353fa9e4066Sahrens 		    buf, sizeof (ddobj), 1, &ddobj);
354ea8dc4b6Seschrock 		if (err) {
355ea8dc4b6Seschrock 			if (err == ENOENT)
356ea8dc4b6Seschrock 				err = 0;
357fa9e4066Sahrens 			break;
358fa9e4066Sahrens 		}
359fa9e4066Sahrens 
360ea8dc4b6Seschrock 		err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
361ea8dc4b6Seschrock 		if (err)
362ea8dc4b6Seschrock 			break;
363fa9e4066Sahrens 		dsl_dir_close(dd, tag);
364fa9e4066Sahrens 		dd = child_ds;
365fa9e4066Sahrens 		next = nextnext;
366fa9e4066Sahrens 	}
367fa9e4066Sahrens 	rw_exit(&dp->dp_config_rwlock);
368fa9e4066Sahrens 
369ea8dc4b6Seschrock 	if (err) {
370ea8dc4b6Seschrock 		dsl_dir_close(dd, tag);
371ea8dc4b6Seschrock 		if (openedspa)
372ea8dc4b6Seschrock 			spa_close(spa, FTAG);
373ea8dc4b6Seschrock 		return (err);
374ea8dc4b6Seschrock 	}
375ea8dc4b6Seschrock 
376fa9e4066Sahrens 	/*
377fa9e4066Sahrens 	 * It's an error if there's more than one component left, or
378fa9e4066Sahrens 	 * tailp==NULL and there's any component left.
379fa9e4066Sahrens 	 */
380fa9e4066Sahrens 	if (next != NULL &&
381fa9e4066Sahrens 	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
382fa9e4066Sahrens 		/* bad path name */
383fa9e4066Sahrens 		dsl_dir_close(dd, tag);
384fa9e4066Sahrens 		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
385ea8dc4b6Seschrock 		err = ENOENT;
386fa9e4066Sahrens 	}
387fa9e4066Sahrens 	if (tailp)
388fa9e4066Sahrens 		*tailp = next;
389fa9e4066Sahrens 	if (openedspa)
390fa9e4066Sahrens 		spa_close(spa, FTAG);
391ea8dc4b6Seschrock 	*ddp = dd;
392ea8dc4b6Seschrock 	return (err);
393fa9e4066Sahrens }
394fa9e4066Sahrens 
395fa9e4066Sahrens /*
396fa9e4066Sahrens  * Return the dsl_dir_t, and possibly the last component which couldn't
397fa9e4066Sahrens  * be found in *tail.  Return NULL if the path is bogus, or if
398fa9e4066Sahrens  * tail==NULL and we couldn't parse the whole name.  (*tail)[0] == '@'
399fa9e4066Sahrens  * means that the last component is a snapshot.
400fa9e4066Sahrens  */
401ea8dc4b6Seschrock int
402ea8dc4b6Seschrock dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
403fa9e4066Sahrens {
404ea8dc4b6Seschrock 	return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
405fa9e4066Sahrens }
406fa9e4066Sahrens 
4071d452cf5Sahrens uint64_t
408fa9e4066Sahrens dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx)
409fa9e4066Sahrens {
410fa9e4066Sahrens 	objset_t *mos = pds->dd_pool->dp_meta_objset;
411fa9e4066Sahrens 	uint64_t ddobj;
412fa9e4066Sahrens 	dsl_dir_phys_t *dsphys;
413fa9e4066Sahrens 	dmu_buf_t *dbuf;
414fa9e4066Sahrens 
4151649cd4bStabriz 	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
4161649cd4bStabriz 	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
4171d452cf5Sahrens 	VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
4181d452cf5Sahrens 	    name, sizeof (uint64_t), 1, &ddobj, tx));
419ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
420fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
421fa9e4066Sahrens 	dsphys = dbuf->db_data;
422fa9e4066Sahrens 
423fa9e4066Sahrens 	dsphys->dd_creation_time = gethrestime_sec();
424fa9e4066Sahrens 	dsphys->dd_parent_obj = pds->dd_object;
425fa9e4066Sahrens 	dsphys->dd_props_zapobj = zap_create(mos,
426fa9e4066Sahrens 	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
427fa9e4066Sahrens 	dsphys->dd_child_dir_zapobj = zap_create(mos,
42887e5029aSahrens 	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
429ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
430fa9e4066Sahrens 
4311d452cf5Sahrens 	return (ddobj);
432fa9e4066Sahrens }
433fa9e4066Sahrens 
4341d452cf5Sahrens /* ARGSUSED */
435fa9e4066Sahrens int
4361d452cf5Sahrens dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
437fa9e4066Sahrens {
4381d452cf5Sahrens 	dsl_dir_t *dd = arg1;
4391d452cf5Sahrens 	dsl_pool_t *dp = dd->dd_pool;
440fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
4411d452cf5Sahrens 	int err;
4421d452cf5Sahrens 	uint64_t count;
443fa9e4066Sahrens 
4441d452cf5Sahrens 	/*
4451d452cf5Sahrens 	 * There should be exactly two holds, both from
4461d452cf5Sahrens 	 * dsl_dataset_destroy: one on the dd directory, and one on its
4471d452cf5Sahrens 	 * head ds.  Otherwise, someone is trying to lookup something
4481d452cf5Sahrens 	 * inside this dir while we want to destroy it.  The
4491d452cf5Sahrens 	 * config_rwlock ensures that nobody else opens it after we
4501d452cf5Sahrens 	 * check.
4511d452cf5Sahrens 	 */
4521d452cf5Sahrens 	if (dmu_buf_refcount(dd->dd_dbuf) > 2)
4531d452cf5Sahrens 		return (EBUSY);
454fa9e4066Sahrens 
4551d452cf5Sahrens 	err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
456ea8dc4b6Seschrock 	if (err)
4571d452cf5Sahrens 		return (err);
4581d452cf5Sahrens 	if (count != 0)
4591d452cf5Sahrens 		return (EEXIST);
460fa9e4066Sahrens 
4611d452cf5Sahrens 	return (0);
4621d452cf5Sahrens }
463fa9e4066Sahrens 
4641d452cf5Sahrens void
465ecd6cf80Smarks dsl_dir_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
4661d452cf5Sahrens {
4671d452cf5Sahrens 	dsl_dir_t *dd = arg1;
4681d452cf5Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
4691d452cf5Sahrens 	uint64_t val, obj;
470fa9e4066Sahrens 
4711d452cf5Sahrens 	ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
472fa9e4066Sahrens 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
473fa9e4066Sahrens 
4741d452cf5Sahrens 	/* Remove our reservation. */
475fa9e4066Sahrens 	val = 0;
476ecd6cf80Smarks 	dsl_dir_set_reservation_sync(dd, &val, cr, tx);
477fa9e4066Sahrens 	ASSERT3U(dd->dd_used_bytes, ==, 0);
478fa9e4066Sahrens 	ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
479fa9e4066Sahrens 
4801d452cf5Sahrens 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
4811d452cf5Sahrens 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
482ecd6cf80Smarks 	VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
4831d452cf5Sahrens 	VERIFY(0 == zap_remove(mos,
4841d452cf5Sahrens 	    dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
485fa9e4066Sahrens 
4861d452cf5Sahrens 	obj = dd->dd_object;
4871d452cf5Sahrens 	dsl_dir_close(dd, tag);
4881d452cf5Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
489fa9e4066Sahrens }
490fa9e4066Sahrens 
491fa9e4066Sahrens void
492fa9e4066Sahrens dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx)
493fa9e4066Sahrens {
494fa9e4066Sahrens 	dsl_dir_phys_t *dsp;
495fa9e4066Sahrens 	dmu_buf_t *dbuf;
496fa9e4066Sahrens 	int error;
497fa9e4066Sahrens 
4981649cd4bStabriz 	*ddobjp = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
4991649cd4bStabriz 	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
500fa9e4066Sahrens 
501fa9e4066Sahrens 	error = zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET,
502fa9e4066Sahrens 	    sizeof (uint64_t), 1, ddobjp, tx);
503fa9e4066Sahrens 	ASSERT3U(error, ==, 0);
504fa9e4066Sahrens 
505ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, *ddobjp, FTAG, &dbuf));
506fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
507fa9e4066Sahrens 	dsp = dbuf->db_data;
508fa9e4066Sahrens 
509fa9e4066Sahrens 	dsp->dd_creation_time = gethrestime_sec();
510fa9e4066Sahrens 	dsp->dd_props_zapobj = zap_create(mos,
511fa9e4066Sahrens 	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
512fa9e4066Sahrens 	dsp->dd_child_dir_zapobj = zap_create(mos,
51387e5029aSahrens 	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
514fa9e4066Sahrens 
515ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
516fa9e4066Sahrens }
517fa9e4066Sahrens 
518fa9e4066Sahrens void
519a2eea2e1Sahrens dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
520fa9e4066Sahrens {
521a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE,
522a2eea2e1Sahrens 	    dsl_dir_space_available(dd, NULL, 0, TRUE));
523fa9e4066Sahrens 
524fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
525a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dd->dd_used_bytes);
526a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA,
527a2eea2e1Sahrens 	    dd->dd_phys->dd_quota);
528a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
529a2eea2e1Sahrens 	    dd->dd_phys->dd_reserved);
530a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
531a2eea2e1Sahrens 	    dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
532a2eea2e1Sahrens 	    (dd->dd_phys->dd_uncompressed_bytes * 100 /
533a2eea2e1Sahrens 	    dd->dd_phys->dd_compressed_bytes));
534fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
535fa9e4066Sahrens 
536fa9e4066Sahrens 	if (dd->dd_phys->dd_clone_parent_obj) {
537fa9e4066Sahrens 		dsl_dataset_t *ds;
538a2eea2e1Sahrens 		char buf[MAXNAMELEN];
539fa9e4066Sahrens 
540fa9e4066Sahrens 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
541ea8dc4b6Seschrock 		VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
542ea8dc4b6Seschrock 		    dd->dd_phys->dd_clone_parent_obj,
543ea8dc4b6Seschrock 		    NULL, DS_MODE_NONE, FTAG, &ds));
544a2eea2e1Sahrens 		dsl_dataset_name(ds, buf);
545fa9e4066Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
546fa9e4066Sahrens 		rw_exit(&dd->dd_pool->dp_config_rwlock);
547a2eea2e1Sahrens 
548a2eea2e1Sahrens 		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
549fa9e4066Sahrens 	}
550fa9e4066Sahrens }
551fa9e4066Sahrens 
552fa9e4066Sahrens void
553fa9e4066Sahrens dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
554fa9e4066Sahrens {
555fa9e4066Sahrens 	dsl_pool_t *dp = dd->dd_pool;
556fa9e4066Sahrens 
557fa9e4066Sahrens 	ASSERT(dd->dd_phys);
558fa9e4066Sahrens 
559fa9e4066Sahrens 	if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
560fa9e4066Sahrens 		/* up the hold count until we can be written out */
561fa9e4066Sahrens 		dmu_buf_add_ref(dd->dd_dbuf, dd);
562fa9e4066Sahrens 	}
563fa9e4066Sahrens }
564fa9e4066Sahrens 
565fa9e4066Sahrens static int64_t
566fa9e4066Sahrens parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
567fa9e4066Sahrens {
568fa9e4066Sahrens 	uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
569fa9e4066Sahrens 	uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
570fa9e4066Sahrens 	return (new_accounted - old_accounted);
571fa9e4066Sahrens }
572fa9e4066Sahrens 
573fa9e4066Sahrens void
574fa9e4066Sahrens dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
575fa9e4066Sahrens {
576fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
577fa9e4066Sahrens 
578fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
579fa9e4066Sahrens 
580fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
581fa9e4066Sahrens 	ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
582fa9e4066Sahrens 	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
583fa9e4066Sahrens 	    dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
584fa9e4066Sahrens 	dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
585fa9e4066Sahrens 	dd->dd_phys->dd_used_bytes = dd->dd_used_bytes;
586fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
587fa9e4066Sahrens 
588fa9e4066Sahrens 	/* release the hold from dsl_dir_dirty */
589ea8dc4b6Seschrock 	dmu_buf_rele(dd->dd_dbuf, dd);
590fa9e4066Sahrens }
591fa9e4066Sahrens 
592fa9e4066Sahrens static uint64_t
593fa9e4066Sahrens dsl_dir_estimated_space(dsl_dir_t *dd)
594fa9e4066Sahrens {
595fa9e4066Sahrens 	int64_t space;
596fa9e4066Sahrens 	int i;
597fa9e4066Sahrens 
598fa9e4066Sahrens 	ASSERT(MUTEX_HELD(&dd->dd_lock));
599fa9e4066Sahrens 
600ea8dc4b6Seschrock 	space = dd->dd_phys->dd_used_bytes;
601fa9e4066Sahrens 	ASSERT(space >= 0);
602fa9e4066Sahrens 	for (i = 0; i < TXG_SIZE; i++) {
603fa9e4066Sahrens 		space += dd->dd_space_towrite[i&TXG_MASK];
604fa9e4066Sahrens 		ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
605fa9e4066Sahrens 	}
606fa9e4066Sahrens 	return (space);
607fa9e4066Sahrens }
608fa9e4066Sahrens 
609fa9e4066Sahrens /*
610fa9e4066Sahrens  * How much space would dd have available if ancestor had delta applied
611fa9e4066Sahrens  * to it?  If ondiskonly is set, we're only interested in what's
612fa9e4066Sahrens  * on-disk, not estimated pending changes.
613fa9e4066Sahrens  */
614a2eea2e1Sahrens uint64_t
615fa9e4066Sahrens dsl_dir_space_available(dsl_dir_t *dd,
616fa9e4066Sahrens     dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
617fa9e4066Sahrens {
618fa9e4066Sahrens 	uint64_t parentspace, myspace, quota, used;
619fa9e4066Sahrens 
620fa9e4066Sahrens 	/*
621fa9e4066Sahrens 	 * If there are no restrictions otherwise, assume we have
622fa9e4066Sahrens 	 * unlimited space available.
623fa9e4066Sahrens 	 */
624fa9e4066Sahrens 	quota = UINT64_MAX;
625fa9e4066Sahrens 	parentspace = UINT64_MAX;
626fa9e4066Sahrens 
627fa9e4066Sahrens 	if (dd->dd_parent != NULL) {
628fa9e4066Sahrens 		parentspace = dsl_dir_space_available(dd->dd_parent,
629fa9e4066Sahrens 		    ancestor, delta, ondiskonly);
630fa9e4066Sahrens 	}
631fa9e4066Sahrens 
632fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
633fa9e4066Sahrens 	if (dd->dd_phys->dd_quota != 0)
634fa9e4066Sahrens 		quota = dd->dd_phys->dd_quota;
635fa9e4066Sahrens 	if (ondiskonly) {
636fa9e4066Sahrens 		used = dd->dd_used_bytes;
637fa9e4066Sahrens 	} else {
638fa9e4066Sahrens 		used = dsl_dir_estimated_space(dd);
639fa9e4066Sahrens 	}
640fa9e4066Sahrens 	if (dd == ancestor)
641fa9e4066Sahrens 		used += delta;
642fa9e4066Sahrens 
643fa9e4066Sahrens 	if (dd->dd_parent == NULL) {
64499653d4eSeschrock 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
645fa9e4066Sahrens 		quota = MIN(quota, poolsize);
646fa9e4066Sahrens 	}
647fa9e4066Sahrens 
648fa9e4066Sahrens 	if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
649fa9e4066Sahrens 		/*
650fa9e4066Sahrens 		 * We have some space reserved, in addition to what our
651fa9e4066Sahrens 		 * parent gave us.
652fa9e4066Sahrens 		 */
653fa9e4066Sahrens 		parentspace += dd->dd_phys->dd_reserved - used;
654fa9e4066Sahrens 	}
655fa9e4066Sahrens 
656fa9e4066Sahrens 	if (used > quota) {
657fa9e4066Sahrens 		/* over quota */
658fa9e4066Sahrens 		myspace = 0;
65999653d4eSeschrock 
66099653d4eSeschrock 		/*
66199653d4eSeschrock 		 * While it's OK to be a little over quota, if
66299653d4eSeschrock 		 * we think we are using more space than there
66399653d4eSeschrock 		 * is in the pool (which is already 1.6% more than
66499653d4eSeschrock 		 * dsl_pool_adjustedsize()), something is very
66599653d4eSeschrock 		 * wrong.
66699653d4eSeschrock 		 */
66799653d4eSeschrock 		ASSERT3U(used, <=, spa_get_space(dd->dd_pool->dp_spa));
668fa9e4066Sahrens 	} else {
669fa9e4066Sahrens 		/*
67099653d4eSeschrock 		 * the lesser of the space provided by our parent and
67199653d4eSeschrock 		 * the space left in our quota
672fa9e4066Sahrens 		 */
673fa9e4066Sahrens 		myspace = MIN(parentspace, quota - used);
674fa9e4066Sahrens 	}
675fa9e4066Sahrens 
676fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
677fa9e4066Sahrens 
678fa9e4066Sahrens 	return (myspace);
679fa9e4066Sahrens }
680fa9e4066Sahrens 
681fa9e4066Sahrens struct tempreserve {
682fa9e4066Sahrens 	list_node_t tr_node;
683fa9e4066Sahrens 	dsl_dir_t *tr_ds;
684fa9e4066Sahrens 	uint64_t tr_size;
685fa9e4066Sahrens };
686fa9e4066Sahrens 
687fa9e4066Sahrens /*
688fa9e4066Sahrens  * Reserve space in this dsl_dir, to be used in this tx's txg.
689fa9e4066Sahrens  * After the space has been dirtied (and thus
690fa9e4066Sahrens  * dsl_dir_willuse_space() has been called), the reservation should
691fa9e4066Sahrens  * be canceled, using dsl_dir_tempreserve_clear().
692fa9e4066Sahrens  */
693fa9e4066Sahrens static int
694fa9e4066Sahrens dsl_dir_tempreserve_impl(dsl_dir_t *dd,
695fa9e4066Sahrens     uint64_t asize, boolean_t netfree, list_t *tr_list, dmu_tx_t *tx)
696fa9e4066Sahrens {
697fa9e4066Sahrens 	uint64_t txg = tx->tx_txg;
698fa9e4066Sahrens 	uint64_t est_used, quota, parent_rsrv;
699fa9e4066Sahrens 	int edquot = EDQUOT;
700fa9e4066Sahrens 	int txgidx = txg & TXG_MASK;
701fa9e4066Sahrens 	int i;
702fa9e4066Sahrens 	struct tempreserve *tr;
703fa9e4066Sahrens 
704fa9e4066Sahrens 	ASSERT3U(txg, !=, 0);
705ea8dc4b6Seschrock 	ASSERT3S(asize, >=, 0);
706fa9e4066Sahrens 
707fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
708fa9e4066Sahrens 	/*
709fa9e4066Sahrens 	 * Check against the dsl_dir's quota.  We don't add in the delta
710fa9e4066Sahrens 	 * when checking for over-quota because they get one free hit.
711fa9e4066Sahrens 	 */
712fa9e4066Sahrens 	est_used = dsl_dir_estimated_space(dd);
713fa9e4066Sahrens 	for (i = 0; i < TXG_SIZE; i++)
714fa9e4066Sahrens 		est_used += dd->dd_tempreserved[i];
715fa9e4066Sahrens 
716fa9e4066Sahrens 	quota = UINT64_MAX;
717fa9e4066Sahrens 
718fa9e4066Sahrens 	if (dd->dd_phys->dd_quota)
719fa9e4066Sahrens 		quota = dd->dd_phys->dd_quota;
720fa9e4066Sahrens 
721fa9e4066Sahrens 	/*
722fa9e4066Sahrens 	 * If this transaction will result in a net free of space, we want
723fa9e4066Sahrens 	 * to let it through, but we have to be careful: the space that it
724fa9e4066Sahrens 	 * frees won't become available until *after* this txg syncs.
725fa9e4066Sahrens 	 * Therefore, to ensure that it's possible to remove files from
726fa9e4066Sahrens 	 * a full pool without inducing transient overcommits, we throttle
727fa9e4066Sahrens 	 * netfree transactions against a quota that is slightly larger,
728fa9e4066Sahrens 	 * but still within the pool's allocation slop.  In cases where
729fa9e4066Sahrens 	 * we're very close to full, this will allow a steady trickle of
730fa9e4066Sahrens 	 * removes to get through.
731fa9e4066Sahrens 	 */
732fa9e4066Sahrens 	if (dd->dd_parent == NULL) {
733fa9e4066Sahrens 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
734fa9e4066Sahrens 		if (poolsize < quota) {
735fa9e4066Sahrens 			quota = poolsize;
736fa9e4066Sahrens 			edquot = ENOSPC;
737fa9e4066Sahrens 		}
738fa9e4066Sahrens 	} else if (netfree) {
739fa9e4066Sahrens 		quota = UINT64_MAX;
740fa9e4066Sahrens 	}
741fa9e4066Sahrens 
742fa9e4066Sahrens 	/*
743fa9e4066Sahrens 	 * If they are requesting more space, and our current estimate
744fa9e4066Sahrens 	 * is over quota.  They get to try again unless the actual
745ea8dc4b6Seschrock 	 * on-disk is over quota and there are no pending changes (which
746ea8dc4b6Seschrock 	 * may free up space for us).
747fa9e4066Sahrens 	 */
748fa9e4066Sahrens 	if (asize > 0 && est_used > quota) {
749ea8dc4b6Seschrock 		if (dd->dd_space_towrite[txg & TXG_MASK] != 0 ||
750ea8dc4b6Seschrock 		    dd->dd_space_towrite[(txg-1) & TXG_MASK] != 0 ||
751ea8dc4b6Seschrock 		    dd->dd_space_towrite[(txg-2) & TXG_MASK] != 0 ||
752ea8dc4b6Seschrock 		    dd->dd_used_bytes < quota)
753fa9e4066Sahrens 			edquot = ERESTART;
754fa9e4066Sahrens 		dprintf_dd(dd, "failing: used=%lluK est_used = %lluK "
755fa9e4066Sahrens 		    "quota=%lluK tr=%lluK err=%d\n",
756fa9e4066Sahrens 		    dd->dd_used_bytes>>10, est_used>>10,
757fa9e4066Sahrens 		    quota>>10, asize>>10, edquot);
758fa9e4066Sahrens 		mutex_exit(&dd->dd_lock);
759fa9e4066Sahrens 		return (edquot);
760fa9e4066Sahrens 	}
761fa9e4066Sahrens 
762fa9e4066Sahrens 	/* We need to up our estimated delta before dropping dd_lock */
763fa9e4066Sahrens 	dd->dd_tempreserved[txgidx] += asize;
764fa9e4066Sahrens 
765fa9e4066Sahrens 	parent_rsrv = parent_delta(dd, est_used, asize);
766fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
767fa9e4066Sahrens 
768fa9e4066Sahrens 	tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP);
769fa9e4066Sahrens 	tr->tr_ds = dd;
770fa9e4066Sahrens 	tr->tr_size = asize;
771fa9e4066Sahrens 	list_insert_tail(tr_list, tr);
772fa9e4066Sahrens 
773fa9e4066Sahrens 	/* see if it's OK with our parent */
774fa9e4066Sahrens 	if (dd->dd_parent && parent_rsrv) {
775fa9e4066Sahrens 		return (dsl_dir_tempreserve_impl(dd->dd_parent,
776fa9e4066Sahrens 		    parent_rsrv, netfree, tr_list, tx));
777fa9e4066Sahrens 	} else {
778fa9e4066Sahrens 		return (0);
779fa9e4066Sahrens 	}
780fa9e4066Sahrens }
781fa9e4066Sahrens 
782fa9e4066Sahrens /*
783fa9e4066Sahrens  * Reserve space in this dsl_dir, to be used in this tx's txg.
784fa9e4066Sahrens  * After the space has been dirtied (and thus
785fa9e4066Sahrens  * dsl_dir_willuse_space() has been called), the reservation should
786fa9e4066Sahrens  * be canceled, using dsl_dir_tempreserve_clear().
787fa9e4066Sahrens  */
788fa9e4066Sahrens int
789fa9e4066Sahrens dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize,
790fa9e4066Sahrens     uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx)
791fa9e4066Sahrens {
792fa9e4066Sahrens 	int err = 0;
793fa9e4066Sahrens 	list_t *tr_list;
794fa9e4066Sahrens 
795fa9e4066Sahrens 	tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
796fa9e4066Sahrens 	list_create(tr_list, sizeof (struct tempreserve),
797fa9e4066Sahrens 	    offsetof(struct tempreserve, tr_node));
798ea8dc4b6Seschrock 	ASSERT3S(asize, >=, 0);
799ea8dc4b6Seschrock 	ASSERT3S(fsize, >=, 0);
800fa9e4066Sahrens 
801fa9e4066Sahrens 	err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
802fa9e4066Sahrens 	    tr_list, tx);
803fa9e4066Sahrens 
804fa9e4066Sahrens 	if (err == 0) {
805fa9e4066Sahrens 		struct tempreserve *tr;
806fa9e4066Sahrens 
807fa9e4066Sahrens 		err = arc_tempreserve_space(lsize);
808fa9e4066Sahrens 		if (err == 0) {
809fa9e4066Sahrens 			tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP);
810fa9e4066Sahrens 			tr->tr_ds = NULL;
811fa9e4066Sahrens 			tr->tr_size = lsize;
812fa9e4066Sahrens 			list_insert_tail(tr_list, tr);
813fa9e4066Sahrens 		}
814fa9e4066Sahrens 	}
815fa9e4066Sahrens 
816fa9e4066Sahrens 	if (err)
817fa9e4066Sahrens 		dsl_dir_tempreserve_clear(tr_list, tx);
818fa9e4066Sahrens 	else
819fa9e4066Sahrens 		*tr_cookiep = tr_list;
820fa9e4066Sahrens 	return (err);
821fa9e4066Sahrens }
822fa9e4066Sahrens 
823fa9e4066Sahrens /*
824fa9e4066Sahrens  * Clear a temporary reservation that we previously made with
825fa9e4066Sahrens  * dsl_dir_tempreserve_space().
826fa9e4066Sahrens  */
827fa9e4066Sahrens void
828fa9e4066Sahrens dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
829fa9e4066Sahrens {
830fa9e4066Sahrens 	int txgidx = tx->tx_txg & TXG_MASK;
831fa9e4066Sahrens 	list_t *tr_list = tr_cookie;
832fa9e4066Sahrens 	struct tempreserve *tr;
833fa9e4066Sahrens 
834fa9e4066Sahrens 	ASSERT3U(tx->tx_txg, !=, 0);
835fa9e4066Sahrens 
836fa9e4066Sahrens 	while (tr = list_head(tr_list)) {
837fa9e4066Sahrens 		if (tr->tr_ds == NULL) {
838fa9e4066Sahrens 			arc_tempreserve_clear(tr->tr_size);
839fa9e4066Sahrens 		} else {
840fa9e4066Sahrens 			mutex_enter(&tr->tr_ds->dd_lock);
841fa9e4066Sahrens 			ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
842fa9e4066Sahrens 			    tr->tr_size);
843fa9e4066Sahrens 			tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
844fa9e4066Sahrens 			mutex_exit(&tr->tr_ds->dd_lock);
845fa9e4066Sahrens 		}
846fa9e4066Sahrens 		list_remove(tr_list, tr);
847fa9e4066Sahrens 		kmem_free(tr, sizeof (struct tempreserve));
848fa9e4066Sahrens 	}
849fa9e4066Sahrens 
850fa9e4066Sahrens 	kmem_free(tr_list, sizeof (list_t));
851fa9e4066Sahrens }
852fa9e4066Sahrens 
853fa9e4066Sahrens /*
854fa9e4066Sahrens  * Call in open context when we think we're going to write/free space,
855fa9e4066Sahrens  * eg. when dirtying data.  Be conservative (ie. OK to write less than
856fa9e4066Sahrens  * this or free more than this, but don't write more or free less).
857fa9e4066Sahrens  */
858fa9e4066Sahrens void
859fa9e4066Sahrens dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
860fa9e4066Sahrens {
861fa9e4066Sahrens 	int64_t parent_space;
862fa9e4066Sahrens 	uint64_t est_used;
863fa9e4066Sahrens 
864fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
865fa9e4066Sahrens 	if (space > 0)
866fa9e4066Sahrens 		dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
867fa9e4066Sahrens 
868fa9e4066Sahrens 	est_used = dsl_dir_estimated_space(dd);
869fa9e4066Sahrens 	parent_space = parent_delta(dd, est_used, space);
870fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
871fa9e4066Sahrens 
872fa9e4066Sahrens 	/* Make sure that we clean up dd_space_to* */
873fa9e4066Sahrens 	dsl_dir_dirty(dd, tx);
874fa9e4066Sahrens 
875fa9e4066Sahrens 	/* XXX this is potentially expensive and unnecessary... */
876fa9e4066Sahrens 	if (parent_space && dd->dd_parent)
877fa9e4066Sahrens 		dsl_dir_willuse_space(dd->dd_parent, parent_space, tx);
878fa9e4066Sahrens }
879fa9e4066Sahrens 
880fa9e4066Sahrens /* call from syncing context when we actually write/free space for this dd */
881fa9e4066Sahrens void
882fa9e4066Sahrens dsl_dir_diduse_space(dsl_dir_t *dd,
883fa9e4066Sahrens     int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
884fa9e4066Sahrens {
885fa9e4066Sahrens 	int64_t accounted_delta;
886fa9e4066Sahrens 
887fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
888fa9e4066Sahrens 
889fa9e4066Sahrens 	dsl_dir_dirty(dd, tx);
890fa9e4066Sahrens 
891fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
892fa9e4066Sahrens 	accounted_delta = parent_delta(dd, dd->dd_used_bytes, used);
893fa9e4066Sahrens 	ASSERT(used >= 0 || dd->dd_used_bytes >= -used);
894fa9e4066Sahrens 	ASSERT(compressed >= 0 ||
895fa9e4066Sahrens 	    dd->dd_phys->dd_compressed_bytes >= -compressed);
896fa9e4066Sahrens 	ASSERT(uncompressed >= 0 ||
897fa9e4066Sahrens 	    dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
898fa9e4066Sahrens 	dd->dd_used_bytes += used;
899fa9e4066Sahrens 	dd->dd_phys->dd_uncompressed_bytes += uncompressed;
900fa9e4066Sahrens 	dd->dd_phys->dd_compressed_bytes += compressed;
901fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
902fa9e4066Sahrens 
903fa9e4066Sahrens 	if (dd->dd_parent != NULL) {
904fa9e4066Sahrens 		dsl_dir_diduse_space(dd->dd_parent,
905fa9e4066Sahrens 		    accounted_delta, compressed, uncompressed, tx);
906fa9e4066Sahrens 	}
907fa9e4066Sahrens }
908fa9e4066Sahrens 
909fa9e4066Sahrens static int
9101d452cf5Sahrens dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
911fa9e4066Sahrens {
9121d452cf5Sahrens 	dsl_dir_t *dd = arg1;
9131d452cf5Sahrens 	uint64_t *quotap = arg2;
914fa9e4066Sahrens 	uint64_t new_quota = *quotap;
915fa9e4066Sahrens 	int err = 0;
9161d452cf5Sahrens 	uint64_t towrite;
917fa9e4066Sahrens 
9181d452cf5Sahrens 	if (new_quota == 0)
9191d452cf5Sahrens 		return (0);
920fa9e4066Sahrens 
921fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
9221d452cf5Sahrens 	/*
9231d452cf5Sahrens 	 * If we are doing the preliminary check in open context, and
9241d452cf5Sahrens 	 * there are pending changes, then don't fail it, since the
9251d452cf5Sahrens 	 * pending changes could under-estimat the amount of space to be
9261d452cf5Sahrens 	 * freed up.
9271d452cf5Sahrens 	 */
9281d452cf5Sahrens 	towrite = dd->dd_space_towrite[0] + dd->dd_space_towrite[1] +
9291d452cf5Sahrens 	    dd->dd_space_towrite[2] + dd->dd_space_towrite[3];
9301d452cf5Sahrens 	if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
9311d452cf5Sahrens 	    (new_quota < dd->dd_phys->dd_reserved ||
932fa9e4066Sahrens 	    new_quota < dsl_dir_estimated_space(dd))) {
933fa9e4066Sahrens 		err = ENOSPC;
934fa9e4066Sahrens 	}
935fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
936fa9e4066Sahrens 	return (err);
937fa9e4066Sahrens }
938fa9e4066Sahrens 
939ecd6cf80Smarks /* ARGSUSED */
9401d452cf5Sahrens static void
941ecd6cf80Smarks dsl_dir_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
9421d452cf5Sahrens {
9431d452cf5Sahrens 	dsl_dir_t *dd = arg1;
9441d452cf5Sahrens 	uint64_t *quotap = arg2;
9451d452cf5Sahrens 	uint64_t new_quota = *quotap;
9461d452cf5Sahrens 
9471d452cf5Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
9481d452cf5Sahrens 
9491d452cf5Sahrens 	mutex_enter(&dd->dd_lock);
9501d452cf5Sahrens 	dd->dd_phys->dd_quota = new_quota;
9511d452cf5Sahrens 	mutex_exit(&dd->dd_lock);
952ecd6cf80Smarks 
953ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_QUOTA, dd->dd_pool->dp_spa,
954ecd6cf80Smarks 	    tx, cr, "%lld dataset = %llu ",
955ecd6cf80Smarks 	    (longlong_t)new_quota, dd->dd_phys->dd_head_dataset_obj);
9561d452cf5Sahrens }
9571d452cf5Sahrens 
958fa9e4066Sahrens int
959fa9e4066Sahrens dsl_dir_set_quota(const char *ddname, uint64_t quota)
960fa9e4066Sahrens {
961fa9e4066Sahrens 	dsl_dir_t *dd;
962fa9e4066Sahrens 	int err;
963fa9e4066Sahrens 
964ea8dc4b6Seschrock 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
965ea8dc4b6Seschrock 	if (err)
966ea8dc4b6Seschrock 		return (err);
967fa9e4066Sahrens 	/*
968fa9e4066Sahrens 	 * If someone removes a file, then tries to set the quota, we
969fa9e4066Sahrens 	 * want to make sure the file freeing takes effect.
970fa9e4066Sahrens 	 */
971fa9e4066Sahrens 	txg_wait_open(dd->dd_pool, 0);
972fa9e4066Sahrens 
9731d452cf5Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
9741d452cf5Sahrens 	    dsl_dir_set_quota_sync, dd, &quota, 0);
975fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
976fa9e4066Sahrens 	return (err);
977fa9e4066Sahrens }
978fa9e4066Sahrens 
979fa9e4066Sahrens static int
9801d452cf5Sahrens dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
981fa9e4066Sahrens {
9821d452cf5Sahrens 	dsl_dir_t *dd = arg1;
9831d452cf5Sahrens 	uint64_t *reservationp = arg2;
984fa9e4066Sahrens 	uint64_t new_reservation = *reservationp;
985fa9e4066Sahrens 	uint64_t used, avail;
986fa9e4066Sahrens 	int64_t delta;
987fa9e4066Sahrens 
988fa9e4066Sahrens 	if (new_reservation > INT64_MAX)
989fa9e4066Sahrens 		return (EOVERFLOW);
990fa9e4066Sahrens 
9911d452cf5Sahrens 	/*
9921d452cf5Sahrens 	 * If we are doing the preliminary check in open context, the
9931d452cf5Sahrens 	 * space estimates may be inaccurate.
9941d452cf5Sahrens 	 */
9951d452cf5Sahrens 	if (!dmu_tx_is_syncing(tx))
9961d452cf5Sahrens 		return (0);
9971d452cf5Sahrens 
998fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
999fa9e4066Sahrens 	used = dd->dd_used_bytes;
1000fa9e4066Sahrens 	delta = MAX(used, new_reservation) -
1001fa9e4066Sahrens 	    MAX(used, dd->dd_phys->dd_reserved);
1002fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
1003fa9e4066Sahrens 
1004fa9e4066Sahrens 	if (dd->dd_parent) {
1005fa9e4066Sahrens 		avail = dsl_dir_space_available(dd->dd_parent,
1006fa9e4066Sahrens 		    NULL, 0, FALSE);
1007fa9e4066Sahrens 	} else {
1008fa9e4066Sahrens 		avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
1009fa9e4066Sahrens 	}
1010fa9e4066Sahrens 
1011fa9e4066Sahrens 	if (delta > 0 && delta > avail)
1012fa9e4066Sahrens 		return (ENOSPC);
1013fa9e4066Sahrens 	if (delta > 0 && dd->dd_phys->dd_quota > 0 &&
1014fa9e4066Sahrens 	    new_reservation > dd->dd_phys->dd_quota)
1015fa9e4066Sahrens 		return (ENOSPC);
10161d452cf5Sahrens 	return (0);
10171d452cf5Sahrens }
10181d452cf5Sahrens 
1019ecd6cf80Smarks /* ARGSUSED */
10201d452cf5Sahrens static void
1021ecd6cf80Smarks dsl_dir_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
10221d452cf5Sahrens {
10231d452cf5Sahrens 	dsl_dir_t *dd = arg1;
10241d452cf5Sahrens 	uint64_t *reservationp = arg2;
10251d452cf5Sahrens 	uint64_t new_reservation = *reservationp;
10261d452cf5Sahrens 	uint64_t used;
10271d452cf5Sahrens 	int64_t delta;
10281d452cf5Sahrens 
10291d452cf5Sahrens 	mutex_enter(&dd->dd_lock);
10301d452cf5Sahrens 	used = dd->dd_used_bytes;
10311d452cf5Sahrens 	delta = MAX(used, new_reservation) -
10321d452cf5Sahrens 	    MAX(used, dd->dd_phys->dd_reserved);
10331d452cf5Sahrens 	mutex_exit(&dd->dd_lock);
1034fa9e4066Sahrens 
1035fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
1036fa9e4066Sahrens 	dd->dd_phys->dd_reserved = new_reservation;
1037fa9e4066Sahrens 
1038fa9e4066Sahrens 	if (dd->dd_parent != NULL) {
1039fa9e4066Sahrens 		/* Roll up this additional usage into our ancestors */
1040fa9e4066Sahrens 		dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx);
1041fa9e4066Sahrens 	}
1042ecd6cf80Smarks 
1043ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_RESERVATION, dd->dd_pool->dp_spa,
1044ecd6cf80Smarks 	    tx, cr, "%lld dataset = %llu",
1045ecd6cf80Smarks 	    (longlong_t)new_reservation, dd->dd_phys->dd_head_dataset_obj);
1046fa9e4066Sahrens }
1047fa9e4066Sahrens 
1048fa9e4066Sahrens int
1049fa9e4066Sahrens dsl_dir_set_reservation(const char *ddname, uint64_t reservation)
1050fa9e4066Sahrens {
1051fa9e4066Sahrens 	dsl_dir_t *dd;
1052fa9e4066Sahrens 	int err;
1053fa9e4066Sahrens 
1054ea8dc4b6Seschrock 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
1055ea8dc4b6Seschrock 	if (err)
1056ea8dc4b6Seschrock 		return (err);
10571d452cf5Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
10581d452cf5Sahrens 	    dsl_dir_set_reservation_sync, dd, &reservation, 0);
1059fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
1060fa9e4066Sahrens 	return (err);
1061fa9e4066Sahrens }
1062fa9e4066Sahrens 
1063fa9e4066Sahrens static dsl_dir_t *
1064fa9e4066Sahrens closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
1065fa9e4066Sahrens {
1066fa9e4066Sahrens 	for (; ds1; ds1 = ds1->dd_parent) {
1067fa9e4066Sahrens 		dsl_dir_t *dd;
1068fa9e4066Sahrens 		for (dd = ds2; dd; dd = dd->dd_parent) {
1069fa9e4066Sahrens 			if (ds1 == dd)
1070fa9e4066Sahrens 				return (dd);
1071fa9e4066Sahrens 		}
1072fa9e4066Sahrens 	}
1073fa9e4066Sahrens 	return (NULL);
1074fa9e4066Sahrens }
1075fa9e4066Sahrens 
1076fa9e4066Sahrens /*
1077fa9e4066Sahrens  * If delta is applied to dd, how much of that delta would be applied to
1078fa9e4066Sahrens  * ancestor?  Syncing context only.
1079fa9e4066Sahrens  */
1080fa9e4066Sahrens static int64_t
1081fa9e4066Sahrens would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
1082fa9e4066Sahrens {
1083fa9e4066Sahrens 	if (dd == ancestor)
1084fa9e4066Sahrens 		return (delta);
1085fa9e4066Sahrens 
1086fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
1087fa9e4066Sahrens 	delta = parent_delta(dd, dd->dd_used_bytes, delta);
1088fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
1089fa9e4066Sahrens 	return (would_change(dd->dd_parent, delta, ancestor));
1090fa9e4066Sahrens }
1091fa9e4066Sahrens 
10921d452cf5Sahrens struct renamearg {
10931d452cf5Sahrens 	dsl_dir_t *newparent;
10941d452cf5Sahrens 	const char *mynewname;
10951d452cf5Sahrens };
10961d452cf5Sahrens 
1097ecd6cf80Smarks /*ARGSUSED*/
10981d452cf5Sahrens static int
10991d452cf5Sahrens dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1100fa9e4066Sahrens {
11011d452cf5Sahrens 	dsl_dir_t *dd = arg1;
11021d452cf5Sahrens 	struct renamearg *ra = arg2;
1103fa9e4066Sahrens 	dsl_pool_t *dp = dd->dd_pool;
1104fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
11051d452cf5Sahrens 	int err;
11061d452cf5Sahrens 	uint64_t val;
1107fa9e4066Sahrens 
1108fa9e4066Sahrens 	/* There should be 2 references: the open and the dirty */
11091d452cf5Sahrens 	if (dmu_buf_refcount(dd->dd_dbuf) > 2)
1110fa9e4066Sahrens 		return (EBUSY);
1111fa9e4066Sahrens 
11121d452cf5Sahrens 	/* check for existing name */
11131d452cf5Sahrens 	err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
11141d452cf5Sahrens 	    ra->mynewname, 8, 1, &val);
11151d452cf5Sahrens 	if (err == 0)
11161d452cf5Sahrens 		return (EEXIST);
11171d452cf5Sahrens 	if (err != ENOENT)
11181d452cf5Sahrens 		return (err);
11191d452cf5Sahrens 
11201d452cf5Sahrens 	if (ra->newparent != dd->dd_parent) {
112199653d4eSeschrock 		/* is there enough space? */
112299653d4eSeschrock 		uint64_t myspace =
112399653d4eSeschrock 		    MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved);
1124fa9e4066Sahrens 
11251d452cf5Sahrens 		/* no rename into our descendant */
11261d452cf5Sahrens 		if (closest_common_ancestor(dd, ra->newparent) == dd)
1127fa9e4066Sahrens 			return (EINVAL);
1128fa9e4066Sahrens 
11291d452cf5Sahrens 		if (err = dsl_dir_transfer_possible(dd->dd_parent,
11301d452cf5Sahrens 		    ra->newparent, myspace))
113199653d4eSeschrock 			return (err);
11321d452cf5Sahrens 	}
1133fa9e4066Sahrens 
11341d452cf5Sahrens 	return (0);
11351d452cf5Sahrens }
11361d452cf5Sahrens 
11371d452cf5Sahrens static void
1138ecd6cf80Smarks dsl_dir_rename_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
11391d452cf5Sahrens {
11401d452cf5Sahrens 	dsl_dir_t *dd = arg1;
11411d452cf5Sahrens 	struct renamearg *ra = arg2;
11421d452cf5Sahrens 	dsl_pool_t *dp = dd->dd_pool;
11431d452cf5Sahrens 	objset_t *mos = dp->dp_meta_objset;
11441d452cf5Sahrens 	int err;
11451d452cf5Sahrens 
11461d452cf5Sahrens 	ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
11471d452cf5Sahrens 
11481d452cf5Sahrens 	if (ra->newparent != dd->dd_parent) {
11491d452cf5Sahrens 		uint64_t myspace =
11501d452cf5Sahrens 		    MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved);
1151fa9e4066Sahrens 
1152fa9e4066Sahrens 		dsl_dir_diduse_space(dd->dd_parent, -myspace,
1153fa9e4066Sahrens 		    -dd->dd_phys->dd_compressed_bytes,
1154fa9e4066Sahrens 		    -dd->dd_phys->dd_uncompressed_bytes, tx);
11551d452cf5Sahrens 		dsl_dir_diduse_space(ra->newparent, myspace,
1156fa9e4066Sahrens 		    dd->dd_phys->dd_compressed_bytes,
1157fa9e4066Sahrens 		    dd->dd_phys->dd_uncompressed_bytes, tx);
1158fa9e4066Sahrens 	}
1159fa9e4066Sahrens 
1160fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
1161fa9e4066Sahrens 
1162fa9e4066Sahrens 	/* remove from old parent zapobj */
1163fa9e4066Sahrens 	err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
1164fa9e4066Sahrens 	    dd->dd_myname, tx);
1165fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1166fa9e4066Sahrens 
11671d452cf5Sahrens 	(void) strcpy(dd->dd_myname, ra->mynewname);
1168fa9e4066Sahrens 	dsl_dir_close(dd->dd_parent, dd);
11691d452cf5Sahrens 	dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
1170ea8dc4b6Seschrock 	VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
11711d452cf5Sahrens 	    ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
1172fa9e4066Sahrens 
1173fa9e4066Sahrens 	/* add to new parent zapobj */
11741d452cf5Sahrens 	err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
1175fa9e4066Sahrens 	    dd->dd_myname, 8, 1, &dd->dd_object, tx);
1176fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1177ecd6cf80Smarks 
1178ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa,
1179ecd6cf80Smarks 	    tx, cr, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
11801d452cf5Sahrens }
1181fa9e4066Sahrens 
11821d452cf5Sahrens int
11831d452cf5Sahrens dsl_dir_rename(dsl_dir_t *dd, const char *newname)
11841d452cf5Sahrens {
11851d452cf5Sahrens 	struct renamearg ra;
11861d452cf5Sahrens 	int err;
11871d452cf5Sahrens 
11881d452cf5Sahrens 	/* new parent should exist */
11891d452cf5Sahrens 	err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
11901d452cf5Sahrens 	if (err)
11911d452cf5Sahrens 		return (err);
11921d452cf5Sahrens 
11931d452cf5Sahrens 	/* can't rename to different pool */
11941d452cf5Sahrens 	if (dd->dd_pool != ra.newparent->dd_pool) {
11951d452cf5Sahrens 		err = ENXIO;
11961d452cf5Sahrens 		goto out;
11971d452cf5Sahrens 	}
11981d452cf5Sahrens 
11991d452cf5Sahrens 	/* new name should not already exist */
12001d452cf5Sahrens 	if (ra.mynewname == NULL) {
12011d452cf5Sahrens 		err = EEXIST;
12021d452cf5Sahrens 		goto out;
12031d452cf5Sahrens 	}
12041d452cf5Sahrens 
12051d452cf5Sahrens 	err = dsl_sync_task_do(dd->dd_pool,
12061d452cf5Sahrens 	    dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
12071d452cf5Sahrens 
12081d452cf5Sahrens out:
12091d452cf5Sahrens 	dsl_dir_close(ra.newparent, FTAG);
12101d452cf5Sahrens 	return (err);
1211fa9e4066Sahrens }
121299653d4eSeschrock 
121399653d4eSeschrock int
121499653d4eSeschrock dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
121599653d4eSeschrock {
121699653d4eSeschrock 	dsl_dir_t *ancestor;
121799653d4eSeschrock 	int64_t adelta;
121899653d4eSeschrock 	uint64_t avail;
121999653d4eSeschrock 
122099653d4eSeschrock 	ancestor = closest_common_ancestor(sdd, tdd);
122199653d4eSeschrock 	adelta = would_change(sdd, -space, ancestor);
122299653d4eSeschrock 	avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
122399653d4eSeschrock 	if (avail < space)
122499653d4eSeschrock 		return (ENOSPC);
122599653d4eSeschrock 
122699653d4eSeschrock 	return (0);
122799653d4eSeschrock }
1228