xref: /illumos-gate/usr/src/uts/common/fs/zfs/dsl_dir.c (revision 74e7dc986c89efca1f2e4451c7a572e05e4a6e4f)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
229082849eSck  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #include <sys/dmu.h>
27a9799022Sck #include <sys/dmu_objset.h>
28fa9e4066Sahrens #include <sys/dmu_tx.h>
29fa9e4066Sahrens #include <sys/dsl_dataset.h>
30fa9e4066Sahrens #include <sys/dsl_dir.h>
31fa9e4066Sahrens #include <sys/dsl_prop.h>
321d452cf5Sahrens #include <sys/dsl_synctask.h>
33ecd6cf80Smarks #include <sys/dsl_deleg.h>
34fa9e4066Sahrens #include <sys/spa.h>
35fa9e4066Sahrens #include <sys/zap.h>
36fa9e4066Sahrens #include <sys/zio.h>
37fa9e4066Sahrens #include <sys/arc.h>
38ecd6cf80Smarks #include <sys/sunddi.h>
39fa9e4066Sahrens #include "zfs_namecheck.h"
40fa9e4066Sahrens 
41a9799022Sck static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
42ecd6cf80Smarks static void dsl_dir_set_reservation_sync(void *arg1, void *arg2,
43ecd6cf80Smarks     cred_t *cr, dmu_tx_t *tx);
44fa9e4066Sahrens 
45fa9e4066Sahrens 
46fa9e4066Sahrens /* ARGSUSED */
47fa9e4066Sahrens static void
48fa9e4066Sahrens dsl_dir_evict(dmu_buf_t *db, void *arg)
49fa9e4066Sahrens {
50fa9e4066Sahrens 	dsl_dir_t *dd = arg;
51fa9e4066Sahrens 	dsl_pool_t *dp = dd->dd_pool;
52fa9e4066Sahrens 	int t;
53fa9e4066Sahrens 
54fa9e4066Sahrens 	for (t = 0; t < TXG_SIZE; t++) {
55fa9e4066Sahrens 		ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
56fa9e4066Sahrens 		ASSERT(dd->dd_tempreserved[t] == 0);
57fa9e4066Sahrens 		ASSERT(dd->dd_space_towrite[t] == 0);
58fa9e4066Sahrens 	}
59fa9e4066Sahrens 
60fa9e4066Sahrens 	if (dd->dd_parent)
61fa9e4066Sahrens 		dsl_dir_close(dd->dd_parent, dd);
62fa9e4066Sahrens 
63fa9e4066Sahrens 	spa_close(dd->dd_pool->dp_spa, dd);
64fa9e4066Sahrens 
65fa9e4066Sahrens 	/*
66fa9e4066Sahrens 	 * The props callback list should be empty since they hold the
67fa9e4066Sahrens 	 * dir open.
68fa9e4066Sahrens 	 */
69fa9e4066Sahrens 	list_destroy(&dd->dd_prop_cbs);
705ad82045Snd 	mutex_destroy(&dd->dd_lock);
71fa9e4066Sahrens 	kmem_free(dd, sizeof (dsl_dir_t));
72fa9e4066Sahrens }
73fa9e4066Sahrens 
74ea8dc4b6Seschrock int
75fa9e4066Sahrens dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
76ea8dc4b6Seschrock     const char *tail, void *tag, dsl_dir_t **ddp)
77fa9e4066Sahrens {
78fa9e4066Sahrens 	dmu_buf_t *dbuf;
79fa9e4066Sahrens 	dsl_dir_t *dd;
80ea8dc4b6Seschrock 	int err;
81fa9e4066Sahrens 
82fa9e4066Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
83fa9e4066Sahrens 	    dsl_pool_sync_context(dp));
84fa9e4066Sahrens 
85ea8dc4b6Seschrock 	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
86ea8dc4b6Seschrock 	if (err)
87ea8dc4b6Seschrock 		return (err);
88fa9e4066Sahrens 	dd = dmu_buf_get_user(dbuf);
89fa9e4066Sahrens #ifdef ZFS_DEBUG
90fa9e4066Sahrens 	{
91fa9e4066Sahrens 		dmu_object_info_t doi;
92fa9e4066Sahrens 		dmu_object_info_from_db(dbuf, &doi);
931649cd4bStabriz 		ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
94*74e7dc98SMatthew Ahrens 		ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
95fa9e4066Sahrens 	}
96fa9e4066Sahrens #endif
97fa9e4066Sahrens 	if (dd == NULL) {
98fa9e4066Sahrens 		dsl_dir_t *winner;
99fa9e4066Sahrens 		int err;
100fa9e4066Sahrens 
101fa9e4066Sahrens 		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
102fa9e4066Sahrens 		dd->dd_object = ddobj;
103fa9e4066Sahrens 		dd->dd_dbuf = dbuf;
104fa9e4066Sahrens 		dd->dd_pool = dp;
105fa9e4066Sahrens 		dd->dd_phys = dbuf->db_data;
1065ad82045Snd 		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
107fa9e4066Sahrens 
108fa9e4066Sahrens 		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
109fa9e4066Sahrens 		    offsetof(dsl_prop_cb_record_t, cbr_node));
110fa9e4066Sahrens 
111fa9e4066Sahrens 		if (dd->dd_phys->dd_parent_obj) {
112ea8dc4b6Seschrock 			err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
113ea8dc4b6Seschrock 			    NULL, dd, &dd->dd_parent);
114*74e7dc98SMatthew Ahrens 			if (err)
115*74e7dc98SMatthew Ahrens 				goto errout;
116fa9e4066Sahrens 			if (tail) {
117fa9e4066Sahrens #ifdef ZFS_DEBUG
118fa9e4066Sahrens 				uint64_t foundobj;
119fa9e4066Sahrens 
120fa9e4066Sahrens 				err = zap_lookup(dp->dp_meta_objset,
121e7437265Sahrens 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
122fa9e4066Sahrens 				    tail, sizeof (foundobj), 1, &foundobj);
123ea8dc4b6Seschrock 				ASSERT(err || foundobj == ddobj);
124fa9e4066Sahrens #endif
125fa9e4066Sahrens 				(void) strcpy(dd->dd_myname, tail);
126fa9e4066Sahrens 			} else {
127fa9e4066Sahrens 				err = zap_value_search(dp->dp_meta_objset,
128e7437265Sahrens 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
129e7437265Sahrens 				    ddobj, 0, dd->dd_myname);
130ea8dc4b6Seschrock 			}
131*74e7dc98SMatthew Ahrens 			if (err)
132*74e7dc98SMatthew Ahrens 				goto errout;
133fa9e4066Sahrens 		} else {
134fa9e4066Sahrens 			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
135fa9e4066Sahrens 		}
136fa9e4066Sahrens 
137fa9e4066Sahrens 		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
138fa9e4066Sahrens 		    dsl_dir_evict);
139fa9e4066Sahrens 		if (winner) {
140fa9e4066Sahrens 			if (dd->dd_parent)
141fa9e4066Sahrens 				dsl_dir_close(dd->dd_parent, dd);
1425ad82045Snd 			mutex_destroy(&dd->dd_lock);
143fa9e4066Sahrens 			kmem_free(dd, sizeof (dsl_dir_t));
144fa9e4066Sahrens 			dd = winner;
145fa9e4066Sahrens 		} else {
146fa9e4066Sahrens 			spa_open_ref(dp->dp_spa, dd);
147fa9e4066Sahrens 		}
148fa9e4066Sahrens 	}
149fa9e4066Sahrens 
150fa9e4066Sahrens 	/*
151fa9e4066Sahrens 	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
152fa9e4066Sahrens 	 * holds on the spa.  We need the open-to-close holds because
153fa9e4066Sahrens 	 * otherwise the spa_refcnt wouldn't change when we open a
154fa9e4066Sahrens 	 * dir which the spa also has open, so we could incorrectly
155fa9e4066Sahrens 	 * think it was OK to unload/export/destroy the pool.  We need
156fa9e4066Sahrens 	 * the instantiate-to-evict hold because the dsl_dir_t has a
157fa9e4066Sahrens 	 * pointer to the dd_pool, which has a pointer to the spa_t.
158fa9e4066Sahrens 	 */
159fa9e4066Sahrens 	spa_open_ref(dp->dp_spa, tag);
160fa9e4066Sahrens 	ASSERT3P(dd->dd_pool, ==, dp);
161fa9e4066Sahrens 	ASSERT3U(dd->dd_object, ==, ddobj);
162fa9e4066Sahrens 	ASSERT3P(dd->dd_dbuf, ==, dbuf);
163ea8dc4b6Seschrock 	*ddp = dd;
164ea8dc4b6Seschrock 	return (0);
165*74e7dc98SMatthew Ahrens 
166*74e7dc98SMatthew Ahrens errout:
167*74e7dc98SMatthew Ahrens 	if (dd->dd_parent)
168*74e7dc98SMatthew Ahrens 		dsl_dir_close(dd->dd_parent, dd);
169*74e7dc98SMatthew Ahrens 	mutex_destroy(&dd->dd_lock);
170*74e7dc98SMatthew Ahrens 	kmem_free(dd, sizeof (dsl_dir_t));
171*74e7dc98SMatthew Ahrens 	dmu_buf_rele(dbuf, tag);
172*74e7dc98SMatthew Ahrens 	return (err);
173*74e7dc98SMatthew Ahrens 
174fa9e4066Sahrens }
175fa9e4066Sahrens 
176fa9e4066Sahrens void
177fa9e4066Sahrens dsl_dir_close(dsl_dir_t *dd, void *tag)
178fa9e4066Sahrens {
179fa9e4066Sahrens 	dprintf_dd(dd, "%s\n", "");
180fa9e4066Sahrens 	spa_close(dd->dd_pool->dp_spa, tag);
181ea8dc4b6Seschrock 	dmu_buf_rele(dd->dd_dbuf, tag);
182fa9e4066Sahrens }
183fa9e4066Sahrens 
18415f66a7fSek /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
185fa9e4066Sahrens void
186fa9e4066Sahrens dsl_dir_name(dsl_dir_t *dd, char *buf)
187fa9e4066Sahrens {
188fa9e4066Sahrens 	if (dd->dd_parent) {
189fa9e4066Sahrens 		dsl_dir_name(dd->dd_parent, buf);
190fa9e4066Sahrens 		(void) strcat(buf, "/");
191fa9e4066Sahrens 	} else {
192fa9e4066Sahrens 		buf[0] = '\0';
193fa9e4066Sahrens 	}
194fa9e4066Sahrens 	if (!MUTEX_HELD(&dd->dd_lock)) {
195fa9e4066Sahrens 		/*
196fa9e4066Sahrens 		 * recursive mutex so that we can use
197fa9e4066Sahrens 		 * dprintf_dd() with dd_lock held
198fa9e4066Sahrens 		 */
199fa9e4066Sahrens 		mutex_enter(&dd->dd_lock);
200fa9e4066Sahrens 		(void) strcat(buf, dd->dd_myname);
201fa9e4066Sahrens 		mutex_exit(&dd->dd_lock);
202fa9e4066Sahrens 	} else {
203fa9e4066Sahrens 		(void) strcat(buf, dd->dd_myname);
204fa9e4066Sahrens 	}
205fa9e4066Sahrens }
206fa9e4066Sahrens 
207b7661cccSmmusante /* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */
208b7661cccSmmusante int
209b7661cccSmmusante dsl_dir_namelen(dsl_dir_t *dd)
210b7661cccSmmusante {
211b7661cccSmmusante 	int result = 0;
212b7661cccSmmusante 
213b7661cccSmmusante 	if (dd->dd_parent) {
214b7661cccSmmusante 		/* parent's name + 1 for the "/" */
215b7661cccSmmusante 		result = dsl_dir_namelen(dd->dd_parent) + 1;
216b7661cccSmmusante 	}
217b7661cccSmmusante 
218b7661cccSmmusante 	if (!MUTEX_HELD(&dd->dd_lock)) {
219b7661cccSmmusante 		/* see dsl_dir_name */
220b7661cccSmmusante 		mutex_enter(&dd->dd_lock);
221b7661cccSmmusante 		result += strlen(dd->dd_myname);
222b7661cccSmmusante 		mutex_exit(&dd->dd_lock);
223b7661cccSmmusante 	} else {
224b7661cccSmmusante 		result += strlen(dd->dd_myname);
225b7661cccSmmusante 	}
226b7661cccSmmusante 
227b7661cccSmmusante 	return (result);
228b7661cccSmmusante }
229b7661cccSmmusante 
230fa9e4066Sahrens int
231fa9e4066Sahrens dsl_dir_is_private(dsl_dir_t *dd)
232fa9e4066Sahrens {
233fa9e4066Sahrens 	int rv = FALSE;
234fa9e4066Sahrens 
235fa9e4066Sahrens 	if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent))
236fa9e4066Sahrens 		rv = TRUE;
237fa9e4066Sahrens 	if (dataset_name_hidden(dd->dd_myname))
238fa9e4066Sahrens 		rv = TRUE;
239fa9e4066Sahrens 	return (rv);
240fa9e4066Sahrens }
241fa9e4066Sahrens 
242fa9e4066Sahrens 
243fa9e4066Sahrens static int
244fa9e4066Sahrens getcomponent(const char *path, char *component, const char **nextp)
245fa9e4066Sahrens {
246fa9e4066Sahrens 	char *p;
247fa9e4066Sahrens 	if (path == NULL)
248203a47d8Snd 		return (ENOENT);
249fa9e4066Sahrens 	/* This would be a good place to reserve some namespace... */
250fa9e4066Sahrens 	p = strpbrk(path, "/@");
251fa9e4066Sahrens 	if (p && (p[1] == '/' || p[1] == '@')) {
252fa9e4066Sahrens 		/* two separators in a row */
253fa9e4066Sahrens 		return (EINVAL);
254fa9e4066Sahrens 	}
255fa9e4066Sahrens 	if (p == NULL || p == path) {
256fa9e4066Sahrens 		/*
257fa9e4066Sahrens 		 * if the first thing is an @ or /, it had better be an
258fa9e4066Sahrens 		 * @ and it had better not have any more ats or slashes,
259fa9e4066Sahrens 		 * and it had better have something after the @.
260fa9e4066Sahrens 		 */
261fa9e4066Sahrens 		if (p != NULL &&
262fa9e4066Sahrens 		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
263fa9e4066Sahrens 			return (EINVAL);
264fa9e4066Sahrens 		if (strlen(path) >= MAXNAMELEN)
265fa9e4066Sahrens 			return (ENAMETOOLONG);
266fa9e4066Sahrens 		(void) strcpy(component, path);
267fa9e4066Sahrens 		p = NULL;
268fa9e4066Sahrens 	} else if (p[0] == '/') {
269fa9e4066Sahrens 		if (p-path >= MAXNAMELEN)
270fa9e4066Sahrens 			return (ENAMETOOLONG);
271fa9e4066Sahrens 		(void) strncpy(component, path, p - path);
272fa9e4066Sahrens 		component[p-path] = '\0';
273fa9e4066Sahrens 		p++;
274fa9e4066Sahrens 	} else if (p[0] == '@') {
275fa9e4066Sahrens 		/*
276fa9e4066Sahrens 		 * if the next separator is an @, there better not be
277fa9e4066Sahrens 		 * any more slashes.
278fa9e4066Sahrens 		 */
279fa9e4066Sahrens 		if (strchr(path, '/'))
280fa9e4066Sahrens 			return (EINVAL);
281fa9e4066Sahrens 		if (p-path >= MAXNAMELEN)
282fa9e4066Sahrens 			return (ENAMETOOLONG);
283fa9e4066Sahrens 		(void) strncpy(component, path, p - path);
284fa9e4066Sahrens 		component[p-path] = '\0';
285fa9e4066Sahrens 	} else {
286fa9e4066Sahrens 		ASSERT(!"invalid p");
287fa9e4066Sahrens 	}
288fa9e4066Sahrens 	*nextp = p;
289fa9e4066Sahrens 	return (0);
290fa9e4066Sahrens }
291fa9e4066Sahrens 
292fa9e4066Sahrens /*
293fa9e4066Sahrens  * same as dsl_open_dir, ignore the first component of name and use the
294fa9e4066Sahrens  * spa instead
295fa9e4066Sahrens  */
296ea8dc4b6Seschrock int
297ea8dc4b6Seschrock dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
298ea8dc4b6Seschrock     dsl_dir_t **ddp, const char **tailp)
299fa9e4066Sahrens {
300fa9e4066Sahrens 	char buf[MAXNAMELEN];
301fa9e4066Sahrens 	const char *next, *nextnext = NULL;
302fa9e4066Sahrens 	int err;
303fa9e4066Sahrens 	dsl_dir_t *dd;
304fa9e4066Sahrens 	dsl_pool_t *dp;
305fa9e4066Sahrens 	uint64_t ddobj;
306fa9e4066Sahrens 	int openedspa = FALSE;
307fa9e4066Sahrens 
308fa9e4066Sahrens 	dprintf("%s\n", name);
309fa9e4066Sahrens 
310fa9e4066Sahrens 	err = getcomponent(name, buf, &next);
311fa9e4066Sahrens 	if (err)
312ea8dc4b6Seschrock 		return (err);
313fa9e4066Sahrens 	if (spa == NULL) {
314fa9e4066Sahrens 		err = spa_open(buf, &spa, FTAG);
315fa9e4066Sahrens 		if (err) {
316fa9e4066Sahrens 			dprintf("spa_open(%s) failed\n", buf);
317ea8dc4b6Seschrock 			return (err);
318fa9e4066Sahrens 		}
319fa9e4066Sahrens 		openedspa = TRUE;
320fa9e4066Sahrens 
321fa9e4066Sahrens 		/* XXX this assertion belongs in spa_open */
322fa9e4066Sahrens 		ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
323fa9e4066Sahrens 	}
324fa9e4066Sahrens 
325fa9e4066Sahrens 	dp = spa_get_dsl(spa);
326fa9e4066Sahrens 
327fa9e4066Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
328ea8dc4b6Seschrock 	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
329ea8dc4b6Seschrock 	if (err) {
330ea8dc4b6Seschrock 		rw_exit(&dp->dp_config_rwlock);
331ea8dc4b6Seschrock 		if (openedspa)
332ea8dc4b6Seschrock 			spa_close(spa, FTAG);
333ea8dc4b6Seschrock 		return (err);
334ea8dc4b6Seschrock 	}
335ea8dc4b6Seschrock 
336fa9e4066Sahrens 	while (next != NULL) {
337fa9e4066Sahrens 		dsl_dir_t *child_ds;
338fa9e4066Sahrens 		err = getcomponent(next, buf, &nextnext);
339ea8dc4b6Seschrock 		if (err)
340ea8dc4b6Seschrock 			break;
341fa9e4066Sahrens 		ASSERT(next[0] != '\0');
342fa9e4066Sahrens 		if (next[0] == '@')
343fa9e4066Sahrens 			break;
344fa9e4066Sahrens 		dprintf("looking up %s in obj%lld\n",
345fa9e4066Sahrens 		    buf, dd->dd_phys->dd_child_dir_zapobj);
346fa9e4066Sahrens 
347fa9e4066Sahrens 		err = zap_lookup(dp->dp_meta_objset,
348fa9e4066Sahrens 		    dd->dd_phys->dd_child_dir_zapobj,
349fa9e4066Sahrens 		    buf, sizeof (ddobj), 1, &ddobj);
350ea8dc4b6Seschrock 		if (err) {
351ea8dc4b6Seschrock 			if (err == ENOENT)
352ea8dc4b6Seschrock 				err = 0;
353fa9e4066Sahrens 			break;
354fa9e4066Sahrens 		}
355fa9e4066Sahrens 
356ea8dc4b6Seschrock 		err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
357ea8dc4b6Seschrock 		if (err)
358ea8dc4b6Seschrock 			break;
359fa9e4066Sahrens 		dsl_dir_close(dd, tag);
360fa9e4066Sahrens 		dd = child_ds;
361fa9e4066Sahrens 		next = nextnext;
362fa9e4066Sahrens 	}
363fa9e4066Sahrens 	rw_exit(&dp->dp_config_rwlock);
364fa9e4066Sahrens 
365ea8dc4b6Seschrock 	if (err) {
366ea8dc4b6Seschrock 		dsl_dir_close(dd, tag);
367ea8dc4b6Seschrock 		if (openedspa)
368ea8dc4b6Seschrock 			spa_close(spa, FTAG);
369ea8dc4b6Seschrock 		return (err);
370ea8dc4b6Seschrock 	}
371ea8dc4b6Seschrock 
372fa9e4066Sahrens 	/*
373fa9e4066Sahrens 	 * It's an error if there's more than one component left, or
374fa9e4066Sahrens 	 * tailp==NULL and there's any component left.
375fa9e4066Sahrens 	 */
376fa9e4066Sahrens 	if (next != NULL &&
377fa9e4066Sahrens 	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
378fa9e4066Sahrens 		/* bad path name */
379fa9e4066Sahrens 		dsl_dir_close(dd, tag);
380fa9e4066Sahrens 		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
381ea8dc4b6Seschrock 		err = ENOENT;
382fa9e4066Sahrens 	}
383fa9e4066Sahrens 	if (tailp)
384fa9e4066Sahrens 		*tailp = next;
385fa9e4066Sahrens 	if (openedspa)
386fa9e4066Sahrens 		spa_close(spa, FTAG);
387ea8dc4b6Seschrock 	*ddp = dd;
388ea8dc4b6Seschrock 	return (err);
389fa9e4066Sahrens }
390fa9e4066Sahrens 
391fa9e4066Sahrens /*
392fa9e4066Sahrens  * Return the dsl_dir_t, and possibly the last component which couldn't
393fa9e4066Sahrens  * be found in *tail.  Return NULL if the path is bogus, or if
394fa9e4066Sahrens  * tail==NULL and we couldn't parse the whole name.  (*tail)[0] == '@'
395fa9e4066Sahrens  * means that the last component is a snapshot.
396fa9e4066Sahrens  */
397ea8dc4b6Seschrock int
398ea8dc4b6Seschrock dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
399fa9e4066Sahrens {
400ea8dc4b6Seschrock 	return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
401fa9e4066Sahrens }
402fa9e4066Sahrens 
4031d452cf5Sahrens uint64_t
404088f3894Sahrens dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
405088f3894Sahrens     dmu_tx_t *tx)
406fa9e4066Sahrens {
407088f3894Sahrens 	objset_t *mos = dp->dp_meta_objset;
408fa9e4066Sahrens 	uint64_t ddobj;
409fa9e4066Sahrens 	dsl_dir_phys_t *dsphys;
410fa9e4066Sahrens 	dmu_buf_t *dbuf;
411fa9e4066Sahrens 
4121649cd4bStabriz 	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
4131649cd4bStabriz 	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
414088f3894Sahrens 	if (pds) {
415088f3894Sahrens 		VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
416088f3894Sahrens 		    name, sizeof (uint64_t), 1, &ddobj, tx));
417088f3894Sahrens 	} else {
418088f3894Sahrens 		/* it's the root dir */
419088f3894Sahrens 		VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
420088f3894Sahrens 		    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
421088f3894Sahrens 	}
422ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
423fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
424fa9e4066Sahrens 	dsphys = dbuf->db_data;
425fa9e4066Sahrens 
426fa9e4066Sahrens 	dsphys->dd_creation_time = gethrestime_sec();
427088f3894Sahrens 	if (pds)
428088f3894Sahrens 		dsphys->dd_parent_obj = pds->dd_object;
429fa9e4066Sahrens 	dsphys->dd_props_zapobj = zap_create(mos,
430fa9e4066Sahrens 	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
431fa9e4066Sahrens 	dsphys->dd_child_dir_zapobj = zap_create(mos,
43287e5029aSahrens 	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
433*74e7dc98SMatthew Ahrens 	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
434*74e7dc98SMatthew Ahrens 		dsphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
435ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
436fa9e4066Sahrens 
4371d452cf5Sahrens 	return (ddobj);
438fa9e4066Sahrens }
439fa9e4066Sahrens 
4401d452cf5Sahrens /* ARGSUSED */
441fa9e4066Sahrens int
4421d452cf5Sahrens dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
443fa9e4066Sahrens {
4441d452cf5Sahrens 	dsl_dir_t *dd = arg1;
4451d452cf5Sahrens 	dsl_pool_t *dp = dd->dd_pool;
446fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
4471d452cf5Sahrens 	int err;
4481d452cf5Sahrens 	uint64_t count;
449fa9e4066Sahrens 
4501d452cf5Sahrens 	/*
4511d452cf5Sahrens 	 * There should be exactly two holds, both from
4521d452cf5Sahrens 	 * dsl_dataset_destroy: one on the dd directory, and one on its
4531d452cf5Sahrens 	 * head ds.  Otherwise, someone is trying to lookup something
4541d452cf5Sahrens 	 * inside this dir while we want to destroy it.  The
4551d452cf5Sahrens 	 * config_rwlock ensures that nobody else opens it after we
4561d452cf5Sahrens 	 * check.
4571d452cf5Sahrens 	 */
4581d452cf5Sahrens 	if (dmu_buf_refcount(dd->dd_dbuf) > 2)
4591d452cf5Sahrens 		return (EBUSY);
460fa9e4066Sahrens 
4611d452cf5Sahrens 	err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
462ea8dc4b6Seschrock 	if (err)
4631d452cf5Sahrens 		return (err);
4641d452cf5Sahrens 	if (count != 0)
4651d452cf5Sahrens 		return (EEXIST);
466fa9e4066Sahrens 
4671d452cf5Sahrens 	return (0);
4681d452cf5Sahrens }
469fa9e4066Sahrens 
4701d452cf5Sahrens void
471ecd6cf80Smarks dsl_dir_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
4721d452cf5Sahrens {
4731d452cf5Sahrens 	dsl_dir_t *dd = arg1;
4741d452cf5Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
4751d452cf5Sahrens 	uint64_t val, obj;
476*74e7dc98SMatthew Ahrens 	dd_used_t t;
477fa9e4066Sahrens 
4781d452cf5Sahrens 	ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
479fa9e4066Sahrens 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
480fa9e4066Sahrens 
4811d452cf5Sahrens 	/* Remove our reservation. */
482fa9e4066Sahrens 	val = 0;
483ecd6cf80Smarks 	dsl_dir_set_reservation_sync(dd, &val, cr, tx);
484*74e7dc98SMatthew Ahrens 	ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);
485fa9e4066Sahrens 	ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
486*74e7dc98SMatthew Ahrens 	for (t = 0; t < DD_USED_NUM; t++)
487*74e7dc98SMatthew Ahrens 		ASSERT3U(dd->dd_phys->dd_used_breakdown[t], ==, 0);
488fa9e4066Sahrens 
4891d452cf5Sahrens 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
4901d452cf5Sahrens 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
491ecd6cf80Smarks 	VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
4921d452cf5Sahrens 	VERIFY(0 == zap_remove(mos,
4931d452cf5Sahrens 	    dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
494fa9e4066Sahrens 
4951d452cf5Sahrens 	obj = dd->dd_object;
4961d452cf5Sahrens 	dsl_dir_close(dd, tag);
4971d452cf5Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
498fa9e4066Sahrens }
499fa9e4066Sahrens 
500088f3894Sahrens boolean_t
501088f3894Sahrens dsl_dir_is_clone(dsl_dir_t *dd)
502fa9e4066Sahrens {
503088f3894Sahrens 	return (dd->dd_phys->dd_origin_obj &&
504088f3894Sahrens 	    (dd->dd_pool->dp_origin_snap == NULL ||
505088f3894Sahrens 	    dd->dd_phys->dd_origin_obj !=
506088f3894Sahrens 	    dd->dd_pool->dp_origin_snap->ds_object));
507fa9e4066Sahrens }
508fa9e4066Sahrens 
509fa9e4066Sahrens void
510a2eea2e1Sahrens dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
511fa9e4066Sahrens {
512fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
513*74e7dc98SMatthew Ahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
514*74e7dc98SMatthew Ahrens 	    dd->dd_phys->dd_used_bytes);
515a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
516a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
517a2eea2e1Sahrens 	    dd->dd_phys->dd_reserved);
518a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
519a2eea2e1Sahrens 	    dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
520a2eea2e1Sahrens 	    (dd->dd_phys->dd_uncompressed_bytes * 100 /
521a2eea2e1Sahrens 	    dd->dd_phys->dd_compressed_bytes));
522*74e7dc98SMatthew Ahrens 	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
523*74e7dc98SMatthew Ahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
524*74e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]);
525*74e7dc98SMatthew Ahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
526*74e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]);
527*74e7dc98SMatthew Ahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
528*74e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]);
529*74e7dc98SMatthew Ahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
530*74e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] +
531*74e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]);
532*74e7dc98SMatthew Ahrens 	}
533fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
534fa9e4066Sahrens 
5354ccbb6e7Sahrens 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
536088f3894Sahrens 	if (dsl_dir_is_clone(dd)) {
537fa9e4066Sahrens 		dsl_dataset_t *ds;
538a2eea2e1Sahrens 		char buf[MAXNAMELEN];
539fa9e4066Sahrens 
540745cd3c5Smaybee 		VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
541745cd3c5Smaybee 		    dd->dd_phys->dd_origin_obj, FTAG, &ds));
542a2eea2e1Sahrens 		dsl_dataset_name(ds, buf);
543745cd3c5Smaybee 		dsl_dataset_rele(ds, FTAG);
544a2eea2e1Sahrens 		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
545fa9e4066Sahrens 	}
5464ccbb6e7Sahrens 	rw_exit(&dd->dd_pool->dp_config_rwlock);
547fa9e4066Sahrens }
548fa9e4066Sahrens 
549fa9e4066Sahrens void
550fa9e4066Sahrens dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
551fa9e4066Sahrens {
552fa9e4066Sahrens 	dsl_pool_t *dp = dd->dd_pool;
553fa9e4066Sahrens 
554fa9e4066Sahrens 	ASSERT(dd->dd_phys);
555fa9e4066Sahrens 
556fa9e4066Sahrens 	if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
557fa9e4066Sahrens 		/* up the hold count until we can be written out */
558fa9e4066Sahrens 		dmu_buf_add_ref(dd->dd_dbuf, dd);
559fa9e4066Sahrens 	}
560fa9e4066Sahrens }
561fa9e4066Sahrens 
562fa9e4066Sahrens static int64_t
563fa9e4066Sahrens parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
564fa9e4066Sahrens {
565fa9e4066Sahrens 	uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
566fa9e4066Sahrens 	uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
567fa9e4066Sahrens 	return (new_accounted - old_accounted);
568fa9e4066Sahrens }
569fa9e4066Sahrens 
570fa9e4066Sahrens void
571fa9e4066Sahrens dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
572fa9e4066Sahrens {
573fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
574fa9e4066Sahrens 
575fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
576fa9e4066Sahrens 
577fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
578fa9e4066Sahrens 	ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
579fa9e4066Sahrens 	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
580fa9e4066Sahrens 	    dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
581fa9e4066Sahrens 	dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
582fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
583fa9e4066Sahrens 
584fa9e4066Sahrens 	/* release the hold from dsl_dir_dirty */
585ea8dc4b6Seschrock 	dmu_buf_rele(dd->dd_dbuf, dd);
586fa9e4066Sahrens }
587fa9e4066Sahrens 
588fa9e4066Sahrens static uint64_t
589a9799022Sck dsl_dir_space_towrite(dsl_dir_t *dd)
590fa9e4066Sahrens {
591a9799022Sck 	uint64_t space = 0;
592fa9e4066Sahrens 	int i;
593fa9e4066Sahrens 
594fa9e4066Sahrens 	ASSERT(MUTEX_HELD(&dd->dd_lock));
595fa9e4066Sahrens 
596fa9e4066Sahrens 	for (i = 0; i < TXG_SIZE; i++) {
597fa9e4066Sahrens 		space += dd->dd_space_towrite[i&TXG_MASK];
598fa9e4066Sahrens 		ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
599fa9e4066Sahrens 	}
600fa9e4066Sahrens 	return (space);
601fa9e4066Sahrens }
602fa9e4066Sahrens 
603fa9e4066Sahrens /*
604fa9e4066Sahrens  * How much space would dd have available if ancestor had delta applied
605fa9e4066Sahrens  * to it?  If ondiskonly is set, we're only interested in what's
606fa9e4066Sahrens  * on-disk, not estimated pending changes.
607fa9e4066Sahrens  */
608a2eea2e1Sahrens uint64_t
609fa9e4066Sahrens dsl_dir_space_available(dsl_dir_t *dd,
610fa9e4066Sahrens     dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
611fa9e4066Sahrens {
612fa9e4066Sahrens 	uint64_t parentspace, myspace, quota, used;
613fa9e4066Sahrens 
614fa9e4066Sahrens 	/*
615fa9e4066Sahrens 	 * If there are no restrictions otherwise, assume we have
616fa9e4066Sahrens 	 * unlimited space available.
617fa9e4066Sahrens 	 */
618fa9e4066Sahrens 	quota = UINT64_MAX;
619fa9e4066Sahrens 	parentspace = UINT64_MAX;
620fa9e4066Sahrens 
621fa9e4066Sahrens 	if (dd->dd_parent != NULL) {
622fa9e4066Sahrens 		parentspace = dsl_dir_space_available(dd->dd_parent,
623fa9e4066Sahrens 		    ancestor, delta, ondiskonly);
624fa9e4066Sahrens 	}
625fa9e4066Sahrens 
626fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
627fa9e4066Sahrens 	if (dd->dd_phys->dd_quota != 0)
628fa9e4066Sahrens 		quota = dd->dd_phys->dd_quota;
629*74e7dc98SMatthew Ahrens 	used = dd->dd_phys->dd_used_bytes;
630a9799022Sck 	if (!ondiskonly)
631a9799022Sck 		used += dsl_dir_space_towrite(dd);
632fa9e4066Sahrens 
633fa9e4066Sahrens 	if (dd->dd_parent == NULL) {
63499653d4eSeschrock 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
635fa9e4066Sahrens 		quota = MIN(quota, poolsize);
636fa9e4066Sahrens 	}
637fa9e4066Sahrens 
638fa9e4066Sahrens 	if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
639fa9e4066Sahrens 		/*
640fa9e4066Sahrens 		 * We have some space reserved, in addition to what our
641fa9e4066Sahrens 		 * parent gave us.
642fa9e4066Sahrens 		 */
643fa9e4066Sahrens 		parentspace += dd->dd_phys->dd_reserved - used;
644fa9e4066Sahrens 	}
645fa9e4066Sahrens 
646*74e7dc98SMatthew Ahrens 	if (dd == ancestor) {
647*74e7dc98SMatthew Ahrens 		ASSERT(delta <= 0);
648*74e7dc98SMatthew Ahrens 		ASSERT(used >= -delta);
649*74e7dc98SMatthew Ahrens 		used += delta;
650*74e7dc98SMatthew Ahrens 		if (parentspace != UINT64_MAX)
651*74e7dc98SMatthew Ahrens 			parentspace -= delta;
652*74e7dc98SMatthew Ahrens 	}
653*74e7dc98SMatthew Ahrens 
654fa9e4066Sahrens 	if (used > quota) {
655fa9e4066Sahrens 		/* over quota */
656fa9e4066Sahrens 		myspace = 0;
65799653d4eSeschrock 
65899653d4eSeschrock 		/*
65999653d4eSeschrock 		 * While it's OK to be a little over quota, if
66099653d4eSeschrock 		 * we think we are using more space than there
66199653d4eSeschrock 		 * is in the pool (which is already 1.6% more than
66299653d4eSeschrock 		 * dsl_pool_adjustedsize()), something is very
66399653d4eSeschrock 		 * wrong.
66499653d4eSeschrock 		 */
66599653d4eSeschrock 		ASSERT3U(used, <=, spa_get_space(dd->dd_pool->dp_spa));
666fa9e4066Sahrens 	} else {
667fa9e4066Sahrens 		/*
66899653d4eSeschrock 		 * the lesser of the space provided by our parent and
66999653d4eSeschrock 		 * the space left in our quota
670fa9e4066Sahrens 		 */
671fa9e4066Sahrens 		myspace = MIN(parentspace, quota - used);
672fa9e4066Sahrens 	}
673fa9e4066Sahrens 
674fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
675fa9e4066Sahrens 
676fa9e4066Sahrens 	return (myspace);
677fa9e4066Sahrens }
678fa9e4066Sahrens 
679fa9e4066Sahrens struct tempreserve {
680fa9e4066Sahrens 	list_node_t tr_node;
6811ab7f2deSmaybee 	dsl_pool_t *tr_dp;
682fa9e4066Sahrens 	dsl_dir_t *tr_ds;
683fa9e4066Sahrens 	uint64_t tr_size;
684fa9e4066Sahrens };
685fa9e4066Sahrens 
686fa9e4066Sahrens static int
687a9799022Sck dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
688a9799022Sck     boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
6896df6c3bcSck     dmu_tx_t *tx, boolean_t first)
690fa9e4066Sahrens {
691fa9e4066Sahrens 	uint64_t txg = tx->tx_txg;
692a9799022Sck 	uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
693a9799022Sck 	struct tempreserve *tr;
694c3fdb13aSmaybee 	int enospc = EDQUOT;
695fa9e4066Sahrens 	int txgidx = txg & TXG_MASK;
696fa9e4066Sahrens 	int i;
6979082849eSck 	uint64_t ref_rsrv = 0;
698fa9e4066Sahrens 
699fa9e4066Sahrens 	ASSERT3U(txg, !=, 0);
700a9799022Sck 	ASSERT3S(asize, >, 0);
701fa9e4066Sahrens 
702fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
703a9799022Sck 
704fa9e4066Sahrens 	/*
705fa9e4066Sahrens 	 * Check against the dsl_dir's quota.  We don't add in the delta
706fa9e4066Sahrens 	 * when checking for over-quota because they get one free hit.
707fa9e4066Sahrens 	 */
708a9799022Sck 	est_inflight = dsl_dir_space_towrite(dd);
709fa9e4066Sahrens 	for (i = 0; i < TXG_SIZE; i++)
710a9799022Sck 		est_inflight += dd->dd_tempreserved[i];
711*74e7dc98SMatthew Ahrens 	used_on_disk = dd->dd_phys->dd_used_bytes;
712fa9e4066Sahrens 
713f4d2e9e6Smaybee 	/*
7146df6c3bcSck 	 * On the first iteration, fetch the dataset's used-on-disk and
7156df6c3bcSck 	 * refreservation values. Also, if checkrefquota is set, test if
7166df6c3bcSck 	 * allocating this space would exceed the dataset's refquota.
717f4d2e9e6Smaybee 	 */
7186df6c3bcSck 	if (first && tx->tx_objset) {
719c3fdb13aSmaybee 		int error;
720a9799022Sck 		dsl_dataset_t *ds = tx->tx_objset->os->os_dsl_dataset;
7219082849eSck 
722a9799022Sck 		error = dsl_dataset_check_quota(ds, checkrefquota,
7239082849eSck 		    asize, est_inflight, &used_on_disk, &ref_rsrv);
724a9799022Sck 		if (error) {
725a9799022Sck 			mutex_exit(&dd->dd_lock);
726a9799022Sck 			return (error);
727a9799022Sck 		}
728a9799022Sck 	}
729a9799022Sck 
730a9799022Sck 	/*
731a9799022Sck 	 * If this transaction will result in a net free of space,
732a9799022Sck 	 * we want to let it through.
733a9799022Sck 	 */
734a9799022Sck 	if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
735f4d2e9e6Smaybee 		quota = UINT64_MAX;
736f4d2e9e6Smaybee 	else
737fa9e4066Sahrens 		quota = dd->dd_phys->dd_quota;
738fa9e4066Sahrens 
739fa9e4066Sahrens 	/*
740f4d2e9e6Smaybee 	 * Adjust the quota against the actual pool size at the root.
741f4d2e9e6Smaybee 	 * To ensure that it's possible to remove files from a full
742f4d2e9e6Smaybee 	 * pool without inducing transient overcommits, we throttle
743fa9e4066Sahrens 	 * netfree transactions against a quota that is slightly larger,
744fa9e4066Sahrens 	 * but still within the pool's allocation slop.  In cases where
745fa9e4066Sahrens 	 * we're very close to full, this will allow a steady trickle of
746fa9e4066Sahrens 	 * removes to get through.
747fa9e4066Sahrens 	 */
7481934e92fSmaybee 	if (dd->dd_parent == NULL) {
749fa9e4066Sahrens 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
750fa9e4066Sahrens 		if (poolsize < quota) {
751fa9e4066Sahrens 			quota = poolsize;
752c3fdb13aSmaybee 			enospc = ENOSPC;
753fa9e4066Sahrens 		}
754fa9e4066Sahrens 	}
755fa9e4066Sahrens 
756fa9e4066Sahrens 	/*
757fa9e4066Sahrens 	 * If they are requesting more space, and our current estimate
758a9799022Sck 	 * is over quota, they get to try again unless the actual
759ea8dc4b6Seschrock 	 * on-disk is over quota and there are no pending changes (which
760ea8dc4b6Seschrock 	 * may free up space for us).
761fa9e4066Sahrens 	 */
762a9799022Sck 	if (used_on_disk + est_inflight > quota) {
763a9799022Sck 		if (est_inflight > 0 || used_on_disk < quota)
764c3fdb13aSmaybee 			enospc = ERESTART;
765a9799022Sck 		dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
766fa9e4066Sahrens 		    "quota=%lluK tr=%lluK err=%d\n",
767a9799022Sck 		    used_on_disk>>10, est_inflight>>10,
768c3fdb13aSmaybee 		    quota>>10, asize>>10, enospc);
769fa9e4066Sahrens 		mutex_exit(&dd->dd_lock);
770c3fdb13aSmaybee 		return (enospc);
771fa9e4066Sahrens 	}
772fa9e4066Sahrens 
773fa9e4066Sahrens 	/* We need to up our estimated delta before dropping dd_lock */
774fa9e4066Sahrens 	dd->dd_tempreserved[txgidx] += asize;
775fa9e4066Sahrens 
7769082849eSck 	parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
7779082849eSck 	    asize - ref_rsrv);
778fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
779fa9e4066Sahrens 
7801ab7f2deSmaybee 	tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
781fa9e4066Sahrens 	tr->tr_ds = dd;
782fa9e4066Sahrens 	tr->tr_size = asize;
783fa9e4066Sahrens 	list_insert_tail(tr_list, tr);
784fa9e4066Sahrens 
785fa9e4066Sahrens 	/* see if it's OK with our parent */
7861934e92fSmaybee 	if (dd->dd_parent && parent_rsrv) {
7871934e92fSmaybee 		boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
7881934e92fSmaybee 
789fa9e4066Sahrens 		return (dsl_dir_tempreserve_impl(dd->dd_parent,
7906df6c3bcSck 		    parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
791fa9e4066Sahrens 	} else {
792fa9e4066Sahrens 		return (0);
793fa9e4066Sahrens 	}
794fa9e4066Sahrens }
795fa9e4066Sahrens 
796fa9e4066Sahrens /*
797fa9e4066Sahrens  * Reserve space in this dsl_dir, to be used in this tx's txg.
798a9799022Sck  * After the space has been dirtied (and dsl_dir_willuse_space()
799a9799022Sck  * has been called), the reservation should be canceled, using
800a9799022Sck  * dsl_dir_tempreserve_clear().
801fa9e4066Sahrens  */
802fa9e4066Sahrens int
803a9799022Sck dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
804a9799022Sck     uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
805fa9e4066Sahrens {
8061ab7f2deSmaybee 	int err;
807fa9e4066Sahrens 	list_t *tr_list;
808fa9e4066Sahrens 
809a9799022Sck 	if (asize == 0) {
810a9799022Sck 		*tr_cookiep = NULL;
811a9799022Sck 		return (0);
812a9799022Sck 	}
813a9799022Sck 
814fa9e4066Sahrens 	tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
815fa9e4066Sahrens 	list_create(tr_list, sizeof (struct tempreserve),
816fa9e4066Sahrens 	    offsetof(struct tempreserve, tr_node));
817a9799022Sck 	ASSERT3S(asize, >, 0);
818ea8dc4b6Seschrock 	ASSERT3S(fsize, >=, 0);
819fa9e4066Sahrens 
8201ab7f2deSmaybee 	err = arc_tempreserve_space(lsize, tx->tx_txg);
821fa9e4066Sahrens 	if (err == 0) {
822fa9e4066Sahrens 		struct tempreserve *tr;
823fa9e4066Sahrens 
8241ab7f2deSmaybee 		tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
8251ab7f2deSmaybee 		tr->tr_size = lsize;
8261ab7f2deSmaybee 		list_insert_tail(tr_list, tr);
8271ab7f2deSmaybee 
8281ab7f2deSmaybee 		err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
8291ab7f2deSmaybee 	} else {
8301ab7f2deSmaybee 		if (err == EAGAIN) {
8311ab7f2deSmaybee 			txg_delay(dd->dd_pool, tx->tx_txg, 1);
8321ab7f2deSmaybee 			err = ERESTART;
833fa9e4066Sahrens 		}
8341ab7f2deSmaybee 		dsl_pool_memory_pressure(dd->dd_pool);
8351ab7f2deSmaybee 	}
8361ab7f2deSmaybee 
8371ab7f2deSmaybee 	if (err == 0) {
8381ab7f2deSmaybee 		struct tempreserve *tr;
8391ab7f2deSmaybee 
8401ab7f2deSmaybee 		tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
8411ab7f2deSmaybee 		tr->tr_dp = dd->dd_pool;
8421ab7f2deSmaybee 		tr->tr_size = asize;
8431ab7f2deSmaybee 		list_insert_tail(tr_list, tr);
8441ab7f2deSmaybee 
8451ab7f2deSmaybee 		err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
8466df6c3bcSck 		    FALSE, asize > usize, tr_list, tx, TRUE);
847fa9e4066Sahrens 	}
848fa9e4066Sahrens 
849fa9e4066Sahrens 	if (err)
850fa9e4066Sahrens 		dsl_dir_tempreserve_clear(tr_list, tx);
851fa9e4066Sahrens 	else
852fa9e4066Sahrens 		*tr_cookiep = tr_list;
8531ab7f2deSmaybee 
854fa9e4066Sahrens 	return (err);
855fa9e4066Sahrens }
856fa9e4066Sahrens 
857fa9e4066Sahrens /*
858fa9e4066Sahrens  * Clear a temporary reservation that we previously made with
859fa9e4066Sahrens  * dsl_dir_tempreserve_space().
860fa9e4066Sahrens  */
861fa9e4066Sahrens void
862fa9e4066Sahrens dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
863fa9e4066Sahrens {
864fa9e4066Sahrens 	int txgidx = tx->tx_txg & TXG_MASK;
865fa9e4066Sahrens 	list_t *tr_list = tr_cookie;
866fa9e4066Sahrens 	struct tempreserve *tr;
867fa9e4066Sahrens 
868fa9e4066Sahrens 	ASSERT3U(tx->tx_txg, !=, 0);
869fa9e4066Sahrens 
870a9799022Sck 	if (tr_cookie == NULL)
871a9799022Sck 		return;
872a9799022Sck 
873fa9e4066Sahrens 	while (tr = list_head(tr_list)) {
8741ab7f2deSmaybee 		if (tr->tr_dp) {
8751ab7f2deSmaybee 			dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx);
8761ab7f2deSmaybee 		} else if (tr->tr_ds) {
877fa9e4066Sahrens 			mutex_enter(&tr->tr_ds->dd_lock);
878fa9e4066Sahrens 			ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
879fa9e4066Sahrens 			    tr->tr_size);
880fa9e4066Sahrens 			tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
881fa9e4066Sahrens 			mutex_exit(&tr->tr_ds->dd_lock);
8821ab7f2deSmaybee 		} else {
8831ab7f2deSmaybee 			arc_tempreserve_clear(tr->tr_size);
884fa9e4066Sahrens 		}
885fa9e4066Sahrens 		list_remove(tr_list, tr);
886fa9e4066Sahrens 		kmem_free(tr, sizeof (struct tempreserve));
887fa9e4066Sahrens 	}
888fa9e4066Sahrens 
889fa9e4066Sahrens 	kmem_free(tr_list, sizeof (list_t));
890fa9e4066Sahrens }
891fa9e4066Sahrens 
8921ab7f2deSmaybee static void
8931ab7f2deSmaybee dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
894fa9e4066Sahrens {
895fa9e4066Sahrens 	int64_t parent_space;
896fa9e4066Sahrens 	uint64_t est_used;
897fa9e4066Sahrens 
898fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
899fa9e4066Sahrens 	if (space > 0)
900fa9e4066Sahrens 		dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
901fa9e4066Sahrens 
902*74e7dc98SMatthew Ahrens 	est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes;
903fa9e4066Sahrens 	parent_space = parent_delta(dd, est_used, space);
904fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
905fa9e4066Sahrens 
906fa9e4066Sahrens 	/* Make sure that we clean up dd_space_to* */
907fa9e4066Sahrens 	dsl_dir_dirty(dd, tx);
908fa9e4066Sahrens 
909fa9e4066Sahrens 	/* XXX this is potentially expensive and unnecessary... */
910fa9e4066Sahrens 	if (parent_space && dd->dd_parent)
9111ab7f2deSmaybee 		dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx);
9121ab7f2deSmaybee }
9131ab7f2deSmaybee 
9141ab7f2deSmaybee /*
9151ab7f2deSmaybee  * Call in open context when we think we're going to write/free space,
9161ab7f2deSmaybee  * eg. when dirtying data.  Be conservative (ie. OK to write less than
9171ab7f2deSmaybee  * this or free more than this, but don't write more or free less).
9181ab7f2deSmaybee  */
9191ab7f2deSmaybee void
9201ab7f2deSmaybee dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
9211ab7f2deSmaybee {
9221ab7f2deSmaybee 	dsl_pool_willuse_space(dd->dd_pool, space, tx);
9231ab7f2deSmaybee 	dsl_dir_willuse_space_impl(dd, space, tx);
924fa9e4066Sahrens }
925fa9e4066Sahrens 
926fa9e4066Sahrens /* call from syncing context when we actually write/free space for this dd */
927fa9e4066Sahrens void
928*74e7dc98SMatthew Ahrens dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
929fa9e4066Sahrens     int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
930fa9e4066Sahrens {
931fa9e4066Sahrens 	int64_t accounted_delta;
932fa9e4066Sahrens 
933fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
934*74e7dc98SMatthew Ahrens 	ASSERT(type < DD_USED_NUM);
935fa9e4066Sahrens 
936fa9e4066Sahrens 	dsl_dir_dirty(dd, tx);
937fa9e4066Sahrens 
938fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
939*74e7dc98SMatthew Ahrens 	accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
940*74e7dc98SMatthew Ahrens 	ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used);
941fa9e4066Sahrens 	ASSERT(compressed >= 0 ||
942fa9e4066Sahrens 	    dd->dd_phys->dd_compressed_bytes >= -compressed);
943fa9e4066Sahrens 	ASSERT(uncompressed >= 0 ||
944fa9e4066Sahrens 	    dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
945*74e7dc98SMatthew Ahrens 	dd->dd_phys->dd_used_bytes += used;
946fa9e4066Sahrens 	dd->dd_phys->dd_uncompressed_bytes += uncompressed;
947fa9e4066Sahrens 	dd->dd_phys->dd_compressed_bytes += compressed;
948*74e7dc98SMatthew Ahrens 
949*74e7dc98SMatthew Ahrens 	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
950*74e7dc98SMatthew Ahrens 		ASSERT(used > 0 ||
951*74e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[type] >= -used);
952*74e7dc98SMatthew Ahrens 		dd->dd_phys->dd_used_breakdown[type] += used;
953*74e7dc98SMatthew Ahrens #ifdef DEBUG
954*74e7dc98SMatthew Ahrens 		dd_used_t t;
955*74e7dc98SMatthew Ahrens 		uint64_t u = 0;
956*74e7dc98SMatthew Ahrens 		for (t = 0; t < DD_USED_NUM; t++)
957*74e7dc98SMatthew Ahrens 			u += dd->dd_phys->dd_used_breakdown[t];
958*74e7dc98SMatthew Ahrens 		ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes);
959*74e7dc98SMatthew Ahrens #endif
960*74e7dc98SMatthew Ahrens 	}
961fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
962fa9e4066Sahrens 
963fa9e4066Sahrens 	if (dd->dd_parent != NULL) {
964*74e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
965fa9e4066Sahrens 		    accounted_delta, compressed, uncompressed, tx);
966*74e7dc98SMatthew Ahrens 		dsl_dir_transfer_space(dd->dd_parent,
967*74e7dc98SMatthew Ahrens 		    used - accounted_delta,
968*74e7dc98SMatthew Ahrens 		    DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
969fa9e4066Sahrens 	}
970fa9e4066Sahrens }
971fa9e4066Sahrens 
972*74e7dc98SMatthew Ahrens void
973*74e7dc98SMatthew Ahrens dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
974*74e7dc98SMatthew Ahrens     dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
975*74e7dc98SMatthew Ahrens {
976*74e7dc98SMatthew Ahrens 	ASSERT(dmu_tx_is_syncing(tx));
977*74e7dc98SMatthew Ahrens 	ASSERT(oldtype < DD_USED_NUM);
978*74e7dc98SMatthew Ahrens 	ASSERT(newtype < DD_USED_NUM);
979*74e7dc98SMatthew Ahrens 
980*74e7dc98SMatthew Ahrens 	if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
981*74e7dc98SMatthew Ahrens 		return;
982*74e7dc98SMatthew Ahrens 
983*74e7dc98SMatthew Ahrens 	dsl_dir_dirty(dd, tx);
984*74e7dc98SMatthew Ahrens 	mutex_enter(&dd->dd_lock);
985*74e7dc98SMatthew Ahrens 	ASSERT(delta > 0 ?
986*74e7dc98SMatthew Ahrens 	    dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
987*74e7dc98SMatthew Ahrens 	    dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
988*74e7dc98SMatthew Ahrens 	ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
989*74e7dc98SMatthew Ahrens 	dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
990*74e7dc98SMatthew Ahrens 	dd->dd_phys->dd_used_breakdown[newtype] += delta;
991*74e7dc98SMatthew Ahrens 	mutex_exit(&dd->dd_lock);
992*74e7dc98SMatthew Ahrens }
993*74e7dc98SMatthew Ahrens 
994*74e7dc98SMatthew Ahrens 
995fa9e4066Sahrens static int
9961d452cf5Sahrens dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
997fa9e4066Sahrens {
9981d452cf5Sahrens 	dsl_dir_t *dd = arg1;
9991d452cf5Sahrens 	uint64_t *quotap = arg2;
1000fa9e4066Sahrens 	uint64_t new_quota = *quotap;
1001fa9e4066Sahrens 	int err = 0;
10021d452cf5Sahrens 	uint64_t towrite;
1003fa9e4066Sahrens 
10041d452cf5Sahrens 	if (new_quota == 0)
10051d452cf5Sahrens 		return (0);
1006fa9e4066Sahrens 
1007fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
10081d452cf5Sahrens 	/*
10091d452cf5Sahrens 	 * If we are doing the preliminary check in open context, and
10101d452cf5Sahrens 	 * there are pending changes, then don't fail it, since the
1011a9799022Sck 	 * pending changes could under-estimate the amount of space to be
10121d452cf5Sahrens 	 * freed up.
10131d452cf5Sahrens 	 */
1014a9799022Sck 	towrite = dsl_dir_space_towrite(dd);
10151d452cf5Sahrens 	if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
10161d452cf5Sahrens 	    (new_quota < dd->dd_phys->dd_reserved ||
1017*74e7dc98SMatthew Ahrens 	    new_quota < dd->dd_phys->dd_used_bytes + towrite)) {
1018fa9e4066Sahrens 		err = ENOSPC;
1019fa9e4066Sahrens 	}
1020fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
1021fa9e4066Sahrens 	return (err);
1022fa9e4066Sahrens }
1023fa9e4066Sahrens 
1024ecd6cf80Smarks /* ARGSUSED */
10251d452cf5Sahrens static void
1026ecd6cf80Smarks dsl_dir_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
10271d452cf5Sahrens {
10281d452cf5Sahrens 	dsl_dir_t *dd = arg1;
10291d452cf5Sahrens 	uint64_t *quotap = arg2;
10301d452cf5Sahrens 	uint64_t new_quota = *quotap;
10311d452cf5Sahrens 
10321d452cf5Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
10331d452cf5Sahrens 
10341d452cf5Sahrens 	mutex_enter(&dd->dd_lock);
10351d452cf5Sahrens 	dd->dd_phys->dd_quota = new_quota;
10361d452cf5Sahrens 	mutex_exit(&dd->dd_lock);
1037ecd6cf80Smarks 
1038ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_QUOTA, dd->dd_pool->dp_spa,
1039ecd6cf80Smarks 	    tx, cr, "%lld dataset = %llu ",
1040ecd6cf80Smarks 	    (longlong_t)new_quota, dd->dd_phys->dd_head_dataset_obj);
10411d452cf5Sahrens }
10421d452cf5Sahrens 
1043fa9e4066Sahrens int
1044fa9e4066Sahrens dsl_dir_set_quota(const char *ddname, uint64_t quota)
1045fa9e4066Sahrens {
1046fa9e4066Sahrens 	dsl_dir_t *dd;
1047fa9e4066Sahrens 	int err;
1048fa9e4066Sahrens 
1049ea8dc4b6Seschrock 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
1050ea8dc4b6Seschrock 	if (err)
1051ea8dc4b6Seschrock 		return (err);
1052fa9e4066Sahrens 
1053a9b821a0Sck 	if (quota != dd->dd_phys->dd_quota) {
1054a9b821a0Sck 		/*
1055a9b821a0Sck 		 * If someone removes a file, then tries to set the quota, we
1056a9b821a0Sck 		 * want to make sure the file freeing takes effect.
1057a9b821a0Sck 		 */
1058a9b821a0Sck 		txg_wait_open(dd->dd_pool, 0);
1059a9b821a0Sck 
1060a9b821a0Sck 		err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
1061a9b821a0Sck 		    dsl_dir_set_quota_sync, dd, &quota, 0);
1062a9b821a0Sck 	}
1063fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
1064fa9e4066Sahrens 	return (err);
1065fa9e4066Sahrens }
1066fa9e4066Sahrens 
1067a9799022Sck int
10681d452cf5Sahrens dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
1069fa9e4066Sahrens {
10701d452cf5Sahrens 	dsl_dir_t *dd = arg1;
10711d452cf5Sahrens 	uint64_t *reservationp = arg2;
1072fa9e4066Sahrens 	uint64_t new_reservation = *reservationp;
1073fa9e4066Sahrens 	uint64_t used, avail;
1074fa9e4066Sahrens 	int64_t delta;
1075fa9e4066Sahrens 
1076fa9e4066Sahrens 	if (new_reservation > INT64_MAX)
1077fa9e4066Sahrens 		return (EOVERFLOW);
1078fa9e4066Sahrens 
10791d452cf5Sahrens 	/*
10801d452cf5Sahrens 	 * If we are doing the preliminary check in open context, the
10811d452cf5Sahrens 	 * space estimates may be inaccurate.
10821d452cf5Sahrens 	 */
10831d452cf5Sahrens 	if (!dmu_tx_is_syncing(tx))
10841d452cf5Sahrens 		return (0);
10851d452cf5Sahrens 
1086fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
1087*74e7dc98SMatthew Ahrens 	used = dd->dd_phys->dd_used_bytes;
1088fa9e4066Sahrens 	delta = MAX(used, new_reservation) -
1089fa9e4066Sahrens 	    MAX(used, dd->dd_phys->dd_reserved);
1090fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
1091fa9e4066Sahrens 
1092fa9e4066Sahrens 	if (dd->dd_parent) {
1093fa9e4066Sahrens 		avail = dsl_dir_space_available(dd->dd_parent,
1094fa9e4066Sahrens 		    NULL, 0, FALSE);
1095fa9e4066Sahrens 	} else {
1096fa9e4066Sahrens 		avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
1097fa9e4066Sahrens 	}
1098fa9e4066Sahrens 
1099fa9e4066Sahrens 	if (delta > 0 && delta > avail)
1100fa9e4066Sahrens 		return (ENOSPC);
1101fa9e4066Sahrens 	if (delta > 0 && dd->dd_phys->dd_quota > 0 &&
1102fa9e4066Sahrens 	    new_reservation > dd->dd_phys->dd_quota)
1103fa9e4066Sahrens 		return (ENOSPC);
11041d452cf5Sahrens 	return (0);
11051d452cf5Sahrens }
11061d452cf5Sahrens 
1107ecd6cf80Smarks /* ARGSUSED */
11081d452cf5Sahrens static void
1109ecd6cf80Smarks dsl_dir_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
11101d452cf5Sahrens {
11111d452cf5Sahrens 	dsl_dir_t *dd = arg1;
11121d452cf5Sahrens 	uint64_t *reservationp = arg2;
11131d452cf5Sahrens 	uint64_t new_reservation = *reservationp;
11141d452cf5Sahrens 	uint64_t used;
11151d452cf5Sahrens 	int64_t delta;
11161d452cf5Sahrens 
1117a9799022Sck 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
1118a9799022Sck 
11191d452cf5Sahrens 	mutex_enter(&dd->dd_lock);
1120*74e7dc98SMatthew Ahrens 	used = dd->dd_phys->dd_used_bytes;
11211d452cf5Sahrens 	delta = MAX(used, new_reservation) -
11221d452cf5Sahrens 	    MAX(used, dd->dd_phys->dd_reserved);
1123fa9e4066Sahrens 	dd->dd_phys->dd_reserved = new_reservation;
1124a9799022Sck 	mutex_exit(&dd->dd_lock);
1125fa9e4066Sahrens 
1126fa9e4066Sahrens 	if (dd->dd_parent != NULL) {
1127fa9e4066Sahrens 		/* Roll up this additional usage into our ancestors */
1128*74e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1129*74e7dc98SMatthew Ahrens 		    delta, 0, 0, tx);
1130fa9e4066Sahrens 	}
1131ecd6cf80Smarks 
1132ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_RESERVATION, dd->dd_pool->dp_spa,
1133ecd6cf80Smarks 	    tx, cr, "%lld dataset = %llu",
1134ecd6cf80Smarks 	    (longlong_t)new_reservation, dd->dd_phys->dd_head_dataset_obj);
1135fa9e4066Sahrens }
1136fa9e4066Sahrens 
1137fa9e4066Sahrens int
1138fa9e4066Sahrens dsl_dir_set_reservation(const char *ddname, uint64_t reservation)
1139fa9e4066Sahrens {
1140fa9e4066Sahrens 	dsl_dir_t *dd;
1141fa9e4066Sahrens 	int err;
1142fa9e4066Sahrens 
1143ea8dc4b6Seschrock 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
1144ea8dc4b6Seschrock 	if (err)
1145ea8dc4b6Seschrock 		return (err);
11461d452cf5Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
11471d452cf5Sahrens 	    dsl_dir_set_reservation_sync, dd, &reservation, 0);
1148fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
1149fa9e4066Sahrens 	return (err);
1150fa9e4066Sahrens }
1151fa9e4066Sahrens 
1152fa9e4066Sahrens static dsl_dir_t *
1153fa9e4066Sahrens closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
1154fa9e4066Sahrens {
1155fa9e4066Sahrens 	for (; ds1; ds1 = ds1->dd_parent) {
1156fa9e4066Sahrens 		dsl_dir_t *dd;
1157fa9e4066Sahrens 		for (dd = ds2; dd; dd = dd->dd_parent) {
1158fa9e4066Sahrens 			if (ds1 == dd)
1159fa9e4066Sahrens 				return (dd);
1160fa9e4066Sahrens 		}
1161fa9e4066Sahrens 	}
1162fa9e4066Sahrens 	return (NULL);
1163fa9e4066Sahrens }
1164fa9e4066Sahrens 
1165fa9e4066Sahrens /*
1166fa9e4066Sahrens  * If delta is applied to dd, how much of that delta would be applied to
1167fa9e4066Sahrens  * ancestor?  Syncing context only.
1168fa9e4066Sahrens  */
1169fa9e4066Sahrens static int64_t
1170fa9e4066Sahrens would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
1171fa9e4066Sahrens {
1172fa9e4066Sahrens 	if (dd == ancestor)
1173fa9e4066Sahrens 		return (delta);
1174fa9e4066Sahrens 
1175fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
1176*74e7dc98SMatthew Ahrens 	delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta);
1177fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
1178fa9e4066Sahrens 	return (would_change(dd->dd_parent, delta, ancestor));
1179fa9e4066Sahrens }
1180fa9e4066Sahrens 
11811d452cf5Sahrens struct renamearg {
11821d452cf5Sahrens 	dsl_dir_t *newparent;
11831d452cf5Sahrens 	const char *mynewname;
11841d452cf5Sahrens };
11851d452cf5Sahrens 
1186ecd6cf80Smarks /*ARGSUSED*/
11871d452cf5Sahrens static int
11881d452cf5Sahrens dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1189fa9e4066Sahrens {
11901d452cf5Sahrens 	dsl_dir_t *dd = arg1;
11911d452cf5Sahrens 	struct renamearg *ra = arg2;
1192fa9e4066Sahrens 	dsl_pool_t *dp = dd->dd_pool;
1193fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
11941d452cf5Sahrens 	int err;
11951d452cf5Sahrens 	uint64_t val;
1196fa9e4066Sahrens 
1197fa9e4066Sahrens 	/* There should be 2 references: the open and the dirty */
11981d452cf5Sahrens 	if (dmu_buf_refcount(dd->dd_dbuf) > 2)
1199fa9e4066Sahrens 		return (EBUSY);
1200fa9e4066Sahrens 
12011d452cf5Sahrens 	/* check for existing name */
12021d452cf5Sahrens 	err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
12031d452cf5Sahrens 	    ra->mynewname, 8, 1, &val);
12041d452cf5Sahrens 	if (err == 0)
12051d452cf5Sahrens 		return (EEXIST);
12061d452cf5Sahrens 	if (err != ENOENT)
12071d452cf5Sahrens 		return (err);
12081d452cf5Sahrens 
12091d452cf5Sahrens 	if (ra->newparent != dd->dd_parent) {
121099653d4eSeschrock 		/* is there enough space? */
121199653d4eSeschrock 		uint64_t myspace =
1212*74e7dc98SMatthew Ahrens 		    MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
1213fa9e4066Sahrens 
12141d452cf5Sahrens 		/* no rename into our descendant */
12151d452cf5Sahrens 		if (closest_common_ancestor(dd, ra->newparent) == dd)
1216fa9e4066Sahrens 			return (EINVAL);
1217fa9e4066Sahrens 
12181d452cf5Sahrens 		if (err = dsl_dir_transfer_possible(dd->dd_parent,
12191d452cf5Sahrens 		    ra->newparent, myspace))
122099653d4eSeschrock 			return (err);
12211d452cf5Sahrens 	}
1222fa9e4066Sahrens 
12231d452cf5Sahrens 	return (0);
12241d452cf5Sahrens }
12251d452cf5Sahrens 
12261d452cf5Sahrens static void
1227ecd6cf80Smarks dsl_dir_rename_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
12281d452cf5Sahrens {
12291d452cf5Sahrens 	dsl_dir_t *dd = arg1;
12301d452cf5Sahrens 	struct renamearg *ra = arg2;
12311d452cf5Sahrens 	dsl_pool_t *dp = dd->dd_pool;
12321d452cf5Sahrens 	objset_t *mos = dp->dp_meta_objset;
12331d452cf5Sahrens 	int err;
12341d452cf5Sahrens 
12351d452cf5Sahrens 	ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
12361d452cf5Sahrens 
12371d452cf5Sahrens 	if (ra->newparent != dd->dd_parent) {
1238*74e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
1239*74e7dc98SMatthew Ahrens 		    -dd->dd_phys->dd_used_bytes,
1240fa9e4066Sahrens 		    -dd->dd_phys->dd_compressed_bytes,
1241fa9e4066Sahrens 		    -dd->dd_phys->dd_uncompressed_bytes, tx);
1242*74e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
1243*74e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_bytes,
1244fa9e4066Sahrens 		    dd->dd_phys->dd_compressed_bytes,
1245fa9e4066Sahrens 		    dd->dd_phys->dd_uncompressed_bytes, tx);
1246*74e7dc98SMatthew Ahrens 
1247*74e7dc98SMatthew Ahrens 		if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) {
1248*74e7dc98SMatthew Ahrens 			uint64_t unused_rsrv = dd->dd_phys->dd_reserved -
1249*74e7dc98SMatthew Ahrens 			    dd->dd_phys->dd_used_bytes;
1250*74e7dc98SMatthew Ahrens 
1251*74e7dc98SMatthew Ahrens 			dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1252*74e7dc98SMatthew Ahrens 			    -unused_rsrv, 0, 0, tx);
1253*74e7dc98SMatthew Ahrens 			dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
1254*74e7dc98SMatthew Ahrens 			    unused_rsrv, 0, 0, tx);
1255*74e7dc98SMatthew Ahrens 		}
1256fa9e4066Sahrens 	}
1257fa9e4066Sahrens 
1258fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
1259fa9e4066Sahrens 
1260fa9e4066Sahrens 	/* remove from old parent zapobj */
1261fa9e4066Sahrens 	err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
1262fa9e4066Sahrens 	    dd->dd_myname, tx);
1263fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1264fa9e4066Sahrens 
12651d452cf5Sahrens 	(void) strcpy(dd->dd_myname, ra->mynewname);
1266fa9e4066Sahrens 	dsl_dir_close(dd->dd_parent, dd);
12671d452cf5Sahrens 	dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
1268ea8dc4b6Seschrock 	VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
12691d452cf5Sahrens 	    ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
1270fa9e4066Sahrens 
1271fa9e4066Sahrens 	/* add to new parent zapobj */
12721d452cf5Sahrens 	err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
1273fa9e4066Sahrens 	    dd->dd_myname, 8, 1, &dd->dd_object, tx);
1274fa9e4066Sahrens 	ASSERT3U(err, ==, 0);
1275ecd6cf80Smarks 
1276ecd6cf80Smarks 	spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa,
1277ecd6cf80Smarks 	    tx, cr, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
12781d452cf5Sahrens }
1279fa9e4066Sahrens 
12801d452cf5Sahrens int
12811d452cf5Sahrens dsl_dir_rename(dsl_dir_t *dd, const char *newname)
12821d452cf5Sahrens {
12831d452cf5Sahrens 	struct renamearg ra;
12841d452cf5Sahrens 	int err;
12851d452cf5Sahrens 
12861d452cf5Sahrens 	/* new parent should exist */
12871d452cf5Sahrens 	err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
12881d452cf5Sahrens 	if (err)
12891d452cf5Sahrens 		return (err);
12901d452cf5Sahrens 
12911d452cf5Sahrens 	/* can't rename to different pool */
12921d452cf5Sahrens 	if (dd->dd_pool != ra.newparent->dd_pool) {
12931d452cf5Sahrens 		err = ENXIO;
12941d452cf5Sahrens 		goto out;
12951d452cf5Sahrens 	}
12961d452cf5Sahrens 
12971d452cf5Sahrens 	/* new name should not already exist */
12981d452cf5Sahrens 	if (ra.mynewname == NULL) {
12991d452cf5Sahrens 		err = EEXIST;
13001d452cf5Sahrens 		goto out;
13011d452cf5Sahrens 	}
13021d452cf5Sahrens 
13031d452cf5Sahrens 	err = dsl_sync_task_do(dd->dd_pool,
13041d452cf5Sahrens 	    dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
13051d452cf5Sahrens 
13061d452cf5Sahrens out:
13071d452cf5Sahrens 	dsl_dir_close(ra.newparent, FTAG);
13081d452cf5Sahrens 	return (err);
1309fa9e4066Sahrens }
131099653d4eSeschrock 
131199653d4eSeschrock int
131299653d4eSeschrock dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
131399653d4eSeschrock {
131499653d4eSeschrock 	dsl_dir_t *ancestor;
131599653d4eSeschrock 	int64_t adelta;
131699653d4eSeschrock 	uint64_t avail;
131799653d4eSeschrock 
131899653d4eSeschrock 	ancestor = closest_common_ancestor(sdd, tdd);
131999653d4eSeschrock 	adelta = would_change(sdd, -space, ancestor);
132099653d4eSeschrock 	avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
132199653d4eSeschrock 	if (avail < space)
132299653d4eSeschrock 		return (ENOSPC);
132399653d4eSeschrock 
132499653d4eSeschrock 	return (0);
132599653d4eSeschrock }
1326