xref: /illumos-gate/usr/src/uts/common/fs/zfs/dsl_dir.c (revision ce636f8b38e8c9ff484e880d9abb27251a882860)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
223f9d6ad7SLin Ling  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
234445fffbSMatthew Ahrens  * Copyright (c) 2012 by Delphix. All rights reserved.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #include <sys/dmu.h>
27a9799022Sck #include <sys/dmu_objset.h>
28fa9e4066Sahrens #include <sys/dmu_tx.h>
29fa9e4066Sahrens #include <sys/dsl_dataset.h>
30fa9e4066Sahrens #include <sys/dsl_dir.h>
31fa9e4066Sahrens #include <sys/dsl_prop.h>
321d452cf5Sahrens #include <sys/dsl_synctask.h>
33ecd6cf80Smarks #include <sys/dsl_deleg.h>
34fa9e4066Sahrens #include <sys/spa.h>
35b24ab676SJeff Bonwick #include <sys/metaslab.h>
36fa9e4066Sahrens #include <sys/zap.h>
37fa9e4066Sahrens #include <sys/zio.h>
38fa9e4066Sahrens #include <sys/arc.h>
39ecd6cf80Smarks #include <sys/sunddi.h>
40fa9e4066Sahrens #include "zfs_namecheck.h"
41fa9e4066Sahrens 
42a9799022Sck static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
434445fffbSMatthew Ahrens static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd,
444445fffbSMatthew Ahrens     uint64_t value, dmu_tx_t *tx);
45fa9e4066Sahrens 
46fa9e4066Sahrens /* ARGSUSED */
47fa9e4066Sahrens static void
48fa9e4066Sahrens dsl_dir_evict(dmu_buf_t *db, void *arg)
49fa9e4066Sahrens {
50fa9e4066Sahrens 	dsl_dir_t *dd = arg;
51fa9e4066Sahrens 	dsl_pool_t *dp = dd->dd_pool;
52fa9e4066Sahrens 	int t;
53fa9e4066Sahrens 
54fa9e4066Sahrens 	for (t = 0; t < TXG_SIZE; t++) {
55fa9e4066Sahrens 		ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
56fa9e4066Sahrens 		ASSERT(dd->dd_tempreserved[t] == 0);
57fa9e4066Sahrens 		ASSERT(dd->dd_space_towrite[t] == 0);
58fa9e4066Sahrens 	}
59fa9e4066Sahrens 
60fa9e4066Sahrens 	if (dd->dd_parent)
61fa9e4066Sahrens 		dsl_dir_close(dd->dd_parent, dd);
62fa9e4066Sahrens 
63fa9e4066Sahrens 	spa_close(dd->dd_pool->dp_spa, dd);
64fa9e4066Sahrens 
65fa9e4066Sahrens 	/*
663f9d6ad7SLin Ling 	 * The props callback list should have been cleaned up by
673f9d6ad7SLin Ling 	 * objset_evict().
68fa9e4066Sahrens 	 */
69fa9e4066Sahrens 	list_destroy(&dd->dd_prop_cbs);
705ad82045Snd 	mutex_destroy(&dd->dd_lock);
71fa9e4066Sahrens 	kmem_free(dd, sizeof (dsl_dir_t));
72fa9e4066Sahrens }
73fa9e4066Sahrens 
74ea8dc4b6Seschrock int
75fa9e4066Sahrens dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
76ea8dc4b6Seschrock     const char *tail, void *tag, dsl_dir_t **ddp)
77fa9e4066Sahrens {
78fa9e4066Sahrens 	dmu_buf_t *dbuf;
79fa9e4066Sahrens 	dsl_dir_t *dd;
80ea8dc4b6Seschrock 	int err;
81fa9e4066Sahrens 
82fa9e4066Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
83fa9e4066Sahrens 	    dsl_pool_sync_context(dp));
84fa9e4066Sahrens 
85ea8dc4b6Seschrock 	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
86ea8dc4b6Seschrock 	if (err)
87ea8dc4b6Seschrock 		return (err);
88fa9e4066Sahrens 	dd = dmu_buf_get_user(dbuf);
89fa9e4066Sahrens #ifdef ZFS_DEBUG
90fa9e4066Sahrens 	{
91fa9e4066Sahrens 		dmu_object_info_t doi;
92fa9e4066Sahrens 		dmu_object_info_from_db(dbuf, &doi);
931649cd4bStabriz 		ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
9474e7dc98SMatthew Ahrens 		ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
95fa9e4066Sahrens 	}
96fa9e4066Sahrens #endif
97fa9e4066Sahrens 	if (dd == NULL) {
98fa9e4066Sahrens 		dsl_dir_t *winner;
99fa9e4066Sahrens 
100fa9e4066Sahrens 		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
101fa9e4066Sahrens 		dd->dd_object = ddobj;
102fa9e4066Sahrens 		dd->dd_dbuf = dbuf;
103fa9e4066Sahrens 		dd->dd_pool = dp;
104fa9e4066Sahrens 		dd->dd_phys = dbuf->db_data;
1055ad82045Snd 		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
106fa9e4066Sahrens 
107fa9e4066Sahrens 		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
108fa9e4066Sahrens 		    offsetof(dsl_prop_cb_record_t, cbr_node));
109fa9e4066Sahrens 
11071eb0538SChris Kirby 		dsl_dir_snap_cmtime_update(dd);
11171eb0538SChris Kirby 
112fa9e4066Sahrens 		if (dd->dd_phys->dd_parent_obj) {
113ea8dc4b6Seschrock 			err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
114ea8dc4b6Seschrock 			    NULL, dd, &dd->dd_parent);
11574e7dc98SMatthew Ahrens 			if (err)
11674e7dc98SMatthew Ahrens 				goto errout;
117fa9e4066Sahrens 			if (tail) {
118fa9e4066Sahrens #ifdef ZFS_DEBUG
119fa9e4066Sahrens 				uint64_t foundobj;
120fa9e4066Sahrens 
121fa9e4066Sahrens 				err = zap_lookup(dp->dp_meta_objset,
122e7437265Sahrens 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
123fa9e4066Sahrens 				    tail, sizeof (foundobj), 1, &foundobj);
124ea8dc4b6Seschrock 				ASSERT(err || foundobj == ddobj);
125fa9e4066Sahrens #endif
126fa9e4066Sahrens 				(void) strcpy(dd->dd_myname, tail);
127fa9e4066Sahrens 			} else {
128fa9e4066Sahrens 				err = zap_value_search(dp->dp_meta_objset,
129e7437265Sahrens 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
130e7437265Sahrens 				    ddobj, 0, dd->dd_myname);
131ea8dc4b6Seschrock 			}
13274e7dc98SMatthew Ahrens 			if (err)
13374e7dc98SMatthew Ahrens 				goto errout;
134fa9e4066Sahrens 		} else {
135fa9e4066Sahrens 			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
136fa9e4066Sahrens 		}
137fa9e4066Sahrens 
1383f9d6ad7SLin Ling 		if (dsl_dir_is_clone(dd)) {
1393f9d6ad7SLin Ling 			dmu_buf_t *origin_bonus;
1403f9d6ad7SLin Ling 			dsl_dataset_phys_t *origin_phys;
1413f9d6ad7SLin Ling 
1423f9d6ad7SLin Ling 			/*
1433f9d6ad7SLin Ling 			 * We can't open the origin dataset, because
1443f9d6ad7SLin Ling 			 * that would require opening this dsl_dir.
1453f9d6ad7SLin Ling 			 * Just look at its phys directly instead.
1463f9d6ad7SLin Ling 			 */
1473f9d6ad7SLin Ling 			err = dmu_bonus_hold(dp->dp_meta_objset,
1483f9d6ad7SLin Ling 			    dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
1493f9d6ad7SLin Ling 			if (err)
1503f9d6ad7SLin Ling 				goto errout;
1513f9d6ad7SLin Ling 			origin_phys = origin_bonus->db_data;
1523f9d6ad7SLin Ling 			dd->dd_origin_txg =
1533f9d6ad7SLin Ling 			    origin_phys->ds_creation_txg;
1543f9d6ad7SLin Ling 			dmu_buf_rele(origin_bonus, FTAG);
1553f9d6ad7SLin Ling 		}
1563f9d6ad7SLin Ling 
157fa9e4066Sahrens 		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
158fa9e4066Sahrens 		    dsl_dir_evict);
159fa9e4066Sahrens 		if (winner) {
160fa9e4066Sahrens 			if (dd->dd_parent)
161fa9e4066Sahrens 				dsl_dir_close(dd->dd_parent, dd);
1625ad82045Snd 			mutex_destroy(&dd->dd_lock);
163fa9e4066Sahrens 			kmem_free(dd, sizeof (dsl_dir_t));
164fa9e4066Sahrens 			dd = winner;
165fa9e4066Sahrens 		} else {
166fa9e4066Sahrens 			spa_open_ref(dp->dp_spa, dd);
167fa9e4066Sahrens 		}
168fa9e4066Sahrens 	}
169fa9e4066Sahrens 
170fa9e4066Sahrens 	/*
171fa9e4066Sahrens 	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
172fa9e4066Sahrens 	 * holds on the spa.  We need the open-to-close holds because
173fa9e4066Sahrens 	 * otherwise the spa_refcnt wouldn't change when we open a
174fa9e4066Sahrens 	 * dir which the spa also has open, so we could incorrectly
175fa9e4066Sahrens 	 * think it was OK to unload/export/destroy the pool.  We need
176fa9e4066Sahrens 	 * the instantiate-to-evict hold because the dsl_dir_t has a
177fa9e4066Sahrens 	 * pointer to the dd_pool, which has a pointer to the spa_t.
178fa9e4066Sahrens 	 */
179fa9e4066Sahrens 	spa_open_ref(dp->dp_spa, tag);
180fa9e4066Sahrens 	ASSERT3P(dd->dd_pool, ==, dp);
181fa9e4066Sahrens 	ASSERT3U(dd->dd_object, ==, ddobj);
182fa9e4066Sahrens 	ASSERT3P(dd->dd_dbuf, ==, dbuf);
183ea8dc4b6Seschrock 	*ddp = dd;
184ea8dc4b6Seschrock 	return (0);
18574e7dc98SMatthew Ahrens 
18674e7dc98SMatthew Ahrens errout:
18774e7dc98SMatthew Ahrens 	if (dd->dd_parent)
18874e7dc98SMatthew Ahrens 		dsl_dir_close(dd->dd_parent, dd);
18974e7dc98SMatthew Ahrens 	mutex_destroy(&dd->dd_lock);
19074e7dc98SMatthew Ahrens 	kmem_free(dd, sizeof (dsl_dir_t));
19174e7dc98SMatthew Ahrens 	dmu_buf_rele(dbuf, tag);
19274e7dc98SMatthew Ahrens 	return (err);
193fa9e4066Sahrens }
194fa9e4066Sahrens 
195fa9e4066Sahrens void
196fa9e4066Sahrens dsl_dir_close(dsl_dir_t *dd, void *tag)
197fa9e4066Sahrens {
198fa9e4066Sahrens 	dprintf_dd(dd, "%s\n", "");
199fa9e4066Sahrens 	spa_close(dd->dd_pool->dp_spa, tag);
200ea8dc4b6Seschrock 	dmu_buf_rele(dd->dd_dbuf, tag);
201fa9e4066Sahrens }
202fa9e4066Sahrens 
20315f66a7fSek /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
204fa9e4066Sahrens void
205fa9e4066Sahrens dsl_dir_name(dsl_dir_t *dd, char *buf)
206fa9e4066Sahrens {
207fa9e4066Sahrens 	if (dd->dd_parent) {
208fa9e4066Sahrens 		dsl_dir_name(dd->dd_parent, buf);
209fa9e4066Sahrens 		(void) strcat(buf, "/");
210fa9e4066Sahrens 	} else {
211fa9e4066Sahrens 		buf[0] = '\0';
212fa9e4066Sahrens 	}
213fa9e4066Sahrens 	if (!MUTEX_HELD(&dd->dd_lock)) {
214fa9e4066Sahrens 		/*
215fa9e4066Sahrens 		 * recursive mutex so that we can use
216fa9e4066Sahrens 		 * dprintf_dd() with dd_lock held
217fa9e4066Sahrens 		 */
218fa9e4066Sahrens 		mutex_enter(&dd->dd_lock);
219fa9e4066Sahrens 		(void) strcat(buf, dd->dd_myname);
220fa9e4066Sahrens 		mutex_exit(&dd->dd_lock);
221fa9e4066Sahrens 	} else {
222fa9e4066Sahrens 		(void) strcat(buf, dd->dd_myname);
223fa9e4066Sahrens 	}
224fa9e4066Sahrens }
225fa9e4066Sahrens 
226*ce636f8bSMatthew Ahrens /* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
227b7661cccSmmusante int
228b7661cccSmmusante dsl_dir_namelen(dsl_dir_t *dd)
229b7661cccSmmusante {
230b7661cccSmmusante 	int result = 0;
231b7661cccSmmusante 
232b7661cccSmmusante 	if (dd->dd_parent) {
233b7661cccSmmusante 		/* parent's name + 1 for the "/" */
234b7661cccSmmusante 		result = dsl_dir_namelen(dd->dd_parent) + 1;
235b7661cccSmmusante 	}
236b7661cccSmmusante 
237b7661cccSmmusante 	if (!MUTEX_HELD(&dd->dd_lock)) {
238b7661cccSmmusante 		/* see dsl_dir_name */
239b7661cccSmmusante 		mutex_enter(&dd->dd_lock);
240b7661cccSmmusante 		result += strlen(dd->dd_myname);
241b7661cccSmmusante 		mutex_exit(&dd->dd_lock);
242b7661cccSmmusante 	} else {
243b7661cccSmmusante 		result += strlen(dd->dd_myname);
244b7661cccSmmusante 	}
245b7661cccSmmusante 
246b7661cccSmmusante 	return (result);
247b7661cccSmmusante }
248b7661cccSmmusante 
249fa9e4066Sahrens static int
250fa9e4066Sahrens getcomponent(const char *path, char *component, const char **nextp)
251fa9e4066Sahrens {
252fa9e4066Sahrens 	char *p;
253ccba0801SRich Morris 	if ((path == NULL) || (path[0] == '\0'))
254203a47d8Snd 		return (ENOENT);
255fa9e4066Sahrens 	/* This would be a good place to reserve some namespace... */
256fa9e4066Sahrens 	p = strpbrk(path, "/@");
257fa9e4066Sahrens 	if (p && (p[1] == '/' || p[1] == '@')) {
258fa9e4066Sahrens 		/* two separators in a row */
259fa9e4066Sahrens 		return (EINVAL);
260fa9e4066Sahrens 	}
261fa9e4066Sahrens 	if (p == NULL || p == path) {
262fa9e4066Sahrens 		/*
263fa9e4066Sahrens 		 * if the first thing is an @ or /, it had better be an
264fa9e4066Sahrens 		 * @ and it had better not have any more ats or slashes,
265fa9e4066Sahrens 		 * and it had better have something after the @.
266fa9e4066Sahrens 		 */
267fa9e4066Sahrens 		if (p != NULL &&
268fa9e4066Sahrens 		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
269fa9e4066Sahrens 			return (EINVAL);
270fa9e4066Sahrens 		if (strlen(path) >= MAXNAMELEN)
271fa9e4066Sahrens 			return (ENAMETOOLONG);
272fa9e4066Sahrens 		(void) strcpy(component, path);
273fa9e4066Sahrens 		p = NULL;
274fa9e4066Sahrens 	} else if (p[0] == '/') {
275fa9e4066Sahrens 		if (p-path >= MAXNAMELEN)
276fa9e4066Sahrens 			return (ENAMETOOLONG);
277fa9e4066Sahrens 		(void) strncpy(component, path, p - path);
278fa9e4066Sahrens 		component[p-path] = '\0';
279fa9e4066Sahrens 		p++;
280fa9e4066Sahrens 	} else if (p[0] == '@') {
281fa9e4066Sahrens 		/*
282fa9e4066Sahrens 		 * if the next separator is an @, there better not be
283fa9e4066Sahrens 		 * any more slashes.
284fa9e4066Sahrens 		 */
285fa9e4066Sahrens 		if (strchr(path, '/'))
286fa9e4066Sahrens 			return (EINVAL);
287fa9e4066Sahrens 		if (p-path >= MAXNAMELEN)
288fa9e4066Sahrens 			return (ENAMETOOLONG);
289fa9e4066Sahrens 		(void) strncpy(component, path, p - path);
290fa9e4066Sahrens 		component[p-path] = '\0';
291fa9e4066Sahrens 	} else {
292fa9e4066Sahrens 		ASSERT(!"invalid p");
293fa9e4066Sahrens 	}
294fa9e4066Sahrens 	*nextp = p;
295fa9e4066Sahrens 	return (0);
296fa9e4066Sahrens }
297fa9e4066Sahrens 
298fa9e4066Sahrens /*
299fa9e4066Sahrens  * same as dsl_open_dir, ignore the first component of name and use the
300fa9e4066Sahrens  * spa instead
301fa9e4066Sahrens  */
302ea8dc4b6Seschrock int
303ea8dc4b6Seschrock dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
304ea8dc4b6Seschrock     dsl_dir_t **ddp, const char **tailp)
305fa9e4066Sahrens {
306fa9e4066Sahrens 	char buf[MAXNAMELEN];
307fa9e4066Sahrens 	const char *next, *nextnext = NULL;
308fa9e4066Sahrens 	int err;
309fa9e4066Sahrens 	dsl_dir_t *dd;
310fa9e4066Sahrens 	dsl_pool_t *dp;
311fa9e4066Sahrens 	uint64_t ddobj;
312fa9e4066Sahrens 	int openedspa = FALSE;
313fa9e4066Sahrens 
314fa9e4066Sahrens 	dprintf("%s\n", name);
315fa9e4066Sahrens 
316fa9e4066Sahrens 	err = getcomponent(name, buf, &next);
317fa9e4066Sahrens 	if (err)
318ea8dc4b6Seschrock 		return (err);
319fa9e4066Sahrens 	if (spa == NULL) {
320fa9e4066Sahrens 		err = spa_open(buf, &spa, FTAG);
321fa9e4066Sahrens 		if (err) {
322fa9e4066Sahrens 			dprintf("spa_open(%s) failed\n", buf);
323ea8dc4b6Seschrock 			return (err);
324fa9e4066Sahrens 		}
325fa9e4066Sahrens 		openedspa = TRUE;
326fa9e4066Sahrens 
327fa9e4066Sahrens 		/* XXX this assertion belongs in spa_open */
328fa9e4066Sahrens 		ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
329fa9e4066Sahrens 	}
330fa9e4066Sahrens 
331fa9e4066Sahrens 	dp = spa_get_dsl(spa);
332fa9e4066Sahrens 
333fa9e4066Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
334ea8dc4b6Seschrock 	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
335ea8dc4b6Seschrock 	if (err) {
336ea8dc4b6Seschrock 		rw_exit(&dp->dp_config_rwlock);
337ea8dc4b6Seschrock 		if (openedspa)
338ea8dc4b6Seschrock 			spa_close(spa, FTAG);
339ea8dc4b6Seschrock 		return (err);
340ea8dc4b6Seschrock 	}
341ea8dc4b6Seschrock 
342fa9e4066Sahrens 	while (next != NULL) {
343fa9e4066Sahrens 		dsl_dir_t *child_ds;
344fa9e4066Sahrens 		err = getcomponent(next, buf, &nextnext);
345ea8dc4b6Seschrock 		if (err)
346ea8dc4b6Seschrock 			break;
347fa9e4066Sahrens 		ASSERT(next[0] != '\0');
348fa9e4066Sahrens 		if (next[0] == '@')
349fa9e4066Sahrens 			break;
350fa9e4066Sahrens 		dprintf("looking up %s in obj%lld\n",
351fa9e4066Sahrens 		    buf, dd->dd_phys->dd_child_dir_zapobj);
352fa9e4066Sahrens 
353fa9e4066Sahrens 		err = zap_lookup(dp->dp_meta_objset,
354fa9e4066Sahrens 		    dd->dd_phys->dd_child_dir_zapobj,
355fa9e4066Sahrens 		    buf, sizeof (ddobj), 1, &ddobj);
356ea8dc4b6Seschrock 		if (err) {
357ea8dc4b6Seschrock 			if (err == ENOENT)
358ea8dc4b6Seschrock 				err = 0;
359fa9e4066Sahrens 			break;
360fa9e4066Sahrens 		}
361fa9e4066Sahrens 
362ea8dc4b6Seschrock 		err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
363ea8dc4b6Seschrock 		if (err)
364ea8dc4b6Seschrock 			break;
365fa9e4066Sahrens 		dsl_dir_close(dd, tag);
366fa9e4066Sahrens 		dd = child_ds;
367fa9e4066Sahrens 		next = nextnext;
368fa9e4066Sahrens 	}
369fa9e4066Sahrens 	rw_exit(&dp->dp_config_rwlock);
370fa9e4066Sahrens 
371ea8dc4b6Seschrock 	if (err) {
372ea8dc4b6Seschrock 		dsl_dir_close(dd, tag);
373ea8dc4b6Seschrock 		if (openedspa)
374ea8dc4b6Seschrock 			spa_close(spa, FTAG);
375ea8dc4b6Seschrock 		return (err);
376ea8dc4b6Seschrock 	}
377ea8dc4b6Seschrock 
378fa9e4066Sahrens 	/*
379fa9e4066Sahrens 	 * It's an error if there's more than one component left, or
380fa9e4066Sahrens 	 * tailp==NULL and there's any component left.
381fa9e4066Sahrens 	 */
382fa9e4066Sahrens 	if (next != NULL &&
383fa9e4066Sahrens 	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
384fa9e4066Sahrens 		/* bad path name */
385fa9e4066Sahrens 		dsl_dir_close(dd, tag);
386fa9e4066Sahrens 		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
387ea8dc4b6Seschrock 		err = ENOENT;
388fa9e4066Sahrens 	}
389fa9e4066Sahrens 	if (tailp)
390fa9e4066Sahrens 		*tailp = next;
391fa9e4066Sahrens 	if (openedspa)
392fa9e4066Sahrens 		spa_close(spa, FTAG);
393ea8dc4b6Seschrock 	*ddp = dd;
394ea8dc4b6Seschrock 	return (err);
395fa9e4066Sahrens }
396fa9e4066Sahrens 
397fa9e4066Sahrens /*
398fa9e4066Sahrens  * Return the dsl_dir_t, and possibly the last component which couldn't
399fa9e4066Sahrens  * be found in *tail.  Return NULL if the path is bogus, or if
400fa9e4066Sahrens  * tail==NULL and we couldn't parse the whole name.  (*tail)[0] == '@'
401fa9e4066Sahrens  * means that the last component is a snapshot.
402fa9e4066Sahrens  */
403ea8dc4b6Seschrock int
404ea8dc4b6Seschrock dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
405fa9e4066Sahrens {
406ea8dc4b6Seschrock 	return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
407fa9e4066Sahrens }
408fa9e4066Sahrens 
4091d452cf5Sahrens uint64_t
410088f3894Sahrens dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
411088f3894Sahrens     dmu_tx_t *tx)
412fa9e4066Sahrens {
413088f3894Sahrens 	objset_t *mos = dp->dp_meta_objset;
414fa9e4066Sahrens 	uint64_t ddobj;
415cde58dbcSMatthew Ahrens 	dsl_dir_phys_t *ddphys;
416fa9e4066Sahrens 	dmu_buf_t *dbuf;
417fa9e4066Sahrens 
4181649cd4bStabriz 	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
4191649cd4bStabriz 	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
420088f3894Sahrens 	if (pds) {
421088f3894Sahrens 		VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
422088f3894Sahrens 		    name, sizeof (uint64_t), 1, &ddobj, tx));
423088f3894Sahrens 	} else {
424088f3894Sahrens 		/* it's the root dir */
425088f3894Sahrens 		VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
426088f3894Sahrens 		    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
427088f3894Sahrens 	}
428ea8dc4b6Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
429fa9e4066Sahrens 	dmu_buf_will_dirty(dbuf, tx);
430cde58dbcSMatthew Ahrens 	ddphys = dbuf->db_data;
431fa9e4066Sahrens 
432cde58dbcSMatthew Ahrens 	ddphys->dd_creation_time = gethrestime_sec();
433088f3894Sahrens 	if (pds)
434cde58dbcSMatthew Ahrens 		ddphys->dd_parent_obj = pds->dd_object;
435cde58dbcSMatthew Ahrens 	ddphys->dd_props_zapobj = zap_create(mos,
436fa9e4066Sahrens 	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
437cde58dbcSMatthew Ahrens 	ddphys->dd_child_dir_zapobj = zap_create(mos,
43887e5029aSahrens 	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
43974e7dc98SMatthew Ahrens 	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
440cde58dbcSMatthew Ahrens 		ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
441ea8dc4b6Seschrock 	dmu_buf_rele(dbuf, FTAG);
442fa9e4066Sahrens 
4431d452cf5Sahrens 	return (ddobj);
444fa9e4066Sahrens }
445fa9e4066Sahrens 
4461d452cf5Sahrens /* ARGSUSED */
447fa9e4066Sahrens int
4481d452cf5Sahrens dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
449fa9e4066Sahrens {
4504445fffbSMatthew Ahrens 	dsl_dir_t *dd = arg1;
4511d452cf5Sahrens 	dsl_pool_t *dp = dd->dd_pool;
452fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
4531d452cf5Sahrens 	int err;
4541d452cf5Sahrens 	uint64_t count;
455fa9e4066Sahrens 
4561d452cf5Sahrens 	/*
4571d452cf5Sahrens 	 * There should be exactly two holds, both from
4581d452cf5Sahrens 	 * dsl_dataset_destroy: one on the dd directory, and one on its
4591d452cf5Sahrens 	 * head ds.  Otherwise, someone is trying to lookup something
4601d452cf5Sahrens 	 * inside this dir while we want to destroy it.  The
4611d452cf5Sahrens 	 * config_rwlock ensures that nobody else opens it after we
4621d452cf5Sahrens 	 * check.
4631d452cf5Sahrens 	 */
4641d452cf5Sahrens 	if (dmu_buf_refcount(dd->dd_dbuf) > 2)
4651d452cf5Sahrens 		return (EBUSY);
466fa9e4066Sahrens 
4671d452cf5Sahrens 	err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
468ea8dc4b6Seschrock 	if (err)
4691d452cf5Sahrens 		return (err);
4701d452cf5Sahrens 	if (count != 0)
4711d452cf5Sahrens 		return (EEXIST);
472fa9e4066Sahrens 
4731d452cf5Sahrens 	return (0);
4741d452cf5Sahrens }
475fa9e4066Sahrens 
4761d452cf5Sahrens void
4773f9d6ad7SLin Ling dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
4781d452cf5Sahrens {
4794445fffbSMatthew Ahrens 	dsl_dir_t *dd = arg1;
4801d452cf5Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
48192241e0bSTom Erickson 	uint64_t obj;
48274e7dc98SMatthew Ahrens 	dd_used_t t;
483fa9e4066Sahrens 
4841d452cf5Sahrens 	ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
485fa9e4066Sahrens 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
486fa9e4066Sahrens 
4874445fffbSMatthew Ahrens 	/*
4884445fffbSMatthew Ahrens 	 * Remove our reservation. The impl() routine avoids setting the
4894445fffbSMatthew Ahrens 	 * actual property, which would require the (already destroyed) ds.
4904445fffbSMatthew Ahrens 	 */
4914445fffbSMatthew Ahrens 	dsl_dir_set_reservation_sync_impl(dd, 0, tx);
49292241e0bSTom Erickson 
493b420f3adSRichard Lowe 	ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);
494b420f3adSRichard Lowe 	ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
49574e7dc98SMatthew Ahrens 	for (t = 0; t < DD_USED_NUM; t++)
496b420f3adSRichard Lowe 		ASSERT3U(dd->dd_phys->dd_used_breakdown[t], ==, 0);
497fa9e4066Sahrens 
4981d452cf5Sahrens 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
4991d452cf5Sahrens 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
500ecd6cf80Smarks 	VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
5011d452cf5Sahrens 	VERIFY(0 == zap_remove(mos,
5021d452cf5Sahrens 	    dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
503fa9e4066Sahrens 
5041d452cf5Sahrens 	obj = dd->dd_object;
5051d452cf5Sahrens 	dsl_dir_close(dd, tag);
5061d452cf5Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
507fa9e4066Sahrens }
508fa9e4066Sahrens 
509088f3894Sahrens boolean_t
510088f3894Sahrens dsl_dir_is_clone(dsl_dir_t *dd)
511fa9e4066Sahrens {
512088f3894Sahrens 	return (dd->dd_phys->dd_origin_obj &&
513088f3894Sahrens 	    (dd->dd_pool->dp_origin_snap == NULL ||
514088f3894Sahrens 	    dd->dd_phys->dd_origin_obj !=
515088f3894Sahrens 	    dd->dd_pool->dp_origin_snap->ds_object));
516fa9e4066Sahrens }
517fa9e4066Sahrens 
518fa9e4066Sahrens void
519a2eea2e1Sahrens dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
520fa9e4066Sahrens {
521fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
52274e7dc98SMatthew Ahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
52374e7dc98SMatthew Ahrens 	    dd->dd_phys->dd_used_bytes);
524a9799022Sck 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
525a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
526a2eea2e1Sahrens 	    dd->dd_phys->dd_reserved);
527a2eea2e1Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
528a2eea2e1Sahrens 	    dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
529a2eea2e1Sahrens 	    (dd->dd_phys->dd_uncompressed_bytes * 100 /
530a2eea2e1Sahrens 	    dd->dd_phys->dd_compressed_bytes));
53174e7dc98SMatthew Ahrens 	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
53274e7dc98SMatthew Ahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
53374e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]);
53474e7dc98SMatthew Ahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
53574e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]);
53674e7dc98SMatthew Ahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
53774e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]);
53874e7dc98SMatthew Ahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
53974e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] +
54074e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]);
54174e7dc98SMatthew Ahrens 	}
542fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
543fa9e4066Sahrens 
5444ccbb6e7Sahrens 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
545088f3894Sahrens 	if (dsl_dir_is_clone(dd)) {
546fa9e4066Sahrens 		dsl_dataset_t *ds;
547a2eea2e1Sahrens 		char buf[MAXNAMELEN];
548fa9e4066Sahrens 
549745cd3c5Smaybee 		VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
550745cd3c5Smaybee 		    dd->dd_phys->dd_origin_obj, FTAG, &ds));
551a2eea2e1Sahrens 		dsl_dataset_name(ds, buf);
552745cd3c5Smaybee 		dsl_dataset_rele(ds, FTAG);
553a2eea2e1Sahrens 		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
554fa9e4066Sahrens 	}
5554ccbb6e7Sahrens 	rw_exit(&dd->dd_pool->dp_config_rwlock);
556fa9e4066Sahrens }
557fa9e4066Sahrens 
558fa9e4066Sahrens void
559fa9e4066Sahrens dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
560fa9e4066Sahrens {
561fa9e4066Sahrens 	dsl_pool_t *dp = dd->dd_pool;
562fa9e4066Sahrens 
563fa9e4066Sahrens 	ASSERT(dd->dd_phys);
564fa9e4066Sahrens 
565fa9e4066Sahrens 	if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
566fa9e4066Sahrens 		/* up the hold count until we can be written out */
567fa9e4066Sahrens 		dmu_buf_add_ref(dd->dd_dbuf, dd);
568fa9e4066Sahrens 	}
569fa9e4066Sahrens }
570fa9e4066Sahrens 
571fa9e4066Sahrens static int64_t
572fa9e4066Sahrens parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
573fa9e4066Sahrens {
574fa9e4066Sahrens 	uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
575fa9e4066Sahrens 	uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
576fa9e4066Sahrens 	return (new_accounted - old_accounted);
577fa9e4066Sahrens }
578fa9e4066Sahrens 
579fa9e4066Sahrens void
580fa9e4066Sahrens dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
581fa9e4066Sahrens {
582fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
583fa9e4066Sahrens 
584fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
585b420f3adSRichard Lowe 	ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
586fa9e4066Sahrens 	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
587fa9e4066Sahrens 	    dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
588fa9e4066Sahrens 	dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
589fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
590fa9e4066Sahrens 
591fa9e4066Sahrens 	/* release the hold from dsl_dir_dirty */
592ea8dc4b6Seschrock 	dmu_buf_rele(dd->dd_dbuf, dd);
593fa9e4066Sahrens }
594fa9e4066Sahrens 
595fa9e4066Sahrens static uint64_t
596a9799022Sck dsl_dir_space_towrite(dsl_dir_t *dd)
597fa9e4066Sahrens {
598a9799022Sck 	uint64_t space = 0;
599fa9e4066Sahrens 	int i;
600fa9e4066Sahrens 
601fa9e4066Sahrens 	ASSERT(MUTEX_HELD(&dd->dd_lock));
602fa9e4066Sahrens 
603fa9e4066Sahrens 	for (i = 0; i < TXG_SIZE; i++) {
604fa9e4066Sahrens 		space += dd->dd_space_towrite[i&TXG_MASK];
605fa9e4066Sahrens 		ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
606fa9e4066Sahrens 	}
607fa9e4066Sahrens 	return (space);
608fa9e4066Sahrens }
609fa9e4066Sahrens 
610fa9e4066Sahrens /*
611fa9e4066Sahrens  * How much space would dd have available if ancestor had delta applied
612fa9e4066Sahrens  * to it?  If ondiskonly is set, we're only interested in what's
613fa9e4066Sahrens  * on-disk, not estimated pending changes.
614fa9e4066Sahrens  */
615a2eea2e1Sahrens uint64_t
616fa9e4066Sahrens dsl_dir_space_available(dsl_dir_t *dd,
617fa9e4066Sahrens     dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
618fa9e4066Sahrens {
619fa9e4066Sahrens 	uint64_t parentspace, myspace, quota, used;
620fa9e4066Sahrens 
621fa9e4066Sahrens 	/*
622fa9e4066Sahrens 	 * If there are no restrictions otherwise, assume we have
623fa9e4066Sahrens 	 * unlimited space available.
624fa9e4066Sahrens 	 */
625fa9e4066Sahrens 	quota = UINT64_MAX;
626fa9e4066Sahrens 	parentspace = UINT64_MAX;
627fa9e4066Sahrens 
628fa9e4066Sahrens 	if (dd->dd_parent != NULL) {
629fa9e4066Sahrens 		parentspace = dsl_dir_space_available(dd->dd_parent,
630fa9e4066Sahrens 		    ancestor, delta, ondiskonly);
631fa9e4066Sahrens 	}
632fa9e4066Sahrens 
633fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
634fa9e4066Sahrens 	if (dd->dd_phys->dd_quota != 0)
635fa9e4066Sahrens 		quota = dd->dd_phys->dd_quota;
63674e7dc98SMatthew Ahrens 	used = dd->dd_phys->dd_used_bytes;
637a9799022Sck 	if (!ondiskonly)
638a9799022Sck 		used += dsl_dir_space_towrite(dd);
639fa9e4066Sahrens 
640fa9e4066Sahrens 	if (dd->dd_parent == NULL) {
64199653d4eSeschrock 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
642fa9e4066Sahrens 		quota = MIN(quota, poolsize);
643fa9e4066Sahrens 	}
644fa9e4066Sahrens 
645fa9e4066Sahrens 	if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
646fa9e4066Sahrens 		/*
647fa9e4066Sahrens 		 * We have some space reserved, in addition to what our
648fa9e4066Sahrens 		 * parent gave us.
649fa9e4066Sahrens 		 */
650fa9e4066Sahrens 		parentspace += dd->dd_phys->dd_reserved - used;
651fa9e4066Sahrens 	}
652fa9e4066Sahrens 
65374e7dc98SMatthew Ahrens 	if (dd == ancestor) {
65474e7dc98SMatthew Ahrens 		ASSERT(delta <= 0);
65574e7dc98SMatthew Ahrens 		ASSERT(used >= -delta);
65674e7dc98SMatthew Ahrens 		used += delta;
65774e7dc98SMatthew Ahrens 		if (parentspace != UINT64_MAX)
65874e7dc98SMatthew Ahrens 			parentspace -= delta;
65974e7dc98SMatthew Ahrens 	}
66074e7dc98SMatthew Ahrens 
661fa9e4066Sahrens 	if (used > quota) {
662fa9e4066Sahrens 		/* over quota */
663fa9e4066Sahrens 		myspace = 0;
664fa9e4066Sahrens 	} else {
665fa9e4066Sahrens 		/*
66699653d4eSeschrock 		 * the lesser of the space provided by our parent and
66799653d4eSeschrock 		 * the space left in our quota
668fa9e4066Sahrens 		 */
669fa9e4066Sahrens 		myspace = MIN(parentspace, quota - used);
670fa9e4066Sahrens 	}
671fa9e4066Sahrens 
672fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
673fa9e4066Sahrens 
674fa9e4066Sahrens 	return (myspace);
675fa9e4066Sahrens }
676fa9e4066Sahrens 
677fa9e4066Sahrens struct tempreserve {
678fa9e4066Sahrens 	list_node_t tr_node;
6791ab7f2deSmaybee 	dsl_pool_t *tr_dp;
680fa9e4066Sahrens 	dsl_dir_t *tr_ds;
681fa9e4066Sahrens 	uint64_t tr_size;
682fa9e4066Sahrens };
683fa9e4066Sahrens 
684fa9e4066Sahrens static int
685a9799022Sck dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
686a9799022Sck     boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
6876df6c3bcSck     dmu_tx_t *tx, boolean_t first)
688fa9e4066Sahrens {
689fa9e4066Sahrens 	uint64_t txg = tx->tx_txg;
690a9799022Sck 	uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
691468c413aSTim Haley 	uint64_t deferred = 0;
692a9799022Sck 	struct tempreserve *tr;
693468c413aSTim Haley 	int retval = EDQUOT;
694fa9e4066Sahrens 	int txgidx = txg & TXG_MASK;
695fa9e4066Sahrens 	int i;
6969082849eSck 	uint64_t ref_rsrv = 0;
697fa9e4066Sahrens 
698fa9e4066Sahrens 	ASSERT3U(txg, !=, 0);
699a9799022Sck 	ASSERT3S(asize, >, 0);
700fa9e4066Sahrens 
701fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
702a9799022Sck 
703fa9e4066Sahrens 	/*
704fa9e4066Sahrens 	 * Check against the dsl_dir's quota.  We don't add in the delta
705fa9e4066Sahrens 	 * when checking for over-quota because they get one free hit.
706fa9e4066Sahrens 	 */
707a9799022Sck 	est_inflight = dsl_dir_space_towrite(dd);
708fa9e4066Sahrens 	for (i = 0; i < TXG_SIZE; i++)
709a9799022Sck 		est_inflight += dd->dd_tempreserved[i];
71074e7dc98SMatthew Ahrens 	used_on_disk = dd->dd_phys->dd_used_bytes;
711fa9e4066Sahrens 
712f4d2e9e6Smaybee 	/*
7136df6c3bcSck 	 * On the first iteration, fetch the dataset's used-on-disk and
7146df6c3bcSck 	 * refreservation values. Also, if checkrefquota is set, test if
7156df6c3bcSck 	 * allocating this space would exceed the dataset's refquota.
716f4d2e9e6Smaybee 	 */
7176df6c3bcSck 	if (first && tx->tx_objset) {
718c3fdb13aSmaybee 		int error;
719503ad85cSMatthew Ahrens 		dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
7209082849eSck 
721a9799022Sck 		error = dsl_dataset_check_quota(ds, checkrefquota,
7229082849eSck 		    asize, est_inflight, &used_on_disk, &ref_rsrv);
723a9799022Sck 		if (error) {
724a9799022Sck 			mutex_exit(&dd->dd_lock);
725a9799022Sck 			return (error);
726a9799022Sck 		}
727a9799022Sck 	}
728a9799022Sck 
729a9799022Sck 	/*
730a9799022Sck 	 * If this transaction will result in a net free of space,
731a9799022Sck 	 * we want to let it through.
732a9799022Sck 	 */
733a9799022Sck 	if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
734f4d2e9e6Smaybee 		quota = UINT64_MAX;
735f4d2e9e6Smaybee 	else
736fa9e4066Sahrens 		quota = dd->dd_phys->dd_quota;
737fa9e4066Sahrens 
738fa9e4066Sahrens 	/*
739468c413aSTim Haley 	 * Adjust the quota against the actual pool size at the root
740468c413aSTim Haley 	 * minus any outstanding deferred frees.
741f4d2e9e6Smaybee 	 * To ensure that it's possible to remove files from a full
742f4d2e9e6Smaybee 	 * pool without inducing transient overcommits, we throttle
743fa9e4066Sahrens 	 * netfree transactions against a quota that is slightly larger,
744fa9e4066Sahrens 	 * but still within the pool's allocation slop.  In cases where
745fa9e4066Sahrens 	 * we're very close to full, this will allow a steady trickle of
746fa9e4066Sahrens 	 * removes to get through.
747fa9e4066Sahrens 	 */
7481934e92fSmaybee 	if (dd->dd_parent == NULL) {
749b24ab676SJeff Bonwick 		spa_t *spa = dd->dd_pool->dp_spa;
750fa9e4066Sahrens 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
751b24ab676SJeff Bonwick 		deferred = metaslab_class_get_deferred(spa_normal_class(spa));
752468c413aSTim Haley 		if (poolsize - deferred < quota) {
753468c413aSTim Haley 			quota = poolsize - deferred;
754468c413aSTim Haley 			retval = ENOSPC;
755fa9e4066Sahrens 		}
756fa9e4066Sahrens 	}
757fa9e4066Sahrens 
758fa9e4066Sahrens 	/*
759fa9e4066Sahrens 	 * If they are requesting more space, and our current estimate
760a9799022Sck 	 * is over quota, they get to try again unless the actual
761ea8dc4b6Seschrock 	 * on-disk is over quota and there are no pending changes (which
762ea8dc4b6Seschrock 	 * may free up space for us).
763fa9e4066Sahrens 	 */
764468c413aSTim Haley 	if (used_on_disk + est_inflight >= quota) {
765468c413aSTim Haley 		if (est_inflight > 0 || used_on_disk < quota ||
766468c413aSTim Haley 		    (retval == ENOSPC && used_on_disk < quota + deferred))
767468c413aSTim Haley 			retval = ERESTART;
768a9799022Sck 		dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
769fa9e4066Sahrens 		    "quota=%lluK tr=%lluK err=%d\n",
770a9799022Sck 		    used_on_disk>>10, est_inflight>>10,
771468c413aSTim Haley 		    quota>>10, asize>>10, retval);
772fa9e4066Sahrens 		mutex_exit(&dd->dd_lock);
773468c413aSTim Haley 		return (retval);
774fa9e4066Sahrens 	}
775fa9e4066Sahrens 
776fa9e4066Sahrens 	/* We need to up our estimated delta before dropping dd_lock */
777fa9e4066Sahrens 	dd->dd_tempreserved[txgidx] += asize;
778fa9e4066Sahrens 
7799082849eSck 	parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
7809082849eSck 	    asize - ref_rsrv);
781fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
782fa9e4066Sahrens 
7831ab7f2deSmaybee 	tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
784fa9e4066Sahrens 	tr->tr_ds = dd;
785fa9e4066Sahrens 	tr->tr_size = asize;
786fa9e4066Sahrens 	list_insert_tail(tr_list, tr);
787fa9e4066Sahrens 
788fa9e4066Sahrens 	/* see if it's OK with our parent */
7891934e92fSmaybee 	if (dd->dd_parent && parent_rsrv) {
7901934e92fSmaybee 		boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
7911934e92fSmaybee 
792fa9e4066Sahrens 		return (dsl_dir_tempreserve_impl(dd->dd_parent,
7936df6c3bcSck 		    parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
794fa9e4066Sahrens 	} else {
795fa9e4066Sahrens 		return (0);
796fa9e4066Sahrens 	}
797fa9e4066Sahrens }
798fa9e4066Sahrens 
799fa9e4066Sahrens /*
800fa9e4066Sahrens  * Reserve space in this dsl_dir, to be used in this tx's txg.
801a9799022Sck  * After the space has been dirtied (and dsl_dir_willuse_space()
802a9799022Sck  * has been called), the reservation should be canceled, using
803a9799022Sck  * dsl_dir_tempreserve_clear().
804fa9e4066Sahrens  */
805fa9e4066Sahrens int
806a9799022Sck dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
807a9799022Sck     uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
808fa9e4066Sahrens {
8091ab7f2deSmaybee 	int err;
810fa9e4066Sahrens 	list_t *tr_list;
811fa9e4066Sahrens 
812a9799022Sck 	if (asize == 0) {
813a9799022Sck 		*tr_cookiep = NULL;
814a9799022Sck 		return (0);
815a9799022Sck 	}
816a9799022Sck 
817fa9e4066Sahrens 	tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
818fa9e4066Sahrens 	list_create(tr_list, sizeof (struct tempreserve),
819fa9e4066Sahrens 	    offsetof(struct tempreserve, tr_node));
820a9799022Sck 	ASSERT3S(asize, >, 0);
821ea8dc4b6Seschrock 	ASSERT3S(fsize, >=, 0);
822fa9e4066Sahrens 
8231ab7f2deSmaybee 	err = arc_tempreserve_space(lsize, tx->tx_txg);
824fa9e4066Sahrens 	if (err == 0) {
825fa9e4066Sahrens 		struct tempreserve *tr;
826fa9e4066Sahrens 
8271ab7f2deSmaybee 		tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
8281ab7f2deSmaybee 		tr->tr_size = lsize;
8291ab7f2deSmaybee 		list_insert_tail(tr_list, tr);
8301ab7f2deSmaybee 
8311ab7f2deSmaybee 		err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
8321ab7f2deSmaybee 	} else {
8331ab7f2deSmaybee 		if (err == EAGAIN) {
8341ab7f2deSmaybee 			txg_delay(dd->dd_pool, tx->tx_txg, 1);
8351ab7f2deSmaybee 			err = ERESTART;
836fa9e4066Sahrens 		}
8371ab7f2deSmaybee 		dsl_pool_memory_pressure(dd->dd_pool);
8381ab7f2deSmaybee 	}
8391ab7f2deSmaybee 
8401ab7f2deSmaybee 	if (err == 0) {
8411ab7f2deSmaybee 		struct tempreserve *tr;
8421ab7f2deSmaybee 
8431ab7f2deSmaybee 		tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
8441ab7f2deSmaybee 		tr->tr_dp = dd->dd_pool;
8451ab7f2deSmaybee 		tr->tr_size = asize;
8461ab7f2deSmaybee 		list_insert_tail(tr_list, tr);
8471ab7f2deSmaybee 
8481ab7f2deSmaybee 		err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
8496df6c3bcSck 		    FALSE, asize > usize, tr_list, tx, TRUE);
850fa9e4066Sahrens 	}
851fa9e4066Sahrens 
852fa9e4066Sahrens 	if (err)
853fa9e4066Sahrens 		dsl_dir_tempreserve_clear(tr_list, tx);
854fa9e4066Sahrens 	else
855fa9e4066Sahrens 		*tr_cookiep = tr_list;
8561ab7f2deSmaybee 
857fa9e4066Sahrens 	return (err);
858fa9e4066Sahrens }
859fa9e4066Sahrens 
860fa9e4066Sahrens /*
861fa9e4066Sahrens  * Clear a temporary reservation that we previously made with
862fa9e4066Sahrens  * dsl_dir_tempreserve_space().
863fa9e4066Sahrens  */
864fa9e4066Sahrens void
865fa9e4066Sahrens dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
866fa9e4066Sahrens {
867fa9e4066Sahrens 	int txgidx = tx->tx_txg & TXG_MASK;
868fa9e4066Sahrens 	list_t *tr_list = tr_cookie;
869fa9e4066Sahrens 	struct tempreserve *tr;
870fa9e4066Sahrens 
871fa9e4066Sahrens 	ASSERT3U(tx->tx_txg, !=, 0);
872fa9e4066Sahrens 
873a9799022Sck 	if (tr_cookie == NULL)
874a9799022Sck 		return;
875a9799022Sck 
876fa9e4066Sahrens 	while (tr = list_head(tr_list)) {
8771ab7f2deSmaybee 		if (tr->tr_dp) {
8781ab7f2deSmaybee 			dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx);
8791ab7f2deSmaybee 		} else if (tr->tr_ds) {
880fa9e4066Sahrens 			mutex_enter(&tr->tr_ds->dd_lock);
881fa9e4066Sahrens 			ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
882fa9e4066Sahrens 			    tr->tr_size);
883fa9e4066Sahrens 			tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
884fa9e4066Sahrens 			mutex_exit(&tr->tr_ds->dd_lock);
8851ab7f2deSmaybee 		} else {
8861ab7f2deSmaybee 			arc_tempreserve_clear(tr->tr_size);
887fa9e4066Sahrens 		}
888fa9e4066Sahrens 		list_remove(tr_list, tr);
889fa9e4066Sahrens 		kmem_free(tr, sizeof (struct tempreserve));
890fa9e4066Sahrens 	}
891fa9e4066Sahrens 
892fa9e4066Sahrens 	kmem_free(tr_list, sizeof (list_t));
893fa9e4066Sahrens }
894fa9e4066Sahrens 
8951ab7f2deSmaybee static void
8961ab7f2deSmaybee dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
897fa9e4066Sahrens {
898fa9e4066Sahrens 	int64_t parent_space;
899fa9e4066Sahrens 	uint64_t est_used;
900fa9e4066Sahrens 
901fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
902fa9e4066Sahrens 	if (space > 0)
903fa9e4066Sahrens 		dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
904fa9e4066Sahrens 
90574e7dc98SMatthew Ahrens 	est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes;
906fa9e4066Sahrens 	parent_space = parent_delta(dd, est_used, space);
907fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
908fa9e4066Sahrens 
909fa9e4066Sahrens 	/* Make sure that we clean up dd_space_to* */
910fa9e4066Sahrens 	dsl_dir_dirty(dd, tx);
911fa9e4066Sahrens 
912fa9e4066Sahrens 	/* XXX this is potentially expensive and unnecessary... */
913fa9e4066Sahrens 	if (parent_space && dd->dd_parent)
9141ab7f2deSmaybee 		dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx);
9151ab7f2deSmaybee }
9161ab7f2deSmaybee 
9171ab7f2deSmaybee /*
9181ab7f2deSmaybee  * Call in open context when we think we're going to write/free space,
9191ab7f2deSmaybee  * eg. when dirtying data.  Be conservative (ie. OK to write less than
9201ab7f2deSmaybee  * this or free more than this, but don't write more or free less).
9211ab7f2deSmaybee  */
9221ab7f2deSmaybee void
9231ab7f2deSmaybee dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
9241ab7f2deSmaybee {
9251ab7f2deSmaybee 	dsl_pool_willuse_space(dd->dd_pool, space, tx);
9261ab7f2deSmaybee 	dsl_dir_willuse_space_impl(dd, space, tx);
927fa9e4066Sahrens }
928fa9e4066Sahrens 
929fa9e4066Sahrens /* call from syncing context when we actually write/free space for this dd */
930fa9e4066Sahrens void
93174e7dc98SMatthew Ahrens dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
932fa9e4066Sahrens     int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
933fa9e4066Sahrens {
934fa9e4066Sahrens 	int64_t accounted_delta;
93502c8f3f0SMatthew Ahrens 	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
936fa9e4066Sahrens 
937fa9e4066Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
93874e7dc98SMatthew Ahrens 	ASSERT(type < DD_USED_NUM);
939fa9e4066Sahrens 
94002c8f3f0SMatthew Ahrens 	if (needlock)
94102c8f3f0SMatthew Ahrens 		mutex_enter(&dd->dd_lock);
94274e7dc98SMatthew Ahrens 	accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
94374e7dc98SMatthew Ahrens 	ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used);
944fa9e4066Sahrens 	ASSERT(compressed >= 0 ||
945fa9e4066Sahrens 	    dd->dd_phys->dd_compressed_bytes >= -compressed);
946fa9e4066Sahrens 	ASSERT(uncompressed >= 0 ||
947fa9e4066Sahrens 	    dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
948*ce636f8bSMatthew Ahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
94974e7dc98SMatthew Ahrens 	dd->dd_phys->dd_used_bytes += used;
950fa9e4066Sahrens 	dd->dd_phys->dd_uncompressed_bytes += uncompressed;
951fa9e4066Sahrens 	dd->dd_phys->dd_compressed_bytes += compressed;
95274e7dc98SMatthew Ahrens 
95374e7dc98SMatthew Ahrens 	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
95474e7dc98SMatthew Ahrens 		ASSERT(used > 0 ||
95574e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_breakdown[type] >= -used);
95674e7dc98SMatthew Ahrens 		dd->dd_phys->dd_used_breakdown[type] += used;
95774e7dc98SMatthew Ahrens #ifdef DEBUG
95874e7dc98SMatthew Ahrens 		dd_used_t t;
95974e7dc98SMatthew Ahrens 		uint64_t u = 0;
96074e7dc98SMatthew Ahrens 		for (t = 0; t < DD_USED_NUM; t++)
96174e7dc98SMatthew Ahrens 			u += dd->dd_phys->dd_used_breakdown[t];
96274e7dc98SMatthew Ahrens 		ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes);
96374e7dc98SMatthew Ahrens #endif
96474e7dc98SMatthew Ahrens 	}
96502c8f3f0SMatthew Ahrens 	if (needlock)
96602c8f3f0SMatthew Ahrens 		mutex_exit(&dd->dd_lock);
967fa9e4066Sahrens 
968fa9e4066Sahrens 	if (dd->dd_parent != NULL) {
96974e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
970fa9e4066Sahrens 		    accounted_delta, compressed, uncompressed, tx);
97174e7dc98SMatthew Ahrens 		dsl_dir_transfer_space(dd->dd_parent,
97274e7dc98SMatthew Ahrens 		    used - accounted_delta,
97374e7dc98SMatthew Ahrens 		    DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
974fa9e4066Sahrens 	}
975fa9e4066Sahrens }
976fa9e4066Sahrens 
97774e7dc98SMatthew Ahrens void
97874e7dc98SMatthew Ahrens dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
97974e7dc98SMatthew Ahrens     dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
98074e7dc98SMatthew Ahrens {
98102c8f3f0SMatthew Ahrens 	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
98202c8f3f0SMatthew Ahrens 
98374e7dc98SMatthew Ahrens 	ASSERT(dmu_tx_is_syncing(tx));
98474e7dc98SMatthew Ahrens 	ASSERT(oldtype < DD_USED_NUM);
98574e7dc98SMatthew Ahrens 	ASSERT(newtype < DD_USED_NUM);
98674e7dc98SMatthew Ahrens 
98774e7dc98SMatthew Ahrens 	if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
98874e7dc98SMatthew Ahrens 		return;
98974e7dc98SMatthew Ahrens 
99002c8f3f0SMatthew Ahrens 	if (needlock)
99102c8f3f0SMatthew Ahrens 		mutex_enter(&dd->dd_lock);
99274e7dc98SMatthew Ahrens 	ASSERT(delta > 0 ?
99374e7dc98SMatthew Ahrens 	    dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
99474e7dc98SMatthew Ahrens 	    dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
99574e7dc98SMatthew Ahrens 	ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
996*ce636f8bSMatthew Ahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
99774e7dc98SMatthew Ahrens 	dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
99874e7dc98SMatthew Ahrens 	dd->dd_phys->dd_used_breakdown[newtype] += delta;
99902c8f3f0SMatthew Ahrens 	if (needlock)
100002c8f3f0SMatthew Ahrens 		mutex_exit(&dd->dd_lock);
100174e7dc98SMatthew Ahrens }
100274e7dc98SMatthew Ahrens 
1003fa9e4066Sahrens static int
10041d452cf5Sahrens dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
1005fa9e4066Sahrens {
100692241e0bSTom Erickson 	dsl_dataset_t *ds = arg1;
100792241e0bSTom Erickson 	dsl_dir_t *dd = ds->ds_dir;
100892241e0bSTom Erickson 	dsl_prop_setarg_t *psa = arg2;
100992241e0bSTom Erickson 	int err;
10101d452cf5Sahrens 	uint64_t towrite;
1011fa9e4066Sahrens 
101292241e0bSTom Erickson 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
101392241e0bSTom Erickson 		return (err);
101492241e0bSTom Erickson 
101592241e0bSTom Erickson 	if (psa->psa_effective_value == 0)
10161d452cf5Sahrens 		return (0);
1017fa9e4066Sahrens 
1018fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
10191d452cf5Sahrens 	/*
10201d452cf5Sahrens 	 * If we are doing the preliminary check in open context, and
10211d452cf5Sahrens 	 * there are pending changes, then don't fail it, since the
1022a9799022Sck 	 * pending changes could under-estimate the amount of space to be
10231d452cf5Sahrens 	 * freed up.
10241d452cf5Sahrens 	 */
1025a9799022Sck 	towrite = dsl_dir_space_towrite(dd);
10261d452cf5Sahrens 	if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
102792241e0bSTom Erickson 	    (psa->psa_effective_value < dd->dd_phys->dd_reserved ||
102892241e0bSTom Erickson 	    psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
1029fa9e4066Sahrens 		err = ENOSPC;
1030fa9e4066Sahrens 	}
1031fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
1032fa9e4066Sahrens 	return (err);
1033fa9e4066Sahrens }
1034fa9e4066Sahrens 
10353f9d6ad7SLin Ling extern dsl_syncfunc_t dsl_prop_set_sync;
103692241e0bSTom Erickson 
10371d452cf5Sahrens static void
10383f9d6ad7SLin Ling dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
10391d452cf5Sahrens {
104092241e0bSTom Erickson 	dsl_dataset_t *ds = arg1;
104192241e0bSTom Erickson 	dsl_dir_t *dd = ds->ds_dir;
104292241e0bSTom Erickson 	dsl_prop_setarg_t *psa = arg2;
104392241e0bSTom Erickson 	uint64_t effective_value = psa->psa_effective_value;
104492241e0bSTom Erickson 
10453f9d6ad7SLin Ling 	dsl_prop_set_sync(ds, psa, tx);
104692241e0bSTom Erickson 	DSL_PROP_CHECK_PREDICTION(dd, psa);
10471d452cf5Sahrens 
10481d452cf5Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
10491d452cf5Sahrens 
10501d452cf5Sahrens 	mutex_enter(&dd->dd_lock);
105192241e0bSTom Erickson 	dd->dd_phys->dd_quota = effective_value;
10521d452cf5Sahrens 	mutex_exit(&dd->dd_lock);
1053ecd6cf80Smarks 
10544445fffbSMatthew Ahrens 	spa_history_log_internal_dd(dd, "set quota", tx,
10554445fffbSMatthew Ahrens 	    "quota=%lld", (longlong_t)effective_value);
10561d452cf5Sahrens }
10571d452cf5Sahrens 
1058fa9e4066Sahrens int
105992241e0bSTom Erickson dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
1060fa9e4066Sahrens {
1061fa9e4066Sahrens 	dsl_dir_t *dd;
106292241e0bSTom Erickson 	dsl_dataset_t *ds;
106392241e0bSTom Erickson 	dsl_prop_setarg_t psa;
1064fa9e4066Sahrens 	int err;
1065fa9e4066Sahrens 
106692241e0bSTom Erickson 	dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
106792241e0bSTom Erickson 
106892241e0bSTom Erickson 	err = dsl_dataset_hold(ddname, FTAG, &ds);
1069ea8dc4b6Seschrock 	if (err)
1070ea8dc4b6Seschrock 		return (err);
1071fa9e4066Sahrens 
107292241e0bSTom Erickson 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
107392241e0bSTom Erickson 	if (err) {
107492241e0bSTom Erickson 		dsl_dataset_rele(ds, FTAG);
107592241e0bSTom Erickson 		return (err);
1076a9b821a0Sck 	}
107792241e0bSTom Erickson 
107892241e0bSTom Erickson 	ASSERT(ds->ds_dir == dd);
107992241e0bSTom Erickson 
108092241e0bSTom Erickson 	/*
108192241e0bSTom Erickson 	 * If someone removes a file, then tries to set the quota, we want to
108292241e0bSTom Erickson 	 * make sure the file freeing takes effect.
108392241e0bSTom Erickson 	 */
108492241e0bSTom Erickson 	txg_wait_open(dd->dd_pool, 0);
108592241e0bSTom Erickson 
108692241e0bSTom Erickson 	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
108792241e0bSTom Erickson 	    dsl_dir_set_quota_sync, ds, &psa, 0);
108892241e0bSTom Erickson 
1089fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
109092241e0bSTom Erickson 	dsl_dataset_rele(ds, FTAG);
1091fa9e4066Sahrens 	return (err);
1092fa9e4066Sahrens }
1093fa9e4066Sahrens 
1094a9799022Sck int
10951d452cf5Sahrens dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
1096fa9e4066Sahrens {
109792241e0bSTom Erickson 	dsl_dataset_t *ds = arg1;
109892241e0bSTom Erickson 	dsl_dir_t *dd = ds->ds_dir;
109992241e0bSTom Erickson 	dsl_prop_setarg_t *psa = arg2;
110092241e0bSTom Erickson 	uint64_t effective_value;
1101fa9e4066Sahrens 	uint64_t used, avail;
110292241e0bSTom Erickson 	int err;
110392241e0bSTom Erickson 
110492241e0bSTom Erickson 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
110592241e0bSTom Erickson 		return (err);
110692241e0bSTom Erickson 
110792241e0bSTom Erickson 	effective_value = psa->psa_effective_value;
1108fa9e4066Sahrens 
11091d452cf5Sahrens 	/*
11101d452cf5Sahrens 	 * If we are doing the preliminary check in open context, the
11111d452cf5Sahrens 	 * space estimates may be inaccurate.
11121d452cf5Sahrens 	 */
11131d452cf5Sahrens 	if (!dmu_tx_is_syncing(tx))
11141d452cf5Sahrens 		return (0);
11151d452cf5Sahrens 
1116fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
111774e7dc98SMatthew Ahrens 	used = dd->dd_phys->dd_used_bytes;
1118fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
1119fa9e4066Sahrens 
1120fa9e4066Sahrens 	if (dd->dd_parent) {
1121fa9e4066Sahrens 		avail = dsl_dir_space_available(dd->dd_parent,
1122fa9e4066Sahrens 		    NULL, 0, FALSE);
1123fa9e4066Sahrens 	} else {
1124fa9e4066Sahrens 		avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
1125fa9e4066Sahrens 	}
1126fa9e4066Sahrens 
112792241e0bSTom Erickson 	if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
112892241e0bSTom Erickson 		uint64_t delta = MAX(used, effective_value) -
1129379c004dSEric Schrock 		    MAX(used, dd->dd_phys->dd_reserved);
1130379c004dSEric Schrock 
1131379c004dSEric Schrock 		if (delta > avail)
1132379c004dSEric Schrock 			return (ENOSPC);
1133379c004dSEric Schrock 		if (dd->dd_phys->dd_quota > 0 &&
113492241e0bSTom Erickson 		    effective_value > dd->dd_phys->dd_quota)
1135379c004dSEric Schrock 			return (ENOSPC);
1136379c004dSEric Schrock 	}
1137379c004dSEric Schrock 
11381d452cf5Sahrens 	return (0);
11391d452cf5Sahrens }
11401d452cf5Sahrens 
11411d452cf5Sahrens static void
11424445fffbSMatthew Ahrens dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
11431d452cf5Sahrens {
11441d452cf5Sahrens 	uint64_t used;
11451d452cf5Sahrens 	int64_t delta;
11461d452cf5Sahrens 
1147a9799022Sck 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
1148a9799022Sck 
11491d452cf5Sahrens 	mutex_enter(&dd->dd_lock);
115074e7dc98SMatthew Ahrens 	used = dd->dd_phys->dd_used_bytes;
11514445fffbSMatthew Ahrens 	delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved);
11524445fffbSMatthew Ahrens 	dd->dd_phys->dd_reserved = value;
1153fa9e4066Sahrens 
1154fa9e4066Sahrens 	if (dd->dd_parent != NULL) {
1155fa9e4066Sahrens 		/* Roll up this additional usage into our ancestors */
115674e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
115774e7dc98SMatthew Ahrens 		    delta, 0, 0, tx);
1158fa9e4066Sahrens 	}
115902c8f3f0SMatthew Ahrens 	mutex_exit(&dd->dd_lock);
11604445fffbSMatthew Ahrens }
11614445fffbSMatthew Ahrens 
11624445fffbSMatthew Ahrens 
11634445fffbSMatthew Ahrens static void
11644445fffbSMatthew Ahrens dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
11654445fffbSMatthew Ahrens {
11664445fffbSMatthew Ahrens 	dsl_dataset_t *ds = arg1;
11674445fffbSMatthew Ahrens 	dsl_dir_t *dd = ds->ds_dir;
11684445fffbSMatthew Ahrens 	dsl_prop_setarg_t *psa = arg2;
11694445fffbSMatthew Ahrens 	uint64_t value = psa->psa_effective_value;
11704445fffbSMatthew Ahrens 
11714445fffbSMatthew Ahrens 	dsl_prop_set_sync(ds, psa, tx);
11724445fffbSMatthew Ahrens 	DSL_PROP_CHECK_PREDICTION(dd, psa);
1173ecd6cf80Smarks 
11744445fffbSMatthew Ahrens 	dsl_dir_set_reservation_sync_impl(dd, value, tx);
11754445fffbSMatthew Ahrens 
11764445fffbSMatthew Ahrens 	spa_history_log_internal_dd(dd, "set reservation", tx,
11774445fffbSMatthew Ahrens 	    "reservation=%lld", (longlong_t)value);
1178fa9e4066Sahrens }
1179fa9e4066Sahrens 
1180fa9e4066Sahrens int
118192241e0bSTom Erickson dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
118292241e0bSTom Erickson     uint64_t reservation)
1183fa9e4066Sahrens {
1184fa9e4066Sahrens 	dsl_dir_t *dd;
118592241e0bSTom Erickson 	dsl_dataset_t *ds;
118692241e0bSTom Erickson 	dsl_prop_setarg_t psa;
1187fa9e4066Sahrens 	int err;
1188fa9e4066Sahrens 
118992241e0bSTom Erickson 	dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
119092241e0bSTom Erickson 
119192241e0bSTom Erickson 	err = dsl_dataset_hold(ddname, FTAG, &ds);
1192ea8dc4b6Seschrock 	if (err)
1193ea8dc4b6Seschrock 		return (err);
119492241e0bSTom Erickson 
119592241e0bSTom Erickson 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
119692241e0bSTom Erickson 	if (err) {
119792241e0bSTom Erickson 		dsl_dataset_rele(ds, FTAG);
119892241e0bSTom Erickson 		return (err);
119992241e0bSTom Erickson 	}
120092241e0bSTom Erickson 
120192241e0bSTom Erickson 	ASSERT(ds->ds_dir == dd);
120292241e0bSTom Erickson 
12031d452cf5Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
120492241e0bSTom Erickson 	    dsl_dir_set_reservation_sync, ds, &psa, 0);
120592241e0bSTom Erickson 
1206fa9e4066Sahrens 	dsl_dir_close(dd, FTAG);
120792241e0bSTom Erickson 	dsl_dataset_rele(ds, FTAG);
1208fa9e4066Sahrens 	return (err);
1209fa9e4066Sahrens }
1210fa9e4066Sahrens 
1211fa9e4066Sahrens static dsl_dir_t *
1212fa9e4066Sahrens closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
1213fa9e4066Sahrens {
1214fa9e4066Sahrens 	for (; ds1; ds1 = ds1->dd_parent) {
1215fa9e4066Sahrens 		dsl_dir_t *dd;
1216fa9e4066Sahrens 		for (dd = ds2; dd; dd = dd->dd_parent) {
1217fa9e4066Sahrens 			if (ds1 == dd)
1218fa9e4066Sahrens 				return (dd);
1219fa9e4066Sahrens 		}
1220fa9e4066Sahrens 	}
1221fa9e4066Sahrens 	return (NULL);
1222fa9e4066Sahrens }
1223fa9e4066Sahrens 
1224fa9e4066Sahrens /*
1225fa9e4066Sahrens  * If delta is applied to dd, how much of that delta would be applied to
1226fa9e4066Sahrens  * ancestor?  Syncing context only.
1227fa9e4066Sahrens  */
1228fa9e4066Sahrens static int64_t
1229fa9e4066Sahrens would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
1230fa9e4066Sahrens {
1231fa9e4066Sahrens 	if (dd == ancestor)
1232fa9e4066Sahrens 		return (delta);
1233fa9e4066Sahrens 
1234fa9e4066Sahrens 	mutex_enter(&dd->dd_lock);
123574e7dc98SMatthew Ahrens 	delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta);
1236fa9e4066Sahrens 	mutex_exit(&dd->dd_lock);
1237fa9e4066Sahrens 	return (would_change(dd->dd_parent, delta, ancestor));
1238fa9e4066Sahrens }
1239fa9e4066Sahrens 
12401d452cf5Sahrens struct renamearg {
12411d452cf5Sahrens 	dsl_dir_t *newparent;
12421d452cf5Sahrens 	const char *mynewname;
12431d452cf5Sahrens };
12441d452cf5Sahrens 
12451d452cf5Sahrens static int
12461d452cf5Sahrens dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1247fa9e4066Sahrens {
12481d452cf5Sahrens 	dsl_dir_t *dd = arg1;
12491d452cf5Sahrens 	struct renamearg *ra = arg2;
1250fa9e4066Sahrens 	dsl_pool_t *dp = dd->dd_pool;
1251fa9e4066Sahrens 	objset_t *mos = dp->dp_meta_objset;
12521d452cf5Sahrens 	int err;
12531d452cf5Sahrens 	uint64_t val;
1254fa9e4066Sahrens 
1255b91a2f0bSMatthew Ahrens 	/*
1256b91a2f0bSMatthew Ahrens 	 * There should only be one reference, from dmu_objset_rename().
1257b91a2f0bSMatthew Ahrens 	 * Fleeting holds are also possible (eg, from "zfs list" getting
1258b91a2f0bSMatthew Ahrens 	 * stats), but any that are present in open context will likely
1259b91a2f0bSMatthew Ahrens 	 * be gone by syncing context, so only fail from syncing
1260b91a2f0bSMatthew Ahrens 	 * context.
1261b91a2f0bSMatthew Ahrens 	 */
1262b91a2f0bSMatthew Ahrens 	if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1)
1263fa9e4066Sahrens 		return (EBUSY);
1264fa9e4066Sahrens 
12651d452cf5Sahrens 	/* check for existing name */
12661d452cf5Sahrens 	err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
12671d452cf5Sahrens 	    ra->mynewname, 8, 1, &val);
12681d452cf5Sahrens 	if (err == 0)
12691d452cf5Sahrens 		return (EEXIST);
12701d452cf5Sahrens 	if (err != ENOENT)
12711d452cf5Sahrens 		return (err);
12721d452cf5Sahrens 
12731d452cf5Sahrens 	if (ra->newparent != dd->dd_parent) {
127499653d4eSeschrock 		/* is there enough space? */
127599653d4eSeschrock 		uint64_t myspace =
127674e7dc98SMatthew Ahrens 		    MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
1277fa9e4066Sahrens 
12781d452cf5Sahrens 		/* no rename into our descendant */
12791d452cf5Sahrens 		if (closest_common_ancestor(dd, ra->newparent) == dd)
1280fa9e4066Sahrens 			return (EINVAL);
1281fa9e4066Sahrens 
12821d452cf5Sahrens 		if (err = dsl_dir_transfer_possible(dd->dd_parent,
12831d452cf5Sahrens 		    ra->newparent, myspace))
128499653d4eSeschrock 			return (err);
12851d452cf5Sahrens 	}
1286fa9e4066Sahrens 
12871d452cf5Sahrens 	return (0);
12881d452cf5Sahrens }
12891d452cf5Sahrens 
12901d452cf5Sahrens static void
12913f9d6ad7SLin Ling dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
12921d452cf5Sahrens {
12931d452cf5Sahrens 	dsl_dir_t *dd = arg1;
12941d452cf5Sahrens 	struct renamearg *ra = arg2;
12951d452cf5Sahrens 	dsl_pool_t *dp = dd->dd_pool;
12961d452cf5Sahrens 	objset_t *mos = dp->dp_meta_objset;
12971d452cf5Sahrens 	int err;
12984445fffbSMatthew Ahrens 	char namebuf[MAXNAMELEN];
12991d452cf5Sahrens 
13001d452cf5Sahrens 	ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
13011d452cf5Sahrens 
13024445fffbSMatthew Ahrens 	/* Log this before we change the name. */
13034445fffbSMatthew Ahrens 	dsl_dir_name(ra->newparent, namebuf);
13044445fffbSMatthew Ahrens 	spa_history_log_internal_dd(dd, "rename", tx,
13054445fffbSMatthew Ahrens 	    "-> %s/%s", namebuf, ra->mynewname);
13064445fffbSMatthew Ahrens 
13071d452cf5Sahrens 	if (ra->newparent != dd->dd_parent) {
130874e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
130974e7dc98SMatthew Ahrens 		    -dd->dd_phys->dd_used_bytes,
1310fa9e4066Sahrens 		    -dd->dd_phys->dd_compressed_bytes,
1311fa9e4066Sahrens 		    -dd->dd_phys->dd_uncompressed_bytes, tx);
131274e7dc98SMatthew Ahrens 		dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
131374e7dc98SMatthew Ahrens 		    dd->dd_phys->dd_used_bytes,
1314fa9e4066Sahrens 		    dd->dd_phys->dd_compressed_bytes,
1315fa9e4066Sahrens 		    dd->dd_phys->dd_uncompressed_bytes, tx);
131674e7dc98SMatthew Ahrens 
131774e7dc98SMatthew Ahrens 		if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) {
131874e7dc98SMatthew Ahrens 			uint64_t unused_rsrv = dd->dd_phys->dd_reserved -
131974e7dc98SMatthew Ahrens 			    dd->dd_phys->dd_used_bytes;
132074e7dc98SMatthew Ahrens 
132174e7dc98SMatthew Ahrens 			dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
132274e7dc98SMatthew Ahrens 			    -unused_rsrv, 0, 0, tx);
132374e7dc98SMatthew Ahrens 			dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
132474e7dc98SMatthew Ahrens 			    unused_rsrv, 0, 0, tx);
132574e7dc98SMatthew Ahrens 		}
1326fa9e4066Sahrens 	}
1327fa9e4066Sahrens 
1328fa9e4066Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
1329fa9e4066Sahrens 
1330fa9e4066Sahrens 	/* remove from old parent zapobj */
1331fa9e4066Sahrens 	err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
1332fa9e4066Sahrens 	    dd->dd_myname, tx);
1333b420f3adSRichard Lowe 	ASSERT3U(err, ==, 0);
1334fa9e4066Sahrens 
13351d452cf5Sahrens 	(void) strcpy(dd->dd_myname, ra->mynewname);
1336fa9e4066Sahrens 	dsl_dir_close(dd->dd_parent, dd);
13371d452cf5Sahrens 	dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
1338ea8dc4b6Seschrock 	VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
13391d452cf5Sahrens 	    ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
1340fa9e4066Sahrens 
1341fa9e4066Sahrens 	/* add to new parent zapobj */
13421d452cf5Sahrens 	err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
1343fa9e4066Sahrens 	    dd->dd_myname, 8, 1, &dd->dd_object, tx);
1344b420f3adSRichard Lowe 	ASSERT3U(err, ==, 0);
1345ecd6cf80Smarks 
13461d452cf5Sahrens }
1347fa9e4066Sahrens 
13481d452cf5Sahrens int
13491d452cf5Sahrens dsl_dir_rename(dsl_dir_t *dd, const char *newname)
13501d452cf5Sahrens {
13511d452cf5Sahrens 	struct renamearg ra;
13521d452cf5Sahrens 	int err;
13531d452cf5Sahrens 
13541d452cf5Sahrens 	/* new parent should exist */
13551d452cf5Sahrens 	err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
13561d452cf5Sahrens 	if (err)
13571d452cf5Sahrens 		return (err);
13581d452cf5Sahrens 
13591d452cf5Sahrens 	/* can't rename to different pool */
13601d452cf5Sahrens 	if (dd->dd_pool != ra.newparent->dd_pool) {
13611d452cf5Sahrens 		err = ENXIO;
13621d452cf5Sahrens 		goto out;
13631d452cf5Sahrens 	}
13641d452cf5Sahrens 
13651d452cf5Sahrens 	/* new name should not already exist */
13661d452cf5Sahrens 	if (ra.mynewname == NULL) {
13671d452cf5Sahrens 		err = EEXIST;
13681d452cf5Sahrens 		goto out;
13691d452cf5Sahrens 	}
13701d452cf5Sahrens 
13711d452cf5Sahrens 	err = dsl_sync_task_do(dd->dd_pool,
13721d452cf5Sahrens 	    dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
13731d452cf5Sahrens 
13741d452cf5Sahrens out:
13751d452cf5Sahrens 	dsl_dir_close(ra.newparent, FTAG);
13761d452cf5Sahrens 	return (err);
1377fa9e4066Sahrens }
137899653d4eSeschrock 
137999653d4eSeschrock int
138099653d4eSeschrock dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
138199653d4eSeschrock {
138299653d4eSeschrock 	dsl_dir_t *ancestor;
138399653d4eSeschrock 	int64_t adelta;
138499653d4eSeschrock 	uint64_t avail;
138599653d4eSeschrock 
138699653d4eSeschrock 	ancestor = closest_common_ancestor(sdd, tdd);
138799653d4eSeschrock 	adelta = would_change(sdd, -space, ancestor);
138899653d4eSeschrock 	avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
138999653d4eSeschrock 	if (avail < space)
139099653d4eSeschrock 		return (ENOSPC);
139199653d4eSeschrock 
139299653d4eSeschrock 	return (0);
139399653d4eSeschrock }
139471eb0538SChris Kirby 
139571eb0538SChris Kirby timestruc_t
139671eb0538SChris Kirby dsl_dir_snap_cmtime(dsl_dir_t *dd)
139771eb0538SChris Kirby {
139871eb0538SChris Kirby 	timestruc_t t;
139971eb0538SChris Kirby 
140071eb0538SChris Kirby 	mutex_enter(&dd->dd_lock);
140171eb0538SChris Kirby 	t = dd->dd_snap_cmtime;
140271eb0538SChris Kirby 	mutex_exit(&dd->dd_lock);
140371eb0538SChris Kirby 
140471eb0538SChris Kirby 	return (t);
140571eb0538SChris Kirby }
140671eb0538SChris Kirby 
140771eb0538SChris Kirby void
140871eb0538SChris Kirby dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
140971eb0538SChris Kirby {
141071eb0538SChris Kirby 	timestruc_t t;
141171eb0538SChris Kirby 
141271eb0538SChris Kirby 	gethrestime(&t);
141371eb0538SChris Kirby 	mutex_enter(&dd->dd_lock);
141471eb0538SChris Kirby 	dd->dd_snap_cmtime = t;
141571eb0538SChris Kirby 	mutex_exit(&dd->dd_lock);
141671eb0538SChris Kirby }
1417