1fa9e406ahrens/*
2fa9e406ahrens * CDDL HEADER START
3fa9e406ahrens *
4fa9e406ahrens * The contents of this file are subject to the terms of the
5ea8dc4beschrock * Common Development and Distribution License (the "License").
6ea8dc4beschrock * You may not use this file except in compliance with the License.
7fa9e406ahrens *
8fa9e406ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e406ahrens * or http://www.opensolaris.org/os/licensing.
10fa9e406ahrens * See the License for the specific language governing permissions
11fa9e406ahrens * and limitations under the License.
12fa9e406ahrens *
13fa9e406ahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e406ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e406ahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e406ahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e406ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e406ahrens *
19fa9e406ahrens * CDDL HEADER END
20fa9e406ahrens */
21fa9e406ahrens/*
223f9d6adLin Ling * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
238671400Serapheim Dimitropoulos * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
24013023dMartin Matuska * Copyright (c) 2013 Martin Matuska. All rights reserved.
25a2afb61Jerry Jelinek * Copyright (c) 2014 Joyent, Inc. All rights reserved.
26bc9014eJustin Gibbs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
2703b1c29Alexander Eremin * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
28fa9e406ahrens */
29fa9e406ahrens
30fa9e406ahrens#include <sys/dmu.h>
31a979902ck#include <sys/dmu_objset.h>
32fa9e406ahrens#include <sys/dmu_tx.h>
33fa9e406ahrens#include <sys/dsl_dataset.h>
34fa9e406ahrens#include <sys/dsl_dir.h>
35fa9e406ahrens#include <sys/dsl_prop.h>
361d452cfahrens#include <sys/dsl_synctask.h>
37ecd6cf8marks#include <sys/dsl_deleg.h>
382acef22Matthew Ahrens#include <sys/dmu_impl.h>
39fa9e406ahrens#include <sys/spa.h>
40eb63303Tom Caputi#include <sys/spa_impl.h>
41b24ab67Jeff Bonwick#include <sys/metaslab.h>
42fa9e406ahrens#include <sys/zap.h>
43fa9e406ahrens#include <sys/zio.h>
44fa9e406ahrens#include <sys/arc.h>
45ecd6cf8marks#include <sys/sunddi.h>
46a2afb61Jerry Jelinek#include <sys/zfeature.h>
47a2afb61Jerry Jelinek#include <sys/policy.h>
48a2afb61Jerry Jelinek#include <sys/zfs_znode.h>
49fa9e406ahrens#include "zfs_namecheck.h"
50a2afb61Jerry Jelinek#include "zfs_prop.h"
51a2afb61Jerry Jelinek
52a2afb61Jerry Jelinek/*
53a2afb61Jerry Jelinek * Filesystem and Snapshot Limits
54a2afb61Jerry Jelinek * ------------------------------
55a2afb61Jerry Jelinek *
56a2afb61Jerry Jelinek * These limits are used to restrict the number of filesystems and/or snapshots
57a2afb61Jerry Jelinek * that can be created at a given level in the tree or below. A typical
58a2afb61Jerry Jelinek * use-case is with a delegated dataset where the administrator wants to ensure
59a2afb61Jerry Jelinek * that a user within the zone is not creating too many additional filesystems
60a2afb61Jerry Jelinek * or snapshots, even though they're not exceeding their space quota.
61a2afb61Jerry Jelinek *
62a2afb61Jerry Jelinek * The filesystem and snapshot counts are stored as extensible properties. This
63a2afb61Jerry Jelinek * capability is controlled by a feature flag and must be enabled to be used.
64a2afb61Jerry Jelinek * Once enabled, the feature is not active until the first limit is set. At
65a2afb61Jerry Jelinek * that point, future operations to create/destroy filesystems or snapshots
66a2afb61Jerry Jelinek * will validate and update the counts.
67a2afb61Jerry Jelinek *
68a2afb61Jerry Jelinek * Because the count properties will not exist before the feature is active,
69a2afb61Jerry Jelinek * the counts are updated when a limit is first set on an uninitialized
70a2afb61Jerry Jelinek * dsl_dir node in the tree (The filesystem/snapshot count on a node includes
71a2afb61Jerry Jelinek * all of the nested filesystems/snapshots. Thus, a new leaf node has a
72a2afb61Jerry Jelinek * filesystem count of 0 and a snapshot count of 0. Non-existent filesystem and
73a2afb61Jerry Jelinek * snapshot count properties on a node indicate uninitialized counts on that
74a2afb61Jerry Jelinek * node.) When first setting a limit on an uninitialized node, the code starts
75a2afb61Jerry Jelinek * at the filesystem with the new limit and descends into all sub-filesystems
76a2afb61Jerry Jelinek * to add the count properties.
77a2afb61Jerry Jelinek *
78a2afb61Jerry Jelinek * In practice this is lightweight since a limit is typically set when the
79a2afb61Jerry Jelinek * filesystem is created and thus has no children. Once valid, changing the
80a2afb61Jerry Jelinek * limit value won't require a re-traversal since the counts are already valid.
81a2afb61Jerry Jelinek * When recursively fixing the counts, if a node with a limit is encountered
82a2afb61Jerry Jelinek * during the descent, the counts are known to be valid and there is no need to
83a2afb61Jerry Jelinek * descend into that filesystem's children. The counts on filesystems above the
84a2afb61Jerry Jelinek * one with the new limit will still be uninitialized, unless a limit is
85a2afb61Jerry Jelinek * eventually set on one of those filesystems. The counts are always recursively
86a2afb61Jerry Jelinek * updated when a limit is set on a dataset, unless there is already a limit.
87a2afb61Jerry Jelinek * When a new limit value is set on a filesystem with an existing limit, it is
88a2afb61Jerry Jelinek * possible for the new limit to be less than the current count at that level
89a2afb61Jerry Jelinek * since a user who can change the limit is also allowed to exceed the limit.
90a2afb61Jerry Jelinek *
91a2afb61Jerry Jelinek * Once the feature is active, then whenever a filesystem or snapshot is
92a2afb61Jerry Jelinek * created, the code recurses up the tree, validating the new count against the
93a2afb61Jerry Jelinek * limit at each initialized level. In practice, most levels will not have a
94a2afb61Jerry Jelinek * limit set. If there is a limit at any initialized level up the tree, the
95a2afb61Jerry Jelinek * check must pass or the creation will fail. Likewise, when a filesystem or
96a2afb61Jerry Jelinek * snapshot is destroyed, the counts are recursively adjusted all the way up
97a2afb61Jerry Jelinek * the initizized nodes in the tree. Renaming a filesystem into different point
98a2afb61Jerry Jelinek * in the tree will first validate, then update the counts on each branch up to
99a2afb61Jerry Jelinek * the common ancestor. A receive will also validate the counts and then update
100a2afb61Jerry Jelinek * them.
101a2afb61Jerry Jelinek *
102a2afb61Jerry Jelinek * An exception to the above behavior is that the limit is not enforced if the
103a2afb61Jerry Jelinek * user has permission to modify the limit. This is primarily so that
104a2afb61Jerry Jelinek * recursive snapshots in the global zone always work. We want to prevent a
105a2afb61Jerry Jelinek * denial-of-service in which a lower level delegated dataset could max out its
106a2afb61Jerry Jelinek * limit and thus block recursive snapshots from being taken in the global zone.
107a2afb61Jerry Jelinek * Because of this, it is possible for the snapshot count to be over the limit
108a2afb61Jerry Jelinek * and snapshots taken in the global zone could cause a lower level dataset to
109a2afb61Jerry Jelinek * hit or exceed its limit. The administrator taking the global zone recursive
110a2afb61Jerry Jelinek * snapshot should be aware of this side-effect and behave accordingly.
111a2afb61Jerry Jelinek * For consistency, the filesystem limit is also not enforced if the user can
112a2afb61Jerry Jelinek * modify the limit.
113a2afb61Jerry Jelinek *
114a2afb61Jerry Jelinek * The filesystem and snapshot limits are validated by dsl_fs_ss_limit_check()
115a2afb61Jerry Jelinek * and updated by dsl_fs_ss_count_adjust(). A new limit value is setup in
116a2afb61Jerry Jelinek * dsl_dir_activate_fs_ss_limit() and the counts are adjusted, if necessary, by
117a2afb61Jerry Jelinek * dsl_dir_init_fs_ss_count().
118a2afb61Jerry Jelinek *
119a2afb61Jerry Jelinek * There is a special case when we receive a filesystem that already exists. In
120a2afb61Jerry Jelinek * this case a temporary clone name of %X is created (see dmu_recv_begin). We
121a2afb61Jerry Jelinek * never update the filesystem counts for temporary clones.
122a2afb61Jerry Jelinek *
123a2afb61Jerry Jelinek * Likewise, we do not update the snapshot counts for temporary snapshots,
124a2afb61Jerry Jelinek * such as those created by zfs diff.
125a2afb61Jerry Jelinek */
126fa9e406ahrens
127c137962Justin T. Gibbsextern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd);
128c137962Justin T. Gibbs
129a979902ckstatic uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
130fa9e406ahrens
1315cabbc6Prashanth Sreenivasatypedef struct ddulrt_arg {
1325cabbc6Prashanth Sreenivasa	dsl_dir_t	*ddulrta_dd;
1335cabbc6Prashanth Sreenivasa	uint64_t	ddlrta_txg;
1345cabbc6Prashanth Sreenivasa} ddulrt_arg_t;
1355cabbc6Prashanth Sreenivasa
136fa9e406ahrensstatic void
13740510e8Josef 'Jeff' Sipekdsl_dir_evict_async(void *dbu)
138fa9e406ahrens{
139bc9014eJustin Gibbs	dsl_dir_t *dd = dbu;
140fa9e406ahrens	dsl_pool_t *dp = dd->dd_pool;
141fa9e406ahrens	int t;
142fa9e406ahrens
143bc9014eJustin Gibbs	dd->dd_dbuf = NULL;
144bc9014eJustin Gibbs
145fa9e406ahrens	for (t = 0; t < TXG_SIZE; t++) {
146fa9e406ahrens		ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
147fa9e406ahrens		ASSERT(dd->dd_tempreserved[t] == 0);
148fa9e406ahrens		ASSERT(dd->dd_space_towrite[t] == 0);
149fa9e406ahrens	}
150fa9e406ahrens
151fa9e406ahrens	if (dd->dd_parent)
152bc9014eJustin Gibbs		dsl_dir_async_rele(dd->dd_parent, dd);
153fa9e406ahrens
154bc9014eJustin Gibbs	spa_async_close(dd->dd_pool->dp_spa, dd);
155fa9e406ahrens
15603bad06Justin Gibbs	dsl_prop_fini(dd);
1575ad8204nd	mutex_destroy(&dd->dd_lock);
158fa9e406ahrens	kmem_free(dd, sizeof (dsl_dir_t));
159fa9e406ahrens}
160fa9e406ahrens
161ea8dc4beschrockint
1623b2aab1Matthew Ahrensdsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
163ea8dc4beschrock    const char *tail, void *tag, dsl_dir_t **ddp)
164fa9e406ahrens{
165fa9e406ahrens	dmu_buf_t *dbuf;
166fa9e406ahrens	dsl_dir_t *dd;
167eb63303Tom Caputi	dmu_object_info_t doi;
168ea8dc4beschrock	int err;
169fa9e406ahrens
1703b2aab1Matthew Ahrens	ASSERT(dsl_pool_config_held(dp));
171fa9e406ahrens
172ea8dc4beschrock	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
1733b2aab1Matthew Ahrens	if (err != 0)
174ea8dc4beschrock		return (err);
175fa9e406ahrens	dd = dmu_buf_get_user(dbuf);
176eb63303Tom Caputi
177eb63303Tom Caputi	dmu_object_info_from_db(dbuf, &doi);
178eb63303Tom Caputi	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
179eb63303Tom Caputi	ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
180eb63303Tom Caputi
181fa9e406ahrens	if (dd == NULL) {
182fa9e406ahrens		dsl_dir_t *winner;
183fa9e406ahrens
184fa9e406ahrens		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
185fa9e406ahrens		dd->dd_object = ddobj;
186fa9e406ahrens		dd->dd_dbuf = dbuf;
187fa9e406ahrens		dd->dd_pool = dp;
188eb63303Tom Caputi
189eb63303Tom Caputi		if (dsl_dir_is_zapified(dd) &&
190eb63303Tom Caputi		    zap_contains(dp->dp_meta_objset, ddobj,
191eb63303Tom Caputi		    DD_FIELD_CRYPTO_KEY_OBJ) == 0) {
192eb63303Tom Caputi			VERIFY0(zap_lookup(dp->dp_meta_objset,
193eb63303Tom Caputi			    ddobj, DD_FIELD_CRYPTO_KEY_OBJ,
194eb63303Tom Caputi			    sizeof (uint64_t), 1, &dd->dd_crypto_obj));
195eb63303Tom Caputi
196eb63303Tom Caputi			/* check for on-disk format errata */
197eb63303Tom Caputi			if (dsl_dir_incompatible_encryption_version(dd)) {
198eb63303Tom Caputi				dp->dp_spa->spa_errata =
199eb63303Tom Caputi				    ZPOOL_ERRATA_ZOL_6845_ENCRYPTION;
200eb63303Tom Caputi			}
201eb63303Tom Caputi		}
202eb63303Tom Caputi
2035ad8204nd		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
20403bad06Justin Gibbs		dsl_prop_init(dd);
205fa9e406ahrens
20671eb053Chris Kirby		dsl_dir_snap_cmtime_update(dd);
20771eb053Chris Kirby
208c137962Justin T. Gibbs		if (dsl_dir_phys(dd)->dd_parent_obj) {
209c137962Justin T. Gibbs			err = dsl_dir_hold_obj(dp,
210c137962Justin T. Gibbs			    dsl_dir_phys(dd)->dd_parent_obj, NULL, dd,
211c137962Justin T. Gibbs			    &dd->dd_parent);
2123b2aab1Matthew Ahrens			if (err != 0)
21374e7dc9Matthew Ahrens				goto errout;
214fa9e406ahrens			if (tail) {
215fa9e406ahrens#ifdef ZFS_DEBUG
216fa9e406ahrens				uint64_t foundobj;
217fa9e406ahrens
218fa9e406ahrens				err = zap_lookup(dp->dp_meta_objset,
219c137962Justin T. Gibbs				    dsl_dir_phys(dd->dd_parent)->
220c137962Justin T. Gibbs				    dd_child_dir_zapobj, tail,
221c137962Justin T. Gibbs				    sizeof (foundobj), 1, &foundobj);
222ea8dc4beschrock				ASSERT(err || foundobj == ddobj);
223fa9e406ahrens#endif
224fa9e406ahrens				(void) strcpy(dd->dd_myname, tail);
225fa9e406ahrens			} else {
226fa9e406ahrens				err = zap_value_search(dp->dp_meta_objset,
227c137962Justin T. Gibbs				    dsl_dir_phys(dd->dd_parent)->
228c137962Justin T. Gibbs				    dd_child_dir_zapobj,
229e743726ahrens				    ddobj, 0, dd->dd_myname);
230ea8dc4beschrock			}
2313b2aab1Matthew Ahrens			if (err != 0)
23274e7dc9Matthew Ahrens				goto errout;
233fa9e406ahrens		} else {
234fa9e406ahrens			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
235fa9e406ahrens		}
236fa9e406ahrens
2373f9d6adLin Ling		if (dsl_dir_is_clone(dd)) {
2383f9d6adLin Ling			dmu_buf_t *origin_bonus;
2393f9d6adLin Ling			dsl_dataset_phys_t *origin_phys;
2403f9d6adLin Ling
2413f9d6adLin Ling			/*
2423f9d6adLin Ling			 * We can't open the origin dataset, because
2433f9d6adLin Ling			 * that would require opening this dsl_dir.
2443f9d6adLin Ling			 * Just look at its phys directly instead.
2453f9d6adLin Ling			 */
2463f9d6adLin Ling			err = dmu_bonus_hold(dp->dp_meta_objset,
247c137962Justin T. Gibbs			    dsl_dir_phys(dd)->dd_origin_obj, FTAG,
248c137962Justin T. Gibbs			    &origin_bonus);
2493b2aab1Matthew Ahrens			if (err != 0)
2503f9d6adLin Ling				goto errout;
2513f9d6adLin Ling			origin_phys = origin_bonus->db_data;
2523f9d6adLin Ling			dd->dd_origin_txg =
2533f9d6adLin Ling			    origin_phys->ds_creation_txg;
2543f9d6adLin Ling			dmu_buf_rele(origin_bonus, FTAG);
2553f9d6adLin Ling		}
2563f9d6adLin Ling
25740510e8Josef 'Jeff' Sipek		dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async,
25840510e8Josef 'Jeff' Sipek		    &dd->dd_dbuf);
259bc9014eJustin Gibbs		winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
260bc9014eJustin Gibbs		if (winner != NULL) {
261fa9e406ahrens			if (dd->dd_parent)
2623b2aab1Matthew Ahrens				dsl_dir_rele(dd->dd_parent, dd);
26303bad06Justin Gibbs			dsl_prop_fini(dd);
2645ad8204nd			mutex_destroy(&dd->dd_lock);
265fa9e406ahrens			kmem_free(dd, sizeof (dsl_dir_t));
266fa9e406ahrens			dd = winner;
267fa9e406ahrens		} else {
268fa9e406ahrens			spa_open_ref(dp->dp_spa, dd);
269fa9e406ahrens		}
270fa9e406ahrens	}
271fa9e406ahrens
272fa9e406ahrens	/*
273fa9e406ahrens	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
274fa9e406ahrens	 * holds on the spa.  We need the open-to-close holds because
275fa9e406ahrens	 * otherwise the spa_refcnt wouldn't change when we open a
276fa9e406ahrens	 * dir which the spa also has open, so we could incorrectly
277fa9e406ahrens	 * think it was OK to unload/export/destroy the pool.  We need
278fa9e406ahrens	 * the instantiate-to-evict hold because the dsl_dir_t has a
279fa9e406ahrens	 * pointer to the dd_pool, which has a pointer to the spa_t.
280fa9e406ahrens	 */
281fa9e406ahrens	spa_open_ref(dp->dp_spa, tag);
282fa9e406ahrens	ASSERT3P(dd->dd_pool, ==, dp);
283fa9e406ahrens	ASSERT3U(dd->dd_object, ==, ddobj);
284fa9e406ahrens	ASSERT3P(dd->dd_dbuf, ==, dbuf);
285ea8dc4beschrock	*ddp = dd;
286ea8dc4beschrock	return (0);
28774e7dc9Matthew Ahrens
28874e7dc9Matthew Ahrenserrout:
28974e7dc9Matthew Ahrens	if (dd->dd_parent)
2903b2aab1Matthew Ahrens		dsl_dir_rele(dd->dd_parent, dd);
29103bad06Justin Gibbs	dsl_prop_fini(dd);
29274e7dc9Matthew Ahrens	mutex_destroy(&dd->dd_lock);
29374e7dc9Matthew Ahrens	kmem_free(dd, sizeof (dsl_dir_t));
29474e7dc9Matthew Ahrens	dmu_buf_rele(dbuf, tag);
29574e7dc9Matthew Ahrens	return (err);
296fa9e406ahrens}
297fa9e406ahrens
298fa9e406ahrensvoid
2993b2aab1Matthew Ahrensdsl_dir_rele(dsl_dir_t *dd, void *tag)
300fa9e406ahrens{
301fa9e406ahrens	dprintf_dd(dd, "%s\n", "");
302fa9e406ahrens	spa_close(dd->dd_pool->dp_spa, tag);
303ea8dc4beschrock	dmu_buf_rele(dd->dd_dbuf, tag);
304fa9e406ahrens}
305fa9e406ahrens
306bc9014eJustin Gibbs/*
307bc9014eJustin Gibbs * Remove a reference to the given dsl dir that is being asynchronously
308bc9014eJustin Gibbs * released.  Async releases occur from a taskq performing eviction of
309bc9014eJustin Gibbs * dsl datasets and dirs.  This process is identical to a normal release
310bc9014eJustin Gibbs * with the exception of using the async API for releasing the reference on
311bc9014eJustin Gibbs * the spa.
312bc9014eJustin Gibbs */
313bc9014eJustin Gibbsvoid
314bc9014eJustin Gibbsdsl_dir_async_rele(dsl_dir_t *dd, void *tag)
315bc9014eJustin Gibbs{
316bc9014eJustin Gibbs	dprintf_dd(dd, "%s\n", "");
317bc9014eJustin Gibbs	spa_async_close(dd->dd_pool->dp_spa, tag);
318bc9014eJustin Gibbs	dmu_buf_rele(dd->dd_dbuf, tag);
319bc9014eJustin Gibbs}
320bc9014eJustin Gibbs
3219adfa60Matthew Ahrens/* buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes */
322fa9e406ahrensvoid
323fa9e406ahrensdsl_dir_name(dsl_dir_t *dd, char *buf)
324fa9e406ahrens{
325fa9e406ahrens	if (dd->dd_parent) {
326fa9e406ahrens		dsl_dir_name(dd->dd_parent, buf);
3279adfa60Matthew Ahrens		VERIFY3U(strlcat(buf, "/", ZFS_MAX_DATASET_NAME_LEN), <,
3289adfa60Matthew Ahrens		    ZFS_MAX_DATASET_NAME_LEN);
329fa9e406ahrens	} else {
330fa9e406ahrens		buf[0] = '\0';
331fa9e406ahrens	}
332fa9e406ahrens	if (!MUTEX_HELD(&dd->dd_lock)) {
333fa9e406ahrens		/*
334fa9e406ahrens		 * recursive mutex so that we can use
335fa9e406ahrens		 * dprintf_dd() with dd_lock held
336fa9e406ahrens		 */
337fa9e406ahrens		mutex_enter(&dd->dd_lock);
3389adfa60Matthew Ahrens		VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
3399adfa60Matthew Ahrens		    <, ZFS_MAX_DATASET_NAME_LEN);
340fa9e406ahrens		mutex_exit(&dd->dd_lock);
341fa9e406ahrens	} else {
3429adfa60Matthew Ahrens		VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
3439adfa60Matthew Ahrens		    <, ZFS_MAX_DATASET_NAME_LEN);
344fa9e406ahrens	}
345fa9e406ahrens}
346fa9e406ahrens
347ce636f8Matthew Ahrens/* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
348b7661ccmmusanteint
349b7661ccmmusantedsl_dir_namelen(dsl_dir_t *dd)
350b7661ccmmusante{
351b7661ccmmusante	int result = 0;
352b7661ccmmusante
353b7661ccmmusante	if (dd->dd_parent) {
354b7661ccmmusante		/* parent's name + 1 for the "/" */
355b7661ccmmusante		result = dsl_dir_namelen(dd->dd_parent) + 1;
356b7661ccmmusante	}
357b7661ccmmusante
358b7661ccmmusante	if (!MUTEX_HELD(&dd->dd_lock)) {
359b7661ccmmusante		/* see dsl_dir_name */
360b7661ccmmusante		mutex_enter(&dd->dd_lock);
361b7661ccmmusante		result += strlen(dd->dd_myname);
362b7661ccmmusante		mutex_exit(&dd->dd_lock);
363b7661ccmmusante	} else {
364b7661ccmmusante		result += strlen(dd->dd_myname);
365b7661ccmmusante	}
366b7661ccmmusante
367b7661ccmmusante	return (result);
368b7661ccmmusante}
369b7661ccmmusante
370fa9e406ahrensstatic int
371fa9e406ahrensgetcomponent(const char *path, char *component, const char **nextp)
372fa9e406ahrens{
373fa9e406ahrens	char *p;
3743b2aab1Matthew Ahrens
375ccba080Rich Morris	if ((path == NULL) || (path[0] == '\0'))
376be6fd75Matthew Ahrens		return (SET_ERROR(ENOENT));
377fa9e406ahrens	/* This would be a good place to reserve some namespace... */
378fa9e406ahrens	p = strpbrk(path, "/@");
379fa9e406ahrens	if (p && (p[1] == '/' || p[1] == '@')) {
380fa9e406ahrens		/* two separators in a row */
381be6fd75Matthew Ahrens		return (SET_ERROR(EINVAL));
382fa9e406ahrens	}
383fa9e406ahrens	if (p == NULL || p == path) {
384fa9e406ahrens		/*
385fa9e406ahrens		 * if the first thing is an @ or /, it had better be an
386fa9e406ahrens		 * @ and it had better not have any more ats or slashes,
387fa9e406ahrens		 * and it had better have something after the @.
388fa9e406ahrens		 */
389fa9e406ahrens		if (p != NULL &&
390fa9e406ahrens		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
391be6fd75Matthew Ahrens			return (SET_ERROR(EINVAL));
3929adfa60Matthew Ahrens		if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN)
393be6fd75Matthew Ahrens			return (SET_ERROR(ENAMETOOLONG));
394fa9e406ahrens		(void) strcpy(component, path);
395fa9e406ahrens		p = NULL;
396fa9e406ahrens	} else if (p[0] == '/') {
3979adfa60Matthew Ahrens		if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
398be6fd75Matthew Ahrens			return (SET_ERROR(ENAMETOOLONG));
399fa9e406ahrens		(void) strncpy(component, path, p - path);
4003b2aab1Matthew Ahrens		component[p - path] = '\0';
401fa9e406ahrens		p++;
402fa9e406ahrens	} else if (p[0] == '@') {
403fa9e406ahrens		/*
404fa9e406ahrens		 * if the next separator is an @, there better not be
405fa9e406ahrens		 * any more slashes.
406fa9e406ahrens		 */
407fa9e406ahrens		if (strchr(path, '/'))
408be6fd75Matthew Ahrens			return (SET_ERROR(EINVAL));
4099adfa60Matthew Ahrens		if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
410be6fd75Matthew Ahrens			return (SET_ERROR(ENAMETOOLONG));
411fa9e406ahrens		(void) strncpy(component, path, p - path);
4123b2aab1Matthew Ahrens		component[p - path] = '\0';
413fa9e406ahrens	} else {
4143b2aab1Matthew Ahrens		panic("invalid p=%p", (void *)p);
415fa9e406ahrens	}
416fa9e406ahrens	*nextp = p;
417fa9e406ahrens	return (0);
418fa9e406ahrens}
419fa9e406ahrens
420fa9e406ahrens/*
4213b2aab1Matthew Ahrens * Return the dsl_dir_t, and possibly the last component which couldn't
4223b2aab1Matthew Ahrens * be found in *tail.  The name must be in the specified dsl_pool_t.  This
4233b2aab1Matthew Ahrens * thread must hold the dp_config_rwlock for the pool.  Returns NULL if the
4243b2aab1Matthew Ahrens * path is bogus, or if tail==NULL and we couldn't parse the whole name.
4253b2aab1Matthew Ahrens * (*tail)[0] == '@' means that the last component is a snapshot.
426fa9e406ahrens */
427ea8dc4beschrockint
4283b2aab1Matthew Ahrensdsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
429ea8dc4beschrock    dsl_dir_t **ddp, const char **tailp)
430fa9e406ahrens{
4319adfa60Matthew Ahrens	char buf[ZFS_MAX_DATASET_NAME_LEN];
4323b2aab1Matthew Ahrens	const char *spaname, *next, *nextnext = NULL;
433fa9e406ahrens	int err;
434fa9e406ahrens	dsl_dir_t *dd;
435fa9e406ahrens	uint64_t ddobj;
436fa9e406ahrens
437fa9e406ahrens	err = getcomponent(name, buf, &next);
4383b2aab1Matthew Ahrens	if (err != 0)
439ea8dc4beschrock		return (err);
440fa9e406ahrens
4413b2aab1Matthew Ahrens	/* Make sure the name is in the specified pool. */
4423b2aab1Matthew Ahrens	spaname = spa_name(dp->dp_spa);
4433b2aab1Matthew Ahrens	if (strcmp(buf, spaname) != 0)
44403b1c29Alexander Eremin		return (SET_ERROR(EXDEV));
445fa9e406ahrens
4463b2aab1Matthew Ahrens	ASSERT(dsl_pool_config_held(dp));
447fa9e406ahrens
4483b2aab1Matthew Ahrens	err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
4493b2aab1Matthew Ahrens	if (err != 0) {
450ea8dc4beschrock		return (err);
451ea8dc4beschrock	}
452ea8dc4beschrock
453fa9e406ahrens	while (next != NULL) {
454bc9014eJustin Gibbs		dsl_dir_t *child_dd;
455fa9e406ahrens		err = getcomponent(next, buf, &nextnext);
4563b2aab1Matthew Ahrens		if (err != 0)
457ea8dc4beschrock			break;
458fa9e406ahrens		ASSERT(next[0] != '\0');
459fa9e406ahrens		if (next[0] == '@')
460fa9e406ahrens			break;
461fa9e406ahrens		dprintf("looking up %s in obj%lld\n",
462c137962Justin T. Gibbs		    buf, dsl_dir_phys(dd)->dd_child_dir_zapobj);
463fa9e406ahrens
464fa9e406ahrens		err = zap_lookup(dp->dp_meta_objset,
465c137962Justin T. Gibbs		    dsl_dir_phys(dd)->dd_child_dir_zapobj,
466fa9e406ahrens		    buf, sizeof (ddobj), 1, &ddobj);
4673b2aab1Matthew Ahrens		if (err != 0) {
468ea8dc4beschrock			if (err == ENOENT)
469ea8dc4beschrock				err = 0;
470fa9e406ahrens			break;
471fa9e406ahrens		}
472fa9e406ahrens
473bc9014eJustin Gibbs		err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_dd);
4743b2aab1Matthew Ahrens		if (err != 0)
475ea8dc4beschrock			break;
4763b2aab1Matthew Ahrens		dsl_dir_rele(dd, tag);
477bc9014eJustin Gibbs		dd = child_dd;
478fa9e406ahrens		next = nextnext;
479fa9e406ahrens	}
480fa9e406ahrens
4813b2aab1Matthew Ahrens	if (err != 0) {
4823b2aab1Matthew Ahrens		dsl_dir_rele(dd, tag);
483ea8dc4beschrock		return (err);
484ea8dc4beschrock	}
485ea8dc4beschrock
486fa9e406ahrens	/*
487fa9e406ahrens	 * It's an error if there's more than one component left, or
488fa9e406ahrens	 * tailp==NULL and there's any component left.
489fa9e406ahrens	 */
490fa9e406ahrens	if (next != NULL &&
491fa9e406ahrens	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
492fa9e406ahrens		/* bad path name */
4933b2aab1Matthew Ahrens		dsl_dir_rele(dd, tag);
494fa9e406ahrens		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
495be6fd75Matthew Ahrens		err = SET_ERROR(ENOENT);
496fa9e406ahrens	}
4973b2aab1Matthew Ahrens	if (tailp != NULL)
498fa9e406ahrens		*tailp = next;
499ea8dc4beschrock	*ddp = dd;
500ea8dc4beschrock	return (err);
501fa9e406ahrens}
502fa9e406ahrens
503a2afb61Jerry Jelinek/*
504a2afb61Jerry Jelinek * If the counts are already initialized for this filesystem and its
505a2afb61Jerry Jelinek * descendants then do nothing, otherwise initialize the counts.
506a2afb61Jerry Jelinek *
507a2afb61Jerry Jelinek * The counts on this filesystem, and those below, may be uninitialized due to
508a2afb61Jerry Jelinek * either the use of a pre-existing pool which did not support the
509a2afb61Jerry Jelinek * filesystem/snapshot limit feature, or one in which the feature had not yet
510a2afb61Jerry Jelinek * been enabled.
511a2afb61Jerry Jelinek *
512a2afb61Jerry Jelinek * Recursively descend the filesystem tree and update the filesystem/snapshot
513a2afb61Jerry Jelinek * counts on each filesystem below, then update the cumulative count on the
514a2afb61Jerry Jelinek * current filesystem. If the filesystem already has a count set on it,
515a2afb61Jerry Jelinek * then we know that its counts, and the counts on the filesystems below it,
516a2afb61Jerry Jelinek * are already correct, so we don't have to update this filesystem.
517a2afb61Jerry Jelinek */
518a2afb61Jerry Jelinekstatic void
519a2afb61Jerry Jelinekdsl_dir_init_fs_ss_count(dsl_dir_t *dd, dmu_tx_t *tx)
520a2afb61Jerry Jelinek{
521a2afb61Jerry Jelinek	uint64_t my_fs_cnt = 0;
522a2afb61Jerry Jelinek	uint64_t my_ss_cnt = 0;
523a2afb61Jerry Jelinek	dsl_pool_t *dp = dd->dd_pool;
524a2afb61Jerry Jelinek	objset_t *os = dp->dp_meta_objset;
525a2afb61Jerry Jelinek	zap_cursor_t *zc;
526a2afb61Jerry Jelinek	zap_attribute_t *za;
527a2afb61Jerry Jelinek	dsl_dataset_t *ds;
528a2afb61Jerry Jelinek
529adf3407Jerry Jelinek	ASSERT(spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT));
530a2afb61Jerry Jelinek	ASSERT(dsl_pool_config_held(dp));
531a2afb61Jerry Jelinek	ASSERT(dmu_tx_is_syncing(tx));
532a2afb61Jerry Jelinek
533a2afb61Jerry Jelinek	dsl_dir_zapify(dd, tx);
534a2afb61Jerry Jelinek
535a2afb61Jerry Jelinek	/*
536a2afb61Jerry Jelinek	 * If the filesystem count has already been initialized then we
537a2afb61Jerry Jelinek	 * don't need to recurse down any further.
538a2afb61Jerry Jelinek	 */
539a2afb61Jerry Jelinek	if (zap_contains(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT) == 0)
540a2afb61Jerry Jelinek		return;
541a2afb61Jerry Jelinek
542a2afb61Jerry Jelinek	zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
543a2afb61Jerry Jelinek	za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
544a2afb61Jerry Jelinek
545a2afb61Jerry Jelinek	/* Iterate my child dirs */
546c137962Justin T. Gibbs	for (zap_cursor_init(zc, os, dsl_dir_phys(dd)->dd_child_dir_zapobj);
547a2afb61Jerry Jelinek	    zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) {
548a2afb61Jerry Jelinek		dsl_dir_t *chld_dd;
549a2afb61Jerry Jelinek		uint64_t count;
550a2afb61Jerry Jelinek
551a2afb61Jerry Jelinek		VERIFY0(dsl_dir_hold_obj(dp, za->za_first_integer, NULL, FTAG,
552a2afb61Jerry Jelinek		    &chld_dd));
553a2afb61Jerry Jelinek
554a2afb61Jerry Jelinek		/*
555a2afb61Jerry Jelinek		 * Ignore hidden ($FREE, $MOS & $ORIGIN) objsets and
556a2afb61Jerry Jelinek		 * temporary datasets.
557a2afb61Jerry Jelinek		 */
558a2afb61Jerry Jelinek		if (chld_dd->dd_myname[0] == '$' ||
559a2afb61Jerry Jelinek		    chld_dd->dd_myname[0] == '%') {
560a2afb61Jerry Jelinek			dsl_dir_rele(chld_dd, FTAG);
561a2afb61Jerry Jelinek			continue;
562a2afb61Jerry Jelinek		}
563a2afb61Jerry Jelinek
564a2afb61Jerry Jelinek		my_fs_cnt++;	/* count this child */
565a2afb61Jerry Jelinek
566a2afb61Jerry Jelinek		dsl_dir_init_fs_ss_count(chld_dd, tx);
567a2afb61Jerry Jelinek
568a2afb61Jerry Jelinek		VERIFY0(zap_lookup(os, chld_dd->dd_object,
569a2afb61Jerry Jelinek		    DD_FIELD_FILESYSTEM_COUNT, sizeof (count), 1, &count));
570a2afb61Jerry Jelinek		my_fs_cnt += count;
571a2afb61Jerry Jelinek		VERIFY0(zap_lookup(os, chld_dd->dd_object,
572a2afb61Jerry Jelinek		    DD_FIELD_SNAPSHOT_COUNT, sizeof (count), 1, &count));
573a2afb61Jerry Jelinek		my_ss_cnt += count;
574a2afb61Jerry Jelinek
575a2afb61Jerry Jelinek		dsl_dir_rele(chld_dd, FTAG);
576a2afb61Jerry Jelinek	}
577a2afb61Jerry Jelinek	zap_cursor_fini(zc);
578a2afb61Jerry Jelinek	/* Count my snapshots (we counted children's snapshots above) */
579a2afb61Jerry Jelinek	VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
580c137962Justin T. Gibbs	    dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds));
581a2afb61Jerry Jelinek
582c137962Justin T. Gibbs	for (zap_cursor_init(zc, os, dsl_dataset_phys(ds)->ds_snapnames_zapobj);
583a2afb61Jerry Jelinek	    zap_cursor_retrieve(zc, za) == 0;
584a2afb61Jerry Jelinek	    zap_cursor_advance(zc)) {
585a2afb61Jerry Jelinek		/* Don't count temporary snapshots */
586a2afb61Jerry Jelinek		if (za->za_name[0] != '%')
587a2afb61Jerry Jelinek			my_ss_cnt++;
588a2afb61Jerry Jelinek	}
589fb7001fAlex Reece	zap_cursor_fini(zc);
590a2afb61Jerry Jelinek
591a2afb61Jerry Jelinek	dsl_dataset_rele(ds, FTAG);
592a2afb61Jerry Jelinek
593a2afb61Jerry Jelinek	kmem_free(zc, sizeof (zap_cursor_t));
594a2afb61Jerry Jelinek	kmem_free(za, sizeof (zap_attribute_t));
595a2afb61Jerry Jelinek
596a2afb61Jerry Jelinek	/* we're in a sync task, update counts */
597a2afb61Jerry Jelinek	dmu_buf_will_dirty(dd->dd_dbuf, tx);
598a2afb61Jerry Jelinek	VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
599a2afb61Jerry Jelinek	    sizeof (my_fs_cnt), 1, &my_fs_cnt, tx));
600a2afb61Jerry Jelinek	VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
601a2afb61Jerry Jelinek	    sizeof (my_ss_cnt), 1, &my_ss_cnt, tx));
602a2afb61Jerry Jelinek}
603a2afb61Jerry Jelinek
604a2afb61Jerry Jelinekstatic int
605a2afb61Jerry Jelinekdsl_dir_actv_fs_ss_limit_check(void *arg, dmu_tx_t *tx)
606a2afb61Jerry Jelinek{
607a2afb61Jerry Jelinek	char *ddname = (char *)arg;
608a2afb61Jerry Jelinek	dsl_pool_t *dp = dmu_tx_pool(tx);
609a2afb61Jerry Jelinek	dsl_dataset_t *ds;
610a2afb61Jerry Jelinek	dsl_dir_t *dd;
611a2afb61Jerry Jelinek	int error;
612a2afb61Jerry Jelinek
613a2afb61Jerry Jelinek	error = dsl_dataset_hold(dp, ddname, FTAG, &ds);
614a2afb61Jerry Jelinek	if (error != 0)
615a2afb61Jerry Jelinek		return (error);
616a2afb61Jerry Jelinek
617a2afb61Jerry Jelinek	if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) {
618a2afb61Jerry Jelinek		dsl_dataset_rele(ds, FTAG);
619a2afb61Jerry Jelinek		return (SET_ERROR(ENOTSUP));
620a2afb61Jerry Jelinek	}
621a2afb61Jerry Jelinek
622a2afb61Jerry Jelinek	dd = ds->ds_dir;
623a2afb61Jerry Jelinek	if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT) &&
624a2afb61Jerry Jelinek	    dsl_dir_is_zapified(dd) &&
625a2afb61Jerry Jelinek	    zap_contains(dp->dp_meta_objset, dd->dd_object,
626a2afb61Jerry Jelinek	    DD_FIELD_FILESYSTEM_COUNT) == 0) {
627a2afb61Jerry Jelinek		dsl_dataset_rele(ds, FTAG);
628a2afb61Jerry Jelinek		return (SET_ERROR(EALREADY));
629a2afb61Jerry Jelinek	}
630a2afb61Jerry Jelinek
631a2afb61Jerry Jelinek	dsl_dataset_rele(ds, FTAG);
632a2afb61Jerry Jelinek	return (0);
633a2afb61Jerry Jelinek}
634a2afb61Jerry Jelinek
635a2afb61Jerry Jelinekstatic void
636a2afb61Jerry Jelinekdsl_dir_actv_fs_ss_limit_sync(void *arg, dmu_tx_t *tx)
637a2afb61Jerry Jelinek{
638a2afb61Jerry Jelinek	char *ddname = (char *)arg;
639a2afb61Jerry Jelinek	dsl_pool_t *dp = dmu_tx_pool(tx);
640a2afb61Jerry Jelinek	dsl_dataset_t *ds;
641a2afb61Jerry Jelinek	spa_t *spa;
642a2afb61Jerry Jelinek
643a2afb61Jerry Jelinek	VERIFY0(dsl_dataset_hold(dp, ddname, FTAG, &ds));
644a2afb61Jerry Jelinek
645a2afb61Jerry Jelinek	spa = dsl_dataset_get_spa(ds);
646a2afb61Jerry Jelinek
647a2afb61Jerry Jelinek	if (!spa_feature_is_active(spa, SPA_FEATURE_FS_SS_LIMIT)) {
648a2afb61Jerry Jelinek		/*
649a2afb61Jerry Jelinek		 * Since the feature was not active and we're now setting a
650a2afb61Jerry Jelinek		 * limit, increment the feature-active counter so that the
651a2afb61Jerry Jelinek		 * feature becomes active for the first time.
652a2afb61Jerry Jelinek		 *
653a2afb61Jerry Jelinek		 * We are already in a sync task so we can update the MOS.
654a2afb61Jerry Jelinek		 */
655a2afb61Jerry Jelinek		spa_feature_incr(spa, SPA_FEATURE_FS_SS_LIMIT, tx);
656a2afb61Jerry Jelinek	}
657a2afb61Jerry Jelinek
658a2afb61Jerry Jelinek	/*
659a2afb61Jerry Jelinek	 * Since we are now setting a non-UINT64_MAX limit on the filesystem,
660a2afb61Jerry Jelinek	 * we need to ensure the counts are correct. Descend down the tree from
661a2afb61Jerry Jelinek	 * this point and update all of the counts to be accurate.
662a2afb61Jerry Jelinek	 */
663a2afb61Jerry Jelinek	dsl_dir_init_fs_ss_count(ds->ds_dir, tx);
664a2afb61Jerry Jelinek
665a2afb61Jerry Jelinek	dsl_dataset_rele(ds, FTAG);
666a2afb61Jerry Jelinek}
667a2afb61Jerry Jelinek
668a2afb61Jerry Jelinek/*
669a2afb61Jerry Jelinek * Make sure the feature is enabled and activate it if necessary.
670a2afb61Jerry Jelinek * Since we're setting a limit, ensure the on-disk counts are valid.
671a2afb61Jerry Jelinek * This is only called by the ioctl path when setting a limit value.
672a2afb61Jerry Jelinek *
673a2afb61Jerry Jelinek * We do not need to validate the new limit, since users who can change the
674a2afb61Jerry Jelinek * limit are also allowed to exceed the limit.
675a2afb61Jerry Jelinek */
676a2afb61Jerry Jelinekint
677a2afb61Jerry Jelinekdsl_dir_activate_fs_ss_limit(const char *ddname)
678a2afb61Jerry Jelinek{
679a2afb61Jerry Jelinek	int error;
680a2afb61Jerry Jelinek
681a2afb61Jerry Jelinek	error = dsl_sync_task(ddname, dsl_dir_actv_fs_ss_limit_check,
6827d46dc6Matthew Ahrens	    dsl_dir_actv_fs_ss_limit_sync, (void *)ddname, 0,
6837d46dc6Matthew Ahrens	    ZFS_SPACE_CHECK_RESERVED);
684a2afb61Jerry Jelinek
685a2afb61Jerry Jelinek	if (error == EALREADY)
686a2afb61Jerry Jelinek		error = 0;
687a2afb61Jerry Jelinek
688a2afb61Jerry Jelinek	return (error);
689a2afb61Jerry Jelinek}
690a2afb61Jerry Jelinek
691a2afb61Jerry Jelinek/*
692a2afb61Jerry Jelinek * Used to determine if the filesystem_limit or snapshot_limit should be
693a2afb61Jerry Jelinek * enforced. We allow the limit to be exceeded if the user has permission to
694a2afb61Jerry Jelinek * write the property value. We pass in the creds that we got in the open
695a2afb61Jerry Jelinek * context since we will always be the GZ root in syncing context. We also have
696a2afb61Jerry Jelinek * to handle the case where we are allowed to change the limit on the current
697a2afb61Jerry Jelinek * dataset, but there may be another limit in the tree above.
698a2afb61Jerry Jelinek *
699a2afb61Jerry Jelinek * We can never modify these two properties within a non-global zone. In
700a2afb61Jerry Jelinek * addition, the other checks are modeled on zfs_secpolicy_write_perms. We
701a2afb61Jerry Jelinek * can't use that function since we are already holding the dp_config_rwlock.
702a2afb61Jerry Jelinek * In addition, we already have the dd and dealing with snapshots is simplified
703a2afb61Jerry Jelinek * in this code.
704a2afb61Jerry Jelinek */
705a2afb61Jerry Jelinek
706a2afb61Jerry Jelinektypedef enum {
707a2afb61Jerry Jelinek	ENFORCE_ALWAYS,
708a2afb61Jerry Jelinek	ENFORCE_NEVER,
709a2afb61Jerry Jelinek	ENFORCE_ABOVE
710a2afb61Jerry Jelinek} enforce_res_t;
711a2afb61Jerry Jelinek
712a2afb61Jerry Jelinekstatic enforce_res_t
713a2afb61Jerry Jelinekdsl_enforce_ds_ss_limits(dsl_dir_t *dd, zfs_prop_t prop, cred_t *cr)
714a2afb61Jerry Jelinek{
715a2afb61Jerry Jelinek	enforce_res_t enforce = ENFORCE_ALWAYS;
716a2afb61Jerry Jelinek	uint64_t obj;
717a2afb61Jerry Jelinek	dsl_dataset_t *ds;
718a2afb61Jerry Jelinek	uint64_t zoned;
719a2afb61Jerry Jelinek
720a2afb61Jerry Jelinek	ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
721a2afb61Jerry Jelinek	    prop == ZFS_PROP_SNAPSHOT_LIMIT);
722a2afb61Jerry Jelinek
723a2afb61Jerry Jelinek#ifdef _KERNEL
724a2afb61Jerry Jelinek	if (crgetzoneid(cr) != GLOBAL_ZONEID)
725a2afb61Jerry Jelinek		return (ENFORCE_ALWAYS);
726a2afb61Jerry Jelinek
727a2afb61Jerry Jelinek	if (secpolicy_zfs(cr) == 0)
728a2afb61Jerry Jelinek		return (ENFORCE_NEVER);
729a2afb61Jerry Jelinek#endif
730a2afb61Jerry Jelinek
731c137962Justin T. Gibbs	if ((obj = dsl_dir_phys(dd)->dd_head_dataset_obj) == 0)
732a2afb61Jerry Jelinek		return (ENFORCE_ALWAYS);
733a2afb61Jerry Jelinek
734a2afb61Jerry Jelinek	ASSERT(dsl_pool_config_held(dd->dd_pool));
735a2afb61Jerry Jelinek
736a2afb61Jerry Jelinek	if (dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds) != 0)
737a2afb61Jerry Jelinek		return (ENFORCE_ALWAYS);
738a2afb61Jerry Jelinek
739a2afb61Jerry Jelinek	if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL) || zoned) {
740a2afb61Jerry Jelinek		/* Only root can access zoned fs's from the GZ */
741a2afb61Jerry Jelinek		enforce = ENFORCE_ALWAYS;
742a2afb61Jerry Jelinek	} else {
743a2afb61Jerry Jelinek		if (dsl_deleg_access_impl(ds, zfs_prop_to_name(prop), cr) == 0)
744a2afb61Jerry Jelinek			enforce = ENFORCE_ABOVE;
745a2afb61Jerry Jelinek	}
746a2afb61Jerry Jelinek
747a2afb61Jerry Jelinek	dsl_dataset_rele(ds, FTAG);
748a2afb61Jerry Jelinek	return (enforce);
749a2afb61Jerry Jelinek}
750a2afb61Jerry Jelinek
7515cabbc6Prashanth Sreenivasastatic void
7525cabbc6Prashanth Sreenivasadsl_dir_update_last_remap_txg_sync(void *varg, dmu_tx_t *tx)
7535cabbc6Prashanth Sreenivasa{
7545cabbc6Prashanth Sreenivasa	ddulrt_arg_t *arg = varg;
7555cabbc6Prashanth Sreenivasa	uint64_t last_remap_txg;
7565cabbc6Prashanth Sreenivasa	dsl_dir_t *dd = arg->ddulrta_dd;
7575cabbc6Prashanth Sreenivasa	objset_t *mos = dd->dd_pool->dp_meta_objset;
7585cabbc6Prashanth Sreenivasa
7595cabbc6Prashanth Sreenivasa	dsl_dir_zapify(dd, tx);
7605cabbc6Prashanth Sreenivasa	if (zap_lookup(mos, dd->dd_object, DD_FIELD_LAST_REMAP_TXG,
7615cabbc6Prashanth Sreenivasa	    sizeof (last_remap_txg), 1, &last_remap_txg) != 0 ||
7625cabbc6Prashanth Sreenivasa	    last_remap_txg < arg->ddlrta_txg) {
7635cabbc6Prashanth Sreenivasa		VERIFY0(zap_update(mos, dd->dd_object, DD_FIELD_LAST_REMAP_TXG,
7645cabbc6Prashanth Sreenivasa		    sizeof (arg->ddlrta_txg), 1, &arg->ddlrta_txg, tx));
7655cabbc6Prashanth Sreenivasa	}
7665cabbc6Prashanth Sreenivasa}
7675cabbc6Prashanth Sreenivasa
7685cabbc6Prashanth Sreenivasaint
7695cabbc6Prashanth Sreenivasadsl_dir_update_last_remap_txg(dsl_dir_t *dd, uint64_t txg)
7705cabbc6Prashanth Sreenivasa{
7715cabbc6Prashanth Sreenivasa	ddulrt_arg_t arg;
7725cabbc6Prashanth Sreenivasa	arg.ddulrta_dd = dd;
7735cabbc6Prashanth Sreenivasa	arg.ddlrta_txg = txg;
7745cabbc6Prashanth Sreenivasa
7755cabbc6Prashanth Sreenivasa	return (dsl_sync_task(spa_name(dd->dd_pool->dp_spa),
7765cabbc6Prashanth Sreenivasa	    NULL, dsl_dir_update_last_remap_txg_sync, &arg,
7775cabbc6Prashanth Sreenivasa	    1, ZFS_SPACE_CHECK_RESERVED));
7785cabbc6Prashanth Sreenivasa}
7795cabbc6Prashanth Sreenivasa
780a2afb61Jerry Jelinek/*
781a2afb61Jerry Jelinek * Check if adding additional child filesystem(s) would exceed any filesystem
782a2afb61Jerry Jelinek * limits or adding additional snapshot(s) would exceed any snapshot limits.
783a2afb61Jerry Jelinek * The prop argument indicates which limit to check.
784a2afb61Jerry Jelinek *
785a2afb61Jerry Jelinek * Note that all filesystem limits up to the root (or the highest
786a2afb61Jerry Jelinek * initialized) filesystem or the given ancestor must be satisfied.
787a2afb61Jerry Jelinek */
788a2afb61Jerry Jelinekint
789a2afb61Jerry Jelinekdsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop,
790a2afb61Jerry Jelinek    dsl_dir_t *ancestor, cred_t *cr)
791a2afb61Jerry Jelinek{
792a2afb61Jerry Jelinek	objset_t *os = dd->dd_pool->dp_meta_objset;
793a2afb61Jerry Jelinek	uint64_t limit, count;
794a2afb61Jerry Jelinek	char *count_prop;
795a2afb61Jerry Jelinek	enforce_res_t enforce;
796a2afb61Jerry Jelinek	int err = 0;
797a2afb61Jerry Jelinek
798a2afb61Jerry Jelinek	ASSERT(dsl_pool_config_held(dd->dd_pool));
799a2afb61Jerry Jelinek	ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
800a2afb61Jerry Jelinek	    prop == ZFS_PROP_SNAPSHOT_LIMIT);
801a2afb61Jerry Jelinek
802a2afb61Jerry Jelinek	/*
803a2afb61Jerry Jelinek	 * If we're allowed to change the limit, don't enforce the limit
804a2afb61Jerry Jelinek	 * e.g. this can happen if a snapshot is taken by an administrative
805a2afb61Jerry Jelinek	 * user in the global zone (i.e. a recursive snapshot by root).
806a2afb61Jerry Jelinek	 * However, we must handle the case of delegated permissions where we
807a2afb61Jerry Jelinek	 * are allowed to change the limit on the current dataset, but there
808a2afb61Jerry Jelinek	 * is another limit in the tree above.
809a2afb61Jerry Jelinek	 */
810a2afb61Jerry Jelinek	enforce = dsl_enforce_ds_ss_limits(dd, prop, cr);
811a2afb61Jerry Jelinek	if (enforce == ENFORCE_NEVER)
812a2afb61Jerry Jelinek		return (0);
813a2afb61Jerry Jelinek
814a2afb61Jerry Jelinek	/*
815a2afb61Jerry Jelinek	 * e.g. if renaming a dataset with no snapshots, count adjustment
816a2afb61Jerry Jelinek	 * is 0.
817a2afb61Jerry Jelinek	 */
818a2afb61Jerry Jelinek	if (delta == 0)
819a2afb61Jerry Jelinek		return (0);
820a2afb61Jerry Jelinek
821a2afb61Jerry Jelinek	if (prop == ZFS_PROP_SNAPSHOT_LIMIT) {
822