xref: /illumos-gate/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision f67950b2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
25  * Portions Copyright 2011 Martin Matuska
26  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
27  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
28  * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
29  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
30  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
31  * Copyright (c) 2013 Steven Hartland. All rights reserved.
32  * Copyright (c) 2014 Integros [integros.com]
33  * Copyright 2016 Toomas Soome <tsoome@me.com>
34  * Copyright (c) 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
35  * Copyright 2017 RackTop Systems.
36  * Copyright (c) 2017, Datto, Inc. All rights reserved.
37  */
38 
39 /*
40  * ZFS ioctls.
41  *
42  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
43  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
44  *
45  * There are two ways that we handle ioctls: the legacy way where almost
46  * all of the logic is in the ioctl callback, and the new way where most
47  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
48  *
49  * Non-legacy ioctls should be registered by calling
50  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
51  * from userland by lzc_ioctl().
52  *
53  * The registration arguments are as follows:
54  *
55  * const char *name
56  *   The name of the ioctl.  This is used for history logging.  If the
57  *   ioctl returns successfully (the callback returns 0), and allow_log
58  *   is true, then a history log entry will be recorded with the input &
59  *   output nvlists.  The log entry can be printed with "zpool history -i".
60  *
61  * zfs_ioc_t ioc
62  *   The ioctl request number, which userland will pass to ioctl(2).
63  *   The ioctl numbers can change from release to release, because
64  *   the caller (libzfs) must be matched to the kernel.
65  *
66  * zfs_secpolicy_func_t *secpolicy
67  *   This function will be called before the zfs_ioc_func_t, to
68  *   determine if this operation is permitted.  It should return EPERM
69  *   on failure, and 0 on success.  Checks include determining if the
70  *   dataset is visible in this zone, and if the user has either all
71  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
72  *   to do this operation on this dataset with "zfs allow".
73  *
74  * zfs_ioc_namecheck_t namecheck
75  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
76  *   name, a dataset name, or nothing.  If the name is not well-formed,
77  *   the ioctl will fail and the callback will not be called.
78  *   Therefore, the callback can assume that the name is well-formed
79  *   (e.g. is null-terminated, doesn't have more than one '@' character,
80  *   doesn't have invalid characters).
81  *
82  * zfs_ioc_poolcheck_t pool_check
83  *   This specifies requirements on the pool state.  If the pool does
84  *   not meet them (is suspended or is readonly), the ioctl will fail
85  *   and the callback will not be called.  If any checks are specified
86  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
87  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
88  *   POOL_CHECK_READONLY).
89  *
90  * boolean_t smush_outnvlist
91  *   If smush_outnvlist is true, then the output is presumed to be a
92  *   list of errors, and it will be "smushed" down to fit into the
93  *   caller's buffer, by removing some entries and replacing them with a
94  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
95  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
96  *   outnvlist does not fit into the userland-provided buffer, then the
97  *   ioctl will fail with ENOMEM.
98  *
99  * zfs_ioc_func_t *func
100  *   The callback function that will perform the operation.
101  *
102  *   The callback should return 0 on success, or an error number on
103  *   failure.  If the function fails, the userland ioctl will return -1,
104  *   and errno will be set to the callback's return value.  The callback
105  *   will be called with the following arguments:
106  *
107  *   const char *name
108  *     The name of the pool or dataset to operate on, from
109  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
110  *     expected type (pool, dataset, or none).
111  *
112  *   nvlist_t *innvl
113  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
114  *     NULL if no input nvlist was provided.  Changes to this nvlist are
115  *     ignored.  If the input nvlist could not be deserialized, the
116  *     ioctl will fail and the callback will not be called.
117  *
118  *   nvlist_t *outnvl
119  *     The output nvlist, initially empty.  The callback can fill it in,
120  *     and it will be returned to userland by serializing it into
121  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
122  *     fails (e.g. because the caller didn't supply a large enough
123  *     buffer), then the overall ioctl will fail.  See the
124  *     'smush_nvlist' argument above for additional behaviors.
125  *
126  *     There are two typical uses of the output nvlist:
127  *       - To return state, e.g. property values.  In this case,
128  *         smush_outnvlist should be false.  If the buffer was not large
129  *         enough, the caller will reallocate a larger buffer and try
130  *         the ioctl again.
131  *
132  *       - To return multiple errors from an ioctl which makes on-disk
133  *         changes.  In this case, smush_outnvlist should be true.
134  *         Ioctls which make on-disk modifications should generally not
135  *         use the outnvl if they succeed, because the caller can not
136  *         distinguish between the operation failing, and
137  *         deserialization failing.
138  */
139 
140 #include <sys/types.h>
141 #include <sys/param.h>
142 #include <sys/errno.h>
143 #include <sys/uio.h>
144 #include <sys/buf.h>
145 #include <sys/modctl.h>
146 #include <sys/open.h>
147 #include <sys/file.h>
148 #include <sys/kmem.h>
149 #include <sys/conf.h>
150 #include <sys/cmn_err.h>
151 #include <sys/stat.h>
152 #include <sys/zfs_ioctl.h>
153 #include <sys/zfs_vfsops.h>
154 #include <sys/zfs_znode.h>
155 #include <sys/zap.h>
156 #include <sys/spa.h>
157 #include <sys/spa_impl.h>
158 #include <sys/vdev.h>
159 #include <sys/priv_impl.h>
160 #include <sys/dmu.h>
161 #include <sys/dsl_dir.h>
162 #include <sys/dsl_dataset.h>
163 #include <sys/dsl_prop.h>
164 #include <sys/dsl_deleg.h>
165 #include <sys/dmu_objset.h>
166 #include <sys/dmu_impl.h>
167 #include <sys/dmu_tx.h>
168 #include <sys/ddi.h>
169 #include <sys/sunddi.h>
170 #include <sys/sunldi.h>
171 #include <sys/policy.h>
172 #include <sys/zone.h>
173 #include <sys/nvpair.h>
174 #include <sys/pathname.h>
175 #include <sys/mount.h>
176 #include <sys/sdt.h>
177 #include <sys/fs/zfs.h>
178 #include <sys/zfs_ctldir.h>
179 #include <sys/zfs_dir.h>
180 #include <sys/zfs_onexit.h>
181 #include <sys/zvol.h>
182 #include <sys/dsl_scan.h>
183 #include <sharefs/share.h>
184 #include <sys/dmu_objset.h>
185 #include <sys/dmu_recv.h>
186 #include <sys/dmu_send.h>
187 #include <sys/dsl_destroy.h>
188 #include <sys/dsl_bookmark.h>
189 #include <sys/dsl_userhold.h>
190 #include <sys/zfeature.h>
191 #include <sys/zcp.h>
192 #include <sys/zio_checksum.h>
193 #include <sys/vdev_removal.h>
194 #include <sys/vdev_impl.h>
195 #include <sys/vdev_initialize.h>
196 #include <sys/dsl_crypt.h>
197 
198 #include "zfs_namecheck.h"
199 #include "zfs_prop.h"
200 #include "zfs_deleg.h"
201 #include "zfs_comutil.h"
202 
203 #include "lua.h"
204 #include "lauxlib.h"
205 
206 extern struct modlfs zfs_modlfs;
207 
208 extern void zfs_init(void);
209 extern void zfs_fini(void);
210 
211 ldi_ident_t zfs_li = NULL;
212 dev_info_t *zfs_dip;
213 
214 uint_t zfs_fsyncer_key;
215 extern uint_t rrw_tsd_key;
216 static uint_t zfs_allow_log_key;
217 
218 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
219 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
220 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
221 
222 typedef enum {
223 	NO_NAME,
224 	POOL_NAME,
225 	DATASET_NAME
226 } zfs_ioc_namecheck_t;
227 
228 typedef enum {
229 	POOL_CHECK_NONE		= 1 << 0,
230 	POOL_CHECK_SUSPENDED	= 1 << 1,
231 	POOL_CHECK_READONLY	= 1 << 2,
232 } zfs_ioc_poolcheck_t;
233 
234 typedef struct zfs_ioc_vec {
235 	zfs_ioc_legacy_func_t	*zvec_legacy_func;
236 	zfs_ioc_func_t		*zvec_func;
237 	zfs_secpolicy_func_t	*zvec_secpolicy;
238 	zfs_ioc_namecheck_t	zvec_namecheck;
239 	boolean_t		zvec_allow_log;
240 	zfs_ioc_poolcheck_t	zvec_pool_check;
241 	boolean_t		zvec_smush_outnvlist;
242 	const char		*zvec_name;
243 } zfs_ioc_vec_t;
244 
245 /* This array is indexed by zfs_userquota_prop_t */
246 static const char *userquota_perms[] = {
247 	ZFS_DELEG_PERM_USERUSED,
248 	ZFS_DELEG_PERM_USERQUOTA,
249 	ZFS_DELEG_PERM_GROUPUSED,
250 	ZFS_DELEG_PERM_GROUPQUOTA,
251 	ZFS_DELEG_PERM_USEROBJUSED,
252 	ZFS_DELEG_PERM_USEROBJQUOTA,
253 	ZFS_DELEG_PERM_GROUPOBJUSED,
254 	ZFS_DELEG_PERM_GROUPOBJQUOTA,
255 	ZFS_DELEG_PERM_PROJECTUSED,
256 	ZFS_DELEG_PERM_PROJECTQUOTA,
257 	ZFS_DELEG_PERM_PROJECTOBJUSED,
258 	ZFS_DELEG_PERM_PROJECTOBJQUOTA,
259 };
260 
261 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
262 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
263 static int zfs_check_settable(const char *name, nvpair_t *property,
264     cred_t *cr);
265 static int zfs_check_clearable(char *dataset, nvlist_t *props,
266     nvlist_t **errors);
267 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
268     boolean_t *);
269 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
270 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
271 
272 static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
273 
274 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
275 void
276 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
277 {
278 	const char *newfile;
279 	char buf[512];
280 	va_list adx;
281 
282 	/*
283 	 * Get rid of annoying "../common/" prefix to filename.
284 	 */
285 	newfile = strrchr(file, '/');
286 	if (newfile != NULL) {
287 		newfile = newfile + 1; /* Get rid of leading / */
288 	} else {
289 		newfile = file;
290 	}
291 
292 	va_start(adx, fmt);
293 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
294 	va_end(adx);
295 
296 	/*
297 	 * To get this data, use the zfs-dprintf probe as so:
298 	 * dtrace -q -n 'zfs-dprintf \
299 	 *	/stringof(arg0) == "dbuf.c"/ \
300 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
301 	 * arg0 = file name
302 	 * arg1 = function name
303 	 * arg2 = line number
304 	 * arg3 = message
305 	 */
306 	DTRACE_PROBE4(zfs__dprintf,
307 	    char *, newfile, char *, func, int, line, char *, buf);
308 }
309 
310 static void
311 history_str_free(char *buf)
312 {
313 	kmem_free(buf, HIS_MAX_RECORD_LEN);
314 }
315 
316 static char *
317 history_str_get(zfs_cmd_t *zc)
318 {
319 	char *buf;
320 
321 	if (zc->zc_history == 0)
322 		return (NULL);
323 
324 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
325 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
326 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
327 		history_str_free(buf);
328 		return (NULL);
329 	}
330 
331 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
332 
333 	return (buf);
334 }
335 
336 /*
337  * Check to see if the named dataset is currently defined as bootable
338  */
339 static boolean_t
340 zfs_is_bootfs(const char *name)
341 {
342 	objset_t *os;
343 
344 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
345 		boolean_t ret;
346 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
347 		dmu_objset_rele(os, FTAG);
348 		return (ret);
349 	}
350 	return (B_FALSE);
351 }
352 
353 /*
354  * Return non-zero if the spa version is less than requested version.
355  */
356 static int
357 zfs_earlier_version(const char *name, int version)
358 {
359 	spa_t *spa;
360 
361 	if (spa_open(name, &spa, FTAG) == 0) {
362 		if (spa_version(spa) < version) {
363 			spa_close(spa, FTAG);
364 			return (1);
365 		}
366 		spa_close(spa, FTAG);
367 	}
368 	return (0);
369 }
370 
371 /*
372  * Return TRUE if the ZPL version is less than requested version.
373  */
374 static boolean_t
375 zpl_earlier_version(const char *name, int version)
376 {
377 	objset_t *os;
378 	boolean_t rc = B_TRUE;
379 
380 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
381 		uint64_t zplversion;
382 
383 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
384 			dmu_objset_rele(os, FTAG);
385 			return (B_TRUE);
386 		}
387 		/* XXX reading from non-owned objset */
388 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
389 			rc = zplversion < version;
390 		dmu_objset_rele(os, FTAG);
391 	}
392 	return (rc);
393 }
394 
395 static void
396 zfs_log_history(zfs_cmd_t *zc)
397 {
398 	spa_t *spa;
399 	char *buf;
400 
401 	if ((buf = history_str_get(zc)) == NULL)
402 		return;
403 
404 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
405 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
406 			(void) spa_history_log(spa, buf);
407 		spa_close(spa, FTAG);
408 	}
409 	history_str_free(buf);
410 }
411 
412 /*
413  * Policy for top-level read operations (list pools).  Requires no privileges,
414  * and can be used in the local zone, as there is no associated dataset.
415  */
416 /* ARGSUSED */
417 static int
418 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
419 {
420 	return (0);
421 }
422 
423 /*
424  * Policy for dataset read operations (list children, get statistics).  Requires
425  * no privileges, but must be visible in the local zone.
426  */
427 /* ARGSUSED */
428 static int
429 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
430 {
431 	if (INGLOBALZONE(curproc) ||
432 	    zone_dataset_visible(zc->zc_name, NULL))
433 		return (0);
434 
435 	return (SET_ERROR(ENOENT));
436 }
437 
438 static int
439 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
440 {
441 	int writable = 1;
442 
443 	/*
444 	 * The dataset must be visible by this zone -- check this first
445 	 * so they don't see EPERM on something they shouldn't know about.
446 	 */
447 	if (!INGLOBALZONE(curproc) &&
448 	    !zone_dataset_visible(dataset, &writable))
449 		return (SET_ERROR(ENOENT));
450 
451 	if (INGLOBALZONE(curproc)) {
452 		/*
453 		 * If the fs is zoned, only root can access it from the
454 		 * global zone.
455 		 */
456 		if (secpolicy_zfs(cr) && zoned)
457 			return (SET_ERROR(EPERM));
458 	} else {
459 		/*
460 		 * If we are in a local zone, the 'zoned' property must be set.
461 		 */
462 		if (!zoned)
463 			return (SET_ERROR(EPERM));
464 
465 		/* must be writable by this zone */
466 		if (!writable)
467 			return (SET_ERROR(EPERM));
468 	}
469 	return (0);
470 }
471 
472 static int
473 zfs_dozonecheck(const char *dataset, cred_t *cr)
474 {
475 	uint64_t zoned;
476 
477 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
478 		return (SET_ERROR(ENOENT));
479 
480 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
481 }
482 
483 static int
484 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
485 {
486 	uint64_t zoned;
487 
488 	if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
489 		return (SET_ERROR(ENOENT));
490 
491 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
492 }
493 
494 static int
495 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
496     const char *perm, cred_t *cr)
497 {
498 	int error;
499 
500 	error = zfs_dozonecheck_ds(name, ds, cr);
501 	if (error == 0) {
502 		error = secpolicy_zfs(cr);
503 		if (error != 0)
504 			error = dsl_deleg_access_impl(ds, perm, cr);
505 	}
506 	return (error);
507 }
508 
509 static int
510 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
511 {
512 	int error;
513 	dsl_dataset_t *ds;
514 	dsl_pool_t *dp;
515 
516 	/*
517 	 * First do a quick check for root in the global zone, which
518 	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
519 	 * will get to handle nonexistent datasets.
520 	 */
521 	if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
522 		return (0);
523 
524 	error = dsl_pool_hold(name, FTAG, &dp);
525 	if (error != 0)
526 		return (error);
527 
528 	error = dsl_dataset_hold(dp, name, FTAG, &ds);
529 	if (error != 0) {
530 		dsl_pool_rele(dp, FTAG);
531 		return (error);
532 	}
533 
534 	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
535 
536 	dsl_dataset_rele(ds, FTAG);
537 	dsl_pool_rele(dp, FTAG);
538 	return (error);
539 }
540 
541 /*
542  * Policy for setting the security label property.
543  *
544  * Returns 0 for success, non-zero for access and other errors.
545  */
546 static int
547 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
548 {
549 	char		ds_hexsl[MAXNAMELEN];
550 	bslabel_t	ds_sl, new_sl;
551 	boolean_t	new_default = FALSE;
552 	uint64_t	zoned;
553 	int		needed_priv = -1;
554 	int		error;
555 
556 	/* First get the existing dataset label. */
557 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
558 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
559 	if (error != 0)
560 		return (SET_ERROR(EPERM));
561 
562 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
563 		new_default = TRUE;
564 
565 	/* The label must be translatable */
566 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
567 		return (SET_ERROR(EINVAL));
568 
569 	/*
570 	 * In a non-global zone, disallow attempts to set a label that
571 	 * doesn't match that of the zone; otherwise no other checks
572 	 * are needed.
573 	 */
574 	if (!INGLOBALZONE(curproc)) {
575 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
576 			return (SET_ERROR(EPERM));
577 		return (0);
578 	}
579 
580 	/*
581 	 * For global-zone datasets (i.e., those whose zoned property is
582 	 * "off", verify that the specified new label is valid for the
583 	 * global zone.
584 	 */
585 	if (dsl_prop_get_integer(name,
586 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
587 		return (SET_ERROR(EPERM));
588 	if (!zoned) {
589 		if (zfs_check_global_label(name, strval) != 0)
590 			return (SET_ERROR(EPERM));
591 	}
592 
593 	/*
594 	 * If the existing dataset label is nondefault, check if the
595 	 * dataset is mounted (label cannot be changed while mounted).
596 	 * Get the zfsvfs; if there isn't one, then the dataset isn't
597 	 * mounted (or isn't a dataset, doesn't exist, ...).
598 	 */
599 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
600 		objset_t *os;
601 		static char *setsl_tag = "setsl_tag";
602 
603 		/*
604 		 * Try to own the dataset; abort if there is any error,
605 		 * (e.g., already mounted, in use, or other error).
606 		 */
607 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
608 		    setsl_tag, &os);
609 		if (error != 0)
610 			return (SET_ERROR(EPERM));
611 
612 		dmu_objset_disown(os, B_TRUE, setsl_tag);
613 
614 		if (new_default) {
615 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
616 			goto out_check;
617 		}
618 
619 		if (hexstr_to_label(strval, &new_sl) != 0)
620 			return (SET_ERROR(EPERM));
621 
622 		if (blstrictdom(&ds_sl, &new_sl))
623 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
624 		else if (blstrictdom(&new_sl, &ds_sl))
625 			needed_priv = PRIV_FILE_UPGRADE_SL;
626 	} else {
627 		/* dataset currently has a default label */
628 		if (!new_default)
629 			needed_priv = PRIV_FILE_UPGRADE_SL;
630 	}
631 
632 out_check:
633 	if (needed_priv != -1)
634 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
635 	return (0);
636 }
637 
638 static int
639 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
640     cred_t *cr)
641 {
642 	char *strval;
643 
644 	/*
645 	 * Check permissions for special properties.
646 	 */
647 	switch (prop) {
648 	case ZFS_PROP_ZONED:
649 		/*
650 		 * Disallow setting of 'zoned' from within a local zone.
651 		 */
652 		if (!INGLOBALZONE(curproc))
653 			return (SET_ERROR(EPERM));
654 		break;
655 
656 	case ZFS_PROP_QUOTA:
657 	case ZFS_PROP_FILESYSTEM_LIMIT:
658 	case ZFS_PROP_SNAPSHOT_LIMIT:
659 		if (!INGLOBALZONE(curproc)) {
660 			uint64_t zoned;
661 			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
662 			/*
663 			 * Unprivileged users are allowed to modify the
664 			 * limit on things *under* (ie. contained by)
665 			 * the thing they own.
666 			 */
667 			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
668 			    setpoint))
669 				return (SET_ERROR(EPERM));
670 			if (!zoned || strlen(dsname) <= strlen(setpoint))
671 				return (SET_ERROR(EPERM));
672 		}
673 		break;
674 
675 	case ZFS_PROP_MLSLABEL:
676 		if (!is_system_labeled())
677 			return (SET_ERROR(EPERM));
678 
679 		if (nvpair_value_string(propval, &strval) == 0) {
680 			int err;
681 
682 			err = zfs_set_slabel_policy(dsname, strval, CRED());
683 			if (err != 0)
684 				return (err);
685 		}
686 		break;
687 	}
688 
689 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
690 }
691 
692 /* ARGSUSED */
693 static int
694 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
695 {
696 	int error;
697 
698 	error = zfs_dozonecheck(zc->zc_name, cr);
699 	if (error != 0)
700 		return (error);
701 
702 	/*
703 	 * permission to set permissions will be evaluated later in
704 	 * dsl_deleg_can_allow()
705 	 */
706 	return (0);
707 }
708 
709 /* ARGSUSED */
710 static int
711 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
712 {
713 	return (zfs_secpolicy_write_perms(zc->zc_name,
714 	    ZFS_DELEG_PERM_ROLLBACK, cr));
715 }
716 
717 /* ARGSUSED */
718 static int
719 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
720 {
721 	dsl_pool_t *dp;
722 	dsl_dataset_t *ds;
723 	char *cp;
724 	int error;
725 
726 	/*
727 	 * Generate the current snapshot name from the given objsetid, then
728 	 * use that name for the secpolicy/zone checks.
729 	 */
730 	cp = strchr(zc->zc_name, '@');
731 	if (cp == NULL)
732 		return (SET_ERROR(EINVAL));
733 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
734 	if (error != 0)
735 		return (error);
736 
737 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
738 	if (error != 0) {
739 		dsl_pool_rele(dp, FTAG);
740 		return (error);
741 	}
742 
743 	dsl_dataset_name(ds, zc->zc_name);
744 
745 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
746 	    ZFS_DELEG_PERM_SEND, cr);
747 	dsl_dataset_rele(ds, FTAG);
748 	dsl_pool_rele(dp, FTAG);
749 
750 	return (error);
751 }
752 
753 /* ARGSUSED */
754 static int
755 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
756 {
757 	return (zfs_secpolicy_write_perms(zc->zc_name,
758 	    ZFS_DELEG_PERM_SEND, cr));
759 }
760 
761 /* ARGSUSED */
762 static int
763 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
764 {
765 	vnode_t *vp;
766 	int error;
767 
768 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
769 	    NO_FOLLOW, NULL, &vp)) != 0)
770 		return (error);
771 
772 	/* Now make sure mntpnt and dataset are ZFS */
773 
774 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
775 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
776 	    zc->zc_name) != 0)) {
777 		VN_RELE(vp);
778 		return (SET_ERROR(EPERM));
779 	}
780 
781 	VN_RELE(vp);
782 	return (dsl_deleg_access(zc->zc_name,
783 	    ZFS_DELEG_PERM_SHARE, cr));
784 }
785 
786 int
787 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
788 {
789 	if (!INGLOBALZONE(curproc))
790 		return (SET_ERROR(EPERM));
791 
792 	if (secpolicy_nfs(cr) == 0) {
793 		return (0);
794 	} else {
795 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
796 	}
797 }
798 
799 int
800 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
801 {
802 	if (!INGLOBALZONE(curproc))
803 		return (SET_ERROR(EPERM));
804 
805 	if (secpolicy_smb(cr) == 0) {
806 		return (0);
807 	} else {
808 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
809 	}
810 }
811 
812 static int
813 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
814 {
815 	char *cp;
816 
817 	/*
818 	 * Remove the @bla or /bla from the end of the name to get the parent.
819 	 */
820 	(void) strncpy(parent, datasetname, parentsize);
821 	cp = strrchr(parent, '@');
822 	if (cp != NULL) {
823 		cp[0] = '\0';
824 	} else {
825 		cp = strrchr(parent, '/');
826 		if (cp == NULL)
827 			return (SET_ERROR(ENOENT));
828 		cp[0] = '\0';
829 	}
830 
831 	return (0);
832 }
833 
834 int
835 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
836 {
837 	int error;
838 
839 	if ((error = zfs_secpolicy_write_perms(name,
840 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
841 		return (error);
842 
843 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
844 }
845 
846 /* ARGSUSED */
847 static int
848 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
849 {
850 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
851 }
852 
853 /*
854  * Destroying snapshots with delegated permissions requires
855  * descendant mount and destroy permissions.
856  */
857 /* ARGSUSED */
858 static int
859 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
860 {
861 	nvlist_t *snaps;
862 	nvpair_t *pair, *nextpair;
863 	int error = 0;
864 
865 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
866 		return (SET_ERROR(EINVAL));
867 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
868 	    pair = nextpair) {
869 		nextpair = nvlist_next_nvpair(snaps, pair);
870 		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
871 		if (error == ENOENT) {
872 			/*
873 			 * Ignore any snapshots that don't exist (we consider
874 			 * them "already destroyed").  Remove the name from the
875 			 * nvl here in case the snapshot is created between
876 			 * now and when we try to destroy it (in which case
877 			 * we don't want to destroy it since we haven't
878 			 * checked for permission).
879 			 */
880 			fnvlist_remove_nvpair(snaps, pair);
881 			error = 0;
882 		}
883 		if (error != 0)
884 			break;
885 	}
886 
887 	return (error);
888 }
889 
890 int
891 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
892 {
893 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
894 	int	error;
895 
896 	if ((error = zfs_secpolicy_write_perms(from,
897 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
898 		return (error);
899 
900 	if ((error = zfs_secpolicy_write_perms(from,
901 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
902 		return (error);
903 
904 	if ((error = zfs_get_parent(to, parentname,
905 	    sizeof (parentname))) != 0)
906 		return (error);
907 
908 	if ((error = zfs_secpolicy_write_perms(parentname,
909 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
910 		return (error);
911 
912 	if ((error = zfs_secpolicy_write_perms(parentname,
913 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
914 		return (error);
915 
916 	return (error);
917 }
918 
919 /* ARGSUSED */
920 static int
921 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
922 {
923 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
924 }
925 
926 /* ARGSUSED */
927 static int
928 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
929 {
930 	dsl_pool_t *dp;
931 	dsl_dataset_t *clone;
932 	int error;
933 
934 	error = zfs_secpolicy_write_perms(zc->zc_name,
935 	    ZFS_DELEG_PERM_PROMOTE, cr);
936 	if (error != 0)
937 		return (error);
938 
939 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
940 	if (error != 0)
941 		return (error);
942 
943 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
944 
945 	if (error == 0) {
946 		char parentname[ZFS_MAX_DATASET_NAME_LEN];
947 		dsl_dataset_t *origin = NULL;
948 		dsl_dir_t *dd;
949 		dd = clone->ds_dir;
950 
951 		error = dsl_dataset_hold_obj(dd->dd_pool,
952 		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
953 		if (error != 0) {
954 			dsl_dataset_rele(clone, FTAG);
955 			dsl_pool_rele(dp, FTAG);
956 			return (error);
957 		}
958 
959 		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
960 		    ZFS_DELEG_PERM_MOUNT, cr);
961 
962 		dsl_dataset_name(origin, parentname);
963 		if (error == 0) {
964 			error = zfs_secpolicy_write_perms_ds(parentname, origin,
965 			    ZFS_DELEG_PERM_PROMOTE, cr);
966 		}
967 		dsl_dataset_rele(clone, FTAG);
968 		dsl_dataset_rele(origin, FTAG);
969 	}
970 	dsl_pool_rele(dp, FTAG);
971 	return (error);
972 }
973 
974 /* ARGSUSED */
975 static int
976 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
977 {
978 	int error;
979 
980 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
981 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
982 		return (error);
983 
984 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
985 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
986 		return (error);
987 
988 	return (zfs_secpolicy_write_perms(zc->zc_name,
989 	    ZFS_DELEG_PERM_CREATE, cr));
990 }
991 
992 int
993 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
994 {
995 	return (zfs_secpolicy_write_perms(name,
996 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
997 }
998 
999 /*
1000  * Check for permission to create each snapshot in the nvlist.
1001  */
1002 /* ARGSUSED */
1003 static int
1004 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1005 {
1006 	nvlist_t *snaps;
1007 	int error = 0;
1008 	nvpair_t *pair;
1009 
1010 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1011 		return (SET_ERROR(EINVAL));
1012 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1013 	    pair = nvlist_next_nvpair(snaps, pair)) {
1014 		char *name = nvpair_name(pair);
1015 		char *atp = strchr(name, '@');
1016 
1017 		if (atp == NULL) {
1018 			error = SET_ERROR(EINVAL);
1019 			break;
1020 		}
1021 		*atp = '\0';
1022 		error = zfs_secpolicy_snapshot_perms(name, cr);
1023 		*atp = '@';
1024 		if (error != 0)
1025 			break;
1026 	}
1027 	return (error);
1028 }
1029 
1030 /*
1031  * Check for permission to create each snapshot in the nvlist.
1032  */
1033 /* ARGSUSED */
1034 static int
1035 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1036 {
1037 	int error = 0;
1038 
1039 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1040 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1041 		char *name = nvpair_name(pair);
1042 		char *hashp = strchr(name, '#');
1043 
1044 		if (hashp == NULL) {
1045 			error = SET_ERROR(EINVAL);
1046 			break;
1047 		}
1048 		*hashp = '\0';
1049 		error = zfs_secpolicy_write_perms(name,
1050 		    ZFS_DELEG_PERM_BOOKMARK, cr);
1051 		*hashp = '#';
1052 		if (error != 0)
1053 			break;
1054 	}
1055 	return (error);
1056 }
1057 
1058 /* ARGSUSED */
1059 static int
1060 zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1061 {
1062 	return (zfs_secpolicy_write_perms(zc->zc_name,
1063 	    ZFS_DELEG_PERM_REMAP, cr));
1064 }
1065 
1066 /* ARGSUSED */
1067 static int
1068 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1069 {
1070 	nvpair_t *pair, *nextpair;
1071 	int error = 0;
1072 
1073 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1074 	    pair = nextpair) {
1075 		char *name = nvpair_name(pair);
1076 		char *hashp = strchr(name, '#');
1077 		nextpair = nvlist_next_nvpair(innvl, pair);
1078 
1079 		if (hashp == NULL) {
1080 			error = SET_ERROR(EINVAL);
1081 			break;
1082 		}
1083 
1084 		*hashp = '\0';
1085 		error = zfs_secpolicy_write_perms(name,
1086 		    ZFS_DELEG_PERM_DESTROY, cr);
1087 		*hashp = '#';
1088 		if (error == ENOENT) {
1089 			/*
1090 			 * Ignore any filesystems that don't exist (we consider
1091 			 * their bookmarks "already destroyed").  Remove
1092 			 * the name from the nvl here in case the filesystem
1093 			 * is created between now and when we try to destroy
1094 			 * the bookmark (in which case we don't want to
1095 			 * destroy it since we haven't checked for permission).
1096 			 */
1097 			fnvlist_remove_nvpair(innvl, pair);
1098 			error = 0;
1099 		}
1100 		if (error != 0)
1101 			break;
1102 	}
1103 
1104 	return (error);
1105 }
1106 
1107 /* ARGSUSED */
1108 static int
1109 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1110 {
1111 	/*
1112 	 * Even root must have a proper TSD so that we know what pool
1113 	 * to log to.
1114 	 */
1115 	if (tsd_get(zfs_allow_log_key) == NULL)
1116 		return (SET_ERROR(EPERM));
1117 	return (0);
1118 }
1119 
1120 static int
1121 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1122 {
1123 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
1124 	int	error;
1125 	char	*origin;
1126 
1127 	if ((error = zfs_get_parent(zc->zc_name, parentname,
1128 	    sizeof (parentname))) != 0)
1129 		return (error);
1130 
1131 	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1132 	    (error = zfs_secpolicy_write_perms(origin,
1133 	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1134 		return (error);
1135 
1136 	if ((error = zfs_secpolicy_write_perms(parentname,
1137 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1138 		return (error);
1139 
1140 	return (zfs_secpolicy_write_perms(parentname,
1141 	    ZFS_DELEG_PERM_MOUNT, cr));
1142 }
1143 
1144 /*
1145  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1146  * SYS_CONFIG privilege, which is not available in a local zone.
1147  */
1148 /* ARGSUSED */
1149 static int
1150 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1151 {
1152 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1153 		return (SET_ERROR(EPERM));
1154 
1155 	return (0);
1156 }
1157 
1158 /*
1159  * Policy for object to name lookups.
1160  */
1161 /* ARGSUSED */
1162 static int
1163 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1164 {
1165 	int error;
1166 
1167 	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1168 		return (0);
1169 
1170 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1171 	return (error);
1172 }
1173 
1174 /*
1175  * Policy for fault injection.  Requires all privileges.
1176  */
1177 /* ARGSUSED */
1178 static int
1179 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1180 {
1181 	return (secpolicy_zinject(cr));
1182 }
1183 
1184 /* ARGSUSED */
1185 static int
1186 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1187 {
1188 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1189 
1190 	if (prop == ZPROP_INVAL) {
1191 		if (!zfs_prop_user(zc->zc_value))
1192 			return (SET_ERROR(EINVAL));
1193 		return (zfs_secpolicy_write_perms(zc->zc_name,
1194 		    ZFS_DELEG_PERM_USERPROP, cr));
1195 	} else {
1196 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1197 		    NULL, cr));
1198 	}
1199 }
1200 
1201 static int
1202 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1203 {
1204 	int err = zfs_secpolicy_read(zc, innvl, cr);
1205 	if (err)
1206 		return (err);
1207 
1208 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1209 		return (SET_ERROR(EINVAL));
1210 
1211 	if (zc->zc_value[0] == 0) {
1212 		/*
1213 		 * They are asking about a posix uid/gid.  If it's
1214 		 * themself, allow it.
1215 		 */
1216 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1217 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1218 		    zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1219 		    zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1220 			if (zc->zc_guid == crgetuid(cr))
1221 				return (0);
1222 		} else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
1223 		    zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
1224 		    zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
1225 		    zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
1226 			if (groupmember(zc->zc_guid, cr))
1227 				return (0);
1228 		}
1229 		/* else is for project quota/used */
1230 	}
1231 
1232 	return (zfs_secpolicy_write_perms(zc->zc_name,
1233 	    userquota_perms[zc->zc_objset_type], cr));
1234 }
1235 
1236 static int
1237 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1238 {
1239 	int err = zfs_secpolicy_read(zc, innvl, cr);
1240 	if (err)
1241 		return (err);
1242 
1243 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1244 		return (SET_ERROR(EINVAL));
1245 
1246 	return (zfs_secpolicy_write_perms(zc->zc_name,
1247 	    userquota_perms[zc->zc_objset_type], cr));
1248 }
1249 
1250 /* ARGSUSED */
1251 static int
1252 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1253 {
1254 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1255 	    NULL, cr));
1256 }
1257 
1258 /* ARGSUSED */
1259 static int
1260 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1261 {
1262 	nvpair_t *pair;
1263 	nvlist_t *holds;
1264 	int error;
1265 
1266 	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1267 	if (error != 0)
1268 		return (SET_ERROR(EINVAL));
1269 
1270 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1271 	    pair = nvlist_next_nvpair(holds, pair)) {
1272 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1273 		error = dmu_fsname(nvpair_name(pair), fsname);
1274 		if (error != 0)
1275 			return (error);
1276 		error = zfs_secpolicy_write_perms(fsname,
1277 		    ZFS_DELEG_PERM_HOLD, cr);
1278 		if (error != 0)
1279 			return (error);
1280 	}
1281 	return (0);
1282 }
1283 
1284 /* ARGSUSED */
1285 static int
1286 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1287 {
1288 	nvpair_t *pair;
1289 	int error;
1290 
1291 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1292 	    pair = nvlist_next_nvpair(innvl, pair)) {
1293 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1294 		error = dmu_fsname(nvpair_name(pair), fsname);
1295 		if (error != 0)
1296 			return (error);
1297 		error = zfs_secpolicy_write_perms(fsname,
1298 		    ZFS_DELEG_PERM_RELEASE, cr);
1299 		if (error != 0)
1300 			return (error);
1301 	}
1302 	return (0);
1303 }
1304 
1305 /* ARGSUSED */
1306 static int
1307 zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1308 {
1309 	return (zfs_secpolicy_write_perms(zc->zc_name,
1310 	    ZFS_DELEG_PERM_LOAD_KEY, cr));
1311 }
1312 
1313 /* ARGSUSED */
1314 static int
1315 zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1316 {
1317 	return (zfs_secpolicy_write_perms(zc->zc_name,
1318 	    ZFS_DELEG_PERM_CHANGE_KEY, cr));
1319 }
1320 
1321 /*
1322  * Policy for allowing temporary snapshots to be taken or released
1323  */
1324 static int
1325 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1326 {
1327 	/*
1328 	 * A temporary snapshot is the same as a snapshot,
1329 	 * hold, destroy and release all rolled into one.
1330 	 * Delegated diff alone is sufficient that we allow this.
1331 	 */
1332 	int error;
1333 
1334 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1335 	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1336 		return (0);
1337 
1338 	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1339 	if (error == 0)
1340 		error = zfs_secpolicy_hold(zc, innvl, cr);
1341 	if (error == 0)
1342 		error = zfs_secpolicy_release(zc, innvl, cr);
1343 	if (error == 0)
1344 		error = zfs_secpolicy_destroy(zc, innvl, cr);
1345 	return (error);
1346 }
1347 
1348 /*
1349  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1350  */
1351 static int
1352 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1353 {
1354 	char *packed;
1355 	int error;
1356 	nvlist_t *list = NULL;
1357 
1358 	/*
1359 	 * Read in and unpack the user-supplied nvlist.
1360 	 */
1361 	if (size == 0)
1362 		return (SET_ERROR(EINVAL));
1363 
1364 	packed = kmem_alloc(size, KM_SLEEP);
1365 
1366 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1367 	    iflag)) != 0) {
1368 		kmem_free(packed, size);
1369 		return (SET_ERROR(EFAULT));
1370 	}
1371 
1372 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1373 		kmem_free(packed, size);
1374 		return (error);
1375 	}
1376 
1377 	kmem_free(packed, size);
1378 
1379 	*nvp = list;
1380 	return (0);
1381 }
1382 
1383 /*
1384  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1385  * Entries will be removed from the end of the nvlist, and one int32 entry
1386  * named "N_MORE_ERRORS" will be added indicating how many entries were
1387  * removed.
1388  */
1389 static int
1390 nvlist_smush(nvlist_t *errors, size_t max)
1391 {
1392 	size_t size;
1393 
1394 	size = fnvlist_size(errors);
1395 
1396 	if (size > max) {
1397 		nvpair_t *more_errors;
1398 		int n = 0;
1399 
1400 		if (max < 1024)
1401 			return (SET_ERROR(ENOMEM));
1402 
1403 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1404 		more_errors = nvlist_prev_nvpair(errors, NULL);
1405 
1406 		do {
1407 			nvpair_t *pair = nvlist_prev_nvpair(errors,
1408 			    more_errors);
1409 			fnvlist_remove_nvpair(errors, pair);
1410 			n++;
1411 			size = fnvlist_size(errors);
1412 		} while (size > max);
1413 
1414 		fnvlist_remove_nvpair(errors, more_errors);
1415 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1416 		ASSERT3U(fnvlist_size(errors), <=, max);
1417 	}
1418 
1419 	return (0);
1420 }
1421 
1422 static int
1423 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1424 {
1425 	char *packed = NULL;
1426 	int error = 0;
1427 	size_t size;
1428 
1429 	size = fnvlist_size(nvl);
1430 
1431 	if (size > zc->zc_nvlist_dst_size) {
1432 		error = SET_ERROR(ENOMEM);
1433 	} else {
1434 		packed = fnvlist_pack(nvl, &size);
1435 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1436 		    size, zc->zc_iflags) != 0)
1437 			error = SET_ERROR(EFAULT);
1438 		fnvlist_pack_free(packed, size);
1439 	}
1440 
1441 	zc->zc_nvlist_dst_size = size;
1442 	zc->zc_nvlist_dst_filled = B_TRUE;
1443 	return (error);
1444 }
1445 
1446 int
1447 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1448 {
1449 	int error = 0;
1450 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1451 		return (SET_ERROR(EINVAL));
1452 	}
1453 
1454 	mutex_enter(&os->os_user_ptr_lock);
1455 	*zfvp = dmu_objset_get_user(os);
1456 	if (*zfvp) {
1457 		VFS_HOLD((*zfvp)->z_vfs);
1458 	} else {
1459 		error = SET_ERROR(ESRCH);
1460 	}
1461 	mutex_exit(&os->os_user_ptr_lock);
1462 	return (error);
1463 }
1464 
1465 int
1466 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1467 {
1468 	objset_t *os;
1469 	int error;
1470 
1471 	error = dmu_objset_hold(dsname, FTAG, &os);
1472 	if (error != 0)
1473 		return (error);
1474 
1475 	error = getzfsvfs_impl(os, zfvp);
1476 	dmu_objset_rele(os, FTAG);
1477 	return (error);
1478 }
1479 
1480 /*
1481  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1482  * case its z_vfs will be NULL, and it will be opened as the owner.
1483  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1484  * which prevents all vnode ops from running.
1485  */
1486 static int
1487 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1488 {
1489 	int error = 0;
1490 
1491 	if (getzfsvfs(name, zfvp) != 0)
1492 		error = zfsvfs_create(name, zfvp);
1493 	if (error == 0) {
1494 		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1495 		    RW_READER, tag);
1496 		if ((*zfvp)->z_unmounted) {
1497 			/*
1498 			 * XXX we could probably try again, since the unmounting
1499 			 * thread should be just about to disassociate the
1500 			 * objset from the zfsvfs.
1501 			 */
1502 			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1503 			return (SET_ERROR(EBUSY));
1504 		}
1505 	}
1506 	return (error);
1507 }
1508 
1509 static void
1510 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1511 {
1512 	rrm_exit(&zfsvfs->z_teardown_lock, tag);
1513 
1514 	if (zfsvfs->z_vfs) {
1515 		VFS_RELE(zfsvfs->z_vfs);
1516 	} else {
1517 		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1518 		zfsvfs_free(zfsvfs);
1519 	}
1520 }
1521 
1522 static int
1523 zfs_ioc_pool_create(zfs_cmd_t *zc)
1524 {
1525 	int error;
1526 	nvlist_t *config, *props = NULL;
1527 	nvlist_t *rootprops = NULL;
1528 	nvlist_t *zplprops = NULL;
1529 	char *spa_name = zc->zc_name;
1530 	dsl_crypto_params_t *dcp = NULL;
1531 
1532 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1533 	    zc->zc_iflags, &config))
1534 		return (error);
1535 
1536 	if (zc->zc_nvlist_src_size != 0 && (error =
1537 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1538 	    zc->zc_iflags, &props))) {
1539 		nvlist_free(config);
1540 		return (error);
1541 	}
1542 
1543 	if (props) {
1544 		nvlist_t *nvl = NULL;
1545 		nvlist_t *hidden_args = NULL;
1546 		uint64_t version = SPA_VERSION;
1547 		char *tname;
1548 
1549 		(void) nvlist_lookup_uint64(props,
1550 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1551 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1552 			error = SET_ERROR(EINVAL);
1553 			goto pool_props_bad;
1554 		}
1555 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1556 		if (nvl) {
1557 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1558 			if (error != 0) {
1559 				nvlist_free(config);
1560 				nvlist_free(props);
1561 				return (error);
1562 			}
1563 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1564 		}
1565 
1566 		(void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
1567 		    &hidden_args);
1568 		error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
1569 		    rootprops, hidden_args, &dcp);
1570 		if (error != 0) {
1571 			nvlist_free(config);
1572 			nvlist_free(props);
1573 			return (error);
1574 		}
1575 		(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
1576 
1577 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1578 		error = zfs_fill_zplprops_root(version, rootprops,
1579 		    zplprops, NULL);
1580 		if (error != 0)
1581 			goto pool_props_bad;
1582 
1583 		if (nvlist_lookup_string(props,
1584 		    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1585 			spa_name = tname;
1586 	}
1587 
1588 	error = spa_create(zc->zc_name, config, props, zplprops, dcp);
1589 
1590 	/*
1591 	 * Set the remaining root properties
1592 	 */
1593 	if (!error && (error = zfs_set_prop_nvlist(spa_name,
1594 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1595 		(void) spa_destroy(spa_name);
1596 
1597 pool_props_bad:
1598 	nvlist_free(rootprops);
1599 	nvlist_free(zplprops);
1600 	nvlist_free(config);
1601 	nvlist_free(props);
1602 	dsl_crypto_params_free(dcp, !!error);
1603 
1604 	return (error);
1605 }
1606 
1607 static int
1608 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1609 {
1610 	int error;
1611 	zfs_log_history(zc);
1612 	error = spa_destroy(zc->zc_name);
1613 	if (error == 0)
1614 		zvol_remove_minors(zc->zc_name);
1615 	return (error);
1616 }
1617 
1618 static int
1619 zfs_ioc_pool_import(zfs_cmd_t *zc)
1620 {
1621 	nvlist_t *config, *props = NULL;
1622 	uint64_t guid;
1623 	int error;
1624 
1625 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1626 	    zc->zc_iflags, &config)) != 0)
1627 		return (error);
1628 
1629 	if (zc->zc_nvlist_src_size != 0 && (error =
1630 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1631 	    zc->zc_iflags, &props))) {
1632 		nvlist_free(config);
1633 		return (error);
1634 	}
1635 
1636 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1637 	    guid != zc->zc_guid)
1638 		error = SET_ERROR(EINVAL);
1639 	else
1640 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1641 
1642 	if (zc->zc_nvlist_dst != 0) {
1643 		int err;
1644 
1645 		if ((err = put_nvlist(zc, config)) != 0)
1646 			error = err;
1647 	}
1648 
1649 	nvlist_free(config);
1650 
1651 	nvlist_free(props);
1652 
1653 	return (error);
1654 }
1655 
1656 static int
1657 zfs_ioc_pool_export(zfs_cmd_t *zc)
1658 {
1659 	int error;
1660 	boolean_t force = (boolean_t)zc->zc_cookie;
1661 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1662 
1663 	zfs_log_history(zc);
1664 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1665 	if (error == 0)
1666 		zvol_remove_minors(zc->zc_name);
1667 	return (error);
1668 }
1669 
1670 static int
1671 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1672 {
1673 	nvlist_t *configs;
1674 	int error;
1675 
1676 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1677 		return (SET_ERROR(EEXIST));
1678 
1679 	error = put_nvlist(zc, configs);
1680 
1681 	nvlist_free(configs);
1682 
1683 	return (error);
1684 }
1685 
1686 /*
1687  * inputs:
1688  * zc_name		name of the pool
1689  *
1690  * outputs:
1691  * zc_cookie		real errno
1692  * zc_nvlist_dst	config nvlist
1693  * zc_nvlist_dst_size	size of config nvlist
1694  */
1695 static int
1696 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1697 {
1698 	nvlist_t *config;
1699 	int error;
1700 	int ret = 0;
1701 
1702 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1703 	    sizeof (zc->zc_value));
1704 
1705 	if (config != NULL) {
1706 		ret = put_nvlist(zc, config);
1707 		nvlist_free(config);
1708 
1709 		/*
1710 		 * The config may be present even if 'error' is non-zero.
1711 		 * In this case we return success, and preserve the real errno
1712 		 * in 'zc_cookie'.
1713 		 */
1714 		zc->zc_cookie = error;
1715 	} else {
1716 		ret = error;
1717 	}
1718 
1719 	return (ret);
1720 }
1721 
1722 /*
1723  * Try to import the given pool, returning pool stats as appropriate so that
1724  * user land knows which devices are available and overall pool health.
1725  */
1726 static int
1727 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1728 {
1729 	nvlist_t *tryconfig, *config;
1730 	int error;
1731 
1732 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1733 	    zc->zc_iflags, &tryconfig)) != 0)
1734 		return (error);
1735 
1736 	config = spa_tryimport(tryconfig);
1737 
1738 	nvlist_free(tryconfig);
1739 
1740 	if (config == NULL)
1741 		return (SET_ERROR(EINVAL));
1742 
1743 	error = put_nvlist(zc, config);
1744 	nvlist_free(config);
1745 
1746 	return (error);
1747 }
1748 
1749 /*
1750  * inputs:
1751  * zc_name              name of the pool
1752  * zc_cookie            scan func (pool_scan_func_t)
1753  * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1754  */
1755 static int
1756 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1757 {
1758 	spa_t *spa;
1759 	int error;
1760 
1761 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1762 		return (error);
1763 
1764 	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1765 		return (SET_ERROR(EINVAL));
1766 
1767 	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1768 		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1769 	else if (zc->zc_cookie == POOL_SCAN_NONE)
1770 		error = spa_scan_stop(spa);
1771 	else
1772 		error = spa_scan(spa, zc->zc_cookie);
1773 
1774 	spa_close(spa, FTAG);
1775 
1776 	return (error);
1777 }
1778 
1779 static int
1780 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1781 {
1782 	spa_t *spa;
1783 	int error;
1784 
1785 	error = spa_open(zc->zc_name, &spa, FTAG);
1786 	if (error == 0) {
1787 		spa_freeze(spa);
1788 		spa_close(spa, FTAG);
1789 	}
1790 	return (error);
1791 }
1792 
1793 static int
1794 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1795 {
1796 	spa_t *spa;
1797 	int error;
1798 
1799 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1800 		return (error);
1801 
1802 	if (zc->zc_cookie < spa_version(spa) ||
1803 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1804 		spa_close(spa, FTAG);
1805 		return (SET_ERROR(EINVAL));
1806 	}
1807 
1808 	spa_upgrade(spa, zc->zc_cookie);
1809 	spa_close(spa, FTAG);
1810 
1811 	return (error);
1812 }
1813 
1814 static int
1815 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1816 {
1817 	spa_t *spa;
1818 	char *hist_buf;
1819 	uint64_t size;
1820 	int error;
1821 
1822 	if ((size = zc->zc_history_len) == 0)
1823 		return (SET_ERROR(EINVAL));
1824 
1825 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1826 		return (error);
1827 
1828 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1829 		spa_close(spa, FTAG);
1830 		return (SET_ERROR(ENOTSUP));
1831 	}
1832 
1833 	hist_buf = kmem_alloc(size, KM_SLEEP);
1834 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1835 	    &zc->zc_history_len, hist_buf)) == 0) {
1836 		error = ddi_copyout(hist_buf,
1837 		    (void *)(uintptr_t)zc->zc_history,
1838 		    zc->zc_history_len, zc->zc_iflags);
1839 	}
1840 
1841 	spa_close(spa, FTAG);
1842 	kmem_free(hist_buf, size);
1843 	return (error);
1844 }
1845 
1846 static int
1847 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1848 {
1849 	spa_t *spa;
1850 	int error;
1851 
1852 	error = spa_open(zc->zc_name, &spa, FTAG);
1853 	if (error == 0) {
1854 		error = spa_change_guid(spa);
1855 		spa_close(spa, FTAG);
1856 	}
1857 	return (error);
1858 }
1859 
1860 static int
1861 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1862 {
1863 	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1864 }
1865 
1866 /*
1867  * inputs:
1868  * zc_name		name of filesystem
1869  * zc_obj		object to find
1870  *
1871  * outputs:
1872  * zc_value		name of object
1873  */
1874 static int
1875 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1876 {
1877 	objset_t *os;
1878 	int error;
1879 
1880 	/* XXX reading from objset not owned */
1881 	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1882 	    FTAG, &os)) != 0)
1883 		return (error);
1884 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1885 		dmu_objset_rele_flags(os, B_TRUE, FTAG);
1886 		return (SET_ERROR(EINVAL));
1887 	}
1888 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1889 	    sizeof (zc->zc_value));
1890 	dmu_objset_rele_flags(os, B_TRUE, FTAG);
1891 
1892 	return (error);
1893 }
1894 
1895 /*
1896  * inputs:
1897  * zc_name		name of filesystem
1898  * zc_obj		object to find
1899  *
1900  * outputs:
1901  * zc_stat		stats on object
1902  * zc_value		path to object
1903  */
1904 static int
1905 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1906 {
1907 	objset_t *os;
1908 	int error;
1909 
1910 	/* XXX reading from objset not owned */
1911 	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1912 	    FTAG, &os)) != 0)
1913 		return (error);
1914 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1915 		dmu_objset_rele_flags(os, B_TRUE, FTAG);
1916 		return (SET_ERROR(EINVAL));
1917 	}
1918 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1919 	    sizeof (zc->zc_value));
1920 	dmu_objset_rele_flags(os, B_TRUE, FTAG);
1921 
1922 	return (error);
1923 }
1924 
1925 static int
1926 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1927 {
1928 	spa_t *spa;
1929 	int error;
1930 	nvlist_t *config, **l2cache, **spares;
1931 	uint_t nl2cache = 0, nspares = 0;
1932 
1933 	error = spa_open(zc->zc_name, &spa, FTAG);
1934 	if (error != 0)
1935 		return (error);
1936 
1937 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1938 	    zc->zc_iflags, &config);
1939 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1940 	    &l2cache, &nl2cache);
1941 
1942 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1943 	    &spares, &nspares);
1944 
1945 	/*
1946 	 * A root pool with concatenated devices is not supported.
1947 	 * Thus, can not add a device to a root pool.
1948 	 *
1949 	 * Intent log device can not be added to a rootpool because
1950 	 * during mountroot, zil is replayed, a seperated log device
1951 	 * can not be accessed during the mountroot time.
1952 	 *
1953 	 * l2cache and spare devices are ok to be added to a rootpool.
1954 	 */
1955 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1956 		nvlist_free(config);
1957 		spa_close(spa, FTAG);
1958 		return (SET_ERROR(EDOM));
1959 	}
1960 
1961 	if (error == 0) {
1962 		error = spa_vdev_add(spa, config);
1963 		nvlist_free(config);
1964 	}
1965 	spa_close(spa, FTAG);
1966 	return (error);
1967 }
1968 
1969 /*
1970  * inputs:
1971  * zc_name		name of the pool
1972  * zc_guid		guid of vdev to remove
1973  * zc_cookie		cancel removal
1974  */
1975 static int
1976 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1977 {
1978 	spa_t *spa;
1979 	int error;
1980 
1981 	error = spa_open(zc->zc_name, &spa, FTAG);
1982 	if (error != 0)
1983 		return (error);
1984 	if (zc->zc_cookie != 0) {
1985 		error = spa_vdev_remove_cancel(spa);
1986 	} else {
1987 		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1988 	}
1989 	spa_close(spa, FTAG);
1990 	return (error);
1991 }
1992 
1993 static int
1994 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1995 {
1996 	spa_t *spa;
1997 	int error;
1998 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1999 
2000 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2001 		return (error);
2002 	switch (zc->zc_cookie) {
2003 	case VDEV_STATE_ONLINE:
2004 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2005 		break;
2006 
2007 	case VDEV_STATE_OFFLINE:
2008 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2009 		break;
2010 
2011 	case VDEV_STATE_FAULTED:
2012 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2013 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2014 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2015 
2016 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2017 		break;
2018 
2019 	case VDEV_STATE_DEGRADED:
2020 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2021 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2022 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2023 
2024 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2025 		break;
2026 
2027 	default:
2028 		error = SET_ERROR(EINVAL);
2029 	}
2030 	zc->zc_cookie = newstate;
2031 	spa_close(spa, FTAG);
2032 	return (error);
2033 }
2034 
2035 static int
2036 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2037 {
2038 	spa_t *spa;
2039 	int replacing = zc->zc_cookie;
2040 	nvlist_t *config;
2041 	int error;
2042 
2043 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2044 		return (error);
2045 
2046 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2047 	    zc->zc_iflags, &config)) == 0) {
2048 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
2049 		nvlist_free(config);
2050 	}
2051 
2052 	spa_close(spa, FTAG);
2053 	return (error);
2054 }
2055 
2056 static int
2057 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2058 {
2059 	spa_t *spa;
2060 	int error;
2061 
2062 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2063 		return (error);
2064 
2065 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2066 
2067 	spa_close(spa, FTAG);
2068 	return (error);
2069 }
2070 
2071 static int
2072 zfs_ioc_vdev_split(zfs_cmd_t *zc)
2073 {
2074 	spa_t *spa;
2075 	nvlist_t *config, *props = NULL;
2076 	int error;
2077 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2078 
2079 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2080 		return (error);
2081 
2082 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2083 	    zc->zc_iflags, &config)) {
2084 		spa_close(spa, FTAG);
2085 		return (error);
2086 	}
2087 
2088 	if (zc->zc_nvlist_src_size != 0 && (error =
2089 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2090 	    zc->zc_iflags, &props))) {
2091 		spa_close(spa, FTAG);
2092 		nvlist_free(config);
2093 		return (error);
2094 	}
2095 
2096 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2097 
2098 	spa_close(spa, FTAG);
2099 
2100 	nvlist_free(config);
2101 	nvlist_free(props);
2102 
2103 	return (error);
2104 }
2105 
2106 static int
2107 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2108 {
2109 	spa_t *spa;
2110 	char *path = zc->zc_value;
2111 	uint64_t guid = zc->zc_guid;
2112 	int error;
2113 
2114 	error = spa_open(zc->zc_name, &spa, FTAG);
2115 	if (error != 0)
2116 		return (error);
2117 
2118 	error = spa_vdev_setpath(spa, guid, path);
2119 	spa_close(spa, FTAG);
2120 	return (error);
2121 }
2122 
2123 static int
2124 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2125 {
2126 	spa_t *spa;
2127 	char *fru = zc->zc_value;
2128 	uint64_t guid = zc->zc_guid;
2129 	int error;
2130 
2131 	error = spa_open(zc->zc_name, &spa, FTAG);
2132 	if (error != 0)
2133 		return (error);
2134 
2135 	error = spa_vdev_setfru(spa, guid, fru);
2136 	spa_close(spa, FTAG);
2137 	return (error);
2138 }
2139 
2140 static int
2141 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2142 {
2143 	int error = 0;
2144 	nvlist_t *nv;
2145 
2146 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2147 
2148 	if (zc->zc_nvlist_dst != 0 &&
2149 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2150 		dmu_objset_stats(os, nv);
2151 		/*
2152 		 * NB: zvol_get_stats() will read the objset contents,
2153 		 * which we aren't supposed to do with a
2154 		 * DS_MODE_USER hold, because it could be
2155 		 * inconsistent.  So this is a bit of a workaround...
2156 		 * XXX reading with out owning
2157 		 */
2158 		if (!zc->zc_objset_stats.dds_inconsistent &&
2159 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2160 			error = zvol_get_stats(os, nv);
2161 			if (error == EIO)
2162 				return (error);
2163 			VERIFY0(error);
2164 		}
2165 		error = put_nvlist(zc, nv);
2166 		nvlist_free(nv);
2167 	}
2168 
2169 	return (error);
2170 }
2171 
2172 /*
2173  * inputs:
2174  * zc_name		name of filesystem
2175  * zc_nvlist_dst_size	size of buffer for property nvlist
2176  *
2177  * outputs:
2178  * zc_objset_stats	stats
2179  * zc_nvlist_dst	property nvlist
2180  * zc_nvlist_dst_size	size of property nvlist
2181  */
2182 static int
2183 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2184 {
2185 	objset_t *os;
2186 	int error;
2187 
2188 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2189 	if (error == 0) {
2190 		error = zfs_ioc_objset_stats_impl(zc, os);
2191 		dmu_objset_rele(os, FTAG);
2192 	}
2193 
2194 	return (error);
2195 }
2196 
2197 /*
2198  * inputs:
2199  * zc_name		name of filesystem
2200  * zc_nvlist_dst_size	size of buffer for property nvlist
2201  *
2202  * outputs:
2203  * zc_nvlist_dst	received property nvlist
2204  * zc_nvlist_dst_size	size of received property nvlist
2205  *
2206  * Gets received properties (distinct from local properties on or after
2207  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2208  * local property values.
2209  */
2210 static int
2211 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2212 {
2213 	int error = 0;
2214 	nvlist_t *nv;
2215 
2216 	/*
2217 	 * Without this check, we would return local property values if the
2218 	 * caller has not already received properties on or after
2219 	 * SPA_VERSION_RECVD_PROPS.
2220 	 */
2221 	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2222 		return (SET_ERROR(ENOTSUP));
2223 
2224 	if (zc->zc_nvlist_dst != 0 &&
2225 	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2226 		error = put_nvlist(zc, nv);
2227 		nvlist_free(nv);
2228 	}
2229 
2230 	return (error);
2231 }
2232 
2233 static int
2234 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2235 {
2236 	uint64_t value;
2237 	int error;
2238 
2239 	/*
2240 	 * zfs_get_zplprop() will either find a value or give us
2241 	 * the default value (if there is one).
2242 	 */
2243 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2244 		return (error);
2245 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2246 	return (0);
2247 }
2248 
2249 /*
2250  * inputs:
2251  * zc_name		name of filesystem
2252  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2253  *
2254  * outputs:
2255  * zc_nvlist_dst	zpl property nvlist
2256  * zc_nvlist_dst_size	size of zpl property nvlist
2257  */
2258 static int
2259 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2260 {
2261 	objset_t *os;
2262 	int err;
2263 
2264 	/* XXX reading without owning */
2265 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2266 		return (err);
2267 
2268 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2269 
2270 	/*
2271 	 * NB: nvl_add_zplprop() will read the objset contents,
2272 	 * which we aren't supposed to do with a DS_MODE_USER
2273 	 * hold, because it could be inconsistent.
2274 	 */
2275 	if (zc->zc_nvlist_dst != 0 &&
2276 	    !zc->zc_objset_stats.dds_inconsistent &&
2277 	    dmu_objset_type(os) == DMU_OST_ZFS) {
2278 		nvlist_t *nv;
2279 
2280 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2281 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2282 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2283 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2284 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2285 			err = put_nvlist(zc, nv);
2286 		nvlist_free(nv);
2287 	} else {
2288 		err = SET_ERROR(ENOENT);
2289 	}
2290 	dmu_objset_rele(os, FTAG);
2291 	return (err);
2292 }
2293 
2294 static boolean_t
2295 dataset_name_hidden(const char *name)
2296 {
2297 	/*
2298 	 * Skip over datasets that are not visible in this zone,
2299 	 * internal datasets (which have a $ in their name), and
2300 	 * temporary datasets (which have a % in their name).
2301 	 */
2302 	if (strchr(name, '$') != NULL)
2303 		return (B_TRUE);
2304 	if (strchr(name, '%') != NULL)
2305 		return (B_TRUE);
2306 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
2307 		return (B_TRUE);
2308 	return (B_FALSE);
2309 }
2310 
2311 /*
2312  * inputs:
2313  * zc_name		name of filesystem
2314  * zc_cookie		zap cursor
2315  * zc_nvlist_dst_size	size of buffer for property nvlist
2316  *
2317  * outputs:
2318  * zc_name		name of next filesystem
2319  * zc_cookie		zap cursor
2320  * zc_objset_stats	stats
2321  * zc_nvlist_dst	property nvlist
2322  * zc_nvlist_dst_size	size of property nvlist
2323  */
2324 static int
2325 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2326 {
2327 	objset_t *os;
2328 	int error;
2329 	char *p;
2330 	size_t orig_len = strlen(zc->zc_name);
2331 
2332 top:
2333 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2334 		if (error == ENOENT)
2335 			error = SET_ERROR(ESRCH);
2336 		return (error);
2337 	}
2338 
2339 	p = strrchr(zc->zc_name, '/');
2340 	if (p == NULL || p[1] != '\0')
2341 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2342 	p = zc->zc_name + strlen(zc->zc_name);
2343 
2344 	do {
2345 		error = dmu_dir_list_next(os,
2346 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2347 		    NULL, &zc->zc_cookie);
2348 		if (error == ENOENT)
2349 			error = SET_ERROR(ESRCH);
2350 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2351 	dmu_objset_rele(os, FTAG);
2352 
2353 	/*
2354 	 * If it's an internal dataset (ie. with a '$' in its name),
2355 	 * don't try to get stats for it, otherwise we'll return ENOENT.
2356 	 */
2357 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2358 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2359 		if (error == ENOENT) {
2360 			/* We lost a race with destroy, get the next one. */
2361 			zc->zc_name[orig_len] = '\0';
2362 			goto top;
2363 		}
2364 	}
2365 	return (error);
2366 }
2367 
2368 /*
2369  * inputs:
2370  * zc_name		name of filesystem
2371  * zc_cookie		zap cursor
2372  * zc_nvlist_dst_size	size of buffer for property nvlist
2373  * zc_simple		when set, only name is requested
2374  *
2375  * outputs:
2376  * zc_name		name of next snapshot
2377  * zc_objset_stats	stats
2378  * zc_nvlist_dst	property nvlist
2379  * zc_nvlist_dst_size	size of property nvlist
2380  */
2381 static int
2382 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2383 {
2384 	objset_t *os;
2385 	int error;
2386 
2387 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2388 	if (error != 0) {
2389 		return (error == ENOENT ? ESRCH : error);
2390 	}
2391 
2392 	/*
2393 	 * A dataset name of maximum length cannot have any snapshots,
2394 	 * so exit immediately.
2395 	 */
2396 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2397 	    ZFS_MAX_DATASET_NAME_LEN) {
2398 		dmu_objset_rele(os, FTAG);
2399 		return (SET_ERROR(ESRCH));
2400 	}
2401 
2402 	error = dmu_snapshot_list_next(os,
2403 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2404 	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2405 	    NULL);
2406 
2407 	if (error == 0 && !zc->zc_simple) {
2408 		dsl_dataset_t *ds;
2409 		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2410 
2411 		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2412 		if (error == 0) {
2413 			objset_t *ossnap;
2414 
2415 			error = dmu_objset_from_ds(ds, &ossnap);
2416 			if (error == 0)
2417 				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2418 			dsl_dataset_rele(ds, FTAG);
2419 		}
2420 	} else if (error == ENOENT) {
2421 		error = SET_ERROR(ESRCH);
2422 	}
2423 
2424 	dmu_objset_rele(os, FTAG);
2425 	/* if we failed, undo the @ that we tacked on to zc_name */
2426 	if (error != 0)
2427 		*strchr(zc->zc_name, '@') = '\0';
2428 	return (error);
2429 }
2430 
2431 static int
2432 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2433 {
2434 	const char *propname = nvpair_name(pair);
2435 	uint64_t *valary;
2436 	unsigned int vallen;
2437 	const char *domain;
2438 	char *dash;
2439 	zfs_userquota_prop_t type;
2440 	uint64_t rid;
2441 	uint64_t quota;
2442 	zfsvfs_t *zfsvfs;
2443 	int err;
2444 
2445 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2446 		nvlist_t *attrs;
2447 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2448 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2449 		    &pair) != 0)
2450 			return (SET_ERROR(EINVAL));
2451 	}
2452 
2453 	/*
2454 	 * A correctly constructed propname is encoded as
2455 	 * userquota@<rid>-<domain>.
2456 	 */
2457 	if ((dash = strchr(propname, '-')) == NULL ||
2458 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2459 	    vallen != 3)
2460 		return (SET_ERROR(EINVAL));
2461 
2462 	domain = dash + 1;
2463 	type = valary[0];
2464 	rid = valary[1];
2465 	quota = valary[2];
2466 
2467 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2468 	if (err == 0) {
2469 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2470 		zfsvfs_rele(zfsvfs, FTAG);
2471 	}
2472 
2473 	return (err);
2474 }
2475 
2476 /*
2477  * If the named property is one that has a special function to set its value,
2478  * return 0 on success and a positive error code on failure; otherwise if it is
2479  * not one of the special properties handled by this function, return -1.
2480  *
2481  * XXX: It would be better for callers of the property interface if we handled
2482  * these special cases in dsl_prop.c (in the dsl layer).
2483  */
2484 static int
2485 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2486     nvpair_t *pair)
2487 {
2488 	const char *propname = nvpair_name(pair);
2489 	zfs_prop_t prop = zfs_name_to_prop(propname);
2490 	uint64_t intval = 0;
2491 	char *strval = NULL;
2492 	int err = -1;
2493 
2494 	if (prop == ZPROP_INVAL) {
2495 		if (zfs_prop_userquota(propname))
2496 			return (zfs_prop_set_userquota(dsname, pair));
2497 		return (-1);
2498 	}
2499 
2500 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2501 		nvlist_t *attrs;
2502 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2503 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2504 		    &pair) == 0);
2505 	}
2506 
2507 	/* all special properties are numeric except for keylocation */
2508 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
2509 		strval = fnvpair_value_string(pair);
2510 	} else {
2511 		intval = fnvpair_value_uint64(pair);
2512 	}
2513 
2514 	switch (prop) {
2515 	case ZFS_PROP_QUOTA:
2516 		err = dsl_dir_set_quota(dsname, source, intval);
2517 		break;
2518 	case ZFS_PROP_REFQUOTA:
2519 		err = dsl_dataset_set_refquota(dsname, source, intval);
2520 		break;
2521 	case ZFS_PROP_FILESYSTEM_LIMIT:
2522 	case ZFS_PROP_SNAPSHOT_LIMIT:
2523 		if (intval == UINT64_MAX) {
2524 			/* clearing the limit, just do it */
2525 			err = 0;
2526 		} else {
2527 			err = dsl_dir_activate_fs_ss_limit(dsname);
2528 		}
2529 		/*
2530 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2531 		 * default path to set the value in the nvlist.
2532 		 */
2533 		if (err == 0)
2534 			err = -1;
2535 		break;
2536 	case ZFS_PROP_KEYLOCATION:
2537 		err = dsl_crypto_can_set_keylocation(dsname, strval);
2538 
2539 		/*
2540 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2541 		 * default path to set the value in the nvlist.
2542 		 */
2543 		if (err == 0)
2544 			err = -1;
2545 		break;
2546 	case ZFS_PROP_RESERVATION:
2547 		err = dsl_dir_set_reservation(dsname, source, intval);
2548 		break;
2549 	case ZFS_PROP_REFRESERVATION:
2550 		err = dsl_dataset_set_refreservation(dsname, source, intval);
2551 		break;
2552 	case ZFS_PROP_VOLSIZE:
2553 		err = zvol_set_volsize(dsname, intval);
2554 		break;
2555 	case ZFS_PROP_VERSION:
2556 	{
2557 		zfsvfs_t *zfsvfs;
2558 
2559 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2560 			break;
2561 
2562 		err = zfs_set_version(zfsvfs, intval);
2563 		zfsvfs_rele(zfsvfs, FTAG);
2564 
2565 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2566 			zfs_cmd_t *zc;
2567 
2568 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2569 			(void) strcpy(zc->zc_name, dsname);
2570 			(void) zfs_ioc_userspace_upgrade(zc);
2571 			(void) zfs_ioc_id_quota_upgrade(zc);
2572 			kmem_free(zc, sizeof (zfs_cmd_t));
2573 		}
2574 		break;
2575 	}
2576 	default:
2577 		err = -1;
2578 	}
2579 
2580 	return (err);
2581 }
2582 
2583 /*
2584  * This function is best effort. If it fails to set any of the given properties,
2585  * it continues to set as many as it can and returns the last error
2586  * encountered. If the caller provides a non-NULL errlist, it will be filled in
2587  * with the list of names of all the properties that failed along with the
2588  * corresponding error numbers.
2589  *
2590  * If every property is set successfully, zero is returned and errlist is not
2591  * modified.
2592  */
2593 int
2594 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2595     nvlist_t *errlist)
2596 {
2597 	nvpair_t *pair;
2598 	nvpair_t *propval;
2599 	int rv = 0;
2600 	uint64_t intval;
2601 	char *strval;
2602 	nvlist_t *genericnvl = fnvlist_alloc();
2603 	nvlist_t *retrynvl = fnvlist_alloc();
2604 
2605 retry:
2606 	pair = NULL;
2607 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2608 		const char *propname = nvpair_name(pair);
2609 		zfs_prop_t prop = zfs_name_to_prop(propname);
2610 		int err = 0;
2611 
2612 		/* decode the property value */
2613 		propval = pair;
2614 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2615 			nvlist_t *attrs;
2616 			attrs = fnvpair_value_nvlist(pair);
2617 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2618 			    &propval) != 0)
2619 				err = SET_ERROR(EINVAL);
2620 		}
2621 
2622 		/* Validate value type */
2623 		if (err == 0 && source == ZPROP_SRC_INHERITED) {
2624 			/* inherited properties are expected to be booleans */
2625 			if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
2626 				err = SET_ERROR(EINVAL);
2627 		} else if (err == 0 && prop == ZPROP_INVAL) {
2628 			if (zfs_prop_user(propname)) {
2629 				if (nvpair_type(propval) != DATA_TYPE_STRING)
2630 					err = SET_ERROR(EINVAL);
2631 			} else if (zfs_prop_userquota(propname)) {
2632 				if (nvpair_type(propval) !=
2633 				    DATA_TYPE_UINT64_ARRAY)
2634 					err = SET_ERROR(EINVAL);
2635 			} else {
2636 				err = SET_ERROR(EINVAL);
2637 			}
2638 		} else if (err == 0) {
2639 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2640 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2641 					err = SET_ERROR(EINVAL);
2642 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2643 				const char *unused;
2644 
2645 				intval = fnvpair_value_uint64(propval);
2646 
2647 				switch (zfs_prop_get_type(prop)) {
2648 				case PROP_TYPE_NUMBER:
2649 					break;
2650 				case PROP_TYPE_STRING:
2651 					err = SET_ERROR(EINVAL);
2652 					break;
2653 				case PROP_TYPE_INDEX:
2654 					if (zfs_prop_index_to_string(prop,
2655 					    intval, &unused) != 0)
2656 						err = SET_ERROR(EINVAL);
2657 					break;
2658 				default:
2659 					cmn_err(CE_PANIC,
2660 					    "unknown property type");
2661 				}
2662 			} else {
2663 				err = SET_ERROR(EINVAL);
2664 			}
2665 		}
2666 
2667 		/* Validate permissions */
2668 		if (err == 0)
2669 			err = zfs_check_settable(dsname, pair, CRED());
2670 
2671 		if (err == 0) {
2672 			if (source == ZPROP_SRC_INHERITED)
2673 				err = -1; /* does not need special handling */
2674 			else
2675 				err = zfs_prop_set_special(dsname, source,
2676 				    pair);
2677 			if (err == -1) {
2678 				/*
2679 				 * For better performance we build up a list of
2680 				 * properties to set in a single transaction.
2681 				 */
2682 				err = nvlist_add_nvpair(genericnvl, pair);
2683 			} else if (err != 0 && nvl != retrynvl) {
2684 				/*
2685 				 * This may be a spurious error caused by
2686 				 * receiving quota and reservation out of order.
2687 				 * Try again in a second pass.
2688 				 */
2689 				err = nvlist_add_nvpair(retrynvl, pair);
2690 			}
2691 		}
2692 
2693 		if (err != 0) {
2694 			if (errlist != NULL)
2695 				fnvlist_add_int32(errlist, propname, err);
2696 			rv = err;
2697 		}
2698 	}
2699 
2700 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2701 		nvl = retrynvl;
2702 		goto retry;
2703 	}
2704 
2705 	if (!nvlist_empty(genericnvl) &&
2706 	    dsl_props_set(dsname, source, genericnvl) != 0) {
2707 		/*
2708 		 * If this fails, we still want to set as many properties as we
2709 		 * can, so try setting them individually.
2710 		 */
2711 		pair = NULL;
2712 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2713 			const char *propname = nvpair_name(pair);
2714 			int err = 0;
2715 
2716 			propval = pair;
2717 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2718 				nvlist_t *attrs;
2719 				attrs = fnvpair_value_nvlist(pair);
2720 				propval = fnvlist_lookup_nvpair(attrs,
2721 				    ZPROP_VALUE);
2722 			}
2723 
2724 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2725 				strval = fnvpair_value_string(propval);
2726 				err = dsl_prop_set_string(dsname, propname,
2727 				    source, strval);
2728 			} else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
2729 				err = dsl_prop_inherit(dsname, propname,
2730 				    source);
2731 			} else {
2732 				intval = fnvpair_value_uint64(propval);
2733 				err = dsl_prop_set_int(dsname, propname, source,
2734 				    intval);
2735 			}
2736 
2737 			if (err != 0) {
2738 				if (errlist != NULL) {
2739 					fnvlist_add_int32(errlist, propname,
2740 					    err);
2741 				}
2742 				rv = err;
2743 			}
2744 		}
2745 	}
2746 	nvlist_free(genericnvl);
2747 	nvlist_free(retrynvl);
2748 
2749 	return (rv);
2750 }
2751 
2752 /*
2753  * Check that all the properties are valid user properties.
2754  */
2755 static int
2756 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2757 {
2758 	nvpair_t *pair = NULL;
2759 	int error = 0;
2760 
2761 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2762 		const char *propname = nvpair_name(pair);
2763 
2764 		if (!zfs_prop_user(propname) ||
2765 		    nvpair_type(pair) != DATA_TYPE_STRING)
2766 			return (SET_ERROR(EINVAL));
2767 
2768 		if (error = zfs_secpolicy_write_perms(fsname,
2769 		    ZFS_DELEG_PERM_USERPROP, CRED()))
2770 			return (error);
2771 
2772 		if (strlen(propname) >= ZAP_MAXNAMELEN)
2773 			return (SET_ERROR(ENAMETOOLONG));
2774 
2775 		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2776 			return (E2BIG);
2777 	}
2778 	return (0);
2779 }
2780 
2781 static void
2782 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2783 {
2784 	nvpair_t *pair;
2785 
2786 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2787 
2788 	pair = NULL;
2789 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2790 		if (nvlist_exists(skipped, nvpair_name(pair)))
2791 			continue;
2792 
2793 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2794 	}
2795 }
2796 
2797 static int
2798 clear_received_props(const char *dsname, nvlist_t *props,
2799     nvlist_t *skipped)
2800 {
2801 	int err = 0;
2802 	nvlist_t *cleared_props = NULL;
2803 	props_skip(props, skipped, &cleared_props);
2804 	if (!nvlist_empty(cleared_props)) {
2805 		/*
2806 		 * Acts on local properties until the dataset has received
2807 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2808 		 */
2809 		zprop_source_t flags = (ZPROP_SRC_NONE |
2810 		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2811 		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2812 	}
2813 	nvlist_free(cleared_props);
2814 	return (err);
2815 }
2816 
2817 /*
2818  * inputs:
2819  * zc_name		name of filesystem
2820  * zc_value		name of property to set
2821  * zc_nvlist_src{_size}	nvlist of properties to apply
2822  * zc_cookie		received properties flag
2823  *
2824  * outputs:
2825  * zc_nvlist_dst{_size} error for each unapplied received property
2826  */
2827 static int
2828 zfs_ioc_set_prop(zfs_cmd_t *zc)
2829 {
2830 	nvlist_t *nvl;
2831 	boolean_t received = zc->zc_cookie;
2832 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2833 	    ZPROP_SRC_LOCAL);
2834 	nvlist_t *errors;
2835 	int error;
2836 
2837 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2838 	    zc->zc_iflags, &nvl)) != 0)
2839 		return (error);
2840 
2841 	if (received) {
2842 		nvlist_t *origprops;
2843 
2844 		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2845 			(void) clear_received_props(zc->zc_name,
2846 			    origprops, nvl);
2847 			nvlist_free(origprops);
2848 		}
2849 
2850 		error = dsl_prop_set_hasrecvd(zc->zc_name);
2851 	}
2852 
2853 	errors = fnvlist_alloc();
2854 	if (error == 0)
2855 		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2856 
2857 	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2858 		(void) put_nvlist(zc, errors);
2859 	}
2860 
2861 	nvlist_free(errors);
2862 	nvlist_free(nvl);
2863 	return (error);
2864 }
2865 
2866 /*
2867  * inputs:
2868  * zc_name		name of filesystem
2869  * zc_value		name of property to inherit
2870  * zc_cookie		revert to received value if TRUE
2871  *
2872  * outputs:		none
2873  */
2874 static int
2875 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2876 {
2877 	const char *propname = zc->zc_value;
2878 	zfs_prop_t prop = zfs_name_to_prop(propname);
2879 	boolean_t received = zc->zc_cookie;
2880 	zprop_source_t source = (received
2881 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2882 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2883 
2884 	if (received) {
2885 		nvlist_t *dummy;
2886 		nvpair_t *pair;
2887 		zprop_type_t type;
2888 		int err;
2889 
2890 		/*
2891 		 * zfs_prop_set_special() expects properties in the form of an
2892 		 * nvpair with type info.
2893 		 */
2894 		if (prop == ZPROP_INVAL) {
2895 			if (!zfs_prop_user(propname))
2896 				return (SET_ERROR(EINVAL));
2897 
2898 			type = PROP_TYPE_STRING;
2899 		} else if (prop == ZFS_PROP_VOLSIZE ||
2900 		    prop == ZFS_PROP_VERSION) {
2901 			return (SET_ERROR(EINVAL));
2902 		} else {
2903 			type = zfs_prop_get_type(prop);
2904 		}
2905 
2906 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2907 
2908 		switch (type) {
2909 		case PROP_TYPE_STRING:
2910 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2911 			break;
2912 		case PROP_TYPE_NUMBER:
2913 		case PROP_TYPE_INDEX:
2914 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2915 			break;
2916 		default:
2917 			nvlist_free(dummy);
2918 			return (SET_ERROR(EINVAL));
2919 		}
2920 
2921 		pair = nvlist_next_nvpair(dummy, NULL);
2922 		err = zfs_prop_set_special(zc->zc_name, source, pair);
2923 		nvlist_free(dummy);
2924 		if (err != -1)
2925 			return (err); /* special property already handled */
2926 	} else {
2927 		/*
2928 		 * Only check this in the non-received case. We want to allow
2929 		 * 'inherit -S' to revert non-inheritable properties like quota
2930 		 * and reservation to the received or default values even though
2931 		 * they are not considered inheritable.
2932 		 */
2933 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2934 			return (SET_ERROR(EINVAL));
2935 	}
2936 
2937 	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2938 	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2939 }
2940 
2941 static int
2942 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2943 {
2944 	nvlist_t *props;
2945 	spa_t *spa;
2946 	int error;
2947 	nvpair_t *pair;
2948 
2949 	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2950 	    zc->zc_iflags, &props))
2951 		return (error);
2952 
2953 	/*
2954 	 * If the only property is the configfile, then just do a spa_lookup()
2955 	 * to handle the faulted case.
2956 	 */
2957 	pair = nvlist_next_nvpair(props, NULL);
2958 	if (pair != NULL && strcmp(nvpair_name(pair),
2959 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2960 	    nvlist_next_nvpair(props, pair) == NULL) {
2961 		mutex_enter(&spa_namespace_lock);
2962 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2963 			spa_configfile_set(spa, props, B_FALSE);
2964 			spa_write_cachefile(spa, B_FALSE, B_TRUE);
2965 		}
2966 		mutex_exit(&spa_namespace_lock);
2967 		if (spa != NULL) {
2968 			nvlist_free(props);
2969 			return (0);
2970 		}
2971 	}
2972 
2973 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2974 		nvlist_free(props);
2975 		return (error);
2976 	}
2977 
2978 	error = spa_prop_set(spa, props);
2979 
2980 	nvlist_free(props);
2981 	spa_close(spa, FTAG);
2982 
2983 	return (error);
2984 }
2985 
2986 static int
2987 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2988 {
2989 	spa_t *spa;
2990 	int error;
2991 	nvlist_t *nvp = NULL;
2992 
2993 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2994 		/*
2995 		 * If the pool is faulted, there may be properties we can still
2996 		 * get (such as altroot and cachefile), so attempt to get them
2997 		 * anyway.
2998 		 */
2999 		mutex_enter(&spa_namespace_lock);
3000 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
3001 			error = spa_prop_get(spa, &nvp);
3002 		mutex_exit(&spa_namespace_lock);
3003 	} else {
3004 		error = spa_prop_get(spa, &nvp);
3005 		spa_close(spa, FTAG);
3006 	}
3007 
3008 	if (error == 0 && zc->zc_nvlist_dst != 0)
3009 		error = put_nvlist(zc, nvp);
3010 	else
3011 		error = SET_ERROR(EFAULT);
3012 
3013 	nvlist_free(nvp);
3014 	return (error);
3015 }
3016 
3017 /*
3018  * inputs:
3019  * zc_name		name of filesystem
3020  * zc_nvlist_src{_size}	nvlist of delegated permissions
3021  * zc_perm_action	allow/unallow flag
3022  *
3023  * outputs:		none
3024  */
3025 static int
3026 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3027 {
3028 	int error;
3029 	nvlist_t *fsaclnv = NULL;
3030 
3031 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3032 	    zc->zc_iflags, &fsaclnv)) != 0)
3033 		return (error);
3034 
3035 	/*
3036 	 * Verify nvlist is constructed correctly
3037 	 */
3038 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
3039 		nvlist_free(fsaclnv);
3040 		return (SET_ERROR(EINVAL));
3041 	}
3042 
3043 	/*
3044 	 * If we don't have PRIV_SYS_MOUNT, then validate
3045 	 * that user is allowed to hand out each permission in
3046 	 * the nvlist(s)
3047 	 */
3048 
3049 	error = secpolicy_zfs(CRED());
3050 	if (error != 0) {
3051 		if (zc->zc_perm_action == B_FALSE) {
3052 			error = dsl_deleg_can_allow(zc->zc_name,
3053 			    fsaclnv, CRED());
3054 		} else {
3055 			error = dsl_deleg_can_unallow(zc->zc_name,
3056 			    fsaclnv, CRED());
3057 		}
3058 	}
3059 
3060 	if (error == 0)
3061 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3062 
3063 	nvlist_free(fsaclnv);
3064 	return (error);
3065 }
3066 
3067 /*
3068  * inputs:
3069  * zc_name		name of filesystem
3070  *
3071  * outputs:
3072  * zc_nvlist_src{_size}	nvlist of delegated permissions
3073  */
3074 static int
3075 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3076 {
3077 	nvlist_t *nvp;
3078 	int error;
3079 
3080 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3081 		error = put_nvlist(zc, nvp);
3082 		nvlist_free(nvp);
3083 	}
3084 
3085 	return (error);
3086 }
3087 
3088 /* ARGSUSED */
3089 static void
3090 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3091 {
3092 	zfs_creat_t *zct = arg;
3093 
3094 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3095 }
3096 
3097 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3098 
3099 /*
3100  * inputs:
3101  * os			parent objset pointer (NULL if root fs)
3102  * fuids_ok		fuids allowed in this version of the spa?
3103  * sa_ok		SAs allowed in this version of the spa?
3104  * createprops		list of properties requested by creator
3105  *
3106  * outputs:
3107  * zplprops	values for the zplprops we attach to the master node object
3108  * is_ci	true if requested file system will be purely case-insensitive
3109  *
3110  * Determine the settings for utf8only, normalization and
3111  * casesensitivity.  Specific values may have been requested by the
3112  * creator and/or we can inherit values from the parent dataset.  If
3113  * the file system is of too early a vintage, a creator can not
3114  * request settings for these properties, even if the requested
3115  * setting is the default value.  We don't actually want to create dsl
3116  * properties for these, so remove them from the source nvlist after
3117  * processing.
3118  */
3119 static int
3120 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3121     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3122     nvlist_t *zplprops, boolean_t *is_ci)
3123 {
3124 	uint64_t sense = ZFS_PROP_UNDEFINED;
3125 	uint64_t norm = ZFS_PROP_UNDEFINED;
3126 	uint64_t u8 = ZFS_PROP_UNDEFINED;
3127 
3128 	ASSERT(zplprops != NULL);
3129 
3130 	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3131 		return (SET_ERROR(EINVAL));
3132 
3133 	/*
3134 	 * Pull out creator prop choices, if any.
3135 	 */
3136 	if (createprops) {
3137 		(void) nvlist_lookup_uint64(createprops,
3138 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3139 		(void) nvlist_lookup_uint64(createprops,
3140 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3141 		(void) nvlist_remove_all(createprops,
3142 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3143 		(void) nvlist_lookup_uint64(createprops,
3144 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3145 		(void) nvlist_remove_all(createprops,
3146 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3147 		(void) nvlist_lookup_uint64(createprops,
3148 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3149 		(void) nvlist_remove_all(createprops,
3150 		    zfs_prop_to_name(ZFS_PROP_CASE));
3151 	}
3152 
3153 	/*
3154 	 * If the zpl version requested is whacky or the file system
3155 	 * or pool is version is too "young" to support normalization
3156 	 * and the creator tried to set a value for one of the props,
3157 	 * error out.
3158 	 */
3159 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3160 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3161 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3162 	    (zplver < ZPL_VERSION_NORMALIZATION &&
3163 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3164 	    sense != ZFS_PROP_UNDEFINED)))
3165 		return (SET_ERROR(ENOTSUP));
3166 
3167 	/*
3168 	 * Put the version in the zplprops
3169 	 */
3170 	VERIFY(nvlist_add_uint64(zplprops,
3171 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3172 
3173 	if (norm == ZFS_PROP_UNDEFINED)
3174 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3175 	VERIFY(nvlist_add_uint64(zplprops,
3176 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3177 
3178 	/*
3179 	 * If we're normalizing, names must always be valid UTF-8 strings.
3180 	 */
3181 	if (norm)
3182 		u8 = 1;
3183 	if (u8 == ZFS_PROP_UNDEFINED)
3184 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3185 	VERIFY(nvlist_add_uint64(zplprops,
3186 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3187 
3188 	if (sense == ZFS_PROP_UNDEFINED)
3189 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3190 	VERIFY(nvlist_add_uint64(zplprops,
3191 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3192 
3193 	if (is_ci)
3194 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3195 
3196 	return (0);
3197 }
3198 
3199 static int
3200 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3201     nvlist_t *zplprops, boolean_t *is_ci)
3202 {
3203 	boolean_t fuids_ok, sa_ok;
3204 	uint64_t zplver = ZPL_VERSION;
3205 	objset_t *os = NULL;
3206 	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3207 	char *cp;
3208 	spa_t *spa;
3209 	uint64_t spa_vers;
3210 	int error;
3211 
3212 	(void) strlcpy(parentname, dataset, sizeof (parentname));
3213 	cp = strrchr(parentname, '/');
3214 	ASSERT(cp != NULL);
3215 	cp[0] = '\0';
3216 
3217 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3218 		return (error);
3219 
3220 	spa_vers = spa_version(spa);
3221 	spa_close(spa, FTAG);
3222 
3223 	zplver = zfs_zpl_version_map(spa_vers);
3224 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3225 	sa_ok = (zplver >= ZPL_VERSION_SA);
3226 
3227 	/*
3228 	 * Open parent object set so we can inherit zplprop values.
3229 	 */
3230 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3231 		return (error);
3232 
3233 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3234 	    zplprops, is_ci);
3235 	dmu_objset_rele(os, FTAG);
3236 	return (error);
3237 }
3238 
3239 static int
3240 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3241     nvlist_t *zplprops, boolean_t *is_ci)
3242 {
3243 	boolean_t fuids_ok;
3244 	boolean_t sa_ok;
3245 	uint64_t zplver = ZPL_VERSION;
3246 	int error;
3247 
3248 	zplver = zfs_zpl_version_map(spa_vers);
3249 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3250 	sa_ok = (zplver >= ZPL_VERSION_SA);
3251 
3252 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3253 	    createprops, zplprops, is_ci);
3254 	return (error);
3255 }
3256 
3257 /*
3258  * innvl: {
3259  *     "type" -> dmu_objset_type_t (int32)
3260  *     (optional) "props" -> { prop -> value }
3261  *     (optional) "hidden_args" -> { "wkeydata" -> value }
3262  *         raw uint8_t array of encryption wrapping key data (32 bytes)
3263  * }
3264  *
3265  * outnvl: propname -> error code (int32)
3266  */
3267 static int
3268 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3269 {
3270 	int error = 0;
3271 	zfs_creat_t zct = { 0 };
3272 	nvlist_t *nvprops = NULL;
3273 	nvlist_t *hidden_args = NULL;
3274 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3275 	int32_t type32;
3276 	dmu_objset_type_t type;
3277 	boolean_t is_insensitive = B_FALSE;
3278 	dsl_crypto_params_t *dcp = NULL;
3279 
3280 	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3281 		return (SET_ERROR(EINVAL));
3282 	type = type32;
3283 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3284 	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
3285 
3286 	switch (type) {
3287 	case DMU_OST_ZFS:
3288 		cbfunc = zfs_create_cb;
3289 		break;
3290 
3291 	case DMU_OST_ZVOL:
3292 		cbfunc = zvol_create_cb;
3293 		break;
3294 
3295 	default:
3296 		cbfunc = NULL;
3297 		break;
3298 	}
3299 	if (strchr(fsname, '@') ||
3300 	    strchr(fsname, '%'))
3301 		return (SET_ERROR(EINVAL));
3302 
3303 	zct.zct_props = nvprops;
3304 
3305 	if (cbfunc == NULL)
3306 		return (SET_ERROR(EINVAL));
3307 
3308 	if (type == DMU_OST_ZVOL) {
3309 		uint64_t volsize, volblocksize;
3310 
3311 		if (nvprops == NULL)
3312 			return (SET_ERROR(EINVAL));
3313 		if (nvlist_lookup_uint64(nvprops,
3314 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3315 			return (SET_ERROR(EINVAL));
3316 
3317 		if ((error = nvlist_lookup_uint64(nvprops,
3318 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3319 		    &volblocksize)) != 0 && error != ENOENT)
3320 			return (SET_ERROR(EINVAL));
3321 
3322 		if (error != 0)
3323 			volblocksize = zfs_prop_default_numeric(
3324 			    ZFS_PROP_VOLBLOCKSIZE);
3325 
3326 		if ((error = zvol_check_volblocksize(
3327 		    volblocksize)) != 0 ||
3328 		    (error = zvol_check_volsize(volsize,
3329 		    volblocksize)) != 0)
3330 			return (error);
3331 	} else if (type == DMU_OST_ZFS) {
3332 		int error;
3333 
3334 		/*
3335 		 * We have to have normalization and
3336 		 * case-folding flags correct when we do the
3337 		 * file system creation, so go figure them out
3338 		 * now.
3339 		 */
3340 		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3341 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3342 		error = zfs_fill_zplprops(fsname, nvprops,
3343 		    zct.zct_zplprops, &is_insensitive);
3344 		if (error != 0) {
3345 			nvlist_free(zct.zct_zplprops);
3346 			return (error);
3347 		}
3348 	}
3349 
3350 	error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
3351 	    hidden_args, &dcp);
3352 	if (error != 0) {
3353 		nvlist_free(zct.zct_zplprops);
3354 		return (error);
3355 	}
3356 
3357 	error = dmu_objset_create(fsname, type,
3358 	    is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
3359 
3360 	nvlist_free(zct.zct_zplprops);
3361 	dsl_crypto_params_free(dcp, !!error);
3362 
3363 	/*
3364 	 * It would be nice to do this atomically.
3365 	 */
3366 	if (error == 0) {
3367 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3368 		    nvprops, outnvl);
3369 		if (error != 0)
3370 			(void) dsl_destroy_head(fsname);
3371 	}
3372 	return (error);
3373 }
3374 
3375 /*
3376  * innvl: {
3377  *     "origin" -> name of origin snapshot
3378  *     (optional) "props" -> { prop -> value }
3379  *     (optional) "hidden_args" -> { "wkeydata" -> value }
3380  *         raw uint8_t array of encryption wrapping key data (32 bytes)
3381  * }
3382  *
3383  * outnvl: propname -> error code (int32)
3384  */
3385 static int
3386 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3387 {
3388 	int error = 0;
3389 	nvlist_t *nvprops = NULL;
3390 	char *origin_name;
3391 
3392 	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3393 		return (SET_ERROR(EINVAL));
3394 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3395 
3396 	if (strchr(fsname, '@') ||
3397 	    strchr(fsname, '%'))
3398 		return (SET_ERROR(EINVAL));
3399 
3400 	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3401 		return (SET_ERROR(EINVAL));
3402 
3403 	error = dmu_objset_clone(fsname, origin_name);
3404 
3405 	/*
3406 	 * It would be nice to do this atomically.
3407 	 */
3408 	if (error == 0) {
3409 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3410 		    nvprops, outnvl);
3411 		if (error != 0)
3412 			(void) dsl_destroy_head(fsname);
3413 	}
3414 	return (error);
3415 }
3416 
3417 /* ARGSUSED */
3418 static int
3419 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3420 {
3421 	if (strchr(fsname, '@') ||
3422 	    strchr(fsname, '%'))
3423 		return (SET_ERROR(EINVAL));
3424 
3425 	return (dmu_objset_remap_indirects(fsname));
3426 }
3427 
3428 /*
3429  * innvl: {
3430  *     "snaps" -> { snapshot1, snapshot2 }
3431  *     (optional) "props" -> { prop -> value (string) }
3432  * }
3433  *
3434  * outnvl: snapshot -> error code (int32)
3435  */
3436 static int
3437 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3438 {
3439 	nvlist_t *snaps;
3440 	nvlist_t *props = NULL;
3441 	int error, poollen;
3442 	nvpair_t *pair;
3443 
3444 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3445 	if ((error = zfs_check_userprops(poolname, props)) != 0)
3446 		return (error);
3447 
3448 	if (!nvlist_empty(props) &&
3449 	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3450 		return (SET_ERROR(ENOTSUP));
3451 
3452 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3453 		return (SET_ERROR(EINVAL));
3454 	poollen = strlen(poolname);
3455 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3456 	    pair = nvlist_next_nvpair(snaps, pair)) {
3457 		const char *name = nvpair_name(pair);
3458 		const char *cp = strchr(name, '@');
3459 
3460 		/*
3461 		 * The snap name must contain an @, and the part after it must
3462 		 * contain only valid characters.
3463 		 */
3464 		if (cp == NULL ||
3465 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3466 			return (SET_ERROR(EINVAL));
3467 
3468 		/*
3469 		 * The snap must be in the specified pool.
3470 		 */
3471 		if (strncmp(name, poolname, poollen) != 0 ||
3472 		    (name[poollen] != '/' && name[poollen] != '@'))
3473 			return (SET_ERROR(EXDEV));
3474 
3475 		/* This must be the only snap of this fs. */
3476 		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3477 		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3478 			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3479 			    == 0) {
3480 				return (SET_ERROR(EXDEV));
3481 			}
3482 		}
3483 	}
3484 
3485 	error = dsl_dataset_snapshot(snaps, props, outnvl);
3486 	return (error);
3487 }
3488 
3489 /*
3490  * innvl: "message" -> string
3491  */
3492 /* ARGSUSED */
3493 static int
3494 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3495 {
3496 	char *message;
3497 	spa_t *spa;
3498 	int error;
3499 	char *poolname;
3500 
3501 	/*
3502 	 * The poolname in the ioctl is not set, we get it from the TSD,
3503 	 * which was set at the end of the last successful ioctl that allows
3504 	 * logging.  The secpolicy func already checked that it is set.
3505 	 * Only one log ioctl is allowed after each successful ioctl, so
3506 	 * we clear the TSD here.
3507 	 */
3508 	poolname = tsd_get(zfs_allow_log_key);
3509 	(void) tsd_set(zfs_allow_log_key, NULL);
3510 	error = spa_open(poolname, &spa, FTAG);
3511 	strfree(poolname);
3512 	if (error != 0)
3513 		return (error);
3514 
3515 	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3516 		spa_close(spa, FTAG);
3517 		return (SET_ERROR(EINVAL));
3518 	}
3519 
3520 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3521 		spa_close(spa, FTAG);
3522 		return (SET_ERROR(ENOTSUP));
3523 	}
3524 
3525 	error = spa_history_log(spa, message);
3526 	spa_close(spa, FTAG);
3527 	return (error);
3528 }
3529 
3530 /*
3531  * The dp_config_rwlock must not be held when calling this, because the
3532  * unmount may need to write out data.
3533  *
3534  * This function is best-effort.  Callers must deal gracefully if it
3535  * remains mounted (or is remounted after this call).
3536  *
3537  * Returns 0 if the argument is not a snapshot, or it is not currently a
3538  * filesystem, or we were able to unmount it.  Returns error code otherwise.
3539  */
3540 void
3541 zfs_unmount_snap(const char *snapname)
3542 {
3543 	vfs_t *vfsp = NULL;
3544 	zfsvfs_t *zfsvfs = NULL;
3545 
3546 	if (strchr(snapname, '@') == NULL)
3547 		return;
3548 
3549 	int err = getzfsvfs(snapname, &zfsvfs);
3550 	if (err != 0) {
3551 		ASSERT3P(zfsvfs, ==, NULL);
3552 		return;
3553 	}
3554 	vfsp = zfsvfs->z_vfs;
3555 
3556 	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3557 
3558 	err = vn_vfswlock(vfsp->vfs_vnodecovered);
3559 	VFS_RELE(vfsp);
3560 	if (err != 0)
3561 		return;
3562 
3563 	/*
3564 	 * Always force the unmount for snapshots.
3565 	 */
3566 	(void) dounmount(vfsp, MS_FORCE, kcred);
3567 }
3568 
3569 /* ARGSUSED */
3570 static int
3571 zfs_unmount_snap_cb(const char *snapname, void *arg)
3572 {
3573 	zfs_unmount_snap(snapname);
3574 	return (0);
3575 }
3576 
3577 /*
3578  * When a clone is destroyed, its origin may also need to be destroyed,
3579  * in which case it must be unmounted.  This routine will do that unmount
3580  * if necessary.
3581  */
3582 void
3583 zfs_destroy_unmount_origin(const char *fsname)
3584 {
3585 	int error;
3586 	objset_t *os;
3587 	dsl_dataset_t *ds;
3588 
3589 	error = dmu_objset_hold(fsname, FTAG, &os);
3590 	if (error != 0)
3591 		return;
3592 	ds = dmu_objset_ds(os);
3593 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3594 		char originname[ZFS_MAX_DATASET_NAME_LEN];
3595 		dsl_dataset_name(ds->ds_prev, originname);
3596 		dmu_objset_rele(os, FTAG);
3597 		zfs_unmount_snap(originname);
3598 	} else {
3599 		dmu_objset_rele(os, FTAG);
3600 	}
3601 }
3602 
3603 /*
3604  * innvl: {
3605  *     "snaps" -> { snapshot1, snapshot2 }
3606  *     (optional boolean) "defer"
3607  * }
3608  *
3609  * outnvl: snapshot -> error code (int32)
3610  *
3611  */
3612 /* ARGSUSED */
3613 static int
3614 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3615 {
3616 	nvlist_t *snaps;
3617 	nvpair_t *pair;
3618 	boolean_t defer;
3619 
3620 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3621 		return (SET_ERROR(EINVAL));
3622 	defer = nvlist_exists(innvl, "defer");
3623 
3624 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3625 	    pair = nvlist_next_nvpair(snaps, pair)) {
3626 		zfs_unmount_snap(nvpair_name(pair));
3627 	}
3628 
3629 	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3630 }
3631 
3632 /*
3633  * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3634  * All bookmarks must be in the same pool.
3635  *
3636  * innvl: {
3637  *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3638  * }
3639  *
3640  * outnvl: bookmark -> error code (int32)
3641  *
3642  */
3643 /* ARGSUSED */
3644 static int
3645 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3646 {
3647 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3648 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3649 		char *snap_name;
3650 
3651 		/*
3652 		 * Verify the snapshot argument.
3653 		 */
3654 		if (nvpair_value_string(pair, &snap_name) != 0)
3655 			return (SET_ERROR(EINVAL));
3656 
3657 
3658 		/* Verify that the keys (bookmarks) are unique */
3659 		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3660 		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3661 			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3662 				return (SET_ERROR(EINVAL));
3663 		}
3664 	}
3665 
3666 	return (dsl_bookmark_create(innvl, outnvl));
3667 }
3668 
3669 /*
3670  * innvl: {
3671  *     property 1, property 2, ...
3672  * }
3673  *
3674  * outnvl: {
3675  *     bookmark name 1 -> { property 1, property 2, ... },
3676  *     bookmark name 2 -> { property 1, property 2, ... }
3677  * }
3678  *
3679  */
3680 static int
3681 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3682 {
3683 	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3684 }
3685 
3686 /*
3687  * innvl: {
3688  *     bookmark name 1, bookmark name 2
3689  * }
3690  *
3691  * outnvl: bookmark -> error code (int32)
3692  *
3693  */
3694 static int
3695 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3696     nvlist_t *outnvl)
3697 {
3698 	int error, poollen;
3699 
3700 	poollen = strlen(poolname);
3701 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3702 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3703 		const char *name = nvpair_name(pair);
3704 		const char *cp = strchr(name, '#');
3705 
3706 		/*
3707 		 * The bookmark name must contain an #, and the part after it
3708 		 * must contain only valid characters.
3709 		 */
3710 		if (cp == NULL ||
3711 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3712 			return (SET_ERROR(EINVAL));
3713 
3714 		/*
3715 		 * The bookmark must be in the specified pool.
3716 		 */
3717 		if (strncmp(name, poolname, poollen) != 0 ||
3718 		    (name[poollen] != '/' && name[poollen] != '#'))
3719 			return (SET_ERROR(EXDEV));
3720 	}
3721 
3722 	error = dsl_bookmark_destroy(innvl, outnvl);
3723 	return (error);
3724 }
3725 
3726 static int
3727 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3728     nvlist_t *outnvl)
3729 {
3730 	char *program;
3731 	uint64_t instrlimit, memlimit;
3732 	boolean_t sync_flag;
3733 	nvpair_t *nvarg = NULL;
3734 
3735 	if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
3736 		return (EINVAL);
3737 	}
3738 	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3739 		sync_flag = B_TRUE;
3740 	}
3741 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3742 		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3743 	}
3744 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3745 		memlimit = ZCP_DEFAULT_MEMLIMIT;
3746 	}
3747 	if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
3748 		return (EINVAL);
3749 	}
3750 
3751 	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3752 		return (EINVAL);
3753 	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3754 		return (EINVAL);
3755 
3756 	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3757 	    nvarg, outnvl));
3758 }
3759 
3760 /*
3761  * innvl: unused
3762  * outnvl: empty
3763  */
3764 /* ARGSUSED */
3765 static int
3766 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3767 {
3768 	return (spa_checkpoint(poolname));
3769 }
3770 
3771 /*
3772  * innvl: unused
3773  * outnvl: empty
3774  */
3775 /* ARGSUSED */
3776 static int
3777 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
3778     nvlist_t *outnvl)
3779 {
3780 	return (spa_checkpoint_discard(poolname));
3781 }
3782 
3783 /*
3784  * inputs:
3785  * zc_name		name of dataset to destroy
3786  * zc_defer_destroy	mark for deferred destroy
3787  *
3788  * outputs:		none
3789  */
3790 static int
3791 zfs_ioc_destroy(zfs_cmd_t *zc)
3792 {
3793 	objset_t *os;
3794 	dmu_objset_type_t ost;
3795 	int err;
3796 
3797 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
3798 	if (err != 0)
3799 		return (err);
3800 	ost = dmu_objset_type(os);
3801 	dmu_objset_rele(os, FTAG);
3802 
3803 	if (ost == DMU_OST_ZFS)
3804 		zfs_unmount_snap(zc->zc_name);
3805 
3806 	if (strchr(zc->zc_name, '@')) {
3807 		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3808 	} else {
3809 		err = dsl_destroy_head(zc->zc_name);
3810 		if (err == EEXIST) {
3811 			/*
3812 			 * It is possible that the given DS may have
3813 			 * hidden child (%recv) datasets - "leftovers"
3814 			 * resulting from the previously interrupted
3815 			 * 'zfs receive'.
3816 			 *
3817 			 * 6 extra bytes for /%recv
3818 			 */
3819 			char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
3820 
3821 			if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
3822 			    zc->zc_name, recv_clone_name) >=
3823 			    sizeof (namebuf))
3824 				return (SET_ERROR(EINVAL));
3825 
3826 			/*
3827 			 * Try to remove the hidden child (%recv) and after
3828 			 * that try to remove the target dataset.
3829 			 * If the hidden child (%recv) does not exist
3830 			 * the original error (EEXIST) will be returned
3831 			 */
3832 			err = dsl_destroy_head(namebuf);
3833 			if (err == 0)
3834 				err = dsl_destroy_head(zc->zc_name);
3835 			else if (err == ENOENT)
3836 				err = SET_ERROR(EEXIST);
3837 		}
3838 	}
3839 	if (ost == DMU_OST_ZVOL && err == 0)
3840 		(void) zvol_remove_minor(zc->zc_name);
3841 	return (err);
3842 }
3843 
3844 /*
3845  * innvl: {
3846  *     vdevs: {
3847  *         guid 1, guid 2, ...
3848  *     },
3849  *     func: POOL_INITIALIZE_{CANCEL|DO|SUSPEND}
3850  * }
3851  *
3852  * outnvl: {
3853  *     [func: EINVAL (if provided command type didn't make sense)],
3854  *     [vdevs: {
3855  *         guid1: errno, (see function body for possible errnos)
3856  *         ...
3857  *     }]
3858  * }
3859  *
3860  */
3861 static int
3862 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3863 {
3864 	spa_t *spa;
3865 	int error;
3866 
3867 	error = spa_open(poolname, &spa, FTAG);
3868 	if (error != 0)
3869 		return (error);
3870 
3871 	uint64_t cmd_type;
3872 	if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
3873 	    &cmd_type) != 0) {
3874 		spa_close(spa, FTAG);
3875 		return (SET_ERROR(EINVAL));
3876 	}
3877 	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
3878 	    cmd_type == POOL_INITIALIZE_DO ||
3879 	    cmd_type == POOL_INITIALIZE_SUSPEND)) {
3880 		spa_close(spa, FTAG);
3881 		return (SET_ERROR(EINVAL));
3882 	}
3883 
3884 	nvlist_t *vdev_guids;
3885 	if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
3886 	    &vdev_guids) != 0) {
3887 		spa_close(spa, FTAG);
3888 		return (SET_ERROR(EINVAL));
3889 	}
3890 
3891 	nvlist_t *vdev_errlist = fnvlist_alloc();
3892 	int total_errors = 0;
3893 
3894 	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
3895 	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
3896 		uint64_t vdev_guid = fnvpair_value_uint64(pair);
3897 
3898 		error = spa_vdev_initialize(spa, vdev_guid, cmd_type);
3899 		if (error != 0) {
3900 			char guid_as_str[MAXNAMELEN];
3901 
3902 			(void) snprintf(guid_as_str, sizeof (guid_as_str),
3903 			    "%llu", (unsigned long long)vdev_guid);
3904 			fnvlist_add_int64(vdev_errlist, guid_as_str, error);
3905 			total_errors++;
3906 		}
3907 	}
3908 	if (fnvlist_size(vdev_errlist) > 0) {
3909 		fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
3910 		    vdev_errlist);
3911 	}
3912 	fnvlist_free(vdev_errlist);
3913 
3914 	spa_close(spa, FTAG);
3915 	return (total_errors > 0 ? EINVAL : 0);
3916 }
3917 
3918 /*
3919  * fsname is name of dataset to rollback (to most recent snapshot)
3920  *
3921  * innvl may contain name of expected target snapshot
3922  *
3923  * outnvl: "target" -> name of most recent snapshot
3924  * }
3925  */
3926 /* ARGSUSED */
3927 static int
3928 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3929 {
3930 	zfsvfs_t *zfsvfs;
3931 	char *target = NULL;
3932 	int error;
3933 
3934 	(void) nvlist_lookup_string(innvl, "target", &target);
3935 	if (target != NULL) {
3936 		const char *cp = strchr(target, '@');
3937 
3938 		/*
3939 		 * The snap name must contain an @, and the part after it must
3940 		 * contain only valid characters.
3941 		 */
3942 		if (cp == NULL ||
3943 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3944 			return (SET_ERROR(EINVAL));
3945 	}
3946 
3947 	if (getzfsvfs(fsname, &zfsvfs) == 0) {
3948 		dsl_dataset_t *ds;
3949 
3950 		ds = dmu_objset_ds(zfsvfs->z_os);
3951 		error = zfs_suspend_fs(zfsvfs);
3952 		if (error == 0) {
3953 			int resume_err;
3954 
3955 			error = dsl_dataset_rollback(fsname, target, zfsvfs,
3956 			    outnvl);
3957 			resume_err = zfs_resume_fs(zfsvfs, ds);
3958 			error = error ? error : resume_err;
3959 		}
3960 		VFS_RELE(zfsvfs->z_vfs);
3961 	} else {
3962 		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
3963 	}
3964 	return (error);
3965 }
3966 
3967 static int
3968 recursive_unmount(const char *fsname, void *arg)
3969 {
3970 	const char *snapname = arg;
3971 	char fullname[ZFS_MAX_DATASET_NAME_LEN];
3972 
3973 	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
3974 	zfs_unmount_snap(fullname);
3975 
3976 	return (0);
3977 }
3978 
3979 /*
3980  * inputs:
3981  * zc_name	old name of dataset
3982  * zc_value	new name of dataset
3983  * zc_cookie	recursive flag (only valid for snapshots)
3984  *
3985  * outputs:	none
3986  */
3987 static int
3988 zfs_ioc_rename(zfs_cmd_t *zc)
3989 {
3990 	objset_t *os;
3991 	dmu_objset_type_t ost;
3992 	boolean_t recursive = zc->zc_cookie & 1;
3993 	char *at;
3994 	int err;
3995 
3996 	/* "zfs rename" from and to ...%recv datasets should both fail */
3997 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
3998 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3999 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4000 	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4001 	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
4002 		return (SET_ERROR(EINVAL));
4003 
4004 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4005 	if (err != 0)
4006 		return (err);
4007 	ost = dmu_objset_type(os);
4008 	dmu_objset_rele(os, FTAG);
4009 
4010 	at = strchr(zc->zc_name, '@');
4011 	if (at != NULL) {
4012 		/* snaps must be in same fs */
4013 		int error;
4014 
4015 		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
4016 			return (SET_ERROR(EXDEV));
4017 		*at = '\0';
4018 		if (ost == DMU_OST_ZFS) {
4019 			error = dmu_objset_find(zc->zc_name,
4020 			    recursive_unmount, at + 1,
4021 			    recursive ? DS_FIND_CHILDREN : 0);
4022 			if (error != 0) {
4023 				*at = '@';
4024 				return (error);
4025 			}
4026 		}
4027 		error = dsl_dataset_rename_snapshot(zc->zc_name,
4028 		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
4029 		*at = '@';
4030 
4031 		return (error);
4032 	} else {
4033 		if (ost == DMU_OST_ZVOL)
4034 			(void) zvol_remove_minor(zc->zc_name);
4035 		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
4036 	}
4037 }
4038 
4039 static int
4040 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
4041 {
4042 	const char *propname = nvpair_name(pair);
4043 	boolean_t issnap = (strchr(dsname, '@') != NULL);
4044 	zfs_prop_t prop = zfs_name_to_prop(propname);
4045 	uint64_t intval;
4046 	int err;
4047 
4048 	if (prop == ZPROP_INVAL) {
4049 		if (zfs_prop_user(propname)) {
4050 			if (err = zfs_secpolicy_write_perms(dsname,
4051 			    ZFS_DELEG_PERM_USERPROP, cr))
4052 				return (err);
4053 			return (0);
4054 		}
4055 
4056 		if (!issnap && zfs_prop_userquota(propname)) {
4057 			const char *perm = NULL;
4058 			const char *uq_prefix =
4059 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4060 			const char *gq_prefix =
4061 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4062 			const char *uiq_prefix =
4063 			    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
4064 			const char *giq_prefix =
4065 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
4066 			const char *pq_prefix =
4067 			    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
4068 			const char *piq_prefix = zfs_userquota_prop_prefixes[\
4069 			    ZFS_PROP_PROJECTOBJQUOTA];
4070 
4071 			if (strncmp(propname, uq_prefix,
4072 			    strlen(uq_prefix)) == 0) {
4073 				perm = ZFS_DELEG_PERM_USERQUOTA;
4074 			} else if (strncmp(propname, uiq_prefix,
4075 			    strlen(uiq_prefix)) == 0) {
4076 				perm = ZFS_DELEG_PERM_USEROBJQUOTA;
4077 			} else if (strncmp(propname, gq_prefix,
4078 			    strlen(gq_prefix)) == 0) {
4079 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
4080 			} else if (strncmp(propname, giq_prefix,
4081 			    strlen(giq_prefix)) == 0) {
4082 				perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
4083 			} else if (strncmp(propname, pq_prefix,
4084 			    strlen(pq_prefix)) == 0) {
4085 				perm = ZFS_DELEG_PERM_PROJECTQUOTA;
4086 			} else if (strncmp(propname, piq_prefix,
4087 			    strlen(piq_prefix)) == 0) {
4088 				perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
4089 			} else {
4090 				/* {USER|GROUP|PROJECT}USED are read-only */
4091 				return (SET_ERROR(EINVAL));
4092 			}
4093 
4094 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
4095 				return (err);
4096 			return (0);
4097 		}
4098 
4099 		return (SET_ERROR(EINVAL));
4100 	}
4101 
4102 	if (issnap)
4103 		return (SET_ERROR(EINVAL));
4104 
4105 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4106 		/*
4107 		 * dsl_prop_get_all_impl() returns properties in this
4108 		 * format.
4109 		 */
4110 		nvlist_t *attrs;
4111 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4112 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4113 		    &pair) == 0);
4114 	}
4115 
4116 	/*
4117 	 * Check that this value is valid for this pool version
4118 	 */
4119 	switch (prop) {
4120 	case ZFS_PROP_COMPRESSION:
4121 		/*
4122 		 * If the user specified gzip compression, make sure
4123 		 * the SPA supports it. We ignore any errors here since
4124 		 * we'll catch them later.
4125 		 */
4126 		if (nvpair_value_uint64(pair, &intval) == 0) {
4127 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
4128 			    intval <= ZIO_COMPRESS_GZIP_9 &&
4129 			    zfs_earlier_version(dsname,
4130 			    SPA_VERSION_GZIP_COMPRESSION)) {
4131 				return (SET_ERROR(ENOTSUP));
4132 			}
4133 
4134 			if (intval == ZIO_COMPRESS_ZLE &&
4135 			    zfs_earlier_version(dsname,
4136 			    SPA_VERSION_ZLE_COMPRESSION))
4137 				return (SET_ERROR(ENOTSUP));
4138 
4139 			if (intval == ZIO_COMPRESS_LZ4) {
4140 				spa_t *spa;
4141 
4142 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4143 					return (err);
4144 
4145 				if (!spa_feature_is_enabled(spa,
4146 				    SPA_FEATURE_LZ4_COMPRESS)) {
4147 					spa_close(spa, FTAG);
4148 					return (SET_ERROR(ENOTSUP));
4149 				}
4150 				spa_close(spa, FTAG);
4151 			}
4152 
4153 			/*
4154 			 * If this is a bootable dataset then
4155 			 * verify that the compression algorithm
4156 			 * is supported for booting. We must return
4157 			 * something other than ENOTSUP since it
4158 			 * implies a downrev pool version.
4159 			 */
4160 			if (zfs_is_bootfs(dsname) &&
4161 			    !BOOTFS_COMPRESS_VALID(intval)) {
4162 				return (SET_ERROR(ERANGE));
4163 			}
4164 		}
4165 		break;
4166 
4167 	case ZFS_PROP_COPIES:
4168 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4169 			return (SET_ERROR(ENOTSUP));
4170 		break;
4171 
4172 	case ZFS_PROP_RECORDSIZE:
4173 		/* Record sizes above 128k need the feature to be enabled */
4174 		if (nvpair_value_uint64(pair, &intval) == 0 &&
4175 		    intval > SPA_OLD_MAXBLOCKSIZE) {
4176 			spa_t *spa;
4177 
4178 			/*
4179 			 * We don't allow setting the property above 1MB,
4180 			 * unless the tunable has been changed.
4181 			 */
4182 			if (intval > zfs_max_recordsize ||
4183 			    intval > SPA_MAXBLOCKSIZE)
4184 				return (SET_ERROR(ERANGE));
4185 
4186 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4187 				return (err);
4188 
4189 			if (!spa_feature_is_enabled(spa,
4190 			    SPA_FEATURE_LARGE_BLOCKS)) {
4191 				spa_close(spa, FTAG);
4192 				return (SET_ERROR(ENOTSUP));
4193 			}
4194 			spa_close(spa, FTAG);
4195 		}
4196 		break;
4197 
4198 	case ZFS_PROP_DNODESIZE:
4199 		/* Dnode sizes above 512 need the feature to be enabled */
4200 		if (nvpair_value_uint64(pair, &intval) == 0 &&
4201 		    intval != ZFS_DNSIZE_LEGACY) {
4202 			spa_t *spa;
4203 
4204 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4205 				return (err);
4206 
4207 			if (!spa_feature_is_enabled(spa,
4208 			    SPA_FEATURE_LARGE_DNODE)) {
4209 				spa_close(spa, FTAG);
4210 				return (SET_ERROR(ENOTSUP));
4211 			}
4212 			spa_close(spa, FTAG);
4213 		}
4214 		break;
4215 
4216 	case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
4217 		/*
4218 		 * This property could require the allocation classes
4219 		 * feature to be active for setting, however we allow
4220 		 * it so that tests of settable properties succeed.
4221 		 * The CLI will issue a warning in this case.
4222 		 */
4223 		break;
4224 
4225 	case ZFS_PROP_SHARESMB:
4226 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4227 			return (SET_ERROR(ENOTSUP));
4228 		break;
4229 
4230 	case ZFS_PROP_ACLINHERIT:
4231 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4232 		    nvpair_value_uint64(pair, &intval) == 0) {
4233 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
4234 			    zfs_earlier_version(dsname,
4235 			    SPA_VERSION_PASSTHROUGH_X))
4236 				return (SET_ERROR(ENOTSUP));
4237 		}
4238 		break;
4239 
4240 	case ZFS_PROP_CHECKSUM:
4241 	case ZFS_PROP_DEDUP:
4242 	{
4243 		spa_feature_t feature;
4244 		spa_t *spa;
4245 
4246 		/* dedup feature version checks */
4247 		if (prop == ZFS_PROP_DEDUP &&
4248 		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4249 			return (SET_ERROR(ENOTSUP));
4250 
4251 		if (nvpair_value_uint64(pair, &intval) != 0)
4252 			return (SET_ERROR(EINVAL));
4253 
4254 		/* check prop value is enabled in features */
4255 		feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4256 		if (feature == SPA_FEATURE_NONE)
4257 			break;
4258 
4259 		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4260 			return (err);
4261 
4262 		if (!spa_feature_is_enabled(spa, feature)) {
4263 			spa_close(spa, FTAG);
4264 			return (SET_ERROR(ENOTSUP));
4265 		}
4266 		spa_close(spa, FTAG);
4267 		break;
4268 	}
4269 	}
4270 
4271 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4272 }
4273 
4274 /*
4275  * Checks for a race condition to make sure we don't increment a feature flag
4276  * multiple times.
4277  */
4278 static int
4279 zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4280 {
4281 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4282 	spa_feature_t *featurep = arg;
4283 
4284 	if (!spa_feature_is_active(spa, *featurep))
4285 		return (0);
4286 	else
4287 		return (SET_ERROR(EBUSY));
4288 }
4289 
4290 /*
4291  * The callback invoked on feature activation in the sync task caused by
4292  * zfs_prop_activate_feature.
4293  */
4294 static void
4295 zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4296 {
4297 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4298 	spa_feature_t *featurep = arg;
4299 
4300 	spa_feature_incr(spa, *featurep, tx);
4301 }
4302 
4303 /*
4304  * Activates a feature on a pool in response to a property setting. This
4305  * creates a new sync task which modifies the pool to reflect the feature
4306  * as being active.
4307  */
4308 static int
4309 zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4310 {
4311 	int err;
4312 
4313 	/* EBUSY here indicates that the feature is already active */
4314 	err = dsl_sync_task(spa_name(spa),
4315 	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4316 	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4317 
4318 	if (err != 0 && err != EBUSY)
4319 		return (err);
4320 	else
4321 		return (0);
4322 }
4323 
4324 /*
4325  * Removes properties from the given props list that fail permission checks
4326  * needed to clear them and to restore them in case of a receive error. For each
4327  * property, make sure we have both set and inherit permissions.
4328  *
4329  * Returns the first error encountered if any permission checks fail. If the
4330  * caller provides a non-NULL errlist, it also gives the complete list of names
4331  * of all the properties that failed a permission check along with the
4332  * corresponding error numbers. The caller is responsible for freeing the
4333  * returned errlist.
4334  *
4335  * If every property checks out successfully, zero is returned and the list
4336  * pointed at by errlist is NULL.
4337  */
4338 static int
4339 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4340 {
4341 	zfs_cmd_t *zc;
4342 	nvpair_t *pair, *next_pair;
4343 	nvlist_t *errors;
4344 	int err, rv = 0;
4345 
4346 	if (props == NULL)
4347 		return (0);
4348 
4349 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4350 
4351 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4352 	(void) strcpy(zc->zc_name, dataset);
4353 	pair = nvlist_next_nvpair(props, NULL);
4354 	while (pair != NULL) {
4355 		next_pair = nvlist_next_nvpair(props, pair);
4356 
4357 		(void) strcpy(zc->zc_value, nvpair_name(pair));
4358 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4359 		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4360 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4361 			VERIFY(nvlist_add_int32(errors,
4362 			    zc->zc_value, err) == 0);
4363 		}
4364 		pair = next_pair;
4365 	}
4366 	kmem_free(zc, sizeof (zfs_cmd_t));
4367 
4368 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4369 		nvlist_free(errors);
4370 		errors = NULL;
4371 	} else {
4372 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4373 	}
4374 
4375 	if (errlist == NULL)
4376 		nvlist_free(errors);
4377 	else
4378 		*errlist = errors;
4379 
4380 	return (rv);
4381 }
4382 
4383 static boolean_t
4384 propval_equals(nvpair_t *p1, nvpair_t *p2)
4385 {
4386 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4387 		/* dsl_prop_get_all_impl() format */
4388 		nvlist_t *attrs;
4389 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4390 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4391 		    &p1) == 0);
4392 	}
4393 
4394 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4395 		nvlist_t *attrs;
4396 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4397 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4398 		    &p2) == 0);
4399 	}
4400 
4401 	if (nvpair_type(p1) != nvpair_type(p2))
4402 		return (B_FALSE);
4403 
4404 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4405 		char *valstr1, *valstr2;
4406 
4407 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4408 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4409 		return (strcmp(valstr1, valstr2) == 0);
4410 	} else {
4411 		uint64_t intval1, intval2;
4412 
4413 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4414 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4415 		return (intval1 == intval2);
4416 	}
4417 }
4418 
4419 /*
4420  * Remove properties from props if they are not going to change (as determined
4421  * by comparison with origprops). Remove them from origprops as well, since we
4422  * do not need to clear or restore properties that won't change.
4423  */
4424 static void
4425 props_reduce(nvlist_t *props, nvlist_t *origprops)
4426 {
4427 	nvpair_t *pair, *next_pair;
4428 
4429 	if (origprops == NULL)
4430 		return; /* all props need to be received */
4431 
4432 	pair = nvlist_next_nvpair(props, NULL);
4433 	while (pair != NULL) {
4434 		const char *propname = nvpair_name(pair);
4435 		nvpair_t *match;
4436 
4437 		next_pair = nvlist_next_nvpair(props, pair);
4438 
4439 		if ((nvlist_lookup_nvpair(origprops, propname,
4440 		    &match) != 0) || !propval_equals(pair, match))
4441 			goto next; /* need to set received value */
4442 
4443 		/* don't clear the existing received value */
4444 		(void) nvlist_remove_nvpair(origprops, match);
4445 		/* don't bother receiving the property */
4446 		(void) nvlist_remove_nvpair(props, pair);
4447 next:
4448 		pair = next_pair;
4449 	}
4450 }
4451 
4452 /*
4453  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4454  * For example, refquota cannot be set until after the receipt of a dataset,
4455  * because in replication streams, an older/earlier snapshot may exceed the
4456  * refquota.  We want to receive the older/earlier snapshot, but setting
4457  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4458  * the older/earlier snapshot from being received (with EDQUOT).
4459  *
4460  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4461  *
4462  * libzfs will need to be judicious handling errors encountered by props
4463  * extracted by this function.
4464  */
4465 static nvlist_t *
4466 extract_delay_props(nvlist_t *props)
4467 {
4468 	nvlist_t *delayprops;
4469 	nvpair_t *nvp, *tmp;
4470 	static const zfs_prop_t delayable[] = {
4471 		ZFS_PROP_REFQUOTA,
4472 		ZFS_PROP_KEYLOCATION,
4473 		0
4474 	};
4475 	int i;
4476 
4477 	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4478 
4479 	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4480 	    nvp = nvlist_next_nvpair(props, nvp)) {
4481 		/*
4482 		 * strcmp() is safe because zfs_prop_to_name() always returns
4483 		 * a bounded string.
4484 		 */
4485 		for (i = 0; delayable[i] != 0; i++) {
4486 			if (strcmp(zfs_prop_to_name(delayable[i]),
4487 			    nvpair_name(nvp)) == 0) {
4488 				break;
4489 			}
4490 		}
4491 		if (delayable[i] != 0) {
4492 			tmp = nvlist_prev_nvpair(props, nvp);
4493 			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4494 			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4495 			nvp = tmp;
4496 		}
4497 	}
4498 
4499 	if (nvlist_empty(delayprops)) {
4500 		nvlist_free(delayprops);
4501 		delayprops = NULL;
4502 	}
4503 	return (delayprops);
4504 }
4505 
4506 #ifdef	DEBUG
4507 static boolean_t zfs_ioc_recv_inject_err;
4508 #endif
4509 
4510 /*
4511  * nvlist 'errors' is always allocated. It will contain descriptions of
4512  * encountered errors, if any. It's the callers responsibility to free.
4513  */
4514 static int
4515 zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
4516     nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
4517     boolean_t resumable, int input_fd, dmu_replay_record_t *begin_record,
4518     int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags,
4519     uint64_t *action_handle, nvlist_t **errors)
4520 {
4521 	dmu_recv_cookie_t drc;
4522 	int error = 0;
4523 	int props_error = 0;
4524 	offset_t off;
4525 	nvlist_t *local_delayprops = NULL;
4526 	nvlist_t *recv_delayprops = NULL;
4527 	nvlist_t *origprops = NULL; /* existing properties */
4528 	nvlist_t *origrecvd = NULL; /* existing received properties */
4529 	boolean_t first_recvd_props = B_FALSE;
4530 	file_t *input_fp;
4531 
4532 	*read_bytes = 0;
4533 	*errflags = 0;
4534 	*errors = fnvlist_alloc();
4535 
4536 	input_fp = getf(input_fd);
4537 	if (input_fp == NULL)
4538 		return (SET_ERROR(EBADF));
4539 
4540 	error = dmu_recv_begin(tofs, tosnap, begin_record, force,
4541 	    resumable, localprops, hidden_args, origin, &drc);
4542 	if (error != 0)
4543 		goto out;
4544 
4545 	/*
4546 	 * Set properties before we receive the stream so that they are applied
4547 	 * to the new data. Note that we must call dmu_recv_stream() if
4548 	 * dmu_recv_begin() succeeds.
4549 	 */
4550 	if (recvprops != NULL && !drc.drc_newfs) {
4551 		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4552 		    SPA_VERSION_RECVD_PROPS &&
4553 		    !dsl_prop_get_hasrecvd(tofs))
4554 			first_recvd_props = B_TRUE;
4555 
4556 		/*
4557 		 * If new received properties are supplied, they are to
4558 		 * completely replace the existing received properties,
4559 		 * so stash away the existing ones.
4560 		 */
4561 		if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
4562 			nvlist_t *errlist = NULL;
4563 			/*
4564 			 * Don't bother writing a property if its value won't
4565 			 * change (and avoid the unnecessary security checks).
4566 			 *
4567 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4568 			 * special case where we blow away all local properties
4569 			 * regardless.
4570 			 */
4571 			if (!first_recvd_props)
4572 				props_reduce(recvprops, origrecvd);
4573 			if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
4574 				(void) nvlist_merge(*errors, errlist, 0);
4575 			nvlist_free(errlist);
4576 
4577 			if (clear_received_props(tofs, origrecvd,
4578 			    first_recvd_props ? NULL : recvprops) != 0)
4579 				*errflags |= ZPROP_ERR_NOCLEAR;
4580 		} else {
4581 			*errflags |= ZPROP_ERR_NOCLEAR;
4582 		}
4583 	}
4584 
4585 	/*
4586 	 * Stash away existing properties so we can restore them on error unless
4587 	 * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
4588 	 * case "origrecvd" will take care of that.
4589 	 */
4590 	if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
4591 		objset_t *os;
4592 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
4593 			if (dsl_prop_get_all(os, &origprops) != 0) {
4594 				*errflags |= ZPROP_ERR_NOCLEAR;
4595 			}
4596 			dmu_objset_rele(os, FTAG);
4597 		} else {
4598 			*errflags |= ZPROP_ERR_NOCLEAR;
4599 		}
4600 	}
4601 
4602 	if (recvprops != NULL) {
4603 		props_error = dsl_prop_set_hasrecvd(tofs);
4604 
4605 		if (props_error == 0) {
4606 			recv_delayprops = extract_delay_props(recvprops);
4607 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4608 			    recvprops, *errors);
4609 		}
4610 	}
4611 
4612 	if (localprops != NULL) {
4613 		nvlist_t *oprops = fnvlist_alloc();
4614 		nvlist_t *xprops = fnvlist_alloc();
4615 		nvpair_t *nvp = NULL;
4616 
4617 		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
4618 			if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
4619 				/* -x property */
4620 				const char *name = nvpair_name(nvp);
4621 				zfs_prop_t prop = zfs_name_to_prop(name);
4622 				if (prop != ZPROP_INVAL) {
4623 					if (!zfs_prop_inheritable(prop))
4624 						continue;
4625 				} else if (!zfs_prop_user(name))
4626 					continue;
4627 				fnvlist_add_boolean(xprops, name);
4628 			} else {
4629 				/* -o property=value */
4630 				fnvlist_add_nvpair(oprops, nvp);
4631 			}
4632 		}
4633 
4634 		local_delayprops = extract_delay_props(oprops);
4635 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
4636 		    oprops, *errors);
4637 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
4638 		    xprops, *errors);
4639 
4640 		nvlist_free(oprops);
4641 		nvlist_free(xprops);
4642 	}
4643 
4644 	off = input_fp->f_offset;
4645 	error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
4646 	    action_handle);
4647 
4648 	if (error == 0) {
4649 		zfsvfs_t *zfsvfs = NULL;
4650 
4651 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4652 			/* online recv */
4653 			dsl_dataset_t *ds;
4654 			int end_err;
4655 
4656 			ds = dmu_objset_ds(zfsvfs->z_os);
4657 			error = zfs_suspend_fs(zfsvfs);
4658 			/*
4659 			 * If the suspend fails, then the recv_end will
4660 			 * likely also fail, and clean up after itself.
4661 			 */
4662 			end_err = dmu_recv_end(&drc, zfsvfs);
4663 			if (error == 0)
4664 				error = zfs_resume_fs(zfsvfs, ds);
4665 			error = error ? error : end_err;
4666 			VFS_RELE(zfsvfs->z_vfs);
4667 		} else {
4668 			error = dmu_recv_end(&drc, NULL);
4669 		}
4670 
4671 		/* Set delayed properties now, after we're done receiving. */
4672 		if (recv_delayprops != NULL && error == 0) {
4673 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4674 			    recv_delayprops, *errors);
4675 		}
4676 		if (local_delayprops != NULL && error == 0) {
4677 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
4678 			    local_delayprops, *errors);
4679 		}
4680 	}
4681 
4682 	/*
4683 	 * Merge delayed props back in with initial props, in case
4684 	 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4685 	 * we have to make sure clear_received_props() includes
4686 	 * the delayed properties).
4687 	 *
4688 	 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4689 	 * using ASSERT() will be just like a VERIFY.
4690 	 */
4691 	if (recv_delayprops != NULL) {
4692 		ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
4693 		nvlist_free(recv_delayprops);
4694 	}
4695 	if (local_delayprops != NULL) {
4696 		ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
4697 		nvlist_free(local_delayprops);
4698 	}
4699 
4700 	*read_bytes = off - input_fp->f_offset;
4701 	if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
4702 		input_fp->f_offset = off;
4703 
4704 #ifdef	DEBUG
4705 	if (zfs_ioc_recv_inject_err) {
4706 		zfs_ioc_recv_inject_err = B_FALSE;
4707 		error = 1;
4708 	}
4709 #endif
4710 
4711 	/*
4712 	 * On error, restore the original props.
4713 	 */
4714 	if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
4715 		if (clear_received_props(tofs, recvprops, NULL) != 0) {
4716 			/*
4717 			 * We failed to clear the received properties.
4718 			 * Since we may have left a $recvd value on the
4719 			 * system, we can't clear the $hasrecvd flag.
4720 			 */
4721 			*errflags |= ZPROP_ERR_NORESTORE;
4722 		} else if (first_recvd_props) {
4723 			dsl_prop_unset_hasrecvd(tofs);
4724 		}
4725 
4726 		if (origrecvd == NULL && !drc.drc_newfs) {
4727 			/* We failed to stash the original properties. */
4728 			*errflags |= ZPROP_ERR_NORESTORE;
4729 		}
4730 
4731 		/*
4732 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4733 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4734 		 * explicitly if we're restoring local properties cleared in the
4735 		 * first new-style receive.
4736 		 */
4737 		if (origrecvd != NULL &&
4738 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4739 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4740 		    origrecvd, NULL) != 0) {
4741 			/*
4742 			 * We stashed the original properties but failed to
4743 			 * restore them.
4744 			 */
4745 			*errflags |= ZPROP_ERR_NORESTORE;
4746 		}
4747 	}
4748 	if (error != 0 && localprops != NULL && !drc.drc_newfs &&
4749 	    !first_recvd_props) {
4750 		nvlist_t *setprops;
4751 		nvlist_t *inheritprops;
4752 		nvpair_t *nvp;
4753 
4754 		if (origprops == NULL) {
4755 			/* We failed to stash the original properties. */
4756 			*errflags |= ZPROP_ERR_NORESTORE;
4757 			goto out;
4758 		}
4759 
4760 		/* Restore original props */
4761 		setprops = fnvlist_alloc();
4762 		inheritprops = fnvlist_alloc();
4763 		nvp = NULL;
4764 		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
4765 			const char *name = nvpair_name(nvp);
4766 			const char *source;
4767 			nvlist_t *attrs;
4768 
4769 			if (!nvlist_exists(origprops, name)) {
4770 				/*
4771 				 * Property was not present or was explicitly
4772 				 * inherited before the receive, restore this.
4773 				 */
4774 				fnvlist_add_boolean(inheritprops, name);
4775 				continue;
4776 			}
4777 			attrs = fnvlist_lookup_nvlist(origprops, name);
4778 			source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
4779 
4780 			/* Skip received properties */
4781 			if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
4782 				continue;
4783 
4784 			if (strcmp(source, tofs) == 0) {
4785 				/* Property was locally set */
4786 				fnvlist_add_nvlist(setprops, name, attrs);
4787 			} else {
4788 				/* Property was implicitly inherited */
4789 				fnvlist_add_boolean(inheritprops, name);
4790 			}
4791 		}
4792 
4793 		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
4794 		    NULL) != 0)
4795 			*errflags |= ZPROP_ERR_NORESTORE;
4796 		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
4797 		    NULL) != 0)
4798 			*errflags |= ZPROP_ERR_NORESTORE;
4799 
4800 		nvlist_free(setprops);
4801 		nvlist_free(inheritprops);
4802 	}
4803 out:
4804 	releasef(input_fd);
4805 	nvlist_free(origrecvd);
4806 	nvlist_free(origprops);
4807 
4808 	if (error == 0)
4809 		error = props_error;
4810 
4811 	return (error);
4812 }
4813 
4814 /*
4815  * inputs:
4816  * zc_name		name of containing filesystem
4817  * zc_nvlist_src{_size}	nvlist of received properties to apply
4818  * zc_nvlist_conf{_size} nvlist of local properties to apply
4819  * zc_history_offset{_len} nvlist of hidden args { "wkeydata" -> value }
4820  * zc_value		name of snapshot to create
4821  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
4822  * zc_cookie		file descriptor to recv from
4823  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
4824  * zc_guid		force flag
4825  * zc_cleanup_fd	cleanup-on-exit file descriptor
4826  * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
4827  * zc_resumable		if data is incomplete assume sender will resume
4828  *
4829  * outputs:
4830  * zc_cookie		number of bytes read
4831  * zc_nvlist_dst{_size} error for each unapplied received property
4832  * zc_obj		zprop_errflags_t
4833  * zc_action_handle	handle for this guid/ds mapping
4834  */
4835 static int
4836 zfs_ioc_recv(zfs_cmd_t *zc)
4837 {
4838 	dmu_replay_record_t begin_record;
4839 	nvlist_t *errors = NULL;
4840 	nvlist_t *recvdprops = NULL;
4841 	nvlist_t *localprops = NULL;
4842 	nvlist_t *hidden_args = NULL;
4843 	char *origin = NULL;
4844 	char *tosnap;
4845 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
4846 	int error = 0;
4847 
4848 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4849 	    strchr(zc->zc_value, '@') == NULL ||
4850 	    strchr(zc->zc_value, '%'))
4851 		return (SET_ERROR(EINVAL));
4852 
4853 	(void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
4854 	tosnap = strchr(tofs, '@');
4855 	*tosnap++ = '\0';
4856 
4857 	if (zc->zc_nvlist_src != 0 &&
4858 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4859 	    zc->zc_iflags, &recvdprops)) != 0)
4860 		return (error);
4861 
4862 	if (zc->zc_nvlist_conf != 0 &&
4863 	    (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
4864 	    zc->zc_iflags, &localprops)) != 0)
4865 		return (error);
4866 
4867 	if (zc->zc_history_offset != 0 &&
4868 	    (error = get_nvlist(zc->zc_history_offset, zc->zc_history_len,
4869 	    zc->zc_iflags, &hidden_args)) != 0)
4870 		return (error);
4871 
4872 	if (zc->zc_string[0])
4873 		origin = zc->zc_string;
4874 
4875 	begin_record.drr_type = DRR_BEGIN;
4876 	begin_record.drr_payloadlen = zc->zc_begin_record.drr_payloadlen;
4877 	begin_record.drr_u.drr_begin = zc->zc_begin_record.drr_u.drr_begin;
4878 
4879 	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
4880 	    hidden_args, zc->zc_guid, zc->zc_resumable, zc->zc_cookie,
4881 	    &begin_record, zc->zc_cleanup_fd, &zc->zc_cookie, &zc->zc_obj,
4882 	    &zc->zc_action_handle, &errors);
4883 	nvlist_free(recvdprops);
4884 	nvlist_free(localprops);
4885 
4886 	/*
4887 	 * Now that all props, initial and delayed, are set, report the prop
4888 	 * errors to the caller.
4889 	 */
4890 	if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
4891 	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4892 	    put_nvlist(zc, errors) != 0)) {
4893 		/*
4894 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
4895 		 * size or supplied an invalid address.
4896 		 */
4897 		error = SET_ERROR(EINVAL);
4898 	}
4899 
4900 	nvlist_free(errors);
4901 
4902 	return (error);
4903 }
4904 
4905 /*
4906  * inputs:
4907  * zc_name	name of snapshot to send
4908  * zc_cookie	file descriptor to send stream to
4909  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
4910  * zc_sendobj	objsetid of snapshot to send
4911  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
4912  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
4913  *		output size in zc_objset_type.
4914  * zc_flags	lzc_send_flags
4915  *
4916  * outputs:
4917  * zc_objset_type	estimated size, if zc_guid is set
4918  */
4919 static int
4920 zfs_ioc_send(zfs_cmd_t *zc)
4921 {
4922 	int error;
4923 	offset_t off;
4924 	boolean_t estimate = (zc->zc_guid != 0);
4925 	boolean_t embedok = (zc->zc_flags & 0x1);
4926 	boolean_t large_block_ok = (zc->zc_flags & 0x2);
4927 	boolean_t compressok = (zc->zc_flags & 0x4);
4928 	boolean_t rawok = (zc->zc_flags & 0x8);
4929 
4930 	if (zc->zc_obj != 0) {
4931 		dsl_pool_t *dp;
4932 		dsl_dataset_t *tosnap;
4933 
4934 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4935 		if (error != 0)
4936 			return (error);
4937 
4938 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4939 		if (error != 0) {
4940 			dsl_pool_rele(dp, FTAG);
4941 			return (error);
4942 		}
4943 
4944 		if (dsl_dir_is_clone(tosnap->ds_dir))
4945 			zc->zc_fromobj =
4946 			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4947 		dsl_dataset_rele(tosnap, FTAG);
4948 		dsl_pool_rele(dp, FTAG);
4949 	}
4950 
4951 	if (estimate) {
4952 		dsl_pool_t *dp;
4953 		dsl_dataset_t *tosnap;
4954 		dsl_dataset_t *fromsnap = NULL;
4955 
4956 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4957 		if (error != 0)
4958 			return (error);
4959 
4960 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
4961 		    FTAG, &tosnap);
4962 		if (error != 0) {
4963 			dsl_pool_rele(dp, FTAG);
4964 			return (error);
4965 		}
4966 
4967 		if (zc->zc_fromobj != 0) {
4968 			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4969 			    FTAG, &fromsnap);
4970 			if (error != 0) {
4971 				dsl_dataset_rele(tosnap, FTAG);
4972 				dsl_pool_rele(dp, FTAG);
4973 				return (error);
4974 			}
4975 		}
4976 
4977 		error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok,
4978 		    &zc->zc_objset_type);
4979 
4980 		if (fromsnap != NULL)
4981 			dsl_dataset_rele(fromsnap, FTAG);
4982 		dsl_dataset_rele(tosnap, FTAG);
4983 		dsl_pool_rele(dp, FTAG);
4984 	} else {
4985 		file_t *fp = getf(zc->zc_cookie);
4986 		if (fp == NULL)
4987 			return (SET_ERROR(EBADF));
4988 
4989 		off = fp->f_offset;
4990 		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4991 		    zc->zc_fromobj, embedok, large_block_ok, compressok, rawok,
4992 		    zc->zc_cookie, fp->f_vnode, &off);
4993 
4994 		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4995 			fp->f_offset = off;
4996 		releasef(zc->zc_cookie);
4997 	}
4998 	return (error);
4999 }
5000 
5001 /*
5002  * inputs:
5003  * zc_name	name of snapshot on which to report progress
5004  * zc_cookie	file descriptor of send stream
5005  *
5006  * outputs:
5007  * zc_cookie	number of bytes written in send stream thus far
5008  */
5009 static int
5010 zfs_ioc_send_progress(zfs_cmd_t *zc)
5011 {
5012 	dsl_pool_t *dp;
5013 	dsl_dataset_t *ds;
5014 	dmu_sendarg_t *dsp = NULL;
5015 	int error;
5016 
5017 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5018 	if (error != 0)
5019 		return (error);
5020 
5021 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5022 	if (error != 0) {
5023 		dsl_pool_rele(dp, FTAG);
5024 		return (error);
5025 	}
5026 
5027 	mutex_enter(&ds->ds_sendstream_lock);
5028 
5029 	/*
5030 	 * Iterate over all the send streams currently active on this dataset.
5031 	 * If there's one which matches the specified file descriptor _and_ the
5032 	 * stream was started by the current process, return the progress of
5033 	 * that stream.
5034 	 */
5035 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
5036 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
5037 		if (dsp->dsa_outfd == zc->zc_cookie &&
5038 		    dsp->dsa_proc == curproc)
5039 			break;
5040 	}
5041 
5042 	if (dsp != NULL)
5043 		zc->zc_cookie = *(dsp->dsa_off);
5044 	else
5045 		error = SET_ERROR(ENOENT);
5046 
5047 	mutex_exit(&ds->ds_sendstream_lock);
5048 	dsl_dataset_rele(ds, FTAG);
5049 	dsl_pool_rele(dp, FTAG);
5050 	return (error);
5051 }
5052 
5053 static int
5054 zfs_ioc_inject_fault(zfs_cmd_t *zc)
5055 {
5056 	int id, error;
5057 
5058 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
5059 	    &zc->zc_inject_record);
5060 
5061 	if (error == 0)
5062 		zc->zc_guid = (uint64_t)id;
5063 
5064 	return (error);
5065 }
5066 
5067 static int
5068 zfs_ioc_clear_fault(zfs_cmd_t *zc)
5069 {
5070 	return (zio_clear_fault((int)zc->zc_guid));
5071 }
5072 
5073 static int
5074 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
5075 {
5076 	int id = (int)zc->zc_guid;
5077 	int error;
5078 
5079 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
5080 	    &zc->zc_inject_record);
5081 
5082 	zc->zc_guid = id;
5083 
5084 	return (error);
5085 }
5086 
5087 static int
5088 zfs_ioc_error_log(zfs_cmd_t *zc)
5089 {
5090 	spa_t *spa;
5091 	int error;
5092 	size_t count = (size_t)zc->zc_nvlist_dst_size;
5093 
5094 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
5095 		return (error);
5096 
5097 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
5098 	    &count);
5099 	if (error == 0)
5100 		zc->zc_nvlist_dst_size = count;
5101 	else
5102 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
5103 
5104 	spa_close(spa, FTAG);
5105 
5106 	return (error);
5107 }
5108 
5109 static int
5110 zfs_ioc_clear(zfs_cmd_t *zc)
5111 {
5112 	spa_t *spa;
5113 	vdev_t *vd;
5114 	int error;
5115 
5116 	/*
5117 	 * On zpool clear we also fix up missing slogs
5118 	 */
5119 	mutex_enter(&spa_namespace_lock);
5120 	spa = spa_lookup(zc->zc_name);
5121 	if (spa == NULL) {
5122 		mutex_exit(&spa_namespace_lock);
5123 		return (SET_ERROR(EIO));
5124 	}
5125 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
5126 		/* we need to let spa_open/spa_load clear the chains */
5127 		spa_set_log_state(spa, SPA_LOG_CLEAR);
5128 	}
5129 	spa->spa_last_open_failed = 0;
5130 	mutex_exit(&spa_namespace_lock);
5131 
5132 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
5133 		error = spa_open(zc->zc_name, &spa, FTAG);
5134 	} else {
5135 		nvlist_t *policy;
5136 		nvlist_t *config = NULL;
5137 
5138 		if (zc->zc_nvlist_src == 0)
5139 			return (SET_ERROR(EINVAL));
5140 
5141 		if ((error = get_nvlist(zc->zc_nvlist_src,
5142 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
5143 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
5144 			    policy, &config);
5145 			if (config != NULL) {
5146 				int err;
5147 
5148 				if ((err = put_nvlist(zc, config)) != 0)
5149 					error = err;
5150 				nvlist_free(config);
5151 			}
5152 			nvlist_free(policy);
5153 		}
5154 	}
5155 
5156 	if (error != 0)
5157 		return (error);
5158 
5159 	/*
5160 	 * If multihost is enabled, resuming I/O is unsafe as another
5161 	 * host may have imported the pool.
5162 	 */
5163 	if (spa_multihost(spa) && spa_suspended(spa))
5164 		return (SET_ERROR(EINVAL));
5165 
5166 	spa_vdev_state_enter(spa, SCL_NONE);
5167 
5168 	if (zc->zc_guid == 0) {
5169 		vd = NULL;
5170 	} else {
5171 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
5172 		if (vd == NULL) {
5173 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
5174 			spa_close(spa, FTAG);
5175 			return (SET_ERROR(ENODEV));
5176 		}
5177 	}
5178 
5179 	vdev_clear(spa, vd);
5180 
5181 	(void) spa_vdev_state_exit(spa, NULL, 0);
5182 
5183 	/*
5184 	 * Resume any suspended I/Os.
5185 	 */
5186 	if (zio_resume(spa) != 0)
5187 		error = SET_ERROR(EIO);
5188 
5189 	spa_close(spa, FTAG);
5190 
5191 	return (error);
5192 }
5193 
5194 static int
5195 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
5196 {
5197 	spa_t *spa;
5198 	int error;
5199 
5200 	error = spa_open(zc->zc_name, &spa, FTAG);
5201 	if (error != 0)
5202 		return (error);
5203 
5204 	spa_vdev_state_enter(spa, SCL_NONE);
5205 
5206 	/*
5207 	 * If a resilver is already in progress then set the
5208 	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
5209 	 * the scan as a side effect of the reopen. Otherwise, let
5210 	 * vdev_open() decided if a resilver is required.
5211 	 */
5212 	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
5213 	vdev_reopen(spa->spa_root_vdev);
5214 	spa->spa_scrub_reopen = B_FALSE;
5215 
5216 	(void) spa_vdev_state_exit(spa, NULL, 0);
5217 	spa_close(spa, FTAG);
5218 	return (0);
5219 }
5220 /*
5221  * inputs:
5222  * zc_name	name of filesystem
5223  *
5224  * outputs:
5225  * zc_string	name of conflicting snapshot, if there is one
5226  */
5227 static int
5228 zfs_ioc_promote(zfs_cmd_t *zc)
5229 {
5230 	dsl_pool_t *dp;
5231 	dsl_dataset_t *ds, *ods;
5232 	char origin[ZFS_MAX_DATASET_NAME_LEN];
5233 	char *cp;
5234 	int error;
5235 
5236 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5237 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5238 	    strchr(zc->zc_name, '%'))
5239 		return (SET_ERROR(EINVAL));
5240 
5241 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5242 	if (error != 0)
5243 		return (error);
5244 
5245 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5246 	if (error != 0) {
5247 		dsl_pool_rele(dp, FTAG);
5248 		return (error);
5249 	}
5250 
5251 	if (!dsl_dir_is_clone(ds->ds_dir)) {
5252 		dsl_dataset_rele(ds, FTAG);
5253 		dsl_pool_rele(dp, FTAG);
5254 		return (SET_ERROR(EINVAL));
5255 	}
5256 
5257 	error = dsl_dataset_hold_obj(dp,
5258 	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5259 	if (error != 0) {
5260 		dsl_dataset_rele(ds, FTAG);
5261 		dsl_pool_rele(dp, FTAG);
5262 		return (error);
5263 	}
5264 
5265 	dsl_dataset_name(ods, origin);
5266 	dsl_dataset_rele(ods, FTAG);
5267 	dsl_dataset_rele(ds, FTAG);
5268 	dsl_pool_rele(dp, FTAG);
5269 
5270 	/*
5271 	 * We don't need to unmount *all* the origin fs's snapshots, but
5272 	 * it's easier.
5273 	 */
5274 	cp = strchr(origin, '@');
5275 	if (cp)
5276 		*cp = '\0';
5277 	(void) dmu_objset_find(origin,
5278 	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5279 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
5280 }
5281 
5282 /*
5283  * Retrieve a single {user|group|project}{used|quota}@... property.
5284  *
5285  * inputs:
5286  * zc_name	name of filesystem
5287  * zc_objset_type zfs_userquota_prop_t
5288  * zc_value	domain name (eg. "S-1-234-567-89")
5289  * zc_guid	RID/UID/GID
5290  *
5291  * outputs:
5292  * zc_cookie	property value
5293  */
5294 static int
5295 zfs_ioc_userspace_one(zfs_cmd_t *zc)
5296 {
5297 	zfsvfs_t *zfsvfs;
5298 	int error;
5299 
5300 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5301 		return (SET_ERROR(EINVAL));
5302 
5303 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5304 	if (error != 0)
5305 		return (error);
5306 
5307 	error = zfs_userspace_one(zfsvfs,
5308 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5309 	zfsvfs_rele(zfsvfs, FTAG);
5310 
5311 	return (error);
5312 }
5313 
5314 /*
5315  * inputs:
5316  * zc_name		name of filesystem
5317  * zc_cookie		zap cursor
5318  * zc_objset_type	zfs_userquota_prop_t
5319  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5320  *
5321  * outputs:
5322  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
5323  * zc_cookie	zap cursor
5324  */
5325 static int
5326 zfs_ioc_userspace_many(zfs_cmd_t *zc)
5327 {
5328 	zfsvfs_t *zfsvfs;
5329 	int bufsize = zc->zc_nvlist_dst_size;
5330 
5331 	if (bufsize <= 0)
5332 		return (SET_ERROR(ENOMEM));
5333 
5334 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5335 	if (error != 0)
5336 		return (error);
5337 
5338 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
5339 
5340 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5341 	    buf, &zc->zc_nvlist_dst_size);
5342 
5343 	if (error == 0) {
5344 		error = xcopyout(buf,
5345 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
5346 		    zc->zc_nvlist_dst_size);
5347 	}
5348 	kmem_free(buf, bufsize);
5349 	zfsvfs_rele(zfsvfs, FTAG);
5350 
5351 	return (error);
5352 }
5353 
5354 /*
5355  * inputs:
5356  * zc_name		name of filesystem
5357  *
5358  * outputs:
5359  * none
5360  */
5361 static int
5362 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5363 {
5364 	objset_t *os;
5365 	int error = 0;
5366 	zfsvfs_t *zfsvfs;
5367 
5368 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5369 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5370 			/*
5371 			 * If userused is not enabled, it may be because the
5372 			 * objset needs to be closed & reopened (to grow the
5373 			 * objset_phys_t).  Suspend/resume the fs will do that.
5374 			 */
5375 			dsl_dataset_t *ds, *newds;
5376 
5377 			ds = dmu_objset_ds(zfsvfs->z_os);
5378 			error = zfs_suspend_fs(zfsvfs);
5379 			if (error == 0) {
5380 				dmu_objset_refresh_ownership(ds, &newds,
5381 				    B_TRUE, zfsvfs);
5382 				error = zfs_resume_fs(zfsvfs, newds);
5383 			}
5384 		}
5385 		if (error == 0)
5386 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5387 		VFS_RELE(zfsvfs->z_vfs);
5388 	} else {
5389 		/* XXX kind of reading contents without owning */
5390 		error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
5391 		if (error != 0)
5392 			return (error);
5393 
5394 		error = dmu_objset_userspace_upgrade(os);
5395 		dmu_objset_rele_flags(os, B_TRUE, FTAG);
5396 	}
5397 
5398 	return (error);
5399 }
5400 
5401 /*
5402  * inputs:
5403  * zc_name		name of filesystem
5404  *
5405  * outputs:
5406  * none
5407  */
5408 static int
5409 zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
5410 {
5411 	objset_t *os;
5412 	int error;
5413 
5414 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5415 	if (error != 0)
5416 		return (error);
5417 
5418 	dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
5419 	dsl_pool_rele(dmu_objset_pool(os), FTAG);
5420 
5421 	if (dmu_objset_userobjspace_upgradable(os) ||
5422 	    dmu_objset_projectquota_upgradable(os)) {
5423 		mutex_enter(&os->os_upgrade_lock);
5424 		if (os->os_upgrade_id == 0) {
5425 			/* clear potential error code and retry */
5426 			os->os_upgrade_status = 0;
5427 			mutex_exit(&os->os_upgrade_lock);
5428 
5429 			dmu_objset_id_quota_upgrade(os);
5430 		} else {
5431 			mutex_exit(&os->os_upgrade_lock);
5432 		}
5433 
5434 		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
5435 		error = os->os_upgrade_status;
5436 	}
5437 
5438 	dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
5439 	dsl_dataset_rele(dmu_objset_ds(os), FTAG);
5440 
5441 	return (error);
5442 }
5443 
5444 /*
5445  * We don't want to have a hard dependency
5446  * against some special symbols in sharefs
5447  * nfs, and smbsrv.  Determine them if needed when
5448  * the first file system is shared.
5449  * Neither sharefs, nfs or smbsrv are unloadable modules.
5450  */
5451 int (*znfsexport_fs)(void *arg);
5452 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5453 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5454 
5455 int zfs_nfsshare_inited;
5456 int zfs_smbshare_inited;
5457 
5458 ddi_modhandle_t nfs_mod;
5459 ddi_modhandle_t sharefs_mod;
5460 ddi_modhandle_t smbsrv_mod;
5461 kmutex_t zfs_share_lock;
5462 
5463 static int
5464 zfs_init_sharefs()
5465 {
5466 	int error;
5467 
5468 	ASSERT(MUTEX_HELD(&zfs_share_lock));
5469 	/* Both NFS and SMB shares also require sharetab support. */
5470 	if (sharefs_mod == NULL && ((sharefs_mod =
5471 	    ddi_modopen("fs/sharefs",
5472 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
5473 		return (SET_ERROR(ENOSYS));
5474 	}
5475 	if (zshare_fs == NULL && ((zshare_fs =
5476 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5477 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5478 		return (SET_ERROR(ENOSYS));
5479 	}
5480 	return (0);
5481 }
5482 
5483 static int
5484 zfs_ioc_share(zfs_cmd_t *zc)
5485 {
5486 	int error;
5487 	int opcode;
5488 
5489 	switch (zc->zc_share.z_sharetype) {
5490 	case ZFS_SHARE_NFS:
5491 	case ZFS_UNSHARE_NFS:
5492 		if (zfs_nfsshare_inited == 0) {
5493 			mutex_enter(&zfs_share_lock);
5494 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5495 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5496 				mutex_exit(&zfs_share_lock);
5497 				return (SET_ERROR(ENOSYS));
5498 			}
5499 			if (znfsexport_fs == NULL &&
5500 			    ((znfsexport_fs = (int (*)(void *))
5501 			    ddi_modsym(nfs_mod,
5502 			    "nfs_export", &error)) == NULL)) {
5503 				mutex_exit(&zfs_share_lock);
5504 				return (SET_ERROR(ENOSYS));
5505 			}
5506 			error = zfs_init_sharefs();
5507 			if (error != 0) {
5508 				mutex_exit(&zfs_share_lock);
5509 				return (SET_ERROR(ENOSYS));
5510 			}
5511 			zfs_nfsshare_inited = 1;
5512 			mutex_exit(&zfs_share_lock);
5513 		}
5514 		break;
5515 	case ZFS_SHARE_SMB:
5516 	case ZFS_UNSHARE_SMB:
5517 		if (zfs_smbshare_inited == 0) {
5518 			mutex_enter(&zfs_share_lock);
5519 			if (smbsrv_mod == NULL && ((smbsrv_mod =
5520 			    ddi_modopen("drv/smbsrv",
5521 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5522 				mutex_exit(&zfs_share_lock);
5523 				return (SET_ERROR(ENOSYS));
5524 			}
5525 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5526 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5527 			    "smb_server_share", &error)) == NULL)) {
5528 				mutex_exit(&zfs_share_lock);
5529 				return (SET_ERROR(ENOSYS));
5530 			}
5531 			error = zfs_init_sharefs();
5532 			if (error != 0) {
5533 				mutex_exit(&zfs_share_lock);
5534 				return (SET_ERROR(ENOSYS));
5535 			}
5536 			zfs_smbshare_inited = 1;
5537 			mutex_exit(&zfs_share_lock);
5538 		}
5539 		break;
5540 	default:
5541 		return (SET_ERROR(EINVAL));
5542 	}
5543 
5544 	switch (zc->zc_share.z_sharetype) {
5545 	case ZFS_SHARE_NFS:
5546 	case ZFS_UNSHARE_NFS:
5547 		if (error =
5548 		    znfsexport_fs((void *)
5549 		    (uintptr_t)zc->zc_share.z_exportdata))
5550 			return (error);
5551 		break;
5552 	case ZFS_SHARE_SMB:
5553 	case ZFS_UNSHARE_SMB:
5554 		if (error = zsmbexport_fs((void *)
5555 		    (uintptr_t)zc->zc_share.z_exportdata,
5556 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5557 		    B_TRUE: B_FALSE)) {
5558 			return (error);
5559 		}
5560 		break;
5561 	}
5562 
5563 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5564 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5565 	    SHAREFS_ADD : SHAREFS_REMOVE;
5566 
5567 	/*
5568 	 * Add or remove share from sharetab
5569 	 */
5570 	error = zshare_fs(opcode,
5571 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
5572 	    zc->zc_share.z_sharemax);
5573 
5574 	return (error);
5575 
5576 }
5577 
5578 ace_t full_access[] = {
5579 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5580 };
5581 
5582 /*
5583  * inputs:
5584  * zc_name		name of containing filesystem
5585  * zc_obj		object # beyond which we want next in-use object #
5586  *
5587  * outputs:
5588  * zc_obj		next in-use object #
5589  */
5590 static int
5591 zfs_ioc_next_obj(zfs_cmd_t *zc)
5592 {
5593 	objset_t *os = NULL;
5594 	int error;
5595 
5596 	error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
5597 	if (error != 0)
5598 		return (error);
5599 
5600 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5601 	    dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5602 
5603 	dmu_objset_rele(os, FTAG);
5604 	return (error);
5605 }
5606 
5607 /*
5608  * inputs:
5609  * zc_name		name of filesystem
5610  * zc_value		prefix name for snapshot
5611  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
5612  *
5613  * outputs:
5614  * zc_value		short name of new snapshot
5615  */
5616 static int
5617 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5618 {
5619 	char *snap_name;
5620 	char *hold_name;
5621 	int error;
5622 	minor_t minor;
5623 
5624 	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5625 	if (error != 0)
5626 		return (error);
5627 
5628 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5629 	    (u_longlong_t)ddi_get_lbolt64());
5630 	hold_name = kmem_asprintf("%%%s", zc->zc_value);
5631 
5632 	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5633 	    hold_name);
5634 	if (error == 0)
5635 		(void) strcpy(zc->zc_value, snap_name);
5636 	strfree(snap_name);
5637 	strfree(hold_name);
5638 	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5639 	return (error);
5640 }
5641 
5642 /*
5643  * inputs:
5644  * zc_name		name of "to" snapshot
5645  * zc_value		name of "from" snapshot
5646  * zc_cookie		file descriptor to write diff data on
5647  *
5648  * outputs:
5649  * dmu_diff_record_t's to the file descriptor
5650  */
5651 static int
5652 zfs_ioc_diff(zfs_cmd_t *zc)
5653 {
5654 	file_t *fp;
5655 	offset_t off;
5656 	int error;
5657 
5658 	fp = getf(zc->zc_cookie);
5659 	if (fp == NULL)
5660 		return (SET_ERROR(EBADF));
5661 
5662 	off = fp->f_offset;
5663 
5664 	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5665 
5666 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5667 		fp->f_offset = off;
5668 	releasef(zc->zc_cookie);
5669 
5670 	return (error);
5671 }
5672 
5673 /*
5674  * Remove all ACL files in shares dir
5675  */
5676 static int
5677 zfs_smb_acl_purge(znode_t *dzp)
5678 {
5679 	zap_cursor_t	zc;
5680 	zap_attribute_t	zap;
5681 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5682 	int error;
5683 
5684 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5685 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5686 	    zap_cursor_advance(&zc)) {
5687 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5688 		    NULL, 0)) != 0)
5689 			break;
5690 	}
5691 	zap_cursor_fini(&zc);
5692 	return (error);
5693 }
5694 
5695 static int
5696 zfs_ioc_smb_acl(zfs_cmd_t *zc)
5697 {
5698 	vnode_t *vp;
5699 	znode_t *dzp;
5700 	vnode_t *resourcevp = NULL;
5701 	znode_t *sharedir;
5702 	zfsvfs_t *zfsvfs;
5703 	nvlist_t *nvlist;
5704 	char *src, *target;
5705 	vattr_t vattr;
5706 	vsecattr_t vsec;
5707 	int error = 0;
5708 
5709 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5710 	    NO_FOLLOW, NULL, &vp)) != 0)
5711 		return (error);
5712 
5713 	/* Now make sure mntpnt and dataset are ZFS */
5714 
5715 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
5716 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5717 	    zc->zc_name) != 0)) {
5718 		VN_RELE(vp);
5719 		return (SET_ERROR(EINVAL));
5720 	}
5721 
5722 	dzp = VTOZ(vp);
5723 	zfsvfs = dzp->z_zfsvfs;
5724 	ZFS_ENTER(zfsvfs);
5725 
5726 	/*
5727 	 * Create share dir if its missing.
5728 	 */
5729 	mutex_enter(&zfsvfs->z_lock);
5730 	if (zfsvfs->z_shares_dir == 0) {
5731 		dmu_tx_t *tx;
5732 
5733 		tx = dmu_tx_create(zfsvfs->z_os);
5734 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5735 		    ZFS_SHARES_DIR);
5736 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5737 		error = dmu_tx_assign(tx, TXG_WAIT);
5738 		if (error != 0) {
5739 			dmu_tx_abort(tx);
5740 		} else {
5741 			error = zfs_create_share_dir(zfsvfs, tx);
5742 			dmu_tx_commit(tx);
5743 		}
5744 		if (error != 0) {
5745 			mutex_exit(&zfsvfs->z_lock);
5746 			VN_RELE(vp);
5747 			ZFS_EXIT(zfsvfs);
5748 			return (error);
5749 		}
5750 	}
5751 	mutex_exit(&zfsvfs->z_lock);
5752 
5753 	ASSERT(zfsvfs->z_shares_dir);
5754 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5755 		VN_RELE(vp);
5756 		ZFS_EXIT(zfsvfs);
5757 		return (error);
5758 	}
5759 
5760 	switch (zc->zc_cookie) {
5761 	case ZFS_SMB_ACL_ADD:
5762 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5763 		vattr.va_type = VREG;
5764 		vattr.va_mode = S_IFREG|0777;
5765 		vattr.va_uid = 0;
5766 		vattr.va_gid = 0;
5767 
5768 		vsec.vsa_mask = VSA_ACE;
5769 		vsec.vsa_aclentp = &full_access;
5770 		vsec.vsa_aclentsz = sizeof (full_access);
5771 		vsec.vsa_aclcnt = 1;
5772 
5773 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5774 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5775 		if (resourcevp)
5776 			VN_RELE(resourcevp);
5777 		break;
5778 
5779 	case ZFS_SMB_ACL_REMOVE:
5780 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5781 		    NULL, 0);
5782 		break;
5783 
5784 	case ZFS_SMB_ACL_RENAME:
5785 		if ((error = get_nvlist(zc->zc_nvlist_src,
5786 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5787 			VN_RELE(vp);
5788 			VN_RELE(ZTOV(sharedir));
5789 			ZFS_EXIT(zfsvfs);
5790 			return (error);
5791 		}
5792 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5793 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5794 		    &target)) {
5795 			VN_RELE(vp);
5796 			VN_RELE(ZTOV(sharedir));
5797 			ZFS_EXIT(zfsvfs);
5798 			nvlist_free(nvlist);
5799 			return (error);
5800 		}
5801 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5802 		    kcred, NULL, 0);
5803 		nvlist_free(nvlist);
5804 		break;
5805 
5806 	case ZFS_SMB_ACL_PURGE:
5807 		error = zfs_smb_acl_purge(sharedir);
5808 		break;
5809 
5810 	default:
5811 		error = SET_ERROR(EINVAL);
5812 		break;
5813 	}
5814 
5815 	VN_RELE(vp);
5816 	VN_RELE(ZTOV(sharedir));
5817 
5818 	ZFS_EXIT(zfsvfs);
5819 
5820 	return (error);
5821 }
5822 
5823 /*
5824  * innvl: {
5825  *     "holds" -> { snapname -> holdname (string), ... }
5826  *     (optional) "cleanup_fd" -> fd (int32)
5827  * }
5828  *
5829  * outnvl: {
5830  *     snapname -> error value (int32)
5831  *     ...
5832  * }
5833  */
5834 /* ARGSUSED */
5835 static int
5836 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5837 {
5838 	nvpair_t *pair;
5839 	nvlist_t *holds;
5840 	int cleanup_fd = -1;
5841 	int error;
5842 	minor_t minor = 0;
5843 
5844 	error = nvlist_lookup_nvlist(args, "holds", &holds);
5845 	if (error != 0)
5846 		return (SET_ERROR(EINVAL));
5847 
5848 	/* make sure the user didn't pass us any invalid (empty) tags */
5849 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5850 	    pair = nvlist_next_nvpair(holds, pair)) {
5851 		char *htag;
5852 
5853 		error = nvpair_value_string(pair, &htag);
5854 		if (error != 0)
5855 			return (SET_ERROR(error));
5856 
5857 		if (strlen(htag) == 0)
5858 			return (SET_ERROR(EINVAL));
5859 	}
5860 
5861 	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5862 		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5863 		if (error != 0)
5864 			return (error);
5865 	}
5866 
5867 	error = dsl_dataset_user_hold(holds, minor, errlist);
5868 	if (minor != 0)
5869 		zfs_onexit_fd_rele(cleanup_fd);
5870 	return (error);
5871 }
5872 
5873 /*
5874  * innvl is not used.
5875  *
5876  * outnvl: {
5877  *    holdname -> time added (uint64 seconds since epoch)
5878  *    ...
5879  * }
5880  */
5881 /* ARGSUSED */
5882 static int
5883 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5884 {
5885 	ASSERT3P(args, ==, NULL);
5886 	return (dsl_dataset_get_holds(snapname, outnvl));
5887 }
5888 
5889 /*
5890  * innvl: {
5891  *     snapname -> { holdname, ... }
5892  *     ...
5893  * }
5894  *
5895  * outnvl: {
5896  *     snapname -> error value (int32)
5897  *     ...
5898  * }
5899  */
5900 /* ARGSUSED */
5901 static int
5902 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5903 {
5904 	return (dsl_dataset_user_release(holds, errlist));
5905 }
5906 
5907 /*
5908  * inputs:
5909  * zc_name		name of new filesystem or snapshot
5910  * zc_value		full name of old snapshot
5911  *
5912  * outputs:
5913  * zc_cookie		space in bytes
5914  * zc_objset_type	compressed space in bytes
5915  * zc_perm_action	uncompressed space in bytes
5916  */
5917 static int
5918 zfs_ioc_space_written(zfs_cmd_t *zc)
5919 {
5920 	int error;
5921 	dsl_pool_t *dp;
5922 	dsl_dataset_t *new, *old;
5923 
5924 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5925 	if (error != 0)
5926 		return (error);
5927 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5928 	if (error != 0) {
5929 		dsl_pool_rele(dp, FTAG);
5930 		return (error);
5931 	}
5932 	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5933 	if (error != 0) {
5934 		dsl_dataset_rele(new, FTAG);
5935 		dsl_pool_rele(dp, FTAG);
5936 		return (error);
5937 	}
5938 
5939 	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5940 	    &zc->zc_objset_type, &zc->zc_perm_action);
5941 	dsl_dataset_rele(old, FTAG);
5942 	dsl_dataset_rele(new, FTAG);
5943 	dsl_pool_rele(dp, FTAG);
5944 	return (error);
5945 }
5946 
5947 /*
5948  * innvl: {
5949  *     "firstsnap" -> snapshot name
5950  * }
5951  *
5952  * outnvl: {
5953  *     "used" -> space in bytes
5954  *     "compressed" -> compressed space in bytes
5955  *     "uncompressed" -> uncompressed space in bytes
5956  * }
5957  */
5958 static int
5959 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5960 {
5961 	int error;
5962 	dsl_pool_t *dp;
5963 	dsl_dataset_t *new, *old;
5964 	char *firstsnap;
5965 	uint64_t used, comp, uncomp;
5966 
5967 	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5968 		return (SET_ERROR(EINVAL));
5969 
5970 	error = dsl_pool_hold(lastsnap, FTAG, &dp);
5971 	if (error != 0)
5972 		return (error);
5973 
5974 	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5975 	if (error == 0 && !new->ds_is_snapshot) {
5976 		dsl_dataset_rele(new, FTAG);
5977 		error = SET_ERROR(EINVAL);
5978 	}
5979 	if (error != 0) {
5980 		dsl_pool_rele(dp, FTAG);
5981 		return (error);
5982 	}
5983 	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5984 	if (error == 0 && !old->ds_is_snapshot) {
5985 		dsl_dataset_rele(old, FTAG);
5986 		error = SET_ERROR(EINVAL);
5987 	}
5988 	if (error != 0) {
5989 		dsl_dataset_rele(new, FTAG);
5990 		dsl_pool_rele(dp, FTAG);
5991 		return (error);
5992 	}
5993 
5994 	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5995 	dsl_dataset_rele(old, FTAG);
5996 	dsl_dataset_rele(new, FTAG);
5997 	dsl_pool_rele(dp, FTAG);
5998 	fnvlist_add_uint64(outnvl, "used", used);
5999 	fnvlist_add_uint64(outnvl, "compressed", comp);
6000 	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
6001 	return (error);
6002 }
6003 
6004 /*
6005  * innvl: {
6006  *     "fd" -> file descriptor to write stream to (int32)
6007  *     (optional) "fromsnap" -> full snap name to send an incremental from
6008  *     (optional) "largeblockok" -> (value ignored)
6009  *         indicates that blocks > 128KB are permitted
6010  *     (optional) "embedok" -> (value ignored)
6011  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6012  *     (optional) "compressok" -> (value ignored)
6013  *         presence indicates compressed DRR_WRITE records are permitted
6014  *     (optional) "rawok" -> (value ignored)
6015  *         presence indicates raw encrypted records should be used.
6016  *     (optional) "resume_object" and "resume_offset" -> (uint64)
6017  *         if present, resume send stream from specified object and offset.
6018  * }
6019  *
6020  * outnvl is unused
6021  */
6022 /* ARGSUSED */
6023 static int
6024 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6025 {
6026 	int error;
6027 	offset_t off;
6028 	char *fromname = NULL;
6029 	int fd;
6030 	boolean_t largeblockok;
6031 	boolean_t embedok;
6032 	boolean_t compressok;
6033 	boolean_t rawok;
6034 	uint64_t resumeobj = 0;
6035 	uint64_t resumeoff = 0;
6036 
6037 	error = nvlist_lookup_int32(innvl, "fd", &fd);
6038 	if (error != 0)
6039 		return (SET_ERROR(EINVAL));
6040 
6041 	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
6042 
6043 	largeblockok = nvlist_exists(innvl, "largeblockok");
6044 	embedok = nvlist_exists(innvl, "embedok");
6045 	compressok = nvlist_exists(innvl, "compressok");
6046 	rawok = nvlist_exists(innvl, "rawok");
6047 
6048 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6049 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6050 
6051 	file_t *fp = getf(fd);
6052 	if (fp == NULL)
6053 		return (SET_ERROR(EBADF));
6054 
6055 	off = fp->f_offset;
6056 	error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
6057 	    rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off);
6058 
6059 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
6060 		fp->f_offset = off;
6061 	releasef(fd);
6062 	return (error);
6063 }
6064 
6065 /*
6066  * Determine approximately how large a zfs send stream will be -- the number
6067  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
6068  *
6069  * innvl: {
6070  *     (optional) "from" -> full snap or bookmark name to send an incremental
6071  *                          from
6072  *     (optional) "largeblockok" -> (value ignored)
6073  *         indicates that blocks > 128KB are permitted
6074  *     (optional) "embedok" -> (value ignored)
6075  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6076  *     (optional) "compressok" -> (value ignored)
6077  *         presence indicates compressed DRR_WRITE records are permitted
6078  * }
6079  *
6080  * outnvl: {
6081  *     "space" -> bytes of space (uint64)
6082  * }
6083  */
6084 static int
6085 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6086 {
6087 	dsl_pool_t *dp;
6088 	dsl_dataset_t *tosnap;
6089 	int error;
6090 	char *fromname;
6091 	boolean_t compressok;
6092 	boolean_t rawok;
6093 	uint64_t space;
6094 
6095 	error = dsl_pool_hold(snapname, FTAG, &dp);
6096 	if (error != 0)
6097 		return (error);
6098 
6099 	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
6100 	if (error != 0) {
6101 		dsl_pool_rele(dp, FTAG);
6102 		return (error);
6103 	}
6104 
6105 	compressok = nvlist_exists(innvl, "compressok");
6106 	rawok = nvlist_exists(innvl, "rawok");
6107 
6108 	error = nvlist_lookup_string(innvl, "from", &fromname);
6109 	if (error == 0) {
6110 		if (strchr(fromname, '@') != NULL) {
6111 			/*
6112 			 * If from is a snapshot, hold it and use the more
6113 			 * efficient dmu_send_estimate to estimate send space
6114 			 * size using deadlists.
6115 			 */
6116 			dsl_dataset_t *fromsnap;
6117 			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
6118 			if (error != 0)
6119 				goto out;
6120 			error = dmu_send_estimate(tosnap, fromsnap,
6121 			    compressok || rawok, &space);
6122 			dsl_dataset_rele(fromsnap, FTAG);
6123 		} else if (strchr(fromname, '#') != NULL) {
6124 			/*
6125 			 * If from is a bookmark, fetch the creation TXG of the
6126 			 * snapshot it was created from and use that to find
6127 			 * blocks that were born after it.
6128 			 */
6129 			zfs_bookmark_phys_t frombm;
6130 
6131 			error = dsl_bookmark_lookup(dp, fromname, tosnap,
6132 			    &frombm);
6133 			if (error != 0)
6134 				goto out;
6135 			error = dmu_send_estimate_from_txg(tosnap,
6136 			    frombm.zbm_creation_txg, compressok || rawok,
6137 			    &space);
6138 		} else {
6139 			/*
6140 			 * from is not properly formatted as a snapshot or
6141 			 * bookmark
6142 			 */
6143 			error = SET_ERROR(EINVAL);
6144 			goto out;
6145 		}
6146 	} else {
6147 		/*
6148 		 * If estimating the size of a full send, use dmu_send_estimate.
6149 		 */
6150 		error = dmu_send_estimate(tosnap, NULL, compressok || rawok,
6151 		    &space);
6152 	}
6153 
6154 	fnvlist_add_uint64(outnvl, "space", space);
6155 
6156 out:
6157 	dsl_dataset_rele(tosnap, FTAG);
6158 	dsl_pool_rele(dp, FTAG);
6159 	return (error);
6160 }
6161 
6162 /*
6163  * Sync the currently open TXG to disk for the specified pool.
6164  * This is somewhat similar to 'zfs_sync()'.
6165  * For cases that do not result in error this ioctl will wait for
6166  * the currently open TXG to commit before returning back to the caller.
6167  *
6168  * innvl: {
6169  *  "force" -> when true, force uberblock update even if there is no dirty data.
6170  *             In addition this will cause the vdev configuration to be written
6171  *             out including updating the zpool cache file. (boolean_t)
6172  * }
6173  *
6174  * onvl is unused
6175  */
6176 /* ARGSUSED */
6177 static int
6178 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
6179 {
6180 	int err;
6181 	boolean_t force;
6182 	spa_t *spa;
6183 
6184 	if ((err = spa_open(pool, &spa, FTAG)) != 0)
6185 		return (err);
6186 
6187 	force = fnvlist_lookup_boolean_value(innvl, "force");
6188 	if (force) {
6189 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
6190 		vdev_config_dirty(spa->spa_root_vdev);
6191 		spa_config_exit(spa, SCL_CONFIG, FTAG);
6192 	}
6193 	txg_wait_synced(spa_get_dsl(spa), 0);
6194 
6195 	spa_close(spa, FTAG);
6196 
6197 	return (err);
6198 }
6199 
6200 /*
6201  * Load a user's wrapping key into the kernel.
6202  * innvl: {
6203  *     "hidden_args" -> { "wkeydata" -> value }
6204  *         raw uint8_t array of encryption wrapping key data (32 bytes)
6205  *     (optional) "noop" -> (value ignored)
6206  *         presence indicated key should only be verified, not loaded
6207  * }
6208  */
6209 /* ARGSUSED */
6210 static int
6211 zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6212 {
6213 	int ret = 0;
6214 	dsl_crypto_params_t *dcp = NULL;
6215 	nvlist_t *hidden_args;
6216 	boolean_t noop = nvlist_exists(innvl, "noop");
6217 
6218 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6219 		ret = SET_ERROR(EINVAL);
6220 		goto error;
6221 	}
6222 
6223 	ret = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6224 	if (ret != 0) {
6225 		ret = SET_ERROR(EINVAL);
6226 		goto error;
6227 	}
6228 
6229 	ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
6230 	    hidden_args, &dcp);
6231 	if (ret != 0)
6232 		goto error;
6233 
6234 	ret = spa_keystore_load_wkey(dsname, dcp, noop);
6235 	if (ret != 0)
6236 		goto error;
6237 
6238 	dsl_crypto_params_free(dcp, noop);
6239 
6240 	return (0);
6241 
6242 error:
6243 	dsl_crypto_params_free(dcp, B_TRUE);
6244 	return (ret);
6245 }
6246 
6247 /*
6248  * Unload a user's wrapping key from the kernel.
6249  * Both innvl and outnvl are unused.
6250  */
6251 /* ARGSUSED */
6252 static int
6253 zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6254 {
6255 	int ret = 0;
6256 
6257 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6258 		ret = (SET_ERROR(EINVAL));
6259 		goto out;
6260 	}
6261 
6262 	ret = spa_keystore_unload_wkey(dsname);
6263 	if (ret != 0)
6264 		goto out;
6265 
6266 out:
6267 	return (ret);
6268 }
6269 
6270 /*
6271  * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
6272  * keylocation, pbkdf2salt, and  pbkdf2iters properties can also be specified
6273  * here to change how the key is derived in userspace.
6274  *
6275  * innvl: {
6276  *    "hidden_args" (optional) -> { "wkeydata" -> value }
6277  *         raw uint8_t array of new encryption wrapping key data (32 bytes)
6278  *    "props" (optional) -> { prop -> value }
6279  * }
6280  *
6281  * outnvl is unused
6282  */
6283 /* ARGSUSED */
6284 static int
6285 zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
6286 {
6287 	int ret;
6288 	uint64_t cmd = DCP_CMD_NONE;
6289 	dsl_crypto_params_t *dcp = NULL;
6290 	nvlist_t *args = NULL, *hidden_args = NULL;
6291 
6292 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
6293 		ret = (SET_ERROR(EINVAL));
6294 		goto error;
6295 	}
6296 
6297 	(void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
6298 	(void) nvlist_lookup_nvlist(innvl, "props", &args);
6299 	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6300 
6301 	ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
6302 	if (ret != 0)
6303 		goto error;
6304 
6305 	ret = spa_keystore_change_key(dsname, dcp);
6306 	if (ret != 0)
6307 		goto error;
6308 
6309 	dsl_crypto_params_free(dcp, B_FALSE);
6310 
6311 	return (0);
6312 
6313 error:
6314 	dsl_crypto_params_free(dcp, B_TRUE);
6315 	return (ret);
6316 }
6317 
6318 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
6319 
6320 static void
6321 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6322     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6323     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
6324 {
6325 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6326 
6327 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6328 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
6329 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6330 	ASSERT3P(vec->zvec_func, ==, NULL);
6331 
6332 	vec->zvec_legacy_func = func;
6333 	vec->zvec_secpolicy = secpolicy;
6334 	vec->zvec_namecheck = namecheck;
6335 	vec->zvec_allow_log = log_history;
6336 	vec->zvec_pool_check = pool_check;
6337 }
6338 
6339 /*
6340  * See the block comment at the beginning of this file for details on
6341  * each argument to this function.
6342  */
6343 static void
6344 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
6345     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6346     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
6347     boolean_t allow_log)
6348 {
6349 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6350 
6351 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6352 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
6353 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6354 	ASSERT3P(vec->zvec_func, ==, NULL);
6355 
6356 	/* if we are logging, the name must be valid */
6357 	ASSERT(!allow_log || namecheck != NO_NAME);
6358 
6359 	vec->zvec_name = name;
6360 	vec->zvec_func = func;
6361 	vec->zvec_secpolicy = secpolicy;
6362 	vec->zvec_namecheck = namecheck;
6363 	vec->zvec_pool_check = pool_check;
6364 	vec->zvec_smush_outnvlist = smush_outnvlist;
6365 	vec->zvec_allow_log = allow_log;
6366 }
6367 
6368 static void
6369 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6370     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
6371     zfs_ioc_poolcheck_t pool_check)
6372 {
6373 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6374 	    POOL_NAME, log_history, pool_check);
6375 }
6376 
6377 static void
6378 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6379     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
6380 {
6381 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6382 	    DATASET_NAME, B_FALSE, pool_check);
6383 }
6384 
6385 static void
6386 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6387 {
6388 	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
6389 	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6390 }
6391 
6392 static void
6393 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6394     zfs_secpolicy_func_t *secpolicy)
6395 {
6396 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6397 	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
6398 }
6399 
6400 static void
6401 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
6402     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
6403 {
6404 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6405 	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
6406 }
6407 
6408 static void
6409 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6410 {
6411 	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
6412 	    zfs_secpolicy_read);
6413 }
6414 
6415 static void
6416 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6417     zfs_secpolicy_func_t *secpolicy)
6418 {
6419 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
6420 	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6421 }
6422 
6423 static void
6424 zfs_ioctl_init(void)
6425 {
6426 	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
6427 	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
6428 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6429 
6430 	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
6431 	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
6432 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
6433 
6434 	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
6435 	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
6436 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6437 
6438 	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
6439 	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
6440 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6441 
6442 	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
6443 	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
6444 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6445 
6446 	zfs_ioctl_register("create", ZFS_IOC_CREATE,
6447 	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
6448 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6449 
6450 	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
6451 	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
6452 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6453 
6454 	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
6455 	    zfs_ioc_remap, zfs_secpolicy_remap, DATASET_NAME,
6456 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6457 
6458 	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
6459 	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
6460 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6461 
6462 	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
6463 	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
6464 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6465 	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
6466 	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
6467 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6468 
6469 	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
6470 	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
6471 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6472 
6473 	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
6474 	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
6475 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6476 
6477 	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
6478 	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
6479 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6480 
6481 	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
6482 	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
6483 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6484 
6485 	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
6486 	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
6487 	    POOL_NAME,
6488 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6489 
6490 	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
6491 	    zfs_ioc_channel_program, zfs_secpolicy_config,
6492 	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
6493 	    B_TRUE);
6494 
6495 	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
6496 	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
6497 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6498 
6499 	zfs_ioctl_register("zpool_discard_checkpoint",
6500 	    ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
6501 	    zfs_secpolicy_config, POOL_NAME,
6502 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6503 
6504 	zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
6505 	    zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
6506 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6507 
6508 	zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
6509 	    zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
6510 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
6511 
6512 	zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
6513 	    zfs_ioc_load_key, zfs_secpolicy_load_key,
6514 	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
6515 	zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
6516 	    zfs_ioc_unload_key, zfs_secpolicy_load_key,
6517 	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
6518 	zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
6519 	    zfs_ioc_change_key, zfs_secpolicy_change_key,
6520 	    DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
6521 	    B_TRUE, B_TRUE);
6522 
6523 	/* IOCTLS that use the legacy function signature */
6524 
6525 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
6526 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
6527 
6528 	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
6529 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6530 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
6531 	    zfs_ioc_pool_scan);
6532 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
6533 	    zfs_ioc_pool_upgrade);
6534 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
6535 	    zfs_ioc_vdev_add);
6536 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
6537 	    zfs_ioc_vdev_remove);
6538 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
6539 	    zfs_ioc_vdev_set_state);
6540 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
6541 	    zfs_ioc_vdev_attach);
6542 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
6543 	    zfs_ioc_vdev_detach);
6544 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
6545 	    zfs_ioc_vdev_setpath);
6546 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
6547 	    zfs_ioc_vdev_setfru);
6548 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
6549 	    zfs_ioc_pool_set_props);
6550 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
6551 	    zfs_ioc_vdev_split);
6552 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
6553 	    zfs_ioc_pool_reguid);
6554 
6555 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
6556 	    zfs_ioc_pool_configs, zfs_secpolicy_none);
6557 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
6558 	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
6559 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
6560 	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
6561 	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
6562 	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
6563 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
6564 	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
6565 
6566 	/*
6567 	 * pool destroy, and export don't log the history as part of
6568 	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
6569 	 * does the logging of those commands.
6570 	 */
6571 	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
6572 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6573 	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
6574 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6575 
6576 	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
6577 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6578 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
6579 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6580 
6581 	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
6582 	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
6583 	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
6584 	    zfs_ioc_dsobj_to_dsname,
6585 	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
6586 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
6587 	    zfs_ioc_pool_get_history,
6588 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
6589 
6590 	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
6591 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6592 
6593 	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
6594 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
6595 	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
6596 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
6597 
6598 	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
6599 	    zfs_ioc_space_written);
6600 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
6601 	    zfs_ioc_objset_recvd_props);
6602 	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
6603 	    zfs_ioc_next_obj);
6604 	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
6605 	    zfs_ioc_get_fsacl);
6606 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
6607 	    zfs_ioc_objset_stats);
6608 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
6609 	    zfs_ioc_objset_zplprops);
6610 	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
6611 	    zfs_ioc_dataset_list_next);
6612 	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
6613 	    zfs_ioc_snapshot_list_next);
6614 	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
6615 	    zfs_ioc_send_progress);
6616 
6617 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
6618 	    zfs_ioc_diff, zfs_secpolicy_diff);
6619 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
6620 	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
6621 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
6622 	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
6623 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
6624 	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
6625 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
6626 	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
6627 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
6628 	    zfs_ioc_send, zfs_secpolicy_send);
6629 
6630 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
6631 	    zfs_secpolicy_none);
6632 	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
6633 	    zfs_secpolicy_destroy);
6634 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
6635 	    zfs_secpolicy_rename);
6636 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
6637 	    zfs_secpolicy_recv);
6638 	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
6639 	    zfs_secpolicy_promote);
6640 	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
6641 	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
6642 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
6643 	    zfs_secpolicy_set_fsacl);
6644 
6645 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
6646 	    zfs_secpolicy_share, POOL_CHECK_NONE);
6647 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
6648 	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
6649 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
6650 	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
6651 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6652 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
6653 	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
6654 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6655 }
6656 
6657 int
6658 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
6659     zfs_ioc_poolcheck_t check)
6660 {
6661 	spa_t *spa;
6662 	int error;
6663 
6664 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
6665 
6666 	if (check & POOL_CHECK_NONE)
6667 		return (0);
6668 
6669 	error = spa_open(name, &spa, FTAG);
6670 	if (error == 0) {
6671 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6672 			error = SET_ERROR(EAGAIN);
6673 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6674 			error = SET_ERROR(EROFS);
6675 		spa_close(spa, FTAG);
6676 	}
6677 	return (error);
6678 }
6679 
6680 /*
6681  * Find a free minor number.
6682  */
6683 minor_t
6684 zfsdev_minor_alloc(void)
6685 {
6686 	static minor_t last_minor;
6687 	minor_t m;
6688 
6689 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6690 
6691 	for (m = last_minor + 1; m != last_minor; m++) {
6692 		if (m > ZFSDEV_MAX_MINOR)
6693 			m = 1;
6694 		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
6695 			last_minor = m;
6696 			return (m);
6697 		}
6698 	}
6699 
6700 	return (0);
6701 }
6702 
6703 static int
6704 zfs_ctldev_init(dev_t *devp)
6705 {
6706 	minor_t minor;
6707 	zfs_soft_state_t *zs;
6708 
6709 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6710 	ASSERT(getminor(*devp) == 0);
6711 
6712 	minor = zfsdev_minor_alloc();
6713 	if (minor == 0)
6714 		return (SET_ERROR(ENXIO));
6715 
6716 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
6717 		return (SET_ERROR(EAGAIN));
6718 
6719 	*devp = makedevice(getemajor(*devp), minor);
6720 
6721 	zs = ddi_get_soft_state(zfsdev_state, minor);
6722 	zs->zss_type = ZSST_CTLDEV;
6723 	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
6724 
6725 	return (0);
6726 }
6727 
6728 static void
6729 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
6730 {
6731 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6732 
6733 	zfs_onexit_destroy(zo);
6734 	ddi_soft_state_free(zfsdev_state, minor);
6735 }
6736 
6737 void *
6738 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
6739 {
6740 	zfs_soft_state_t *zp;
6741 
6742 	zp = ddi_get_soft_state(zfsdev_state, minor);
6743 	if (zp == NULL || zp->zss_type != which)
6744 		return (NULL);
6745 
6746 	return (zp->zss_data);
6747 }
6748 
6749 static int
6750 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
6751 {
6752 	int error = 0;
6753 
6754 	if (getminor(*devp) != 0)
6755 		return (zvol_open(devp, flag, otyp, cr));
6756 
6757 	/* This is the control device. Allocate a new minor if requested. */
6758 	if (flag & FEXCL) {
6759 		mutex_enter(&zfsdev_state_lock);
6760 		error = zfs_ctldev_init(devp);
6761 		mutex_exit(&zfsdev_state_lock);
6762 	}
6763 
6764 	return (error);
6765 }
6766 
6767 static int
6768 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
6769 {
6770 	zfs_onexit_t *zo;
6771 	minor_t minor = getminor(dev);
6772 
6773 	if (minor == 0)
6774 		return (0);
6775 
6776 	mutex_enter(&zfsdev_state_lock);
6777 	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
6778 	if (zo == NULL) {
6779 		mutex_exit(&zfsdev_state_lock);
6780 		return (zvol_close(dev, flag, otyp, cr));
6781 	}
6782 	zfs_ctldev_destroy(zo, minor);
6783 	mutex_exit(&zfsdev_state_lock);
6784 
6785 	return (0);
6786 }
6787 
6788 static int
6789 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
6790 {
6791 	zfs_cmd_t *zc;
6792 	uint_t vecnum;
6793 	int error, rc, len;
6794 	minor_t minor = getminor(dev);
6795 	const zfs_ioc_vec_t *vec;
6796 	char *saved_poolname = NULL;
6797 	nvlist_t *innvl = NULL;
6798 
6799 	if (minor != 0 &&
6800 	    zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
6801 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
6802 
6803 	vecnum = cmd - ZFS_IOC_FIRST;
6804 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
6805 
6806 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6807 		return (SET_ERROR(EINVAL));
6808 	vec = &zfs_ioc_vec[vecnum];
6809 
6810 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
6811 
6812 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6813 	if (error != 0) {
6814 		error = SET_ERROR(EFAULT);
6815 		goto out;
6816 	}
6817 
6818 	zc->zc_iflags = flag & FKIOCTL;
6819 	if (zc->zc_nvlist_src_size != 0) {
6820 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6821 		    zc->zc_iflags, &innvl);
6822 		if (error != 0)
6823 			goto out;
6824 	}
6825 
6826 	/*
6827 	 * Ensure that all pool/dataset names are valid before we pass down to
6828 	 * the lower layers.
6829 	 */
6830 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6831 	switch (vec->zvec_namecheck) {
6832 	case POOL_NAME:
6833 		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6834 			error = SET_ERROR(EINVAL);
6835 		else
6836 			error = pool_status_check(zc->zc_name,
6837 			    vec->zvec_namecheck, vec->zvec_pool_check);
6838 		break;
6839 
6840 	case DATASET_NAME:
6841 		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6842 			error = SET_ERROR(EINVAL);
6843 		else
6844 			error = pool_status_check(zc->zc_name,
6845 			    vec->zvec_namecheck, vec->zvec_pool_check);
6846 		break;
6847 
6848 	case NO_NAME:
6849 		break;
6850 	}
6851 
6852 
6853 	if (error == 0)
6854 		error = vec->zvec_secpolicy(zc, innvl, cr);
6855 
6856 	if (error != 0)
6857 		goto out;
6858 
6859 	/* legacy ioctls can modify zc_name */
6860 	len = strcspn(zc->zc_name, "/@#") + 1;
6861 	saved_poolname = kmem_alloc(len, KM_SLEEP);
6862 	(void) strlcpy(saved_poolname, zc->zc_name, len);
6863 
6864 	if (vec->zvec_func != NULL) {
6865 		nvlist_t *outnvl;
6866 		int puterror = 0;
6867 		spa_t *spa;
6868 		nvlist_t *lognv = NULL;
6869 
6870 		ASSERT(vec->zvec_legacy_func == NULL);
6871 
6872 		/*
6873 		 * Add the innvl to the lognv before calling the func,
6874 		 * in case the func changes the innvl.
6875 		 */
6876 		if (vec->zvec_allow_log) {
6877 			lognv = fnvlist_alloc();
6878 			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6879 			    vec->zvec_name);
6880 			if (!nvlist_empty(innvl)) {
6881 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6882 				    innvl);
6883 			}
6884 		}
6885 
6886 		outnvl = fnvlist_alloc();
6887 		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6888 
6889 		/*
6890 		 * Some commands can partially execute, modify state, and still
6891 		 * return an error.  In these cases, attempt to record what
6892 		 * was modified.
6893 		 */
6894 		if ((error == 0 ||
6895 		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
6896 		    vec->zvec_allow_log &&
6897 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
6898 			if (!nvlist_empty(outnvl)) {
6899 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6900 				    outnvl);
6901 			}
6902 			if (error != 0) {
6903 				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
6904 				    error);
6905 			}
6906 			(void) spa_history_log_nvl(spa, lognv);
6907 			spa_close(spa, FTAG);
6908 		}
6909 		fnvlist_free(lognv);
6910 
6911 		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6912 			int smusherror = 0;
6913 			if (vec->zvec_smush_outnvlist) {
6914 				smusherror = nvlist_smush(outnvl,
6915 				    zc->zc_nvlist_dst_size);
6916 			}
6917 			if (smusherror == 0)
6918 				puterror = put_nvlist(zc, outnvl);
6919 		}
6920 
6921 		if (puterror != 0)
6922 			error = puterror;
6923 
6924 		nvlist_free(outnvl);
6925 	} else {
6926 		error = vec->zvec_legacy_func(zc);
6927 	}
6928 
6929 out:
6930 	nvlist_free(innvl);
6931 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6932 	if (error == 0 && rc != 0)
6933 		error = SET_ERROR(EFAULT);
6934 	if (error == 0 && vec->zvec_allow_log) {
6935 		char *s = tsd_get(zfs_allow_log_key);
6936 		if (s != NULL)
6937 			strfree(s);
6938 		(void) tsd_set(zfs_allow_log_key, saved_poolname);
6939 	} else {
6940 		if (saved_poolname != NULL)
6941 			strfree(saved_poolname);
6942 	}
6943 
6944 	kmem_free(zc, sizeof (zfs_cmd_t));
6945 	return (error);
6946 }
6947 
6948 static int
6949 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
6950 {
6951 	if (cmd != DDI_ATTACH)
6952 		return (DDI_FAILURE);
6953 
6954 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
6955 	    DDI_PSEUDO, 0) == DDI_FAILURE)
6956 		return (DDI_FAILURE);
6957 
6958 	zfs_dip = dip;
6959 
6960 	ddi_report_dev(dip);
6961 
6962 	return (DDI_SUCCESS);
6963 }
6964 
6965 static int
6966 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6967 {
6968 	if (spa_busy() || zfs_busy() || zvol_busy())
6969 		return (DDI_FAILURE);
6970 
6971 	if (cmd != DDI_DETACH)
6972 		return (DDI_FAILURE);
6973 
6974 	zfs_dip = NULL;
6975 
6976 	ddi_prop_remove_all(dip);
6977 	ddi_remove_minor_node(dip, NULL);
6978 
6979 	return (DDI_SUCCESS);
6980 }
6981 
6982 /*ARGSUSED*/
6983 static int
6984 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
6985 {
6986 	switch (infocmd) {
6987 	case DDI_INFO_DEVT2DEVINFO:
6988 		*result = zfs_dip;
6989 		return (DDI_SUCCESS);
6990 
6991 	case DDI_INFO_DEVT2INSTANCE:
6992 		*result = (void *)0;
6993 		return (DDI_SUCCESS);
6994 	}
6995 
6996 	return (DDI_FAILURE);
6997 }
6998 
6999 /*
7000  * OK, so this is a little weird.
7001  *
7002  * /dev/zfs is the control node, i.e. minor 0.
7003  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
7004  *
7005  * /dev/zfs has basically nothing to do except serve up ioctls,
7006  * so most of the standard driver entry points are in zvol.c.
7007  */
7008 static struct cb_ops zfs_cb_ops = {
7009 	zfsdev_open,	/* open */
7010 	zfsdev_close,	/* close */
7011 	zvol_strategy,	/* strategy */
7012 	nodev,		/* print */
7013 	zvol_dump,	/* dump */
7014 	zvol_read,	/* read */
7015 	zvol_write,	/* write */
7016 	zfsdev_ioctl,	/* ioctl */
7017 	nodev,		/* devmap */
7018 	nodev,		/* mmap */
7019 	nodev,		/* segmap */
7020 	nochpoll,	/* poll */
7021 	ddi_prop_op,	/* prop_op */
7022 	NULL,		/* streamtab */
7023 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
7024 	CB_REV,		/* version */
7025 	nodev,		/* async read */
7026 	nodev,		/* async write */
7027 };
7028 
7029 static struct dev_ops zfs_dev_ops = {
7030 	DEVO_REV,	/* version */
7031 	0,		/* refcnt */
7032 	zfs_info,	/* info */
7033 	nulldev,	/* identify */
7034 	nulldev,	/* probe */
7035 	zfs_attach,	/* attach */
7036 	zfs_detach,	/* detach */
7037 	nodev,		/* reset */
7038 	&zfs_cb_ops,	/* driver operations */
7039 	NULL,		/* no bus operations */
7040 	NULL,		/* power */
7041 	ddi_quiesce_not_needed,	/* quiesce */
7042 };
7043 
7044 static struct modldrv zfs_modldrv = {
7045 	&mod_driverops,
7046 	"ZFS storage pool",
7047 	&zfs_dev_ops
7048 };
7049 
7050 static struct modlinkage modlinkage = {
7051 	MODREV_1,
7052 	(void *)&zfs_modlfs,
7053 	(void *)&zfs_modldrv,
7054 	NULL
7055 };
7056 
7057 static void
7058 zfs_allow_log_destroy(void *arg)
7059 {
7060 	char *poolname = arg;
7061 	strfree(poolname);
7062 }
7063 
7064 int
7065 _init(void)
7066 {
7067 	int error;
7068 
7069 	spa_init(FREAD | FWRITE);
7070 	zfs_init();
7071 	zvol_init();
7072 	zfs_ioctl_init();
7073 
7074 	if ((error = mod_install(&modlinkage)) != 0) {
7075 		zvol_fini();
7076 		zfs_fini();
7077 		spa_fini();
7078 		return (error);
7079 	}
7080 
7081 	tsd_create(&zfs_fsyncer_key, NULL);
7082 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
7083 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
7084 
7085 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
7086 	ASSERT(error == 0);
7087 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
7088 
7089 	return (0);
7090 }
7091 
7092 int
7093 _fini(void)
7094 {
7095 	int error;
7096 
7097 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
7098 		return (SET_ERROR(EBUSY));
7099 
7100 	if ((error = mod_remove(&modlinkage)) != 0)
7101 		return (error);
7102 
7103 	zvol_fini();
7104 	zfs_fini();
7105 	spa_fini();
7106 	if (zfs_nfsshare_inited)
7107 		(void) ddi_modclose(nfs_mod);
7108 	if (zfs_smbshare_inited)
7109 		(void) ddi_modclose(smbsrv_mod);
7110 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
7111 		(void) ddi_modclose(sharefs_mod);
7112 
7113 	tsd_destroy(&zfs_fsyncer_key);
7114 	ldi_ident_release(zfs_li);
7115 	zfs_li = NULL;
7116 	mutex_destroy(&zfs_share_lock);
7117 
7118 	return (error);
7119 }
7120 
7121 int
7122 _info(struct modinfo *modinfop)
7123 {
7124 	return (mod_info(&modlinkage, modinfop));
7125 }
7126