1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*
27 * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
28 * Portions Copyright 2011 Martin Matuska
29 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
30 * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
31 * Copyright 2019 Joyent, Inc.
32 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
33 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
34 * Copyright (c) 2013 Steven Hartland. All rights reserved.
35 * Copyright (c) 2014 Integros [integros.com]
36 * Copyright 2016 Toomas Soome <tsoome@me.com>
37 * Copyright (c) 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
38 * Copyright 2017 RackTop Systems.
39 * Copyright (c) 2017, Datto, Inc. All rights reserved.
40 */
41
42/*
43 * ZFS ioctls.
44 *
45 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
46 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
47 *
48 * There are two ways that we handle ioctls: the legacy way where almost
49 * all of the logic is in the ioctl callback, and the new way where most
50 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
51 *
52 * Non-legacy ioctls should be registered by calling
53 * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
54 * from userland by lzc_ioctl().
55 *
56 * The registration arguments are as follows:
57 *
58 * const char *name
59 *   The name of the ioctl.  This is used for history logging.  If the
60 *   ioctl returns successfully (the callback returns 0), and allow_log
61 *   is true, then a history log entry will be recorded with the input &
62 *   output nvlists.  The log entry can be printed with "zpool history -i".
63 *
64 * zfs_ioc_t ioc
65 *   The ioctl request number, which userland will pass to ioctl(2).
66 *   We want newer versions of libzfs and libzfs_core to run against
67 *   existing zfs kernel modules (i.e. a deferred reboot after an update).
68 *   Therefore the ioctl numbers cannot change from release to release.
69 *
70 * zfs_secpolicy_func_t *secpolicy
71 *   This function will be called before the zfs_ioc_func_t, to
72 *   determine if this operation is permitted.  It should return EPERM
73 *   on failure, and 0 on success.  Checks include determining if the
74 *   dataset is visible in this zone, and if the user has either all
75 *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
76 *   to do this operation on this dataset with "zfs allow".
77 *
78 * zfs_ioc_namecheck_t namecheck
79 *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
80 *   name, a dataset name, or nothing.  If the name is not well-formed,
81 *   the ioctl will fail and the callback will not be called.
82 *   Therefore, the callback can assume that the name is well-formed
83 *   (e.g. is null-terminated, doesn't have more than one '@' character,
84 *   doesn't have invalid characters).
85 *
86 * zfs_ioc_poolcheck_t pool_check
87 *   This specifies requirements on the pool state.  If the pool does
88 *   not meet them (is suspended or is readonly), the ioctl will fail
89 *   and the callback will not be called.  If any checks are specified
90 *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
91 *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
92 *   POOL_CHECK_READONLY).
93 *
94 * zfs_ioc_key_t *nvl_keys
95 *  The list of expected/allowable innvl input keys. This list is used
96 *  to validate the nvlist input to the ioctl.
97 *
98 * boolean_t smush_outnvlist
99 *   If smush_outnvlist is true, then the output is presumed to be a
100 *   list of errors, and it will be "smushed" down to fit into the
101 *   caller's buffer, by removing some entries and replacing them with a
102 *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
103 *   nvlist_smush() for details.  If smush_outnvlist is false, and the
104 *   outnvlist does not fit into the userland-provided buffer, then the
105 *   ioctl will fail with ENOMEM.
106 *
107 * zfs_ioc_func_t *func
108 *   The callback function that will perform the operation.
109 *
110 *   The callback should return 0 on success, or an error number on
111 *   failure.  If the function fails, the userland ioctl will return -1,
112 *   and errno will be set to the callback's return value.  The callback
113 *   will be called with the following arguments:
114 *
115 *   const char *name
116 *     The name of the pool or dataset to operate on, from
117 *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
118 *     expected type (pool, dataset, or none).
119 *
120 *   nvlist_t *innvl
121 *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
122 *     NULL if no input nvlist was provided.  Changes to this nvlist are
123 *     ignored.  If the input nvlist could not be deserialized, the
124 *     ioctl will fail and the callback will not be called.
125 *
126 *   nvlist_t *outnvl
127 *     The output nvlist, initially empty.  The callback can fill it in,
128 *     and it will be returned to userland by serializing it into
129 *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
130 *     fails (e.g. because the caller didn't supply a large enough
131 *     buffer), then the overall ioctl will fail.  See the
132 *     'smush_nvlist' argument above for additional behaviors.
133 *
134 *     There are two typical uses of the output nvlist:
135 *       - To return state, e.g. property values.  In this case,
136 *         smush_outnvlist should be false.  If the buffer was not large
137 *         enough, the caller will reallocate a larger buffer and try
138 *         the ioctl again.
139 *
140 *       - To return multiple errors from an ioctl which makes on-disk
141 *         changes.  In this case, smush_outnvlist should be true.
142 *         Ioctls which make on-disk modifications should generally not
143 *         use the outnvl if they succeed, because the caller can not
144 *         distinguish between the operation failing, and
145 *         deserialization failing.
146 *
147 * IOCTL Interface Errors
148 *
149 * The following ioctl input errors can be returned:
150 *   ZFS_ERR_IOC_CMD_UNAVAIL	the ioctl number is not supported by kernel
151 *   ZFS_ERR_IOC_ARG_UNAVAIL	an input argument is not supported by kernel
152 *   ZFS_ERR_IOC_ARG_REQUIRED	a required input argument is missing
153 *   ZFS_ERR_IOC_ARG_BADTYPE	an input argument has an invalid type
154 */
155
156#include <sys/types.h>
157#include <sys/param.h>
158#include <sys/errno.h>
159#include <sys/uio.h>
160#include <sys/buf.h>
161#include <sys/modctl.h>
162#include <sys/open.h>
163#include <sys/file.h>
164#include <sys/kmem.h>
165#include <sys/conf.h>
166#include <sys/cmn_err.h>
167#include <sys/stat.h>
168#include <sys/zfs_ioctl.h>
169#include <sys/zfs_vfsops.h>
170#include <sys/zfs_znode.h>
171#include <sys/zap.h>
172#include <sys/spa.h>
173#include <sys/spa_impl.h>
174#include <sys/vdev.h>
175#include <sys/priv_impl.h>
176#include <sys/dmu.h>
177#include <sys/dsl_dir.h>
178#include <sys/dsl_dataset.h>
179#include <sys/dsl_prop.h>
180#include <sys/dsl_deleg.h>
181#include <sys/dmu_objset.h>
182#include <sys/dmu_impl.h>
183#include <sys/dmu_tx.h>
184#include <sys/ddi.h>
185#include <sys/sunddi.h>
186#include <sys/sunldi.h>
187#include <sys/policy.h>
188#include <sys/zone.h>
189#include <sys/nvpair.h>
190#include <sys/pathname.h>
191#include <sys/mount.h>
192#include <sys/sdt.h>
193#include <sys/fs/zfs.h>
194#include <sys/zfs_ctldir.h>
195#include <sys/zfs_dir.h>
196#include <sys/zfs_onexit.h>
197#include <sys/zvol.h>
198#include <sys/dsl_scan.h>
199#include <sharefs/share.h>
200#include <sys/dmu_objset.h>
201#include <sys/dmu_recv.h>
202#include <sys/dmu_send.h>
203#include <sys/dsl_destroy.h>
204#include <sys/dsl_bookmark.h>
205#include <sys/dsl_userhold.h>
206#include <sys/zfeature.h>
207#include <sys/zcp.h>
208#include <sys/zio_checksum.h>
209#include <sys/vdev_removal.h>
210#include <sys/vdev_impl.h>
211#include <sys/vdev_initialize.h>
212#include <sys/vdev_trim.h>
213#include <sys/dsl_crypt.h>
214
215#include "zfs_namecheck.h"
216#include "zfs_prop.h"
217#include "zfs_deleg.h"
218#include "zfs_comutil.h"
219
220#include "lua.h"
221#include "lauxlib.h"
222
223extern struct modlfs zfs_modlfs;
224
225extern void zfs_init(void);
226extern void zfs_fini(void);
227
228ldi_ident_t zfs_li = NULL;
229dev_info_t *zfs_dip;
230
231uint_t zfs_fsyncer_key;
232extern uint_t rrw_tsd_key;
233static uint_t zfs_allow_log_key;
234
235typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
236typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
237typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
238
239/*
240 * IOC Keys are used to document and validate user->kernel interface inputs.
241 * See zfs_keys_recv_new for an example declaration. Any key name that is not
242 * listed will be rejected as input.
243 *
244 * The keyname 'optional' is always allowed, and must be an nvlist if present.
245 * Arguments which older kernels can safely ignore can be placed under the
246 * "optional" key.
247 *
248 * When adding new keys to an existing ioc for new functionality, consider:
249 *	- adding an entry into zfs_sysfs.c zfs_features[] list
250 *	- updating the libzfs_input_check.c test utility
251 *
252 * Note: in the ZK_WILDCARDLIST case, the name serves as documentation
253 * for the expected name (bookmark, snapshot, property, etc) but there
254 * is no validation in the preflight zfs_check_input_nvpairs() check.
255 */
256typedef enum {
257	ZK_OPTIONAL = 1 << 0,		/* pair is optional */
258	ZK_WILDCARDLIST = 1 << 1,	/* one or more unspecified key names */
259} ioc_key_flag_t;
260
261/* DATA_TYPE_ANY is used when zkey_type can vary. */
262#define	DATA_TYPE_ANY	DATA_TYPE_UNKNOWN
263
264typedef struct zfs_ioc_key {
265	const char	*zkey_name;
266	data_type_t	zkey_type;
267	ioc_key_flag_t	zkey_flags;
268} zfs_ioc_key_t;
269
270typedef enum {
271	NO_NAME,
272	POOL_NAME,
273	DATASET_NAME
274} zfs_ioc_namecheck_t;
275
276typedef enum {
277	POOL_CHECK_NONE		= 1 << 0,
278	POOL_CHECK_SUSPENDED	= 1 << 1,
279	POOL_CHECK_READONLY	= 1 << 2,
280} zfs_ioc_poolcheck_t;
281
282typedef struct zfs_ioc_vec {
283	zfs_ioc_legacy_func_t	*zvec_legacy_func;
284	zfs_ioc_func_t		*zvec_func;
285	zfs_secpolicy_func_t	*zvec_secpolicy;
286	zfs_ioc_namecheck_t	zvec_namecheck;
287	boolean_t		zvec_allow_log;
288	zfs_ioc_poolcheck_t	zvec_pool_check;
289	boolean_t		zvec_smush_outnvlist;
290	const char		*zvec_name;
291	const zfs_ioc_key_t	*zvec_nvl_keys;
292	size_t			zvec_nvl_key_count;
293} zfs_ioc_vec_t;
294
295/* This array is indexed by zfs_userquota_prop_t */
296static const char *userquota_perms[] = {
297	ZFS_DELEG_PERM_USERUSED,
298	ZFS_DELEG_PERM_USERQUOTA,
299	ZFS_DELEG_PERM_GROUPUSED,
300	ZFS_DELEG_PERM_GROUPQUOTA,
301	ZFS_DELEG_PERM_USEROBJUSED,
302	ZFS_DELEG_PERM_USEROBJQUOTA,
303	ZFS_DELEG_PERM_GROUPOBJUSED,
304	ZFS_DELEG_PERM_GROUPOBJQUOTA,
305	ZFS_DELEG_PERM_PROJECTUSED,
306	ZFS_DELEG_PERM_PROJECTQUOTA,
307	ZFS_DELEG_PERM_PROJECTOBJUSED,
308	ZFS_DELEG_PERM_PROJECTOBJQUOTA,
309};
310
311static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
312static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
313static int zfs_check_settable(const char *name, nvpair_t *property,
314    cred_t *cr);
315static int zfs_check_clearable(char *dataset, nvlist_t *props,
316    nvlist_t **errors);
317static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
318    boolean_t *);
319int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
320static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
321
322static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
323
324/* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
325void
326__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
327{
328	const char *newfile;
329	char buf[512];
330	va_list adx;
331
332	/*
333	 * Get rid of annoying "../common/" prefix to filename.
334	 */
335	newfile = strrchr(file, '/');
336	if (newfile != NULL) {
337		newfile = newfile + 1; /* Get rid of leading / */
338	} else {
339		newfile = file;
340	}
341
342	va_start(adx, fmt);
343	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
344	va_end(adx);
345
346	/*
347	 * To get this data, use the zfs-dprintf probe as so:
348	 * dtrace -q -n 'zfs-dprintf \
349	 *	/stringof(arg0) == "dbuf.c"/ \
350	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
351	 * arg0 = file name
352	 * arg1 = function name
353	 * arg2 = line number
354	 * arg3 = message
355	 */
356	DTRACE_PROBE4(zfs__dprintf,
357	    char *, newfile, char *, func, int, line, char *, buf);
358}
359
360static void
361history_str_free(char *buf)
362{
363	kmem_free(buf, HIS_MAX_RECORD_LEN);
364}
365
366static char *
367history_str_get(zfs_cmd_t *zc)
368{
369	char *buf;
370
371	if (zc->zc_history == 0)
372		return (NULL);
373
374	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
375	if (copyinstr((void *)(uintptr_t)zc->zc_history,
376	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
377		history_str_free(buf);
378		return (NULL);
379	}
380
381	buf[HIS_MAX_RECORD_LEN -1] = '\0';
382
383	return (buf);
384}
385
386/*
387 * Check to see if the named dataset is currently defined as bootable
388 */
389static boolean_t
390zfs_is_bootfs(const char *name)
391{
392	objset_t *os;
393
394	if (dmu_objset_hold(name, FTAG, &os) == 0) {
395		boolean_t ret;
396		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
397		dmu_objset_rele(os, FTAG);
398		return (ret);
399	}
400	return (B_FALSE);
401}
402
403/*
404 * Return non-zero if the spa version is less than requested version.
405 */
406static int
407zfs_earlier_version(const char *name, int version)
408{
409	spa_t *spa;
410
411	if (spa_open(name, &spa, FTAG) == 0) {
412		if (spa_version(spa) < version) {
413			spa_close(spa, FTAG);
414			return (1);
415		}
416		spa_close(spa, FTAG);
417	}
418	return (0);
419}
420
421/*
422 * Return TRUE if the ZPL version is less than requested version.
423 */
424static boolean_t
425zpl_earlier_version(const char *name, int version)
426{
427	objset_t *os;
428	boolean_t rc = B_TRUE;
429
430	if (dmu_objset_hold(name, FTAG, &os) == 0) {
431		uint64_t zplversion;
432
433		if (dmu_objset_type(os) != DMU_OST_ZFS) {
434			dmu_objset_rele(os, FTAG);
435			return (B_TRUE);
436		}
437		/* XXX reading from non-owned objset */
438		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
439			rc = zplversion < version;
440		dmu_objset_rele(os, FTAG);
441	}
442	return (rc);
443}
444
445static void
446zfs_log_history(zfs_cmd_t *zc)
447{
448	spa_t *spa;
449	char *buf;
450
451	if ((buf = history_str_get(zc)) == NULL)
452		return;
453
454	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
455		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
456			(void) spa_history_log(spa, buf);
457		spa_close(spa, FTAG);
458	}
459	history_str_free(buf);
460}
461
462/*
463 * Policy for top-level read operations (list pools).  Requires no privileges,
464 * and can be used in the local zone, as there is no associated dataset.
465 */
466/* ARGSUSED */
467static int
468zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
469{
470	return (0);
471}
472
473/*
474 * Policy for dataset read operations (list children, get statistics).  Requires
475 * no privileges, but must be visible in the local zone.
476 */
477/* ARGSUSED */
478static int
479zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
480{
481	if (INGLOBALZONE(curproc) ||
482	    zone_dataset_visible(zc->zc_name, NULL))
483		return (0);
484
485	return (SET_ERROR(ENOENT));
486}
487
488static int
489zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
490{
491	int writable = 1;
492
493	/*
494	 * The dataset must be visible by this zone -- check this first
495	 * so they don't see EPERM on something they shouldn't know about.
496	 */
497	if (!INGLOBALZONE(curproc) &&
498	    !zone_dataset_visible(dataset, &writable))
499		return (SET_ERROR(ENOENT));
500
501	if (INGLOBALZONE(curproc)) {
502		/*
503		 * If the fs is zoned, only root can access it from the
504		 * global zone.
505		 */
506		if (secpolicy_zfs(cr) && zoned)
507			return (SET_ERROR(EPERM));
508	} else {
509		/*
510		 * If we are in a local zone, the 'zoned' property must be set.
511		 */
512		if (!zoned)
513			return (SET_ERROR(EPERM));
514
515		/* must be writable by this zone */
516		if (!writable)
517			return (SET_ERROR(EPERM));
518	}
519	return (0);
520}
521
522static int
523zfs_dozonecheck(const char *dataset, cred_t *cr)
524{
525	uint64_t zoned;
526
527	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
528		return (SET_ERROR(ENOENT));
529
530	return (zfs_dozonecheck_impl(dataset, zoned, cr));
531}
532
533static int
534zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
535{
536	uint64_t zoned;
537
538	if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
539		return (SET_ERROR(ENOENT));
540
541	return (zfs_dozonecheck_impl(dataset, zoned, cr));
542}
543
544static int
545zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
546    const char *perm, cred_t *cr)
547{
548	int error;
549
550	error = zfs_dozonecheck_ds(name, ds, cr);
551	if (error == 0) {
552		error = secpolicy_zfs(cr);
553		if (error != 0)
554			error = dsl_deleg_access_impl(ds, perm, cr);
555	}
556	return (error);
557}
558
559static int
560zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
561{
562	int error;
563	dsl_dataset_t *ds;
564	dsl_pool_t *dp;
565
566	/*
567	 * First do a quick check for root in the global zone, which
568	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
569	 * will get to handle nonexistent datasets.
570	 */
571	if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
572		return (0);
573
574	error = dsl_pool_hold(name, FTAG, &dp);
575	if (error != 0)
576		return (error);
577
578	error = dsl_dataset_hold(dp, name, FTAG, &ds);
579	if (error != 0) {
580		dsl_pool_rele(dp, FTAG);
581		return (error);
582	}
583
584	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
585
586	dsl_dataset_rele(ds, FTAG);
587	dsl_pool_rele(dp, FTAG);
588	return (error);
589}
590
591/*
592 * Policy for setting the security label property.
593 *
594 * Returns 0 for success, non-zero for access and other errors.
595 */
596static int
597zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
598{
599	char		ds_hexsl[MAXNAMELEN];
600	bslabel_t	ds_sl, new_sl;
601	boolean_t	new_default = FALSE;
602	uint64_t	zoned;
603	int		needed_priv = -1;
604	int		error;
605
606	/* First get the existing dataset label. */
607	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
608	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
609	if (error != 0)
610		return (SET_ERROR(EPERM));
611
612	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
613		new_default = TRUE;
614
615	/* The label must be translatable */
616	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
617		return (SET_ERROR(EINVAL));
618
619	/*
620	 * In a non-global zone, disallow attempts to set a label that
621	 * doesn't match that of the zone; otherwise no other checks
622	 * are needed.
623	 */
624	if (!INGLOBALZONE(curproc)) {
625		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
626			return (SET_ERROR(EPERM));
627		return (0);
628	}
629
630	/*
631	 * For global-zone datasets (i.e., those whose zoned property is
632	 * "off", verify that the specified new label is valid for the
633	 * global zone.
634	 */
635	if (dsl_prop_get_integer(name,
636	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
637		return (SET_ERROR(EPERM));
638	if (!zoned) {
639		if (zfs_check_global_label(name, strval) != 0)
640			return (SET_ERROR(EPERM));
641	}
642
643	/*
644	 * If the existing dataset label is nondefault, check if the
645	 * dataset is mounted (label cannot be changed while mounted).
646	 * Get the zfsvfs; if there isn't one, then the dataset isn't
647	 * mounted (or isn't a dataset, doesn't exist, ...).
648	 */
649	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
650		objset_t *os;
651		static char *setsl_tag = "setsl_tag";
652
653		/*
654		 * Try to own the dataset; abort if there is any error,
655		 * (e.g., already mounted, in use, or other error).
656		 */
657		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
658		    setsl_tag, &os);
659		if (error != 0)
660			return (SET_ERROR(EPERM));
661
662		dmu_objset_disown(os, B_TRUE, setsl_tag);
663
664		if (new_default) {
665			needed_priv = PRIV_FILE_DOWNGRADE_SL;
666			goto out_check;
667		}
668
669		if (hexstr_to_label(strval, &new_sl) != 0)
670			return (SET_ERROR(EPERM));
671
672		if (blstrictdom(&ds_sl, &new_sl))
673			needed_priv = PRIV_FILE_DOWNGRADE_SL;
674		else if (blstrictdom(&new_sl, &ds_sl))
675			needed_priv = PRIV_FILE_UPGRADE_SL;
676	} else {
677		/* dataset currently has a default label */
678		if (!new_default)
679			needed_priv = PRIV_FILE_UPGRADE_SL;
680	}
681
682out_check:
683	if (needed_priv != -1)
684		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
685	return (0);
686}
687
688static int
689zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
690    cred_t *cr)
691{
692	char *strval;
693
694	/*
695	 * Check permissions for special properties.
696	 */
697	switch (prop) {
698	case ZFS_PROP_ZONED:
699		/*
700		 * Disallow setting of 'zoned' from within a local zone.
701		 */
702		if (!INGLOBALZONE(curproc))
703			return (SET_ERROR(EPERM));
704		break;
705
706	case ZFS_PROP_QUOTA:
707	case ZFS_PROP_FILESYSTEM_LIMIT:
708	case ZFS_PROP_SNAPSHOT_LIMIT:
709		if (!INGLOBALZONE(curproc)) {
710			uint64_t zoned;
711			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
712			/*
713			 * Unprivileged users are allowed to modify the
714			 * limit on things *under* (ie. contained by)
715			 * the thing they own.
716			 */
717			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
718			    setpoint))
719				return (SET_ERROR(EPERM));
720			if (!zoned || strlen(dsname) <= strlen(setpoint))
721				return (SET_ERROR(EPERM));
722		}
723		break;
724
725	case ZFS_PROP_MLSLABEL:
726		if (!is_system_labeled())
727			return (SET_ERROR(EPERM));
728
729		if (nvpair_value_string(propval, &strval) == 0) {
730			int err;
731
732			err = zfs_set_slabel_policy(dsname, strval, CRED());
733			if (err != 0)
734				return (err);
735		}
736		break;
737	}
738
739	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
740}
741
742/* ARGSUSED */
743static int
744zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
745{
746	int error;
747
748	error = zfs_dozonecheck(zc->zc_name, cr);
749	if (error != 0)
750		return (error);
751
752	/*
753	 * permission to set permissions will be evaluated later in
754	 * dsl_deleg_can_allow()
755	 */
756	return (0);
757}
758
759/* ARGSUSED */
760static int
761zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
762{
763	return (zfs_secpolicy_write_perms(zc->zc_name,
764	    ZFS_DELEG_PERM_ROLLBACK, cr));
765}
766
767/* ARGSUSED */
768static int
769zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
770{
771	dsl_pool_t *dp;
772	dsl_dataset_t *ds;
773	char *cp;
774	int error;
775
776	/*
777	 * Generate the current snapshot name from the given objsetid, then
778	 * use that name for the secpolicy/zone checks.
779	 */
780	cp = strchr(zc->zc_name, '@');
781	if (cp == NULL)
782		return (SET_ERROR(EINVAL));
783	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
784	if (error != 0)
785		return (error);
786
787	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
788	if (error != 0) {
789		dsl_pool_rele(dp, FTAG);
790		return (error);
791	}
792
793	dsl_dataset_name(ds, zc->zc_name);
794
795	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
796	    ZFS_DELEG_PERM_SEND, cr);
797	dsl_dataset_rele(ds, FTAG);
798	dsl_pool_rele(dp, FTAG);
799
800	return (error);
801}
802
803/* ARGSUSED */
804static int
805zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
806{
807	return (zfs_secpolicy_write_perms(zc->zc_name,
808	    ZFS_DELEG_PERM_SEND, cr));
809}
810
811/* ARGSUSED */
812static int
813zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
814{
815	vnode_t *vp;
816	int error;
817
818	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
819	    NO_FOLLOW, NULL, &vp)) != 0)
820		return (error);
821
822	/* Now make sure mntpnt and dataset are ZFS */
823
824	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
825	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
826	    zc->zc_name) != 0)) {
827		VN_RELE(vp);
828		return (SET_ERROR(EPERM));
829	}
830
831	VN_RELE(vp);
832	return (dsl_deleg_access(zc->zc_name,
833	    ZFS_DELEG_PERM_SHARE, cr));
834}
835
836int
837zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
838{
839	if (secpolicy_nfs(cr) == 0) {
840		return (0);
841	} else {
842		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
843	}
844}
845
846int
847zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
848{
849	if (secpolicy_smb(cr) == 0) {
850		return (0);
851	} else {
852		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
853	}
854}
855
856static int
857zfs_get_parent(const char *datasetname, char *parent, int parentsize)
858{
859	char *cp;
860
861	/*
862	 * Remove the @bla or /bla from the end of the name to get the parent.
863	 */
864	(void) strncpy(parent, datasetname, parentsize);
865	cp = strrchr(parent, '@');
866	if (cp != NULL) {
867		cp[0] = '\0';
868	} else {
869		cp = strrchr(parent, '/');
870		if (cp == NULL)
871			return (SET_ERROR(ENOENT));
872		cp[0] = '\0';
873	}
874
875	return (0);
876}
877
878int
879zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
880{
881	int error;
882
883	if ((error = zfs_secpolicy_write_perms(name,
884	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
885		return (error);
886
887	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
888}
889
890/* ARGSUSED */
891static int
892zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
893{
894	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
895}
896
897/*
898 * Destroying snapshots with delegated permissions requires
899 * descendant mount and destroy permissions.
900 */
901/* ARGSUSED */
902static int
903zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
904{
905	nvlist_t *snaps;
906	nvpair_t *pair, *nextpair;
907	int error = 0;
908
909	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
910
911	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
912	    pair = nextpair) {
913		nextpair = nvlist_next_nvpair(snaps, pair);
914		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
915		if (error == ENOENT) {
916			/*
917			 * Ignore any snapshots that don't exist (we consider
918			 * them "already destroyed").  Remove the name from the
919			 * nvl here in case the snapshot is created between
920			 * now and when we try to destroy it (in which case
921			 * we don't want to destroy it since we haven't
922			 * checked for permission).
923			 */
924			fnvlist_remove_nvpair(snaps, pair);
925			error = 0;
926		}
927		if (error != 0)
928			break;
929	}
930
931	return (error);
932}
933
934int
935zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
936{
937	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
938	int	error;
939
940	if ((error = zfs_secpolicy_write_perms(from,
941	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
942		return (error);
943
944	if ((error = zfs_secpolicy_write_perms(from,
945	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
946		return (error);
947
948	if ((error = zfs_get_parent(to, parentname,
949	    sizeof (parentname))) != 0)
950		return (error);
951
952	if ((error = zfs_secpolicy_write_perms(parentname,
953	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
954		return (error);
955
956	if ((error = zfs_secpolicy_write_perms(parentname,
957	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
958		return (error);
959
960	return (error);
961}
962
963/* ARGSUSED */
964static int
965zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
966{
967	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
968}
969
970/* ARGSUSED */
971static int
972zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
973{
974	dsl_pool_t *dp;
975	dsl_dataset_t *clone;
976	int error;
977
978	error = zfs_secpolicy_write_perms(zc->zc_name,
979	    ZFS_DELEG_PERM_PROMOTE, cr);
980	if (error != 0)
981		return (error);
982
983	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
984	if (error != 0)
985		return (error);
986
987	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
988
989	if (error == 0) {
990		char parentname[ZFS_MAX_DATASET_NAME_LEN];
991		dsl_dataset_t *origin = NULL;
992		dsl_dir_t *dd;
993		dd = clone->ds_dir;
994
995		error = dsl_dataset_hold_obj(dd->dd_pool,
996		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
997		if (error != 0) {
998			dsl_dataset_rele(clone, FTAG);
999			dsl_pool_rele(dp, FTAG);
1000			return (error);
1001		}
1002
1003		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
1004		    ZFS_DELEG_PERM_MOUNT, cr);
1005
1006		dsl_dataset_name(origin, parentname);
1007		if (error == 0) {
1008			error = zfs_secpolicy_write_perms_ds(parentname, origin,
1009			    ZFS_DELEG_PERM_PROMOTE, cr);
1010		}
1011		dsl_dataset_rele(clone, FTAG);
1012		dsl_dataset_rele(origin, FTAG);
1013	}
1014	dsl_pool_rele(dp, FTAG);
1015	return (error);
1016}
1017
1018/* ARGSUSED */
1019static int
1020zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1021{
1022	int error;
1023
1024	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1025	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
1026		return (error);
1027
1028	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1029	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1030		return (error);
1031
1032	return (zfs_secpolicy_write_perms(zc->zc_name,
1033	    ZFS_DELEG_PERM_CREATE, cr));
1034}
1035
1036int
1037zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1038{
1039	return (zfs_secpolicy_write_perms(name,
1040	    ZFS_DELEG_PERM_SNAPSHOT, cr));
1041}
1042
1043/*
1044 * Check for permission to create each snapshot in the nvlist.
1045 */
1046/* ARGSUSED */
1047static int
1048zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1049{
1050	nvlist_t *snaps;
1051	int error = 0;
1052	nvpair_t *pair;
1053
1054	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
1055
1056	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1057	    pair = nvlist_next_nvpair(snaps, pair)) {
1058		char *name = nvpair_name(pair);
1059		char *atp = strchr(name, '@');
1060
1061		if (atp == NULL) {
1062			error = SET_ERROR(EINVAL);
1063			break;
1064		}
1065		*atp = '\0';
1066		error = zfs_secpolicy_snapshot_perms(name, cr);
1067		*atp = '@';
1068		if (error != 0)
1069			break;
1070	}
1071	return (error);
1072}
1073
1074/*
1075 * Check for permission to create each bookmark in the nvlist.
1076 */
1077/* ARGSUSED */
1078static int
1079zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1080{
1081	int error = 0;
1082
1083	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1084	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1085		char *name = nvpair_name(pair);
1086		char *hashp = strchr(name, '#');
1087
1088		if (hashp == NULL) {
1089			error = SET_ERROR(EINVAL);
1090			break;
1091		}
1092		*hashp = '\0';
1093		error = zfs_secpolicy_write_perms(name,
1094		    ZFS_DELEG_PERM_BOOKMARK, cr);
1095		*hashp = '#';
1096		if (error != 0)
1097			break;
1098	}
1099	return (error);
1100}
1101
1102/* ARGSUSED */
1103static int
1104zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1105{
1106	return (zfs_secpolicy_write_perms(zc->zc_name,
1107	    ZFS_DELEG_PERM_REMAP, cr));
1108}
1109
1110/* ARGSUSED */
1111static int
1112zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1113{
1114	nvpair_t *pair, *nextpair;
1115	int error = 0;
1116
1117	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1118	    pair = nextpair) {
1119		char *name = nvpair_name(pair);
1120		char *hashp = strchr(name, '#');
1121		nextpair = nvlist_next_nvpair(innvl, pair);
1122
1123		if (hashp == NULL) {
1124			error = SET_ERROR(EINVAL);
1125			break;
1126		}
1127
1128		*hashp = '\0';
1129		error = zfs_secpolicy_write_perms(name,
1130		    ZFS_DELEG_PERM_DESTROY, cr);
1131		*hashp = '#';
1132		if (error == ENOENT) {
1133			/*
1134			 * Ignore any filesystems that don't exist (we consider
1135			 * their bookmarks "already destroyed").  Remove
1136			 * the name from the nvl here in case the filesystem
1137			 * is created between now and when we try to destroy
1138			 * the bookmark (in which case we don't want to
1139			 * destroy it since we haven't checked for permission).
1140			 */
1141			fnvlist_remove_nvpair(innvl, pair);
1142			error = 0;
1143		}
1144		if (error != 0)
1145			break;
1146	}
1147
1148	return (error);
1149}
1150
1151/* ARGSUSED */
1152static int
1153zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1154{
1155	/*
1156	 * Even root must have a proper TSD so that we know what pool
1157	 * to log to.
1158	 */
1159	if (tsd_get(zfs_allow_log_key) == NULL)
1160		return (SET_ERROR(EPERM));
1161	return (0);
1162}
1163
1164static int
1165zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1166{
1167	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
1168	int	error;
1169	char	*origin;
1170
1171	if ((error = zfs_get_parent(zc->zc_name, parentname,
1172	    sizeof (parentname))) != 0)
1173		return (error);
1174
1175	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1176	    (error = zfs_secpolicy_write_perms(origin,
1177	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1178		return (error);
1179
1180	if ((error = zfs_secpolicy_write_perms(parentname,
1181	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1182		return (error);
1183
1184	return (zfs_secpolicy_write_perms(parentname,
1185	    ZFS_DELEG_PERM_MOUNT, cr));
1186}
1187
1188/*
1189 * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1190 * SYS_CONFIG privilege, which is not available in a local zone.
1191 */
1192/* ARGSUSED */
1193static int
1194zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1195{
1196	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1197		return (SET_ERROR(EPERM));
1198
1199	return (0);
1200}
1201
1202/*
1203 * Policy for object to name lookups.
1204 */
1205/* ARGSUSED */
1206static int
1207zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1208{
1209	int error;
1210
1211	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1212		return (0);
1213
1214	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1215	return (error);
1216}
1217
1218/*
1219 * Policy for fault injection.  Requires all privileges.
1220 */
1221/* ARGSUSED */
1222static int
1223zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1224{
1225	return (secpolicy_zinject(cr));
1226}
1227
1228/* ARGSUSED */
1229static int
1230zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1231{
1232	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1233
1234	if (prop == ZPROP_INVAL) {
1235		if (!zfs_prop_user(zc->zc_value))
1236			return (SET_ERROR(EINVAL));
1237		return (zfs_secpolicy_write_perms(zc->zc_name,
1238		    ZFS_DELEG_PERM_USERPROP, cr));
1239	} else {
1240		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1241		    NULL, cr));
1242	}
1243}
1244
1245static int
1246zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1247{
1248	int err = zfs_secpolicy_read(zc, innvl, cr);
1249	if (err)
1250		return (err);
1251
1252	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1253		return (SET_ERROR(EINVAL));
1254
1255	if (zc->zc_value[0] == 0) {
1256		/*
1257		 * They are asking about a posix uid/gid.  If it's
1258		 * themself, allow it.
1259		 */
1260		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1261		    zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1262		    zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1263		    zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1264			if (zc->zc_guid == crgetuid(cr))
1265				return (0);
1266		} else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
1267		    zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
1268		    zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
1269		    zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
1270			if (groupmember(zc->zc_guid, cr))
1271				return (0);
1272		}
1273		/* else is for project quota/used */
1274	}
1275
1276	return (zfs_secpolicy_write_perms(zc->zc_name,
1277	    userquota_perms[zc->zc_objset_type], cr));
1278}
1279
1280static int
1281zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1282{
1283	int err = zfs_secpolicy_read(zc, innvl, cr);
1284	if (err)
1285		return (err);
1286
1287	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1288		return (SET_ERROR(EINVAL));
1289
1290	return (zfs_secpolicy_write_perms(zc->zc_name,
1291	    userquota_perms[zc->zc_objset_type], cr));
1292}
1293
1294/* ARGSUSED */
1295static int
1296zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1297{
1298	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1299	    NULL, cr));
1300}
1301
1302/* ARGSUSED */
1303static int
1304zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1305{
1306	nvpair_t *pair;
1307	nvlist_t *holds;
1308	int error;
1309
1310	holds = fnvlist_lookup_nvlist(innvl, "holds");
1311
1312	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1313	    pair = nvlist_next_nvpair(holds, pair)) {
1314		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1315		error = dmu_fsname(nvpair_name(pair), fsname);
1316		if (error != 0)
1317			return (error);
1318		error = zfs_secpolicy_write_perms(fsname,
1319		    ZFS_DELEG_PERM_HOLD, cr);
1320		if (error != 0)
1321			return (error);
1322	}
1323	return (0);
1324}
1325
1326/* ARGSUSED */
1327static int
1328zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1329{
1330	nvpair_t *pair;
1331	int error;
1332
1333	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1334	    pair = nvlist_next_nvpair(innvl, pair)) {
1335		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1336		error = dmu_fsname(nvpair_name(pair), fsname);
1337		if (error != 0)
1338			return (error);
1339		error = zfs_secpolicy_write_perms(fsname,
1340		    ZFS_DELEG_PERM_RELEASE, cr);
1341		if (error != 0)
1342			return (error);
1343	}
1344	return (0);
1345}
1346
1347/* ARGSUSED */
1348static int
1349zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1350{
1351	return (zfs_secpolicy_write_perms(zc->zc_name,
1352	    ZFS_DELEG_PERM_LOAD_KEY, cr));
1353}
1354
1355/* ARGSUSED */
1356static int
1357zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1358{
1359	return (zfs_secpolicy_write_perms(zc->zc_name,
1360	    ZFS_DELEG_PERM_CHANGE_KEY, cr));
1361}
1362
1363/*
1364 * Policy for allowing temporary snapshots to be taken or released
1365 */
1366static int
1367zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1368{
1369	/*
1370	 * A temporary snapshot is the same as a snapshot,
1371	 * hold, destroy and release all rolled into one.
1372	 * Delegated diff alone is sufficient that we allow this.
1373	 */
1374	int error;
1375
1376	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1377	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1378		return (0);
1379
1380	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1381
1382	if (innvl != NULL) {
1383		if (error == 0)
1384			error = zfs_secpolicy_hold(zc, innvl, cr);
1385		if (error == 0)
1386			error = zfs_secpolicy_release(zc, innvl, cr);
1387		if (error == 0)
1388			error = zfs_secpolicy_destroy(zc, innvl, cr);
1389	}
1390	return (error);
1391}
1392
1393/*
1394 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1395 */
1396static int
1397get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1398{
1399	char *packed;
1400	int error;
1401	nvlist_t *list = NULL;
1402
1403	/*
1404	 * Read in and unpack the user-supplied nvlist.
1405	 */
1406	if (size == 0)
1407		return (SET_ERROR(EINVAL));
1408
1409	packed = kmem_alloc(size, KM_SLEEP);
1410
1411	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1412	    iflag)) != 0) {
1413		kmem_free(packed, size);
1414		return (SET_ERROR(EFAULT));
1415	}
1416
1417	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1418		kmem_free(packed, size);
1419		return (error);
1420	}
1421
1422	kmem_free(packed, size);
1423
1424	*nvp = list;
1425	return (0);
1426}
1427
1428/*
1429 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1430 * Entries will be removed from the end of the nvlist, and one int32 entry
1431 * named "N_MORE_ERRORS" will be added indicating how many entries were
1432 * removed.
1433 */
1434static int
1435nvlist_smush(nvlist_t *errors, size_t max)
1436{
1437	size_t size;
1438
1439	size = fnvlist_size(errors);
1440
1441	if (size > max) {
1442		nvpair_t *more_errors;
1443		int n = 0;
1444
1445		if (max < 1024)
1446			return (SET_ERROR(ENOMEM));
1447
1448		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1449		more_errors = nvlist_prev_nvpair(errors, NULL);
1450
1451		do {
1452			nvpair_t *pair = nvlist_prev_nvpair(errors,
1453			    more_errors);
1454			fnvlist_remove_nvpair(errors, pair);
1455			n++;
1456			size = fnvlist_size(errors);
1457		} while (size > max);
1458
1459		fnvlist_remove_nvpair(errors, more_errors);
1460		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1461		ASSERT3U(fnvlist_size(errors), <=, max);
1462	}
1463
1464	return (0);
1465}
1466
1467static int
1468put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1469{
1470	char *packed = NULL;
1471	int error = 0;
1472	size_t size;
1473
1474	size = fnvlist_size(nvl);
1475
1476	if (size > zc->zc_nvlist_dst_size) {
1477		error = SET_ERROR(ENOMEM);
1478	} else {
1479		packed = fnvlist_pack(nvl, &size);
1480		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1481		    size, zc->zc_iflags) != 0)
1482			error = SET_ERROR(EFAULT);
1483		fnvlist_pack_free(packed, size);
1484	}
1485
1486	zc->zc_nvlist_dst_size = size;
1487	zc->zc_nvlist_dst_filled = B_TRUE;
1488	return (error);
1489}
1490
1491int
1492getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1493{
1494	int error = 0;
1495	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1496		return (SET_ERROR(EINVAL));
1497	}
1498
1499	mutex_enter(&os->os_user_ptr_lock);
1500	*zfvp = dmu_objset_get_user(os);
1501	if (*zfvp) {
1502		VFS_HOLD((*zfvp)->z_vfs);
1503	} else {
1504		error = SET_ERROR(ESRCH);
1505	}
1506	mutex_exit(&os->os_user_ptr_lock);
1507	return (error);
1508}
1509
1510int
1511getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1512{
1513	objset_t *os;
1514	int error;
1515
1516	error = dmu_objset_hold(dsname, FTAG, &os);
1517	if (error != 0)
1518		return (error);
1519
1520	error = getzfsvfs_impl(os, zfvp);
1521	dmu_objset_rele(os, FTAG);
1522	return (error);
1523}
1524
1525/*
1526 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1527 * case its z_vfs will be NULL, and it will be opened as the owner.
1528 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1529 * which prevents all vnode ops from running.
1530 */
1531static int
1532zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1533{
1534	int error = 0;
1535
1536	if (getzfsvfs(name, zfvp) != 0)
1537		error = zfsvfs_create(name, B_FALSE, zfvp);
1538	if (error == 0) {
1539		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1540		    RW_READER, tag);
1541		if ((*zfvp)->z_unmounted) {
1542			/*
1543			 * XXX we could probably try again, since the unmounting
1544			 * thread should be just about to disassociate the
1545			 * objset from the zfsvfs.
1546			 */
1547			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1548			return (SET_ERROR(EBUSY));
1549		}
1550	}
1551	return (error);
1552}
1553
1554static void
1555zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1556{
1557	rrm_exit(&zfsvfs->z_teardown_lock, tag);
1558
1559	if (zfsvfs->z_vfs) {
1560		VFS_RELE(zfsvfs->z_vfs);
1561	} else {
1562		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1563		zfsvfs_free(zfsvfs);
1564	}
1565}
1566
1567static int
1568zfs_ioc_pool_create(zfs_cmd_t *zc)
1569{
1570	int error;
1571	nvlist_t *config, *props = NULL;
1572	nvlist_t *rootprops = NULL;
1573	nvlist_t *zplprops = NULL;
1574	char *spa_name = zc->zc_name;
1575	dsl_crypto_params_t *dcp = NULL;
1576
1577	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1578	    zc->zc_iflags, &config))
1579		return (error);
1580
1581	if (zc->zc_nvlist_src_size != 0 && (error =
1582	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1583	    zc->zc_iflags, &props))) {
1584		nvlist_free(config);
1585		return (error);
1586	}
1587
1588	if (props) {
1589		nvlist_t *nvl = NULL;
1590		nvlist_t *hidden_args = NULL;
1591		uint64_t version = SPA_VERSION;
1592		char *tname;
1593
1594		(void) nvlist_lookup_uint64(props,
1595		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1596		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1597			error = SET_ERROR(EINVAL);
1598			goto pool_props_bad;
1599		}
1600		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1601		if (nvl) {
1602			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1603			if (error != 0) {
1604				nvlist_free(config);
1605				nvlist_free(props);
1606				return (error);
1607			}
1608			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1609		}
1610
1611		(void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
1612		    &hidden_args);
1613		error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
1614		    rootprops, hidden_args, &dcp);
1615		if (error != 0) {
1616			nvlist_free(config);
1617			nvlist_free(props);
1618			return (error);
1619		}
1620		(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
1621
1622		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1623		error = zfs_fill_zplprops_root(version, rootprops,
1624		    zplprops, NULL);
1625		if (error != 0)
1626			goto pool_props_bad;
1627
1628		if (nvlist_lookup_string(props,
1629		    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1630			spa_name = tname;
1631	}
1632
1633	error = spa_create(zc->zc_name, config, props, zplprops, dcp);
1634
1635	/*
1636	 * Set the remaining root properties
1637	 */
1638	if (!error && (error = zfs_set_prop_nvlist(spa_name,
1639	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1640		(void) spa_destroy(spa_name);
1641
1642pool_props_bad:
1643	nvlist_free(rootprops);
1644	nvlist_free(zplprops);
1645	nvlist_free(config);
1646	nvlist_free(props);
1647	dsl_crypto_params_free(dcp, !!error);
1648
1649	return (error);
1650}
1651
1652static int
1653zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1654{
1655	int error;
1656	zfs_log_history(zc);
1657	error = spa_destroy(zc->zc_name);
1658	if (error == 0)
1659		zvol_remove_minors(zc->zc_name);
1660	return (error);
1661}
1662
1663static int
1664zfs_ioc_pool_import(zfs_cmd_t *zc)
1665{
1666	nvlist_t *config, *props = NULL;
1667	uint64_t guid;
1668	int error;
1669
1670	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1671	    zc->zc_iflags, &config)) != 0)
1672		return (error);
1673
1674	if (zc->zc_nvlist_src_size != 0 && (error =
1675	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1676	    zc->zc_iflags, &props))) {
1677		nvlist_free(config);
1678		return (error);
1679	}
1680
1681	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1682	    guid != zc->zc_guid)
1683		error = SET_ERROR(EINVAL);
1684	else
1685		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1686
1687	if (zc->zc_nvlist_dst != 0) {
1688		int err;
1689
1690		if ((err = put_nvlist(zc, config)) != 0)
1691			error = err;
1692	}
1693
1694	nvlist_free(config);
1695
1696	nvlist_free(props);
1697
1698	return (error);
1699}
1700
1701static int
1702zfs_ioc_pool_export(zfs_cmd_t *zc)
1703{
1704	int error;
1705	boolean_t force = (boolean_t)zc->zc_cookie;
1706	boolean_t hardforce = (boolean_t)zc->zc_guid;
1707
1708	zfs_log_history(zc);
1709	error = spa_export(zc->zc_name, NULL, force, hardforce);
1710	if (error == 0)
1711		zvol_remove_minors(zc->zc_name);
1712	return (error);
1713}
1714
1715static int
1716zfs_ioc_pool_configs(zfs_cmd_t *zc)
1717{
1718	nvlist_t *configs;
1719	int error;
1720
1721	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1722		return (SET_ERROR(EEXIST));
1723
1724	error = put_nvlist(zc, configs);
1725
1726	nvlist_free(configs);
1727
1728	return (error);
1729}
1730
1731/*
1732 * inputs:
1733 * zc_name		name of the pool
1734 *
1735 * outputs:
1736 * zc_cookie		real errno
1737 * zc_nvlist_dst	config nvlist
1738 * zc_nvlist_dst_size	size of config nvlist
1739 */
1740static int
1741zfs_ioc_pool_stats(zfs_cmd_t *zc)
1742{
1743	nvlist_t *config;
1744	int error;
1745	int ret = 0;
1746
1747	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1748	    sizeof (zc->zc_value));
1749
1750	if (config != NULL) {
1751		ret = put_nvlist(zc, config);
1752		nvlist_free(config);
1753
1754		/*
1755		 * The config may be present even if 'error' is non-zero.
1756		 * In this case we return success, and preserve the real errno
1757		 * in 'zc_cookie'.
1758		 */
1759		zc->zc_cookie = error;
1760	} else {
1761		ret = error;
1762	}
1763
1764	return (ret);
1765}
1766
1767/*
1768 * Try to import the given pool, returning pool stats as appropriate so that
1769 * user land knows which devices are available and overall pool health.
1770 */
1771static int
1772zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1773{
1774	nvlist_t *tryconfig, *config;
1775	int error;
1776
1777	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1778	    zc->zc_iflags, &tryconfig)) != 0)
1779		return (error);
1780
1781	config = spa_tryimport(tryconfig);
1782
1783	nvlist_free(tryconfig);
1784
1785	if (config == NULL)
1786		return (SET_ERROR(EINVAL));
1787
1788	error = put_nvlist(zc, config);
1789	nvlist_free(config);
1790
1791	return (error);
1792}
1793
1794/*
1795 * inputs:
1796 * zc_name              name of the pool
1797 * zc_cookie            scan func (pool_scan_func_t)
1798 * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1799 */
1800static int
1801zfs_ioc_pool_scan(zfs_cmd_t *zc)
1802{
1803	spa_t *spa;
1804	int error;
1805
1806	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1807		return (error);
1808
1809	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1810		return (SET_ERROR(EINVAL));
1811
1812	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1813		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1814	else if (zc->zc_cookie == POOL_SCAN_NONE)
1815		error = spa_scan_stop(spa);
1816	else
1817		error = spa_scan(spa, zc->zc_cookie);
1818
1819	spa_close(spa, FTAG);
1820
1821	return (error);
1822}
1823
1824static int
1825zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1826{
1827	spa_t *spa;
1828	int error;
1829
1830	error = spa_open(zc->zc_name, &spa, FTAG);
1831	if (error == 0) {
1832		spa_freeze(spa);
1833		spa_close(spa, FTAG);
1834	}
1835	return (error);
1836}
1837
1838static int
1839zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1840{
1841	spa_t *spa;
1842	int error;
1843
1844	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1845		return (error);
1846
1847	if (zc->zc_cookie < spa_version(spa) ||
1848	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1849		spa_close(spa, FTAG);
1850		return (SET_ERROR(EINVAL));
1851	}
1852
1853	spa_upgrade(spa, zc->zc_cookie);
1854	spa_close(spa, FTAG);
1855
1856	return (error);
1857}
1858
1859static int
1860zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1861{
1862	spa_t *spa;
1863	char *hist_buf;
1864	uint64_t size;
1865	int error;
1866
1867	if ((size = zc->zc_history_len) == 0)
1868		return (SET_ERROR(EINVAL));
1869
1870	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1871		return (error);
1872
1873	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1874		spa_close(spa, FTAG);
1875		return (SET_ERROR(ENOTSUP));
1876	}
1877
1878	hist_buf = kmem_alloc(size, KM_SLEEP);
1879	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1880	    &zc->zc_history_len, hist_buf)) == 0) {
1881		error = ddi_copyout(hist_buf,
1882		    (void *)(uintptr_t)zc->zc_history,
1883		    zc->zc_history_len, zc->zc_iflags);
1884	}
1885
1886	spa_close(spa, FTAG);
1887	kmem_free(hist_buf, size);
1888	return (error);
1889}
1890
1891static int
1892zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1893{
1894	spa_t *spa;
1895	int error;
1896
1897	error = spa_open(zc->zc_name, &spa, FTAG);
1898	if (error == 0) {
1899		error = spa_change_guid(spa);
1900		spa_close(spa, FTAG);
1901	}
1902	return (error);
1903}
1904
1905static int
1906zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1907{
1908	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1909}
1910
1911/*
1912 * inputs:
1913 * zc_name		name of filesystem
1914 * zc_obj		object to find
1915 *
1916 * outputs:
1917 * zc_value		name of object
1918 */
1919static int
1920zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1921{
1922	objset_t *os;
1923	int error;
1924
1925	/* XXX reading from objset not owned */
1926	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1927	    FTAG, &os)) != 0)
1928		return (error);
1929	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1930		dmu_objset_rele_flags(os, B_TRUE, FTAG);
1931		return (SET_ERROR(EINVAL));
1932	}
1933	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1934	    sizeof (zc->zc_value));
1935	dmu_objset_rele_flags(os, B_TRUE, FTAG);
1936
1937	return (error);
1938}
1939
1940/*
1941 * inputs:
1942 * zc_name		name of filesystem
1943 * zc_obj		object to find
1944 *
1945 * outputs:
1946 * zc_stat		stats on object
1947 * zc_value		path to object
1948 */
1949static int
1950zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1951{
1952	objset_t *os;
1953	int error;
1954
1955	/* XXX reading from objset not owned */
1956	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
1957	    FTAG, &os)) != 0)
1958		return (error);
1959	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1960		dmu_objset_rele_flags(os, B_TRUE, FTAG);
1961		return (SET_ERROR(EINVAL));
1962	}
1963	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1964	    sizeof (zc->zc_value));
1965	dmu_objset_rele_flags(os, B_TRUE, FTAG);
1966
1967	return (error);
1968}
1969
1970static int
1971zfs_ioc_vdev_add(zfs_cmd_t *zc)
1972{
1973	spa_t *spa;
1974	int error;
1975	nvlist_t *config;
1976
1977	error = spa_open(zc->zc_name, &spa, FTAG);
1978	if (error != 0)
1979		return (error);
1980
1981	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1982	    zc->zc_iflags, &config);
1983
1984	if (error == 0) {
1985		error = spa_vdev_add(spa, config);
1986		nvlist_free(config);
1987	}
1988	spa_close(spa, FTAG);
1989	return (error);
1990}
1991
1992/*
1993 * inputs:
1994 * zc_name		name of the pool
1995 * zc_guid		guid of vdev to remove
1996 * zc_cookie		cancel removal
1997 */
1998static int
1999zfs_ioc_vdev_remove(zfs_cmd_t *zc)
2000{
2001	spa_t *spa;
2002	int error;
2003
2004	error = spa_open(zc->zc_name, &spa, FTAG);
2005	if (error != 0)
2006		return (error);
2007	if (zc->zc_cookie != 0) {
2008		error = spa_vdev_remove_cancel(spa);
2009	} else {
2010		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
2011	}
2012	spa_close(spa, FTAG);
2013	return (error);
2014}
2015
2016static int
2017zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
2018{
2019	spa_t *spa;
2020	int error;
2021	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
2022
2023	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2024		return (error);
2025	switch (zc->zc_cookie) {
2026	case VDEV_STATE_ONLINE:
2027		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2028		break;
2029
2030	case VDEV_STATE_OFFLINE:
2031		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2032		break;
2033
2034	case VDEV_STATE_FAULTED:
2035		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2036		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2037			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2038
2039		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2040		break;
2041
2042	case VDEV_STATE_DEGRADED:
2043		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2044		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2045			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2046
2047		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2048		break;
2049
2050	default:
2051		error = SET_ERROR(EINVAL);
2052	}
2053	zc->zc_cookie = newstate;
2054	spa_close(spa, FTAG);
2055	return (error);
2056}
2057
2058static int
2059zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2060{
2061	spa_t *spa;
2062	int replacing = zc->zc_cookie;
2063	nvlist_t *config;
2064	int error;
2065
2066	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2067		return (error);
2068
2069	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2070	    zc->zc_iflags, &config)) == 0) {
2071		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
2072		nvlist_free(config);
2073	}
2074
2075	spa_close(spa, FTAG);
2076	return (error);
2077}
2078
2079static int
2080zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2081{
2082	spa_t *spa;
2083	int error;
2084
2085	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2086		return (error);
2087
2088	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2089
2090	spa_close(spa, FTAG);
2091	return (error);
2092}
2093
2094static int
2095zfs_ioc_vdev_split(zfs_cmd_t *zc)
2096{
2097	spa_t *spa;
2098	nvlist_t *config, *props = NULL;
2099	int error;
2100	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2101
2102	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2103		return (error);
2104
2105	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2106	    zc->zc_iflags, &config)) {
2107		spa_close(spa, FTAG);
2108		return (error);
2109	}
2110
2111	if (zc->zc_nvlist_src_size != 0 && (error =
2112	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2113	    zc->zc_iflags, &props))) {
2114		spa_close(spa, FTAG);
2115		nvlist_free(config);
2116		return (error);
2117	}
2118
2119	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2120
2121	spa_close(spa, FTAG);
2122
2123	nvlist_free(config);
2124	nvlist_free(props);
2125
2126	return (error);
2127}
2128
2129static int
2130zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2131{
2132	spa_t *spa;
2133	char *path = zc->zc_value;
2134	uint64_t guid = zc->zc_guid;
2135	int error;
2136
2137	error = spa_open(zc->zc_name, &spa, FTAG);
2138	if (error != 0)
2139		return (error);
2140
2141	error = spa_vdev_setpath(spa, guid, path);
2142	spa_close(spa, FTAG);
2143	return (error);
2144}
2145
2146static int
2147zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2148{
2149	spa_t *spa;
2150	char *fru = zc->zc_value;
2151	uint64_t guid = zc->zc_guid;
2152	int error;
2153
2154	error = spa_open(zc->zc_name, &spa, FTAG);
2155	if (error != 0)
2156		return (error);
2157
2158	error = spa_vdev_setfru(spa, guid, fru);
2159	spa_close(spa, FTAG);
2160	return (error);
2161}
2162
2163static int
2164zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2165{
2166	int error = 0;
2167	nvlist_t *nv;
2168
2169	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2170
2171	if (zc->zc_nvlist_dst != 0 &&
2172	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2173		dmu_objset_stats(os, nv);
2174		/*
2175		 * NB: zvol_get_stats() will read the objset contents,
2176		 * which we aren't supposed to do with a
2177		 * DS_MODE_USER hold, because it could be
2178		 * inconsistent.  So this is a bit of a workaround...
2179		 * XXX reading with out owning
2180		 */
2181		if (!zc->zc_objset_stats.dds_inconsistent &&
2182		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2183			error = zvol_get_stats(os, nv);
2184			if (error == EIO)
2185				return (error);
2186			VERIFY0(error);
2187		}
2188		error = put_nvlist(zc, nv);
2189		nvlist_free(nv);
2190	}
2191
2192	return (error);
2193}
2194
2195/*
2196 * inputs:
2197 * zc_name		name of filesystem
2198 * zc_nvlist_dst_size	size of buffer for property nvlist
2199 *
2200 * outputs:
2201 * zc_objset_stats	stats
2202 * zc_nvlist_dst	property nvlist
2203 * zc_nvlist_dst_size	size of property nvlist
2204 */
2205static int
2206zfs_ioc_objset_stats(zfs_cmd_t *zc)
2207{
2208	objset_t *os;
2209	int error;
2210
2211	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2212	if (error == 0) {
2213		error = zfs_ioc_objset_stats_impl(zc, os);
2214		dmu_objset_rele(os, FTAG);
2215	}
2216
2217	return (error);
2218}
2219
2220/*
2221 * inputs:
2222 * zc_name		name of filesystem
2223 * zc_nvlist_dst_size	size of buffer for property nvlist
2224 *
2225 * outputs:
2226 * zc_nvlist_dst	received property nvlist
2227 * zc_nvlist_dst_size	size of received property nvlist
2228 *
2229 * Gets received properties (distinct from local properties on or after
2230 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2231 * local property values.
2232 */
2233static int
2234zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2235{
2236	int error = 0;
2237	nvlist_t *nv;
2238
2239	/*
2240	 * Without this check, we would return local property values if the
2241	 * caller has not already received properties on or after
2242	 * SPA_VERSION_RECVD_PROPS.
2243	 */
2244	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2245		return (SET_ERROR(ENOTSUP));
2246
2247	if (zc->zc_nvlist_dst != 0 &&
2248	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2249		error = put_nvlist(zc, nv);
2250		nvlist_free(nv);
2251	}
2252
2253	return (error);
2254}
2255
2256static int
2257nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2258{
2259	uint64_t value;
2260	int error;
2261
2262	/*
2263	 * zfs_get_zplprop() will either find a value or give us
2264	 * the default value (if there is one).
2265	 */
2266	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2267		return (error);
2268	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2269	return (0);
2270}
2271
2272/*
2273 * inputs:
2274 * zc_name		name of filesystem
2275 * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2276 *
2277 * outputs:
2278 * zc_nvlist_dst	zpl property nvlist
2279 * zc_nvlist_dst_size	size of zpl property nvlist
2280 */
2281static int
2282zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2283{
2284	objset_t *os;
2285	int err;
2286
2287	/* XXX reading without owning */
2288	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2289		return (err);
2290
2291	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2292
2293	/*
2294	 * NB: nvl_add_zplprop() will read the objset contents,
2295	 * which we aren't supposed to do with a DS_MODE_USER
2296	 * hold, because it could be inconsistent.
2297	 */
2298	if (zc->zc_nvlist_dst != 0 &&
2299	    !zc->zc_objset_stats.dds_inconsistent &&
2300	    dmu_objset_type(os) == DMU_OST_ZFS) {
2301		nvlist_t *nv;
2302
2303		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2304		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2305		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2306		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2307		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2308			err = put_nvlist(zc, nv);
2309		nvlist_free(nv);
2310	} else {
2311		err = SET_ERROR(ENOENT);
2312	}
2313	dmu_objset_rele(os, FTAG);
2314	return (err);
2315}
2316
2317static boolean_t
2318dataset_name_hidden(const char *name)
2319{
2320	/*
2321	 * Skip over datasets that are not visible in this zone,
2322	 * internal datasets (which have a $ in their name), and
2323	 * temporary datasets (which have a % in their name).
2324	 */
2325	if (strchr(name, '$') != NULL)
2326		return (B_TRUE);
2327	if (strchr(name, '%') != NULL)
2328		return (B_TRUE);
2329	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
2330		return (B_TRUE);
2331	return (B_FALSE);
2332}
2333
2334/*
2335 * inputs:
2336 * zc_name		name of filesystem
2337 * zc_cookie		zap cursor
2338 * zc_nvlist_dst_size	size of buffer for property nvlist
2339 *
2340 * outputs:
2341 * zc_name		name of next filesystem
2342 * zc_cookie		zap cursor
2343 * zc_objset_stats	stats
2344 * zc_nvlist_dst	property nvlist
2345 * zc_nvlist_dst_size	size of property nvlist
2346 */
2347static int
2348zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2349{
2350	objset_t *os;
2351	int error;
2352	char *p;
2353	size_t orig_len = strlen(zc->zc_name);
2354
2355top:
2356	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2357		if (error == ENOENT)
2358			error = SET_ERROR(ESRCH);
2359		return (error);
2360	}
2361
2362	p = strrchr(zc->zc_name, '/');
2363	if (p == NULL || p[1] != '\0')
2364		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2365	p = zc->zc_name + strlen(zc->zc_name);
2366
2367	do {
2368		error = dmu_dir_list_next(os,
2369		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2370		    NULL, &zc->zc_cookie);
2371		if (error == ENOENT)
2372			error = SET_ERROR(ESRCH);
2373	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2374	dmu_objset_rele(os, FTAG);
2375
2376	/*
2377	 * If it's an internal dataset (ie. with a '$' in its name),
2378	 * don't try to get stats for it, otherwise we'll return ENOENT.
2379	 */
2380	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2381		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2382		if (error == ENOENT) {
2383			/* We lost a race with destroy, get the next one. */
2384			zc->zc_name[orig_len] = '\0';
2385			goto top;
2386		}
2387	}
2388	return (error);
2389}
2390
2391/*
2392 * inputs:
2393 * zc_name		name of filesystem
2394 * zc_cookie		zap cursor
2395 * zc_nvlist_dst_size	size of buffer for property nvlist
2396 * zc_simple		when set, only name is requested
2397 *
2398 * outputs:
2399 * zc_name		name of next snapshot
2400 * zc_objset_stats	stats
2401 * zc_nvlist_dst	property nvlist
2402 * zc_nvlist_dst_size	size of property nvlist
2403 */
2404static int
2405zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2406{
2407	objset_t *os;
2408	int error;
2409
2410	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2411	if (error != 0) {
2412		return (error == ENOENT ? ESRCH : error);
2413	}
2414
2415	/*
2416	 * A dataset name of maximum length cannot have any snapshots,
2417	 * so exit immediately.
2418	 */
2419	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2420	    ZFS_MAX_DATASET_NAME_LEN) {
2421		dmu_objset_rele(os, FTAG);
2422		return (SET_ERROR(ESRCH));
2423	}
2424
2425	error = dmu_snapshot_list_next(os,
2426	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2427	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2428	    NULL);
2429
2430	if (error == 0 && !zc->zc_simple) {
2431		dsl_dataset_t *ds;
2432		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2433
2434		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2435		if (error == 0) {
2436			objset_t *ossnap;
2437
2438			error = dmu_objset_from_ds(ds, &ossnap);
2439			if (error == 0)
2440				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2441			dsl_dataset_rele(ds, FTAG);
2442		}
2443	} else if (error == ENOENT) {
2444		error = SET_ERROR(ESRCH);
2445	}
2446
2447	dmu_objset_rele(os, FTAG);
2448	/* if we failed, undo the @ that we tacked on to zc_name */
2449	if (error != 0)
2450		*strchr(zc->zc_name, '@') = '\0';
2451	return (error);
2452}
2453
2454static int
2455zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2456{
2457	const char *propname = nvpair_name(pair);
2458	uint64_t *valary;
2459	unsigned int vallen;
2460	const char *domain;
2461	char *dash;
2462	zfs_userquota_prop_t type;
2463	uint64_t rid;
2464	uint64_t quota;
2465	zfsvfs_t *zfsvfs;
2466	int err;
2467
2468	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2469		nvlist_t *attrs;
2470		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2471		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2472		    &pair) != 0)
2473			return (SET_ERROR(EINVAL));
2474	}
2475
2476	/*
2477	 * A correctly constructed propname is encoded as
2478	 * userquota@<rid>-<domain>.
2479	 */
2480	if ((dash = strchr(propname, '-')) == NULL ||
2481	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2482	    vallen != 3)
2483		return (SET_ERROR(EINVAL));
2484
2485	domain = dash + 1;
2486	type = valary[0];
2487	rid = valary[1];
2488	quota = valary[2];
2489
2490	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2491	if (err == 0) {
2492		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2493		zfsvfs_rele(zfsvfs, FTAG);
2494	}
2495
2496	return (err);
2497}
2498
2499/*
2500 * If the named property is one that has a special function to set its value,
2501 * return 0 on success and a positive error code on failure; otherwise if it is
2502 * not one of the special properties handled by this function, return -1.
2503 *
2504 * XXX: It would be better for callers of the property interface if we handled
2505 * these special cases in dsl_prop.c (in the dsl layer).
2506 */
2507static int
2508zfs_prop_set_special(const char *dsname, zprop_source_t source,
2509    nvpair_t *pair)
2510{
2511	const char *propname = nvpair_name(pair);
2512	zfs_prop_t prop = zfs_name_to_prop(propname);
2513	uint64_t intval = 0;
2514	char *strval = NULL;
2515	int err = -1;
2516
2517	if (prop == ZPROP_INVAL) {
2518		if (zfs_prop_userquota(propname))
2519			return (zfs_prop_set_userquota(dsname, pair));
2520		return (-1);
2521	}
2522
2523	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2524		nvlist_t *attrs;
2525		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2526		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2527		    &pair) == 0);
2528	}
2529
2530	/* all special properties are numeric except for keylocation */
2531	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
2532		strval = fnvpair_value_string(pair);
2533	} else {
2534		intval = fnvpair_value_uint64(pair);
2535	}
2536
2537	switch (prop) {
2538	case ZFS_PROP_QUOTA:
2539		err = dsl_dir_set_quota(dsname, source, intval);
2540		break;
2541	case ZFS_PROP_REFQUOTA:
2542		err = dsl_dataset_set_refquota(dsname, source, intval);
2543		break;
2544	case ZFS_PROP_FILESYSTEM_LIMIT:
2545	case ZFS_PROP_SNAPSHOT_LIMIT:
2546		if (intval == UINT64_MAX) {
2547			/* clearing the limit, just do it */
2548			err = 0;
2549		} else {
2550			err = dsl_dir_activate_fs_ss_limit(dsname);
2551		}
2552		/*
2553		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2554		 * default path to set the value in the nvlist.
2555		 */
2556		if (err == 0)
2557			err = -1;
2558		break;
2559	case ZFS_PROP_KEYLOCATION:
2560		err = dsl_crypto_can_set_keylocation(dsname, strval);
2561
2562		/*
2563		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2564		 * default path to set the value in the nvlist.
2565		 */
2566		if (err == 0)
2567			err = -1;
2568		break;
2569	case ZFS_PROP_RESERVATION:
2570		err = dsl_dir_set_reservation(dsname, source, intval);
2571		break;
2572	case ZFS_PROP_REFRESERVATION:
2573		err = dsl_dataset_set_refreservation(dsname, source, intval);
2574		break;
2575	case ZFS_PROP_VOLSIZE:
2576		err = zvol_set_volsize(dsname, intval);
2577		break;
2578	case ZFS_PROP_VERSION:
2579	{
2580		zfsvfs_t *zfsvfs;
2581
2582		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2583			break;
2584
2585		err = zfs_set_version(zfsvfs, intval);
2586		zfsvfs_rele(zfsvfs, FTAG);
2587
2588		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2589			zfs_cmd_t *zc;
2590
2591			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2592			(void) strcpy(zc->zc_name, dsname);
2593			(void) zfs_ioc_userspace_upgrade(zc);
2594			(void) zfs_ioc_id_quota_upgrade(zc);
2595			kmem_free(zc, sizeof (zfs_cmd_t));
2596		}
2597		break;
2598	}
2599	default:
2600		err = -1;
2601	}
2602
2603	return (err);
2604}
2605
2606/*
2607 * This function is best effort. If it fails to set any of the given properties,
2608 * it continues to set as many as it can and returns the last error
2609 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2610 * with the list of names of all the properties that failed along with the
2611 * corresponding error numbers.
2612 *
2613 * If every property is set successfully, zero is returned and errlist is not
2614 * modified.
2615 */
2616int
2617zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2618    nvlist_t *errlist)
2619{
2620	nvpair_t *pair;
2621	nvpair_t *propval;
2622	int rv = 0;
2623	uint64_t intval;
2624	char *strval;
2625	nvlist_t *genericnvl = fnvlist_alloc();
2626	nvlist_t *retrynvl = fnvlist_alloc();
2627
2628retry:
2629	pair = NULL;
2630	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2631		const char *propname = nvpair_name(pair);
2632		zfs_prop_t prop = zfs_name_to_prop(propname);
2633		int err = 0;
2634
2635		/* decode the property value */
2636		propval = pair;
2637		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2638			nvlist_t *attrs;
2639			attrs = fnvpair_value_nvlist(pair);
2640			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2641			    &propval) != 0)
2642				err = SET_ERROR(EINVAL);
2643		}
2644
2645		/* Validate value type */
2646		if (err == 0 && source == ZPROP_SRC_INHERITED) {
2647			/* inherited properties are expected to be booleans */
2648			if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
2649				err = SET_ERROR(EINVAL);
2650		} else if (err == 0 && prop == ZPROP_INVAL) {
2651			if (zfs_prop_user(propname)) {
2652				if (nvpair_type(propval) != DATA_TYPE_STRING)
2653					err = SET_ERROR(EINVAL);
2654			} else if (zfs_prop_userquota(propname)) {
2655				if (nvpair_type(propval) !=
2656				    DATA_TYPE_UINT64_ARRAY)
2657					err = SET_ERROR(EINVAL);
2658			} else {
2659				err = SET_ERROR(EINVAL);
2660			}
2661		} else if (err == 0) {
2662			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2663				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2664					err = SET_ERROR(EINVAL);
2665			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2666				const char *unused;
2667
2668				intval = fnvpair_value_uint64(propval);
2669
2670				switch (zfs_prop_get_type(prop)) {
2671				case PROP_TYPE_NUMBER:
2672					break;
2673				case PROP_TYPE_STRING:
2674					err = SET_ERROR(EINVAL);
2675					break;
2676				case PROP_TYPE_INDEX:
2677					if (zfs_prop_index_to_string(prop,
2678					    intval, &unused) != 0)
2679						err = SET_ERROR(EINVAL);
2680					break;
2681				default:
2682					cmn_err(CE_PANIC,
2683					    "unknown property type");
2684				}
2685			} else {
2686				err = SET_ERROR(EINVAL);
2687			}
2688		}
2689
2690		/* Validate permissions */
2691		if (err == 0)
2692			err = zfs_check_settable(dsname, pair, CRED());
2693
2694		if (err == 0) {
2695			if (source == ZPROP_SRC_INHERITED)
2696				err = -1; /* does not need special handling */
2697			else
2698				err = zfs_prop_set_special(dsname, source,
2699				    pair);
2700			if (err == -1) {
2701				/*
2702				 * For better performance we build up a list of
2703				 * properties to set in a single transaction.
2704				 */
2705				err = nvlist_add_nvpair(genericnvl, pair);
2706			} else if (err != 0 && nvl != retrynvl) {
2707				/*
2708				 * This may be a spurious error caused by
2709				 * receiving quota and reservation out of order.
2710				 * Try again in a second pass.
2711				 */
2712				err = nvlist_add_nvpair(retrynvl, pair);
2713			}
2714		}
2715
2716		if (err != 0) {
2717			if (errlist != NULL)
2718				fnvlist_add_int32(errlist, propname, err);
2719			rv = err;
2720		}
2721	}
2722
2723	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2724		nvl = retrynvl;
2725		goto retry;
2726	}
2727
2728	if (!nvlist_empty(genericnvl) &&
2729	    dsl_props_set(dsname, source, genericnvl) != 0) {
2730		/*
2731		 * If this fails, we still want to set as many properties as we
2732		 * can, so try setting them individually.
2733		 */
2734		pair = NULL;
2735		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2736			const char *propname = nvpair_name(pair);
2737			int err = 0;
2738
2739			propval = pair;
2740			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2741				nvlist_t *attrs;
2742				attrs = fnvpair_value_nvlist(pair);
2743				propval = fnvlist_lookup_nvpair(attrs,
2744				    ZPROP_VALUE);
2745			}
2746
2747			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2748				strval = fnvpair_value_string(propval);
2749				err = dsl_prop_set_string(dsname, propname,
2750				    source, strval);
2751			} else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
2752				err = dsl_prop_inherit(dsname, propname,
2753				    source);
2754			} else {
2755				intval = fnvpair_value_uint64(propval);
2756				err = dsl_prop_set_int(dsname, propname, source,
2757				    intval);
2758			}
2759
2760			if (err != 0) {
2761				if (errlist != NULL) {
2762					fnvlist_add_int32(errlist, propname,
2763					    err);
2764				}
2765				rv = err;
2766			}
2767		}
2768	}
2769	nvlist_free(genericnvl);
2770	nvlist_free(retrynvl);
2771
2772	return (rv);
2773}
2774
2775/*
2776 * Check that all the properties are valid user properties.
2777 */
2778static int
2779zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2780{
2781	nvpair_t *pair = NULL;
2782	int error = 0;
2783
2784	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2785		const char *propname = nvpair_name(pair);
2786
2787		if (!zfs_prop_user(propname) ||
2788		    nvpair_type(pair) != DATA_TYPE_STRING)
2789			return (SET_ERROR(EINVAL));
2790
2791		if (error = zfs_secpolicy_write_perms(fsname,
2792		    ZFS_DELEG_PERM_USERPROP, CRED()))
2793			return (error);
2794
2795		if (strlen(propname) >= ZAP_MAXNAMELEN)
2796			return (SET_ERROR(ENAMETOOLONG));
2797
2798		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2799			return (E2BIG);
2800	}
2801	return (0);
2802}
2803
2804static void
2805props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2806{
2807	nvpair_t *pair;
2808
2809	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2810
2811	pair = NULL;
2812	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2813		if (nvlist_exists(skipped, nvpair_name(pair)))
2814			continue;
2815
2816		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2817	}
2818}
2819
2820static int
2821clear_received_props(const char *dsname, nvlist_t *props,
2822    nvlist_t *skipped)
2823{
2824	int err = 0;
2825	nvlist_t *cleared_props = NULL;
2826	props_skip(props, skipped, &cleared_props);
2827	if (!nvlist_empty(cleared_props)) {
2828		/*
2829		 * Acts on local properties until the dataset has received
2830		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2831		 */
2832		zprop_source_t flags = (ZPROP_SRC_NONE |
2833		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2834		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2835	}
2836	nvlist_free(cleared_props);
2837	return (err);
2838}
2839
2840/*
2841 * inputs:
2842 * zc_name		name of filesystem
2843 * zc_value		name of property to set
2844 * zc_nvlist_src{_size}	nvlist of properties to apply
2845 * zc_cookie		received properties flag
2846 *
2847 * outputs:
2848 * zc_nvlist_dst{_size} error for each unapplied received property
2849 */
2850static int
2851zfs_ioc_set_prop(zfs_cmd_t *zc)
2852{
2853	nvlist_t *nvl;
2854	boolean_t received = zc->zc_cookie;
2855	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2856	    ZPROP_SRC_LOCAL);
2857	nvlist_t *errors;
2858	int error;
2859
2860	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2861	    zc->zc_iflags, &nvl)) != 0)
2862		return (error);
2863
2864	if (received) {
2865		nvlist_t *origprops;
2866
2867		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2868			(void) clear_received_props(zc->zc_name,
2869			    origprops, nvl);
2870			nvlist_free(origprops);
2871		}
2872
2873		error = dsl_prop_set_hasrecvd(zc->zc_name);
2874	}
2875
2876	errors = fnvlist_alloc();
2877	if (error == 0)
2878		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2879
2880	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2881		(void) put_nvlist(zc, errors);
2882	}
2883
2884	nvlist_free(errors);
2885	nvlist_free(nvl);
2886	return (error);
2887}
2888
2889/*
2890 * inputs:
2891 * zc_name		name of filesystem
2892 * zc_value		name of property to inherit
2893 * zc_cookie		revert to received value if TRUE
2894 *
2895 * outputs:		none
2896 */
2897static int
2898zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2899{
2900	const char *propname = zc->zc_value;
2901	zfs_prop_t prop = zfs_name_to_prop(propname);
2902	boolean_t received = zc->zc_cookie;
2903	zprop_source_t source = (received
2904	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2905	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2906
2907	if (received) {
2908		nvlist_t *dummy;
2909		nvpair_t *pair;
2910		zprop_type_t type;
2911		int err;
2912
2913		/*
2914		 * zfs_prop_set_special() expects properties in the form of an
2915		 * nvpair with type info.
2916		 */
2917		if (prop == ZPROP_INVAL) {
2918			if (!zfs_prop_user(propname))
2919				return (SET_ERROR(EINVAL));
2920
2921			type = PROP_TYPE_STRING;
2922		} else if (prop == ZFS_PROP_VOLSIZE ||
2923		    prop == ZFS_PROP_VERSION) {
2924			return (SET_ERROR(EINVAL));
2925		} else {
2926			type = zfs_prop_get_type(prop);
2927		}
2928
2929		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2930
2931		switch (type) {
2932		case PROP_TYPE_STRING:
2933			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2934			break;
2935		case PROP_TYPE_NUMBER:
2936		case PROP_TYPE_INDEX:
2937			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2938			break;
2939		default:
2940			nvlist_free(dummy);
2941			return (SET_ERROR(EINVAL));
2942		}
2943
2944		pair = nvlist_next_nvpair(dummy, NULL);
2945		err = zfs_prop_set_special(zc->zc_name, source, pair);
2946		nvlist_free(dummy);
2947		if (err != -1)
2948			return (err); /* special property already handled */
2949	} else {
2950		/*
2951		 * Only check this in the non-received case. We want to allow
2952		 * 'inherit -S' to revert non-inheritable properties like quota
2953		 * and reservation to the received or default values even though
2954		 * they are not considered inheritable.
2955		 */
2956		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2957			return (SET_ERROR(EINVAL));
2958	}
2959
2960	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2961	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2962}
2963
2964static int
2965zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2966{
2967	nvlist_t *props;
2968	spa_t *spa;
2969	int error;
2970	nvpair_t *pair;
2971
2972	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2973	    zc->zc_iflags, &props))
2974		return (error);
2975
2976	/*
2977	 * If the only property is the configfile, then just do a spa_lookup()
2978	 * to handle the faulted case.
2979	 */
2980	pair = nvlist_next_nvpair(props, NULL);
2981	if (pair != NULL && strcmp(nvpair_name(pair),
2982	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2983	    nvlist_next_nvpair(props, pair) == NULL) {
2984		mutex_enter(&spa_namespace_lock);
2985		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2986			spa_configfile_set(spa, props, B_FALSE);
2987			spa_write_cachefile(spa, B_FALSE, B_TRUE);
2988		}
2989		mutex_exit(&spa_namespace_lock);
2990		if (spa != NULL) {
2991			nvlist_free(props);
2992			return (0);
2993		}
2994	}
2995
2996	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2997		nvlist_free(props);
2998		return (error);
2999	}
3000
3001	error = spa_prop_set(spa, props);
3002
3003	nvlist_free(props);
3004	spa_close(spa, FTAG);
3005
3006	return (error);
3007}
3008
3009static int
3010zfs_ioc_pool_get_props(zfs_cmd_t *zc)
3011{
3012	spa_t *spa;
3013	int error;
3014	nvlist_t *nvp = NULL;
3015
3016	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
3017		/*
3018		 * If the pool is faulted, there may be properties we can still
3019		 * get (such as altroot and cachefile), so attempt to get them
3020		 * anyway.
3021		 */
3022		mutex_enter(&spa_namespace_lock);
3023		if ((spa = spa_lookup(zc->zc_name)) != NULL)
3024			error = spa_prop_get(spa, &nvp);
3025		mutex_exit(&spa_namespace_lock);
3026	} else {
3027		error = spa_prop_get(spa, &nvp);
3028		spa_close(spa, FTAG);
3029	}
3030
3031	if (error == 0 && zc->zc_nvlist_dst != 0)
3032		error = put_nvlist(zc, nvp);
3033	else
3034		error = SET_ERROR(EFAULT);
3035
3036	nvlist_free(nvp);
3037	return (error);
3038}
3039
3040/*
3041 * inputs:
3042 * zc_name		name of filesystem
3043 * zc_nvlist_src{_size}	nvlist of delegated permissions
3044 * zc_perm_action	allow/unallow flag
3045 *
3046 * outputs:		none
3047 */
3048static int
3049zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3050{
3051	int error;
3052	nvlist_t *fsaclnv = NULL;
3053
3054	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3055	    zc->zc_iflags, &fsaclnv)) != 0)
3056		return (error);
3057
3058	/*
3059	 * Verify nvlist is constructed correctly
3060	 */
3061	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
3062		nvlist_free(fsaclnv);
3063		return (SET_ERROR(EINVAL));
3064	}
3065
3066	/*
3067	 * If we don't have PRIV_SYS_MOUNT, then validate
3068	 * that user is allowed to hand out each permission in
3069	 * the nvlist(s)
3070	 */
3071
3072	error = secpolicy_zfs(CRED());
3073	if (error != 0) {
3074		if (zc->zc_perm_action == B_FALSE) {
3075			error = dsl_deleg_can_allow(zc->zc_name,
3076			    fsaclnv, CRED());
3077		} else {
3078			error = dsl_deleg_can_unallow(zc->zc_name,
3079			    fsaclnv, CRED());
3080		}
3081	}
3082
3083	if (error == 0)
3084		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3085
3086	nvlist_free(fsaclnv);
3087	return (error);
3088}
3089
3090/*
3091 * inputs:
3092 * zc_name		name of filesystem
3093 *
3094 * outputs:
3095 * zc_nvlist_src{_size}	nvlist of delegated permissions
3096 */
3097static int
3098zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3099{
3100	nvlist_t *nvp;
3101	int error;
3102
3103	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3104		error = put_nvlist(zc, nvp);
3105		nvlist_free(nvp);
3106	}
3107
3108	return (error);
3109}
3110
3111/* ARGSUSED */
3112static void
3113zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3114{
3115	zfs_creat_t *zct = arg;
3116
3117	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3118}
3119
3120#define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3121
3122/*
3123 * inputs:
3124 * os			parent objset pointer (NULL if root fs)
3125 * fuids_ok		fuids allowed in this version of the spa?
3126 * sa_ok		SAs allowed in this version of the spa?
3127 * createprops		list of properties requested by creator
3128 *
3129 * outputs:
3130 * zplprops	values for the zplprops we attach to the master node object
3131 * is_ci	true if requested file system will be purely case-insensitive
3132 *
3133 * Determine the settings for utf8only, normalization and
3134 * casesensitivity.  Specific values may have been requested by the
3135 * creator and/or we can inherit values from the parent dataset.  If
3136 * the file system is of too early a vintage, a creator can not
3137 * request settings for these properties, even if the requested
3138 * setting is the default value.  We don't actually want to create dsl
3139 * properties for these, so remove them from the source nvlist after
3140 * processing.
3141 */
3142static int
3143zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3144    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3145    nvlist_t *zplprops, boolean_t *is_ci)
3146{
3147	uint64_t sense = ZFS_PROP_UNDEFINED;
3148	uint64_t norm = ZFS_PROP_UNDEFINED;
3149	uint64_t u8 = ZFS_PROP_UNDEFINED;
3150
3151	ASSERT(zplprops != NULL);
3152
3153	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3154		return (SET_ERROR(EINVAL));
3155
3156	/*
3157	 * Pull out creator prop choices, if any.
3158	 */
3159	if (createprops) {
3160		(void) nvlist_lookup_uint64(createprops,
3161		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3162		(void) nvlist_lookup_uint64(createprops,
3163		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3164		(void) nvlist_remove_all(createprops,
3165		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3166		(void) nvlist_lookup_uint64(createprops,
3167		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3168		(void) nvlist_remove_all(createprops,
3169		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3170		(void) nvlist_lookup_uint64(createprops,
3171		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3172		(void) nvlist_remove_all(createprops,
3173		    zfs_prop_to_name(ZFS_PROP_CASE));
3174	}
3175
3176	/*
3177	 * If the zpl version requested is whacky or the file system
3178	 * or pool is version is too "young" to support normalization
3179	 * and the creator tried to set a value for one of the props,
3180	 * error out.
3181	 */
3182	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3183	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3184	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3185	    (zplver < ZPL_VERSION_NORMALIZATION &&
3186	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3187	    sense != ZFS_PROP_UNDEFINED)))
3188		return (SET_ERROR(ENOTSUP));
3189
3190	/*
3191	 * Put the version in the zplprops
3192	 */
3193	VERIFY(nvlist_add_uint64(zplprops,
3194	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3195
3196	if (norm == ZFS_PROP_UNDEFINED)
3197		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3198	VERIFY(nvlist_add_uint64(zplprops,
3199	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3200
3201	/*
3202	 * If we're normalizing, names must always be valid UTF-8 strings.
3203	 */
3204	if (norm)
3205		u8 = 1;
3206	if (u8 == ZFS_PROP_UNDEFINED)
3207		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3208	VERIFY(nvlist_add_uint64(zplprops,
3209	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3210
3211	if (sense == ZFS_PROP_UNDEFINED)
3212		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3213	VERIFY(nvlist_add_uint64(zplprops,
3214	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3215
3216	if (is_ci)
3217		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3218
3219	return (0);
3220}
3221
3222static int
3223zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3224    nvlist_t *zplprops, boolean_t *is_ci)
3225{
3226	boolean_t fuids_ok, sa_ok;
3227	uint64_t zplver = ZPL_VERSION;
3228	objset_t *os = NULL;
3229	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3230	char *cp;
3231	spa_t *spa;
3232	uint64_t spa_vers;
3233	int error;
3234
3235	(void) strlcpy(parentname, dataset, sizeof (parentname));
3236	cp = strrchr(parentname, '/');
3237	ASSERT(cp != NULL);
3238	cp[0] = '\0';
3239
3240	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3241		return (error);
3242
3243	spa_vers = spa_version(spa);
3244	spa_close(spa, FTAG);
3245
3246	zplver = zfs_zpl_version_map(spa_vers);
3247	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3248	sa_ok = (zplver >= ZPL_VERSION_SA);
3249
3250	/*
3251	 * Open parent object set so we can inherit zplprop values.
3252	 */
3253	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3254		return (error);
3255
3256	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3257	    zplprops, is_ci);
3258	dmu_objset_rele(os, FTAG);
3259	return (error);
3260}
3261
3262static int
3263zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3264    nvlist_t *zplprops, boolean_t *is_ci)
3265{
3266	boolean_t fuids_ok;
3267	boolean_t sa_ok;
3268	uint64_t zplver = ZPL_VERSION;
3269	int error;
3270
3271	zplver = zfs_zpl_version_map(spa_vers);
3272	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3273	sa_ok = (zplver >= ZPL_VERSION_SA);
3274
3275	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3276	    createprops, zplprops, is_ci);
3277	return (error);
3278}
3279
3280/*
3281 * innvl: {
3282 *     "type" -> dmu_objset_type_t (int32)
3283 *     (optional) "props" -> { prop -> value }
3284 *     (optional) "hidden_args" -> { "wkeydata" -> value }
3285 *         raw uint8_t array of encryption wrapping key data (32 bytes)
3286 * }
3287 *
3288 * outnvl: propname -> error code (int32)
3289 */
3290
3291static const zfs_ioc_key_t zfs_keys_create[] = {
3292	{"type",	DATA_TYPE_INT32,	0},
3293	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3294	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3295};
3296
3297static int
3298zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3299{
3300	int error = 0;
3301	zfs_creat_t zct = { 0 };
3302	nvlist_t *nvprops = NULL;
3303	nvlist_t *hidden_args = NULL;
3304	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3305	dmu_objset_type_t type;
3306	boolean_t is_insensitive = B_FALSE;
3307	dsl_crypto_params_t *dcp = NULL;
3308
3309	type = (dmu_objset_type_t)fnvlist_lookup_int32(innvl, "type");
3310	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3311	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
3312
3313	switch (type) {
3314	case DMU_OST_ZFS:
3315		cbfunc = zfs_create_cb;
3316		break;
3317
3318	case DMU_OST_ZVOL:
3319		cbfunc = zvol_create_cb;
3320		break;
3321
3322	default:
3323		cbfunc = NULL;
3324		break;
3325	}
3326	if (strchr(fsname, '@') ||
3327	    strchr(fsname, '%'))
3328		return (SET_ERROR(EINVAL));
3329
3330	zct.zct_props = nvprops;
3331
3332	if (cbfunc == NULL)
3333		return (SET_ERROR(EINVAL));
3334
3335	if (type == DMU_OST_ZVOL) {
3336		uint64_t volsize, volblocksize;
3337
3338		if (nvprops == NULL)
3339			return (SET_ERROR(EINVAL));
3340		if (nvlist_lookup_uint64(nvprops,
3341		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3342			return (SET_ERROR(EINVAL));
3343
3344		if ((error = nvlist_lookup_uint64(nvprops,
3345		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3346		    &volblocksize)) != 0 && error != ENOENT)
3347			return (SET_ERROR(EINVAL));
3348
3349		if (error != 0)
3350			volblocksize = zfs_prop_default_numeric(
3351			    ZFS_PROP_VOLBLOCKSIZE);
3352
3353		if ((error = zvol_check_volblocksize(
3354		    volblocksize)) != 0 ||
3355		    (error = zvol_check_volsize(volsize,
3356		    volblocksize)) != 0)
3357			return (error);
3358	} else if (type == DMU_OST_ZFS) {
3359		int error;
3360
3361		/*
3362		 * We have to have normalization and
3363		 * case-folding flags correct when we do the
3364		 * file system creation, so go figure them out
3365		 * now.
3366		 */
3367		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3368		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3369		error = zfs_fill_zplprops(fsname, nvprops,
3370		    zct.zct_zplprops, &is_insensitive);
3371		if (error != 0) {
3372			nvlist_free(zct.zct_zplprops);
3373			return (error);
3374		}
3375	}
3376
3377	error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
3378	    hidden_args, &dcp);
3379	if (error != 0) {
3380		nvlist_free(zct.zct_zplprops);
3381		return (error);
3382	}
3383
3384	error = dmu_objset_create(fsname, type,
3385	    is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
3386
3387	nvlist_free(zct.zct_zplprops);
3388	dsl_crypto_params_free(dcp, !!error);
3389
3390	/*
3391	 * It would be nice to do this atomically.
3392	 */
3393	if (error == 0) {
3394		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3395		    nvprops, outnvl);
3396		if (error != 0)
3397			(void) dsl_destroy_head(fsname);
3398	}
3399	return (error);
3400}
3401
3402/*
3403 * innvl: {
3404 *     "origin" -> name of origin snapshot
3405 *     (optional) "props" -> { prop -> value }
3406 *     (optional) "hidden_args" -> { "wkeydata" -> value }
3407 *         raw uint8_t array of encryption wrapping key data (32 bytes)
3408 * }
3409 *
3410 * outnvl: propname -> error code (int32)
3411 */
3412static const zfs_ioc_key_t zfs_keys_clone[] = {
3413	{"origin",	DATA_TYPE_STRING,	0},
3414	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3415	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3416};
3417
3418static int
3419zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3420{
3421	int error = 0;
3422	nvlist_t *nvprops = NULL;
3423	char *origin_name;
3424
3425	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3426		return (SET_ERROR(EINVAL));
3427	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3428
3429	if (strchr(fsname, '@') ||
3430	    strchr(fsname, '%'))
3431		return (SET_ERROR(EINVAL));
3432
3433	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3434		return (SET_ERROR(EINVAL));
3435
3436	error = dmu_objset_clone(fsname, origin_name);
3437
3438	/*
3439	 * It would be nice to do this atomically.
3440	 */
3441	if (error == 0) {
3442		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3443		    nvprops, outnvl);
3444		if (error != 0)
3445			(void) dsl_destroy_head(fsname);
3446	}
3447	return (error);
3448}
3449
3450static const zfs_ioc_key_t zfs_keys_remap[] = {
3451	/* no nvl keys */
3452};
3453
3454/* ARGSUSED */
3455static int
3456zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3457{
3458	if (strchr(fsname, '@') ||
3459	    strchr(fsname, '%'))
3460		return (SET_ERROR(EINVAL));
3461
3462	return (dmu_objset_remap_indirects(fsname));
3463}
3464
3465/*
3466 * innvl: {
3467 *     "snaps" -> { snapshot1, snapshot2 }
3468 *     (optional) "props" -> { prop -> value (string) }
3469 * }
3470 *
3471 * outnvl: snapshot -> error code (int32)
3472 */
3473static const zfs_ioc_key_t zfs_keys_snapshot[] = {
3474	{"snaps",	DATA_TYPE_NVLIST,	0},
3475	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3476};
3477
3478static int
3479zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3480{
3481	nvlist_t *snaps;
3482	nvlist_t *props = NULL;
3483	int error, poollen;
3484	nvpair_t *pair;
3485
3486	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3487	if ((error = zfs_check_userprops(poolname, props)) != 0)
3488		return (error);
3489
3490	if (!nvlist_empty(props) &&
3491	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3492		return (SET_ERROR(ENOTSUP));
3493
3494	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
3495	poollen = strlen(poolname);
3496	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3497	    pair = nvlist_next_nvpair(snaps, pair)) {
3498		const char *name = nvpair_name(pair);
3499		const char *cp = strchr(name, '@');
3500
3501		/*
3502		 * The snap name must contain an @, and the part after it must
3503		 * contain only valid characters.
3504		 */
3505		if (cp == NULL ||
3506		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3507			return (SET_ERROR(EINVAL));
3508
3509		/*
3510		 * The snap must be in the specified pool.
3511		 */
3512		if (strncmp(name, poolname, poollen) != 0 ||
3513		    (name[poollen] != '/' && name[poollen] != '@'))
3514			return (SET_ERROR(EXDEV));
3515
3516		/* This must be the only snap of this fs. */
3517		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3518		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3519			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3520			    == 0) {
3521				return (SET_ERROR(EXDEV));
3522			}
3523		}
3524	}
3525
3526	error = dsl_dataset_snapshot(snaps, props, outnvl);
3527	return (error);
3528}
3529
3530/*
3531 * innvl: "message" -> string
3532 */
3533static const zfs_ioc_key_t zfs_keys_log_history[] = {
3534	{"message",	DATA_TYPE_STRING,	0},
3535};
3536
3537/* ARGSUSED */
3538static int
3539zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3540{
3541	char *message;
3542	spa_t *spa;
3543	int error;
3544	char *poolname;
3545
3546	/*
3547	 * The poolname in the ioctl is not set, we get it from the TSD,
3548	 * which was set at the end of the last successful ioctl that allows
3549	 * logging.  The secpolicy func already checked that it is set.
3550	 * Only one log ioctl is allowed after each successful ioctl, so
3551	 * we clear the TSD here.
3552	 */
3553	poolname = tsd_get(zfs_allow_log_key);
3554	(void) tsd_set(zfs_allow_log_key, NULL);
3555	error = spa_open(poolname, &spa, FTAG);
3556	strfree(poolname);
3557	if (error != 0)
3558		return (error);
3559
3560	message = fnvlist_lookup_string(innvl, "message");
3561
3562	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3563		spa_close(spa, FTAG);
3564		return (SET_ERROR(ENOTSUP));
3565	}
3566
3567	error = spa_history_log(spa, message);
3568	spa_close(spa, FTAG);
3569	return (error);
3570}
3571
3572/*
3573 * The dp_config_rwlock must not be held when calling this, because the
3574 * unmount may need to write out data.
3575 *
3576 * This function is best-effort.  Callers must deal gracefully if it
3577 * remains mounted (or is remounted after this call).
3578 *
3579 * Returns 0 if the argument is not a snapshot, or it is not currently a
3580 * filesystem, or we were able to unmount it.  Returns error code otherwise.
3581 */
3582void
3583zfs_unmount_snap(const char *snapname)
3584{
3585	vfs_t *vfsp = NULL;
3586	zfsvfs_t *zfsvfs = NULL;
3587
3588	if (strchr(snapname, '@') == NULL)
3589		return;
3590
3591	int err = getzfsvfs(snapname, &zfsvfs);
3592	if (err != 0) {
3593		ASSERT3P(zfsvfs, ==, NULL);
3594		return;
3595	}
3596	vfsp = zfsvfs->z_vfs;
3597
3598	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3599
3600	err = vn_vfswlock(vfsp->vfs_vnodecovered);
3601	VFS_RELE(vfsp);
3602	if (err != 0)
3603		return;
3604
3605	/*
3606	 * Always force the unmount for snapshots.
3607	 */
3608	(void) dounmount(vfsp, MS_FORCE, kcred);
3609}
3610
3611/* ARGSUSED */
3612static int
3613zfs_unmount_snap_cb(const char *snapname, void *arg)
3614{
3615	zfs_unmount_snap(snapname);
3616	return (0);
3617}
3618
3619/*
3620 * When a clone is destroyed, its origin may also need to be destroyed,
3621 * in which case it must be unmounted.  This routine will do that unmount
3622 * if necessary.
3623 */
3624void
3625zfs_destroy_unmount_origin(const char *fsname)
3626{
3627	int error;
3628	objset_t *os;
3629	dsl_dataset_t *ds;
3630
3631	error = dmu_objset_hold(fsname, FTAG, &os);
3632	if (error != 0)
3633		return;
3634	ds = dmu_objset_ds(os);
3635	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3636		char originname[ZFS_MAX_DATASET_NAME_LEN];
3637		dsl_dataset_name(ds->ds_prev, originname);
3638		dmu_objset_rele(os, FTAG);
3639		zfs_unmount_snap(originname);
3640	} else {
3641		dmu_objset_rele(os, FTAG);
3642	}
3643}
3644
3645/*
3646 * innvl: {
3647 *     "snaps" -> { snapshot1, snapshot2 }
3648 *     (optional boolean) "defer"
3649 * }
3650 *
3651 * outnvl: snapshot -> error code (int32)
3652 *
3653 */
3654static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
3655	{"snaps",	DATA_TYPE_NVLIST,	0},
3656	{"defer",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
3657};
3658
3659/* ARGSUSED */
3660static int
3661zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3662{
3663	nvlist_t *snaps;
3664	nvpair_t *pair;
3665	boolean_t defer;
3666
3667	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3668		return (SET_ERROR(EINVAL));
3669	defer = nvlist_exists(innvl, "defer");
3670
3671	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3672	    pair = nvlist_next_nvpair(snaps, pair)) {
3673		zfs_unmount_snap(nvpair_name(pair));
3674	}
3675
3676	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3677}
3678
3679/*
3680 * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3681 * All bookmarks must be in the same pool.
3682 *
3683 * innvl: {
3684 *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3685 * }
3686 *
3687 * outnvl: bookmark -> error code (int32)
3688 *
3689 */
3690static const zfs_ioc_key_t zfs_keys_bookmark[] = {
3691	{"<bookmark>...",	DATA_TYPE_STRING,	ZK_WILDCARDLIST},
3692};
3693
3694/* ARGSUSED */
3695static int
3696zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3697{
3698	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3699	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3700		char *snap_name;
3701
3702		/*
3703		 * Verify the snapshot argument.
3704		 */
3705		if (nvpair_value_string(pair, &snap_name) != 0)
3706			return (SET_ERROR(EINVAL));
3707
3708
3709		/* Verify that the keys (bookmarks) are unique */
3710		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3711		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3712			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3713				return (SET_ERROR(EINVAL));
3714		}
3715	}
3716
3717	return (dsl_bookmark_create(innvl, outnvl));
3718}
3719
3720/*
3721 * innvl: {
3722 *     property 1, property 2, ...
3723 * }
3724 *
3725 * outnvl: {
3726 *     bookmark name 1 -> { property 1, property 2, ... },
3727 *     bookmark name 2 -> { property 1, property 2, ... }
3728 * }
3729 *
3730 */
3731static const zfs_ioc_key_t zfs_keys_get_bookmarks[] = {
3732	{"<property>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST | ZK_OPTIONAL},
3733};
3734
3735static int
3736zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3737{
3738	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3739}
3740
3741/*
3742 * innvl: {
3743 *     bookmark name 1, bookmark name 2
3744 * }
3745 *
3746 * outnvl: bookmark -> error code (int32)
3747 *
3748 */
3749static const zfs_ioc_key_t zfs_keys_destroy_bookmarks[] = {
3750	{"<bookmark>...",	DATA_TYPE_BOOLEAN,	ZK_WILDCARDLIST},
3751};
3752
3753static int
3754zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3755    nvlist_t *outnvl)
3756{
3757	int error, poollen;
3758
3759	poollen = strlen(poolname);
3760	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3761	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3762		const char *name = nvpair_name(pair);
3763		const char *cp = strchr(name, '#');
3764
3765		/*
3766		 * The bookmark name must contain an #, and the part after it
3767		 * must contain only valid characters.
3768		 */
3769		if (cp == NULL ||
3770		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3771			return (SET_ERROR(EINVAL));
3772
3773		/*
3774		 * The bookmark must be in the specified pool.
3775		 */
3776		if (strncmp(name, poolname, poollen) != 0 ||
3777		    (name[poollen] != '/' && name[poollen] != '#'))
3778			return (SET_ERROR(EXDEV));
3779	}
3780
3781	error = dsl_bookmark_destroy(innvl, outnvl);
3782	return (error);
3783}
3784
3785static const zfs_ioc_key_t zfs_keys_channel_program[] = {
3786	{"program",	DATA_TYPE_STRING,		0},
3787	{"arg",		DATA_TYPE_ANY,			0},
3788	{"sync",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
3789	{"instrlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
3790	{"memlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
3791};
3792
3793static int
3794zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3795    nvlist_t *outnvl)
3796{
3797	char *program;
3798	uint64_t instrlimit, memlimit;
3799	boolean_t sync_flag;
3800	nvpair_t *nvarg = NULL;
3801
3802	program = fnvlist_lookup_string(innvl, ZCP_ARG_PROGRAM);
3803	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3804		sync_flag = B_TRUE;
3805	}
3806	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3807		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3808	}
3809	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3810		memlimit = ZCP_DEFAULT_MEMLIMIT;
3811	}
3812	nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
3813
3814	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3815		return (EINVAL);
3816	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3817		return (EINVAL);
3818
3819	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3820	    nvarg, outnvl));
3821}
3822
3823/*
3824 * innvl: unused
3825 * outnvl: empty
3826 */
3827static const zfs_ioc_key_t zfs_keys_pool_checkpoint[] = {
3828	/* no nvl keys */
3829};
3830
3831/* ARGSUSED */
3832static int
3833zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3834{
3835	return (spa_checkpoint(poolname));
3836}
3837
3838/*
3839 * innvl: unused
3840 * outnvl: empty
3841 */
3842static const zfs_ioc_key_t zfs_keys_pool_discard_checkpoint[] = {
3843	/* no nvl keys */
3844};
3845
3846/* ARGSUSED */
3847static int
3848zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
3849    nvlist_t *outnvl)
3850{
3851	return (spa_checkpoint_discard(poolname));
3852}
3853
3854/*
3855 * inputs:
3856 * zc_name		name of dataset to destroy
3857 * zc_defer_destroy	mark for deferred destroy
3858 *
3859 * outputs:		none
3860 */
3861static int
3862zfs_ioc_destroy(zfs_cmd_t *zc)
3863{
3864	objset_t *os;
3865	dmu_objset_type_t ost;
3866	int err;
3867
3868	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
3869	if (err != 0)
3870		return (err);
3871	ost = dmu_objset_type(os);
3872	dmu_objset_rele(os, FTAG);
3873
3874	if (ost == DMU_OST_ZFS)
3875		zfs_unmount_snap(zc->zc_name);
3876
3877	if (strchr(zc->zc_name, '@')) {
3878		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3879	} else {
3880		err = dsl_destroy_head(zc->zc_name);
3881		if (err == EEXIST) {
3882			/*
3883			 * It is possible that the given DS may have
3884			 * hidden child (%recv) datasets - "leftovers"
3885			 * resulting from the previously interrupted
3886			 * 'zfs receive'.
3887			 *
3888			 * 6 extra bytes for /%recv
3889			 */
3890			char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
3891
3892			if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
3893			    zc->zc_name, recv_clone_name) >=
3894			    sizeof (namebuf))
3895				return (SET_ERROR(EINVAL));
3896
3897			/*
3898			 * Try to remove the hidden child (%recv) and after
3899			 * that try to remove the target dataset.
3900			 * If the hidden child (%recv) does not exist
3901			 * the original error (EEXIST) will be returned
3902			 */
3903			err = dsl_destroy_head(namebuf);
3904			if (err == 0)
3905				err = dsl_destroy_head(zc->zc_name);
3906			else if (err == ENOENT)
3907				err = SET_ERROR(EEXIST);
3908		}
3909	}
3910	if (ost == DMU_OST_ZVOL && err == 0)
3911		(void) zvol_remove_minor(zc->zc_name);
3912	return (err);
3913}
3914
3915/*
3916 * innvl: {
3917 *     "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
3918 *     "initialize_vdevs": { -> guids to initialize (nvlist)
3919 *         "vdev_path_1": vdev_guid_1, (uint64),
3920 *         "vdev_path_2": vdev_guid_2, (uint64),
3921 *         ...
3922 *     },
3923 * }
3924 *
3925 * outnvl: {
3926 *     "initialize_vdevs": { -> initialization errors (nvlist)
3927 *         "vdev_path_1": errno, see function body for possible errnos (uint64)
3928 *         "vdev_path_2": errno, ... (uint64)
3929 *         ...
3930 *     }
3931 * }
3932 *
3933 * EINVAL is returned for an unknown command or if any of the provided vdev
3934 * guids have be specified with a type other than uint64.
3935 */
3936static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
3937	{ZPOOL_INITIALIZE_COMMAND,	DATA_TYPE_UINT64,	0},
3938	{ZPOOL_INITIALIZE_VDEVS,	DATA_TYPE_NVLIST,	0}
3939};
3940
3941static int
3942zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3943{
3944	uint64_t cmd_type;
3945	if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
3946	    &cmd_type) != 0) {
3947		return (SET_ERROR(EINVAL));
3948	}
3949
3950	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
3951	    cmd_type == POOL_INITIALIZE_START ||
3952	    cmd_type == POOL_INITIALIZE_SUSPEND)) {
3953		return (SET_ERROR(EINVAL));
3954	}
3955
3956	nvlist_t *vdev_guids;
3957	if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
3958	    &vdev_guids) != 0) {
3959		return (SET_ERROR(EINVAL));
3960	}
3961
3962	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
3963	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
3964		uint64_t vdev_guid;
3965		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
3966			return (SET_ERROR(EINVAL));
3967		}
3968	}
3969
3970	spa_t *spa;
3971	int error = spa_open(poolname, &spa, FTAG);
3972	if (error != 0)
3973		return (error);
3974
3975	nvlist_t *vdev_errlist = fnvlist_alloc();
3976	int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
3977	    vdev_errlist);
3978
3979	if (fnvlist_size(vdev_errlist) > 0) {
3980		fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
3981		    vdev_errlist);
3982	}
3983	fnvlist_free(vdev_errlist);
3984
3985	spa_close(spa, FTAG);
3986	return (total_errors > 0 ? EINVAL : 0);
3987}
3988
3989/*
3990 * innvl: {
3991 *     "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
3992 *     "trim_vdevs": { -> guids to TRIM (nvlist)
3993 *         "vdev_path_1": vdev_guid_1, (uint64),
3994 *         "vdev_path_2": vdev_guid_2, (uint64),
3995 *         ...
3996 *     },
3997 *     "trim_rate" -> Target TRIM rate in bytes/sec.
3998 *     "trim_secure" -> Set to request a secure TRIM.
3999 * }
4000 *
4001 * outnvl: {
4002 *     "trim_vdevs": { -> TRIM errors (nvlist)
4003 *         "vdev_path_1": errno, see function body for possible errnos (uint64)
4004 *         "vdev_path_2": errno, ... (uint64)
4005 *         ...
4006 *     }
4007 * }
4008 *
4009 * EINVAL is returned for an unknown command or if any of the provided vdev
4010 * guids have be specified with a type other than uint64.
4011 */
4012static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
4013	{ZPOOL_TRIM_COMMAND,	DATA_TYPE_UINT64,		0},
4014	{ZPOOL_TRIM_VDEVS,	DATA_TYPE_NVLIST,		0},
4015	{ZPOOL_TRIM_RATE,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4016	{ZPOOL_TRIM_SECURE,	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
4017};
4018
4019static int
4020zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4021{
4022	uint64_t cmd_type;
4023	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
4024		return (SET_ERROR(EINVAL));
4025
4026	if (!(cmd_type == POOL_TRIM_CANCEL ||
4027	    cmd_type == POOL_TRIM_START ||
4028	    cmd_type == POOL_TRIM_SUSPEND)) {
4029		return (SET_ERROR(EINVAL));
4030	}
4031
4032	nvlist_t *vdev_guids;
4033	if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
4034		return (SET_ERROR(EINVAL));
4035
4036	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4037	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4038		uint64_t vdev_guid;
4039		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4040			return (SET_ERROR(EINVAL));
4041		}
4042	}
4043
4044	/* Optional, defaults to maximum rate when not provided */
4045	uint64_t rate;
4046	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
4047		rate = 0;
4048
4049	/* Optional, defaults to standard TRIM when not provided */
4050	boolean_t secure;
4051	if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
4052	    &secure) != 0) {
4053		secure = B_FALSE;
4054	}
4055
4056	spa_t *spa;
4057	int error = spa_open(poolname, &spa, FTAG);
4058	if (error != 0)
4059		return (error);
4060
4061	nvlist_t *vdev_errlist = fnvlist_alloc();
4062	int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
4063	    rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
4064
4065	if (fnvlist_size(vdev_errlist) > 0)
4066		fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
4067
4068	fnvlist_free(vdev_errlist);
4069
4070	spa_close(spa, FTAG);
4071	return (total_errors > 0 ? EINVAL : 0);
4072}
4073
4074/*
4075 * fsname is name of dataset to rollback (to most recent snapshot)
4076 *
4077 * innvl may contain name of expected target snapshot
4078 *
4079 * outnvl: "target" -> name of most recent snapshot
4080 * }
4081 */
4082static const zfs_ioc_key_t zfs_keys_rollback[] = {
4083	{"target",	DATA_TYPE_STRING,	ZK_OPTIONAL},
4084};
4085
4086/* ARGSUSED */
4087static int
4088zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4089{
4090	zfsvfs_t *zfsvfs;
4091	char *target = NULL;
4092	int error;
4093
4094	(void) nvlist_lookup_string(innvl, "target", &target);
4095	if (target != NULL) {
4096		const char *cp = strchr(target, '@');
4097
4098		/*
4099		 * The snap name must contain an @, and the part after it must
4100		 * contain only valid characters.
4101		 */
4102		if (cp == NULL ||
4103		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4104			return (SET_ERROR(EINVAL));
4105	}
4106
4107	if (getzfsvfs(fsname, &zfsvfs) == 0) {
4108		dsl_dataset_t *ds;
4109
4110		ds = dmu_objset_ds(zfsvfs->z_os);
4111		error = zfs_suspend_fs(zfsvfs);
4112		if (error == 0) {
4113			int resume_err;
4114
4115			error = dsl_dataset_rollback(fsname, target, zfsvfs,
4116			    outnvl);
4117			resume_err = zfs_resume_fs(zfsvfs, ds);
4118			error = error ? error : resume_err;
4119		}
4120		VFS_RELE(zfsvfs->z_vfs);
4121	} else {
4122		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
4123	}
4124	return (error);
4125}
4126
4127static int
4128recursive_unmount(const char *fsname, void *arg)
4129{
4130	const char *snapname = arg;
4131	char fullname[ZFS_MAX_DATASET_NAME_LEN];
4132
4133	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
4134	zfs_unmount_snap(fullname);
4135
4136	return (0);
4137}
4138
4139/*
4140 * inputs:
4141 * zc_name	old name of dataset
4142 * zc_value	new name of dataset
4143 * zc_cookie	recursive flag (only valid for snapshots)
4144 *
4145 * outputs:	none
4146 */
4147static int
4148zfs_ioc_rename(zfs_cmd_t *zc)
4149{
4150	objset_t *os;
4151	dmu_objset_type_t ost;
4152	boolean_t recursive = zc->zc_cookie & 1;
4153	char *at;
4154	int err;
4155
4156	/* "zfs rename" from and to ...%recv datasets should both fail */
4157	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4158	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
4159	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4160	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4161	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
4162		return (SET_ERROR(EINVAL));
4163
4164	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4165	if (err != 0)
4166		return (err);
4167	ost = dmu_objset_type(os);
4168	dmu_objset_rele(os, FTAG);
4169
4170	at = strchr(zc->zc_name, '@');
4171	if (at != NULL) {
4172		/* snaps must be in same fs */
4173		int error;
4174
4175		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
4176			return (SET_ERROR(EXDEV));
4177		*at = '\0';
4178		if (ost == DMU_OST_ZFS) {
4179			error = dmu_objset_find(zc->zc_name,
4180			    recursive_unmount, at + 1,
4181			    recursive ? DS_FIND_CHILDREN : 0);
4182			if (error != 0) {
4183				*at = '@';
4184				return (error);
4185			}
4186		}
4187		error = dsl_dataset_rename_snapshot(zc->zc_name,
4188		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
4189		*at = '@';
4190
4191		return (error);
4192	} else {
4193		if (ost == DMU_OST_ZVOL)
4194			(void) zvol_remove_minor(zc->zc_name);
4195		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
4196	}
4197}
4198
4199static int
4200zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
4201{
4202	const char *propname = nvpair_name(pair);
4203	boolean_t issnap = (strchr(dsname, '@') != NULL);
4204	zfs_prop_t prop = zfs_name_to_prop(propname);
4205	uint64_t intval;
4206	int err;
4207
4208	if (prop == ZPROP_INVAL) {
4209		if (zfs_prop_user(propname)) {
4210			if (err = zfs_secpolicy_write_perms(dsname,
4211			    ZFS_DELEG_PERM_USERPROP, cr))
4212				return (err);
4213			return (0);
4214		}
4215
4216		if (!issnap && zfs_prop_userquota(propname)) {
4217			const char *perm = NULL;
4218			const char *uq_prefix =
4219			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4220			const char *gq_prefix =
4221			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4222			const char *uiq_prefix =
4223			    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
4224			const char *giq_prefix =
4225			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
4226			const char *pq_prefix =
4227			    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
4228			const char *piq_prefix = zfs_userquota_prop_prefixes[\
4229			    ZFS_PROP_PROJECTOBJQUOTA];
4230
4231			if (strncmp(propname, uq_prefix,
4232			    strlen(uq_prefix)) == 0) {
4233				perm = ZFS_DELEG_PERM_USERQUOTA;
4234			} else if (strncmp(propname, uiq_prefix,
4235			    strlen(uiq_prefix)) == 0) {
4236				perm = ZFS_DELEG_PERM_USEROBJQUOTA;
4237			} else if (strncmp(propname, gq_prefix,
4238			    strlen(gq_prefix)) == 0) {
4239				perm = ZFS_DELEG_PERM_GROUPQUOTA;
4240			} else if (strncmp(propname, giq_prefix,
4241			    strlen(giq_prefix)) == 0) {
4242				perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
4243			} else if (strncmp(propname, pq_prefix,
4244			    strlen(pq_prefix)) == 0) {
4245				perm = ZFS_DELEG_PERM_PROJECTQUOTA;
4246			} else if (strncmp(propname, piq_prefix,
4247			    strlen(piq_prefix)) == 0) {
4248				perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
4249			} else {
4250				/* {USER|GROUP|PROJECT}USED are read-only */
4251				return (SET_ERROR(EINVAL));
4252			}
4253
4254			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
4255				return (err);
4256			return (0);
4257		}
4258
4259		return (SET_ERROR(EINVAL));
4260	}
4261
4262	if (issnap)
4263		return (SET_ERROR(EINVAL));
4264
4265	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4266		/*
4267		 * dsl_prop_get_all_impl() returns properties in this
4268		 * format.
4269		 */
4270		nvlist_t *attrs;
4271		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4272		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4273		    &pair) == 0);
4274	}
4275
4276	/*
4277	 * Check that this value is valid for this pool version
4278	 */
4279	switch (prop) {
4280	case ZFS_PROP_COMPRESSION:
4281		/*
4282		 * If the user specified gzip compression, make sure
4283		 * the SPA supports it. We ignore any errors here since
4284		 * we'll catch them later.
4285		 */
4286		if (nvpair_value_uint64(pair, &intval) == 0) {
4287			if (intval >= ZIO_COMPRESS_GZIP_1 &&
4288			    intval <= ZIO_COMPRESS_GZIP_9 &&
4289			    zfs_earlier_version(dsname,
4290			    SPA_VERSION_GZIP_COMPRESSION)) {
4291				return (SET_ERROR(ENOTSUP));
4292			}
4293
4294			if (intval == ZIO_COMPRESS_ZLE &&
4295			    zfs_earlier_version(dsname,
4296			    SPA_VERSION_ZLE_COMPRESSION))
4297				return (SET_ERROR(ENOTSUP));
4298
4299			if (intval == ZIO_COMPRESS_LZ4) {
4300				spa_t *spa;
4301
4302				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4303					return (err);
4304
4305				if (!spa_feature_is_enabled(spa,
4306				    SPA_FEATURE_LZ4_COMPRESS)) {
4307					spa_close(spa, FTAG);
4308					return (SET_ERROR(ENOTSUP));
4309				}
4310				spa_close(spa, FTAG);
4311			}
4312
4313			/*
4314			 * If this is a bootable dataset then
4315			 * verify that the compression algorithm
4316			 * is supported for booting. We must return
4317			 * something other than ENOTSUP since it
4318			 * implies a downrev pool version.
4319			 */
4320			if (zfs_is_bootfs(dsname) &&
4321			    !BOOTFS_COMPRESS_VALID(intval)) {
4322				return (SET_ERROR(ERANGE));
4323			}
4324		}
4325		break;
4326
4327	case ZFS_PROP_COPIES:
4328		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4329			return (SET_ERROR(ENOTSUP));
4330		break;
4331
4332	case ZFS_PROP_RECORDSIZE:
4333		/* Record sizes above 128k need the feature to be enabled */
4334		if (nvpair_value_uint64(pair, &intval) == 0 &&
4335		    intval > SPA_OLD_MAXBLOCKSIZE) {
4336			spa_t *spa;
4337
4338			/*
4339			 * We don't allow setting the property above 1MB,
4340			 * unless the tunable has been changed.
4341			 */
4342			if (intval > zfs_max_recordsize ||
4343			    intval > SPA_MAXBLOCKSIZE)
4344				return (SET_ERROR(ERANGE));
4345
4346			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4347				return (err);
4348
4349			if (!spa_feature_is_enabled(spa,
4350			    SPA_FEATURE_LARGE_BLOCKS)) {
4351				spa_close(spa, FTAG);
4352				return (SET_ERROR(ENOTSUP));
4353			}
4354			spa_close(spa, FTAG);
4355		}
4356		break;
4357
4358	case ZFS_PROP_DNODESIZE:
4359		/* Dnode sizes above 512 need the feature to be enabled */
4360		if (nvpair_value_uint64(pair, &intval) == 0 &&
4361		    intval != ZFS_DNSIZE_LEGACY) {
4362			spa_t *spa;
4363
4364			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4365				return (err);
4366
4367			if (!spa_feature_is_enabled(spa,
4368			    SPA_FEATURE_LARGE_DNODE)) {
4369				spa_close(spa, FTAG);
4370				return (SET_ERROR(ENOTSUP));
4371			}
4372			spa_close(spa, FTAG);
4373		}
4374		break;
4375
4376	case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
4377		/*
4378		 * This property could require the allocation classes
4379		 * feature to be active for setting, however we allow
4380		 * it so that tests of settable properties succeed.
4381		 * The CLI will issue a warning in this case.
4382		 */
4383		break;
4384
4385	case ZFS_PROP_SHARESMB:
4386		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4387			return (SET_ERROR(ENOTSUP));
4388		break;
4389
4390	case ZFS_PROP_ACLINHERIT:
4391		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4392		    nvpair_value_uint64(pair, &intval) == 0) {
4393			if (intval == ZFS_ACL_PASSTHROUGH_X &&
4394			    zfs_earlier_version(dsname,
4395			    SPA_VERSION_PASSTHROUGH_X))
4396				return (SET_ERROR(ENOTSUP));
4397		}
4398		break;
4399
4400	case ZFS_PROP_CHECKSUM:
4401	case ZFS_PROP_DEDUP:
4402	{
4403		spa_feature_t feature;
4404		spa_t *spa;
4405
4406		/* dedup feature version checks */
4407		if (prop == ZFS_PROP_DEDUP &&
4408		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4409			return (SET_ERROR(ENOTSUP));
4410
4411		if (nvpair_value_uint64(pair, &intval) != 0)
4412			return (SET_ERROR(EINVAL));
4413
4414		/* check prop value is enabled in features */
4415		feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4416		if (feature == SPA_FEATURE_NONE)
4417			break;
4418
4419		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4420			return (err);
4421
4422		if (!spa_feature_is_enabled(spa, feature)) {
4423			spa_close(spa, FTAG);
4424			return (SET_ERROR(ENOTSUP));
4425		}
4426		spa_close(spa, FTAG);
4427		break;
4428	}
4429	}
4430
4431	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4432}
4433
4434/*
4435 * Checks for a race condition to make sure we don't increment a feature flag
4436 * multiple times.
4437 */
4438static int
4439zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4440{
4441	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4442	spa_feature_t *featurep = arg;
4443
4444	if (!spa_feature_is_active(spa, *featurep))
4445		return (0);
4446	else
4447		return (SET_ERROR(EBUSY));
4448}
4449
4450/*
4451 * The callback invoked on feature activation in the sync task caused by
4452 * zfs_prop_activate_feature.
4453 */
4454static void
4455zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4456{
4457	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4458	spa_feature_t *featurep = arg;
4459
4460	spa_feature_incr(spa, *featurep, tx);
4461}
4462
4463/*
4464 * Activates a feature on a pool in response to a property setting. This
4465 * creates a new sync task which modifies the pool to reflect the feature
4466 * as being active.
4467 */
4468static int
4469zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4470{
4471	int err;
4472
4473	/* EBUSY here indicates that the feature is already active */
4474	err = dsl_sync_task(spa_name(spa),
4475	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4476	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4477
4478	if (err != 0 && err != EBUSY)
4479		return (err);
4480	else
4481		return (0);
4482}
4483
4484/*
4485 * Removes properties from the given props list that fail permission checks
4486 * needed to clear them and to restore them in case of a receive error. For each
4487 * property, make sure we have both set and inherit permissions.
4488 *
4489 * Returns the first error encountered if any permission checks fail. If the
4490 * caller provides a non-NULL errlist, it also gives the complete list of names
4491 * of all the properties that failed a permission check along with the
4492 * corresponding error numbers. The caller is responsible for freeing the
4493 * returned errlist.
4494 *
4495 * If every property checks out successfully, zero is returned and the list
4496 * pointed at by errlist is NULL.
4497 */
4498static int
4499zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4500{
4501	zfs_cmd_t *zc;
4502	nvpair_t *pair, *next_pair;
4503	nvlist_t *errors;
4504	int err, rv = 0;
4505
4506	if (props == NULL)
4507		return (0);
4508
4509	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4510
4511	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4512	(void) strcpy(zc->zc_name, dataset);
4513	pair = nvlist_next_nvpair(props, NULL);
4514	while (pair != NULL) {
4515		next_pair = nvlist_next_nvpair(props, pair);
4516
4517		(void) strcpy(zc->zc_value, nvpair_name(pair));
4518		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4519		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4520			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4521			VERIFY(nvlist_add_int32(errors,
4522			    zc->zc_value, err) == 0);
4523		}
4524		pair = next_pair;
4525	}
4526	kmem_free(zc, sizeof (zfs_cmd_t));
4527
4528	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4529		nvlist_free(errors);
4530		errors = NULL;
4531	} else {
4532		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4533	}
4534
4535	if (errlist == NULL)
4536		nvlist_free(errors);
4537	else
4538		*errlist = errors;
4539
4540	return (rv);
4541}
4542
4543static boolean_t
4544propval_equals(nvpair_t *p1, nvpair_t *p2)
4545{
4546	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4547		/* dsl_prop_get_all_impl() format */
4548		nvlist_t *attrs;
4549		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4550		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4551		    &p1) == 0);
4552	}
4553
4554	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4555		nvlist_t *attrs;
4556		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4557		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4558		    &p2) == 0);
4559	}
4560
4561	if (nvpair_type(p1) != nvpair_type(p2))
4562		return (B_FALSE);
4563
4564	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4565		char *valstr1, *valstr2;
4566
4567		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4568		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4569		return (strcmp(valstr1, valstr2) == 0);
4570	} else {
4571		uint64_t intval1, intval2;
4572
4573		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4574		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4575		return (intval1 == intval2);
4576	}
4577}
4578
4579/*
4580 * Remove properties from props if they are not going to change (as determined
4581 * by comparison with origprops). Remove them from origprops as well, since we
4582 * do not need to clear or restore properties that won't change.
4583 */
4584static void
4585props_reduce(nvlist_t *props, nvlist_t *origprops)
4586{
4587	nvpair_t *pair, *next_pair;
4588
4589	if (origprops == NULL)
4590		return; /* all props need to be received */
4591
4592	pair = nvlist_next_nvpair(props, NULL);
4593	while (pair != NULL) {
4594		const char *propname = nvpair_name(pair);
4595		nvpair_t *match;
4596
4597		next_pair = nvlist_next_nvpair(props, pair);
4598
4599		if ((nvlist_lookup_nvpair(origprops, propname,
4600		    &match) != 0) || !propval_equals(pair, match))
4601			goto next; /* need to set received value */
4602
4603		/* don't clear the existing received value */
4604		(void) nvlist_remove_nvpair(origprops, match);
4605		/* don't bother receiving the property */
4606		(void) nvlist_remove_nvpair(props, pair);
4607next:
4608		pair = next_pair;
4609	}
4610}
4611
4612/*
4613 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4614 * For example, refquota cannot be set until after the receipt of a dataset,
4615 * because in replication streams, an older/earlier snapshot may exceed the
4616 * refquota.  We want to receive the older/earlier snapshot, but setting
4617 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4618 * the older/earlier snapshot from being received (with EDQUOT).
4619 *
4620 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4621 *
4622 * libzfs will need to be judicious handling errors encountered by props
4623 * extracted by this function.
4624 */
4625static nvlist_t *
4626extract_delay_props(nvlist_t *props)
4627{
4628	nvlist_t *delayprops;
4629	nvpair_t *nvp, *tmp;
4630	static const zfs_prop_t delayable[] = {
4631		ZFS_PROP_REFQUOTA,
4632		ZFS_PROP_KEYLOCATION,
4633		0
4634	};
4635	int i;
4636
4637	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4638
4639	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4640	    nvp = nvlist_next_nvpair(props, nvp)) {
4641		/*
4642		 * strcmp() is safe because zfs_prop_to_name() always returns
4643		 * a bounded string.
4644		 */
4645		for (i = 0; delayable[i] != 0; i++) {
4646			if (strcmp(zfs_prop_to_name(delayable[i]),
4647			    nvpair_name(nvp)) == 0) {
4648				break;
4649			}
4650		}
4651		if (delayable[i] != 0) {
4652			tmp = nvlist_prev_nvpair(props, nvp);
4653			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4654			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4655			nvp = tmp;
4656		}
4657	}
4658
4659	if (nvlist_empty(delayprops)) {
4660		nvlist_free(delayprops);
4661		delayprops = NULL;
4662	}
4663	return (delayprops);
4664}
4665
4666#ifdef	DEBUG
4667static boolean_t zfs_ioc_recv_inject_err;
4668#endif
4669
4670/*
4671 * nvlist 'errors' is always allocated. It will contain descriptions of
4672 * encountered errors, if any. It's the callers responsibility to free.
4673 */
4674static int
4675zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
4676    nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
4677    boolean_t resumable, int input_fd, dmu_replay_record_t *begin_record,
4678    int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags,
4679    uint64_t *action_handle, nvlist_t **errors)
4680{
4681	dmu_recv_cookie_t drc;
4682	int error = 0;
4683	int props_error = 0;
4684	offset_t off;
4685	nvlist_t *local_delayprops = NULL;
4686	nvlist_t *recv_delayprops = NULL;
4687	nvlist_t *origprops = NULL; /* existing properties */
4688	nvlist_t *origrecvd = NULL; /* existing received properties */
4689	boolean_t first_recvd_props = B_FALSE;
4690	file_t *input_fp;
4691
4692	*read_bytes = 0;
4693	*errflags = 0;
4694	*errors = fnvlist_alloc();
4695
4696	input_fp = getf(input_fd);
4697	if (input_fp == NULL)
4698		return (SET_ERROR(EBADF));
4699
4700	error = dmu_recv_begin(tofs, tosnap, begin_record, force,
4701	    resumable, localprops, hidden_args, origin, &drc);
4702	if (error != 0)
4703		goto out;
4704
4705	/*
4706	 * Set properties before we receive the stream so that they are applied
4707	 * to the new data. Note that we must call dmu_recv_stream() if
4708	 * dmu_recv_begin() succeeds.
4709	 */
4710	if (recvprops != NULL && !drc.drc_newfs) {
4711		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4712		    SPA_VERSION_RECVD_PROPS &&
4713		    !dsl_prop_get_hasrecvd(tofs))
4714			first_recvd_props = B_TRUE;
4715
4716		/*
4717		 * If new received properties are supplied, they are to
4718		 * completely replace the existing received properties,
4719		 * so stash away the existing ones.
4720		 */
4721		if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
4722			nvlist_t *errlist = NULL;
4723			/*
4724			 * Don't bother writing a property if its value won't
4725			 * change (and avoid the unnecessary security checks).
4726			 *
4727			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4728			 * special case where we blow away all local properties
4729			 * regardless.
4730			 */
4731			if (!first_recvd_props)
4732				props_reduce(recvprops, origrecvd);
4733			if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
4734				(void) nvlist_merge(*errors, errlist, 0);
4735			nvlist_free(errlist);
4736
4737			if (clear_received_props(tofs, origrecvd,
4738			    first_recvd_props ? NULL : recvprops) != 0)
4739				*errflags |= ZPROP_ERR_NOCLEAR;
4740		} else {
4741			*errflags |= ZPROP_ERR_NOCLEAR;
4742		}
4743	}
4744
4745	/*
4746	 * Stash away existing properties so we can restore them on error unless
4747	 * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
4748	 * case "origrecvd" will take care of that.
4749	 */
4750	if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
4751		objset_t *os;
4752		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
4753			if (dsl_prop_get_all(os, &origprops) != 0) {
4754				*errflags |= ZPROP_ERR_NOCLEAR;
4755			}
4756			dmu_objset_rele(os, FTAG);
4757		} else {
4758			*errflags |= ZPROP_ERR_NOCLEAR;
4759		}
4760	}
4761
4762	if (recvprops != NULL) {
4763		props_error = dsl_prop_set_hasrecvd(tofs);
4764
4765		if (props_error == 0) {
4766			recv_delayprops = extract_delay_props(recvprops);
4767			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4768			    recvprops, *errors);
4769		}
4770	}
4771
4772	if (localprops != NULL) {
4773		nvlist_t *oprops = fnvlist_alloc();
4774		nvlist_t *xprops = fnvlist_alloc();
4775		nvpair_t *nvp = NULL;
4776
4777		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
4778			if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
4779				/* -x property */
4780				const char *name = nvpair_name(nvp);
4781				zfs_prop_t prop = zfs_name_to_prop(name);
4782				if (prop != ZPROP_INVAL) {
4783					if (!zfs_prop_inheritable(prop))
4784						continue;
4785				} else if (!zfs_prop_user(name))
4786					continue;
4787				fnvlist_add_boolean(xprops, name);
4788			} else {
4789				/* -o property=value */
4790				fnvlist_add_nvpair(oprops, nvp);
4791			}
4792		}
4793
4794		local_delayprops = extract_delay_props(oprops);
4795		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
4796		    oprops, *errors);
4797		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
4798		    xprops, *errors);
4799
4800		nvlist_free(oprops);
4801		nvlist_free(xprops);
4802	}
4803
4804	off = input_fp->f_offset;
4805	error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
4806	    action_handle);
4807
4808	if (error == 0) {
4809		zfsvfs_t *zfsvfs = NULL;
4810
4811		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4812			/* online recv */
4813			dsl_dataset_t *ds;
4814			int end_err;
4815
4816			ds = dmu_objset_ds(zfsvfs->z_os);
4817			error = zfs_suspend_fs(zfsvfs);
4818			/*
4819			 * If the suspend fails, then the recv_end will
4820			 * likely also fail, and clean up after itself.
4821			 */
4822			end_err = dmu_recv_end(&drc, zfsvfs);
4823			if (error == 0)
4824				error = zfs_resume_fs(zfsvfs, ds);
4825			error = error ? error : end_err;
4826			VFS_RELE(zfsvfs->z_vfs);
4827		} else {
4828			error = dmu_recv_end(&drc, NULL);
4829		}
4830
4831		/* Set delayed properties now, after we're done receiving. */
4832		if (recv_delayprops != NULL && error == 0) {
4833			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4834			    recv_delayprops, *errors);
4835		}
4836		if (local_delayprops != NULL && error == 0) {
4837			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
4838			    local_delayprops, *errors);
4839		}
4840	}
4841
4842	/*
4843	 * Merge delayed props back in with initial props, in case
4844	 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4845	 * we have to make sure clear_received_props() includes
4846	 * the delayed properties).
4847	 *
4848	 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4849	 * using ASSERT() will be just like a VERIFY.
4850	 */
4851	if (recv_delayprops != NULL) {
4852		ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
4853		nvlist_free(recv_delayprops);
4854	}
4855	if (local_delayprops != NULL) {
4856		ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
4857		nvlist_free(local_delayprops);
4858	}
4859
4860	*read_bytes = off - input_fp->f_offset;
4861	if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
4862		input_fp->f_offset = off;
4863
4864#ifdef	DEBUG
4865	if (zfs_ioc_recv_inject_err) {
4866		zfs_ioc_recv_inject_err = B_FALSE;
4867		error = 1;
4868	}
4869#endif
4870
4871	/*
4872	 * On error, restore the original props.
4873	 */
4874	if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
4875		if (clear_received_props(tofs, recvprops, NULL) != 0) {
4876			/*
4877			 * We failed to clear the received properties.
4878			 * Since we may have left a $recvd value on the
4879			 * system, we can't clear the $hasrecvd flag.
4880			 */
4881			*errflags |= ZPROP_ERR_NORESTORE;
4882		} else if (first_recvd_props) {
4883			dsl_prop_unset_hasrecvd(tofs);
4884		}
4885
4886		if (origrecvd == NULL && !drc.drc_newfs) {
4887			/* We failed to stash the original properties. */
4888			*errflags |= ZPROP_ERR_NORESTORE;
4889		}
4890
4891		/*
4892		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4893		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4894		 * explicitly if we're restoring local properties cleared in the
4895		 * first new-style receive.
4896		 */
4897		if (origrecvd != NULL &&
4898		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4899		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4900		    origrecvd, NULL) != 0) {
4901			/*
4902			 * We stashed the original properties but failed to
4903			 * restore them.
4904			 */
4905			*errflags |= ZPROP_ERR_NORESTORE;
4906		}
4907	}
4908	if (error != 0 && localprops != NULL && !drc.drc_newfs &&
4909	    !first_recvd_props) {
4910		nvlist_t *setprops;
4911		nvlist_t *inheritprops;
4912		nvpair_t *nvp;
4913
4914		if (origprops == NULL) {
4915			/* We failed to stash the original properties. */
4916			*errflags |= ZPROP_ERR_NORESTORE;
4917			goto out;
4918		}
4919
4920		/* Restore original props */
4921		setprops = fnvlist_alloc();
4922		inheritprops = fnvlist_alloc();
4923		nvp = NULL;
4924		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
4925			const char *name = nvpair_name(nvp);
4926			const char *source;
4927			nvlist_t *attrs;
4928
4929			if (!nvlist_exists(origprops, name)) {
4930				/*
4931				 * Property was not present or was explicitly
4932				 * inherited before the receive, restore this.
4933				 */
4934				fnvlist_add_boolean(inheritprops, name);
4935				continue;
4936			}
4937			attrs = fnvlist_lookup_nvlist(origprops, name);
4938			source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
4939
4940			/* Skip received properties */
4941			if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
4942				continue;
4943
4944			if (strcmp(source, tofs) == 0) {
4945				/* Property was locally set */
4946				fnvlist_add_nvlist(setprops, name, attrs);
4947			} else {
4948				/* Property was implicitly inherited */
4949				fnvlist_add_boolean(inheritprops, name);
4950			}
4951		}
4952
4953		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
4954		    NULL) != 0)
4955			*errflags |= ZPROP_ERR_NORESTORE;
4956		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
4957		    NULL) != 0)
4958			*errflags |= ZPROP_ERR_NORESTORE;
4959
4960		nvlist_free(setprops);
4961		nvlist_free(inheritprops);
4962	}
4963out:
4964	releasef(input_fd);
4965	nvlist_free(origrecvd);
4966	nvlist_free(origprops);
4967
4968	if (error == 0)
4969		error = props_error;
4970
4971	return (error);
4972}
4973
4974/*
4975 * inputs:
4976 * zc_name		name of containing filesystem
4977 * zc_nvlist_src{_size}	nvlist of received properties to apply
4978 * zc_nvlist_conf{_size} nvlist of local properties to apply
4979 * zc_history_offset{_len} nvlist of hidden args { "wkeydata" -> value }
4980 * zc_value		name of snapshot to create
4981 * zc_string		name of clone origin (if DRR_FLAG_CLONE)
4982 * zc_cookie		file descriptor to recv from
4983 * zc_begin_record	the BEGIN record of the stream (not byteswapped)
4984 * zc_guid		force flag
4985 * zc_cleanup_fd	cleanup-on-exit file descriptor
4986 * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
4987 * zc_resumable		if data is incomplete assume sender will resume
4988 *
4989 * outputs:
4990 * zc_cookie		number of bytes read
4991 * zc_nvlist_dst{_size} error for each unapplied received property
4992 * zc_obj		zprop_errflags_t
4993 * zc_action_handle	handle for this guid/ds mapping
4994 */
4995static int
4996zfs_ioc_recv(zfs_cmd_t *zc)
4997{
4998	dmu_replay_record_t begin_record;
4999	nvlist_t *errors = NULL;
5000	nvlist_t *recvdprops = NULL;
5001	nvlist_t *localprops = NULL;
5002	nvlist_t *hidden_args = NULL;
5003	char *origin = NULL;
5004	char *tosnap;
5005	char tofs[ZFS_MAX_DATASET_NAME_LEN];
5006	int error = 0;
5007
5008	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5009	    strchr(zc->zc_value, '@') == NULL ||
5010	    strchr(zc->zc_value, '%'))
5011		return (SET_ERROR(EINVAL));
5012
5013	(void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
5014	tosnap = strchr(tofs, '@');
5015	*tosnap++ = '\0';
5016
5017	if (zc->zc_nvlist_src != 0 &&
5018	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5019	    zc->zc_iflags, &recvdprops)) != 0)
5020		return (error);
5021
5022	if (zc->zc_nvlist_conf != 0 &&
5023	    (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
5024	    zc->zc_iflags, &localprops)) != 0)
5025		return (error);
5026
5027	if (zc->zc_history_offset != 0 &&
5028	    (error = get_nvlist(zc->zc_history_offset, zc->zc_history_len,
5029	    zc->zc_iflags, &hidden_args)) != 0)
5030		return (error);
5031
5032	if (zc->zc_string[0])
5033		origin = zc->zc_string;
5034
5035	begin_record.drr_type = DRR_BEGIN;
5036	begin_record.drr_payloadlen = zc->zc_begin_record.drr_payloadlen;
5037	begin_record.drr_u.drr_begin = zc->zc_begin_record.drr_u.drr_begin;
5038
5039	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
5040	    hidden_args, zc->zc_guid, zc->zc_resumable, zc->zc_cookie,
5041	    &begin_record, zc->zc_cleanup_fd, &zc->zc_cookie, &zc->zc_obj,
5042	    &zc->zc_action_handle, &errors);
5043	nvlist_free(recvdprops);
5044	nvlist_free(localprops);
5045
5046	/*
5047	 * Now that all props, initial and delayed, are set, report the prop
5048	 * errors to the caller.
5049	 */
5050	if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
5051	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
5052	    put_nvlist(zc, errors) != 0)) {
5053		/*
5054		 * Caller made zc->zc_nvlist_dst less than the minimum expected
5055		 * size or supplied an invalid address.
5056		 */
5057		error = SET_ERROR(EINVAL);
5058	}
5059
5060	nvlist_free(errors);
5061
5062	return (error);
5063}
5064
5065/*
5066 * inputs:
5067 * zc_name	name of snapshot to send
5068 * zc_cookie	file descriptor to send stream to
5069 * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
5070 * zc_sendobj	objsetid of snapshot to send
5071 * zc_fromobj	objsetid of incremental fromsnap (may be zero)
5072 * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
5073 *		output size in zc_objset_type.
5074 * zc_flags	lzc_send_flags
5075 *
5076 * outputs:
5077 * zc_objset_type	estimated size, if zc_guid is set
5078 */
5079static int
5080zfs_ioc_send(zfs_cmd_t *zc)
5081{
5082	int error;
5083	offset_t off;
5084	boolean_t estimate = (zc->zc_guid != 0);
5085	boolean_t embedok = (zc->zc_flags & 0x1);
5086	boolean_t large_block_ok = (zc->zc_flags & 0x2);
5087	boolean_t compressok = (zc->zc_flags & 0x4);
5088	boolean_t rawok = (zc->zc_flags & 0x8);
5089
5090	if (zc->zc_obj != 0) {
5091		dsl_pool_t *dp;
5092		dsl_dataset_t *tosnap;
5093
5094		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5095		if (error != 0)
5096			return (error);
5097
5098		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
5099		if (error != 0) {
5100			dsl_pool_rele(dp, FTAG);
5101			return (error);
5102		}
5103
5104		if (dsl_dir_is_clone(tosnap->ds_dir))
5105			zc->zc_fromobj =
5106			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
5107		dsl_dataset_rele(tosnap, FTAG);
5108		dsl_pool_rele(dp, FTAG);
5109	}
5110
5111	if (estimate) {
5112		dsl_pool_t *dp;
5113		dsl_dataset_t *tosnap;
5114		dsl_dataset_t *fromsnap = NULL;
5115
5116		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5117		if (error != 0)
5118			return (error);
5119
5120		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
5121		    FTAG, &tosnap);
5122		if (error != 0) {
5123			dsl_pool_rele(dp, FTAG);
5124			return (error);
5125		}
5126
5127		if (zc->zc_fromobj != 0) {
5128			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
5129			    FTAG, &fromsnap);
5130			if (error != 0) {
5131				dsl_dataset_rele(tosnap, FTAG);
5132				dsl_pool_rele(dp, FTAG);
5133				return (error);
5134			}
5135		}
5136
5137		error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok,
5138		    &zc->zc_objset_type);
5139
5140		if (fromsnap != NULL)
5141			dsl_dataset_rele(fromsnap, FTAG);
5142		dsl_dataset_rele(tosnap, FTAG);
5143		dsl_pool_rele(dp, FTAG);
5144	} else {
5145		file_t *fp = getf(zc->zc_cookie);
5146		if (fp == NULL)
5147			return (SET_ERROR(EBADF));
5148
5149		off = fp->f_offset;
5150		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
5151		    zc->zc_fromobj, embedok, large_block_ok, compressok, rawok,
5152		    zc->zc_cookie, fp->f_vnode, &off);
5153
5154		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5155			fp->f_offset = off;
5156		releasef(zc->zc_cookie);
5157	}
5158	return (error);
5159}
5160
5161/*
5162 * inputs:
5163 * zc_name	name of snapshot on which to report progress
5164 * zc_cookie	file descriptor of send stream
5165 *
5166 * outputs:
5167 * zc_cookie	number of bytes written in send stream thus far
5168 */
5169static int
5170zfs_ioc_send_progress(zfs_cmd_t *zc)
5171{
5172	dsl_pool_t *dp;
5173	dsl_dataset_t *ds;
5174	dmu_sendarg_t *dsp = NULL;
5175	int error;
5176
5177	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5178	if (error != 0)
5179		return (error);
5180
5181	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5182	if (error != 0) {
5183		dsl_pool_rele(dp, FTAG);
5184		return (error);
5185	}
5186
5187	mutex_enter(&ds->ds_sendstream_lock);
5188
5189	/*
5190	 * Iterate over all the send streams currently active on this dataset.
5191	 * If there's one which matches the specified file descriptor _and_ the
5192	 * stream was started by the current process, return the progress of
5193	 * that stream.
5194	 */
5195	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
5196	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
5197		if (dsp->dsa_outfd == zc->zc_cookie &&
5198		    dsp->dsa_proc == curproc)
5199			break;
5200	}
5201
5202	if (dsp != NULL)
5203		zc->zc_cookie = *(dsp->dsa_off);
5204	else
5205		error = SET_ERROR(ENOENT);
5206
5207	mutex_exit(&ds->ds_sendstream_lock);
5208	dsl_dataset_rele(ds, FTAG);
5209	dsl_pool_rele(dp, FTAG);
5210	return (error);
5211}
5212
5213static int
5214zfs_ioc_inject_fault(zfs_cmd_t *zc)
5215{
5216	int id, error;
5217
5218	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
5219	    &zc->zc_inject_record);
5220
5221	if (error == 0)
5222		zc->zc_guid = (uint64_t)id;
5223
5224	return (error);
5225}
5226
5227static int
5228zfs_ioc_clear_fault(zfs_cmd_t *zc)
5229{
5230	return (zio_clear_fault((int)zc->zc_guid));
5231}
5232
5233static int
5234zfs_ioc_inject_list_next(zfs_cmd_t *zc)
5235{
5236	int id = (int)zc->zc_guid;
5237	int error;
5238
5239	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
5240	    &zc->zc_inject_record);
5241
5242	zc->zc_guid = id;
5243
5244	return (error);
5245}
5246
5247static int
5248zfs_ioc_error_log(zfs_cmd_t *zc)
5249{
5250	spa_t *spa;
5251	int error;
5252	size_t count = (size_t)zc->zc_nvlist_dst_size;
5253
5254	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
5255		return (error);
5256
5257	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
5258	    &count);
5259	if (error == 0)
5260		zc->zc_nvlist_dst_size = count;
5261	else
5262		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
5263
5264	spa_close(spa, FTAG);
5265
5266	return (error);
5267}
5268
5269static int
5270zfs_ioc_clear(zfs_cmd_t *zc)
5271{
5272	spa_t *spa;
5273	vdev_t *vd;
5274	int error;
5275
5276	/*
5277	 * On zpool clear we also fix up missing slogs
5278	 */
5279	mutex_enter(&spa_namespace_lock);
5280	spa = spa_lookup(zc->zc_name);
5281	if (spa == NULL) {
5282		mutex_exit(&spa_namespace_lock);
5283		return (SET_ERROR(EIO));
5284	}
5285	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
5286		/* we need to let spa_open/spa_load clear the chains */
5287		spa_set_log_state(spa, SPA_LOG_CLEAR);
5288	}
5289	spa->spa_last_open_failed = 0;
5290	mutex_exit(&spa_namespace_lock);
5291
5292	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
5293		error = spa_open(zc->zc_name, &spa, FTAG);
5294	} else {
5295		nvlist_t *policy;
5296		nvlist_t *config = NULL;
5297
5298		if (zc->zc_nvlist_src == 0)
5299			return (SET_ERROR(EINVAL));
5300
5301		if ((error = get_nvlist(zc->zc_nvlist_src,
5302		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
5303			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
5304			    policy, &config);
5305			if (config != NULL) {
5306				int err;
5307
5308				if ((err = put_nvlist(zc, config)) != 0)
5309					error = err;
5310				nvlist_free(config);
5311			}
5312			nvlist_free(policy);
5313		}
5314	}
5315
5316	if (error != 0)
5317		return (error);
5318
5319	/*
5320	 * If multihost is enabled, resuming I/O is unsafe as another
5321	 * host may have imported the pool.
5322	 */
5323	if (spa_multihost(spa) && spa_suspended(spa))
5324		return (SET_ERROR(EINVAL));
5325
5326	spa_vdev_state_enter(spa, SCL_NONE);
5327
5328	if (zc->zc_guid == 0) {
5329		vd = NULL;
5330	} else {
5331		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
5332		if (vd == NULL) {
5333			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
5334			spa_close(spa, FTAG);
5335			return (SET_ERROR(ENODEV));
5336		}
5337	}
5338
5339	vdev_clear(spa, vd);
5340
5341	(void) spa_vdev_state_exit(spa, NULL, 0);
5342
5343	/*
5344	 * Resume any suspended I/Os.
5345	 */
5346	if (zio_resume(spa) != 0)
5347		error = SET_ERROR(EIO);
5348
5349	spa_close(spa, FTAG);
5350
5351	return (error);
5352}
5353
5354static int
5355zfs_ioc_pool_reopen(zfs_cmd_t *zc)
5356{
5357	spa_t *spa;
5358	int error;
5359
5360	error = spa_open(zc->zc_name, &spa, FTAG);
5361	if (error != 0)
5362		return (error);
5363
5364	spa_vdev_state_enter(spa, SCL_NONE);
5365
5366	/*
5367	 * If a resilver is already in progress then set the
5368	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
5369	 * the scan as a side effect of the reopen. Otherwise, let
5370	 * vdev_open() decided if a resilver is required.
5371	 */
5372	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
5373	vdev_reopen(spa->spa_root_vdev);
5374	spa->spa_scrub_reopen = B_FALSE;
5375
5376	(void) spa_vdev_state_exit(spa, NULL, 0);
5377	spa_close(spa, FTAG);
5378	return (0);
5379}
5380/*
5381 * inputs:
5382 * zc_name	name of filesystem
5383 *
5384 * outputs:
5385 * zc_string	name of conflicting snapshot, if there is one
5386 */
5387static int
5388zfs_ioc_promote(zfs_cmd_t *zc)
5389{
5390	dsl_pool_t *dp;
5391	dsl_dataset_t *ds, *ods;
5392	char origin[ZFS_MAX_DATASET_NAME_LEN];
5393	char *cp;
5394	int error;
5395
5396	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5397	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5398	    strchr(zc->zc_name, '%'))
5399		return (SET_ERROR(EINVAL));
5400
5401	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5402	if (error != 0)
5403		return (error);
5404
5405	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5406	if (error != 0) {
5407		dsl_pool_rele(dp, FTAG);
5408		return (error);
5409	}
5410
5411	if (!dsl_dir_is_clone(ds->ds_dir)) {
5412		dsl_dataset_rele(ds, FTAG);
5413		dsl_pool_rele(dp, FTAG);
5414		return (SET_ERROR(EINVAL));
5415	}
5416
5417	error = dsl_dataset_hold_obj(dp,
5418	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5419	if (error != 0) {
5420		dsl_dataset_rele(ds, FTAG);
5421		dsl_pool_rele(dp, FTAG);
5422		return (error);
5423	}
5424
5425	dsl_dataset_name(ods, origin);
5426	dsl_dataset_rele(ods, FTAG);
5427	dsl_dataset_rele(ds, FTAG);
5428	dsl_pool_rele(dp, FTAG);
5429
5430	/*
5431	 * We don't need to unmount *all* the origin fs's snapshots, but
5432	 * it's easier.
5433	 */
5434	cp = strchr(origin, '@');
5435	if (cp)
5436		*cp = '\0';
5437	(void) dmu_objset_find(origin,
5438	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5439	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
5440}
5441
5442/*
5443 * Retrieve a single {user|group|project}{used|quota}@... property.
5444 *
5445 * inputs:
5446 * zc_name	name of filesystem
5447 * zc_objset_type zfs_userquota_prop_t
5448 * zc_value	domain name (eg. "S-1-234-567-89")
5449 * zc_guid	RID/UID/GID
5450 *
5451 * outputs:
5452 * zc_cookie	property value
5453 */
5454static int
5455zfs_ioc_userspace_one(zfs_cmd_t *zc)
5456{
5457	zfsvfs_t *zfsvfs;
5458	int error;
5459
5460	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5461		return (SET_ERROR(EINVAL));
5462
5463	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5464	if (error != 0)
5465		return (error);
5466
5467	error = zfs_userspace_one(zfsvfs,
5468	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5469	zfsvfs_rele(zfsvfs, FTAG);
5470
5471	return (error);
5472}
5473
5474/*
5475 * inputs:
5476 * zc_name		name of filesystem
5477 * zc_cookie		zap cursor
5478 * zc_objset_type	zfs_userquota_prop_t
5479 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5480 *
5481 * outputs:
5482 * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
5483 * zc_cookie	zap cursor
5484 */
5485static int
5486zfs_ioc_userspace_many(zfs_cmd_t *zc)
5487{
5488	zfsvfs_t *zfsvfs;
5489	int bufsize = zc->zc_nvlist_dst_size;
5490
5491	if (bufsize <= 0)
5492		return (SET_ERROR(ENOMEM));
5493
5494	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5495	if (error != 0)
5496		return (error);
5497
5498	void *buf = kmem_alloc(bufsize, KM_SLEEP);
5499
5500	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5501	    buf, &zc->zc_nvlist_dst_size);
5502
5503	if (error == 0) {
5504		error = xcopyout(buf,
5505		    (void *)(uintptr_t)zc->zc_nvlist_dst,
5506		    zc->zc_nvlist_dst_size);
5507	}
5508	kmem_free(buf, bufsize);
5509	zfsvfs_rele(zfsvfs, FTAG);
5510
5511	return (error);
5512}
5513
5514/*
5515 * inputs:
5516 * zc_name		name of filesystem
5517 *
5518 * outputs:
5519 * none
5520 */
5521static int
5522zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5523{
5524	objset_t *os;
5525	int error = 0;
5526	zfsvfs_t *zfsvfs;
5527
5528	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5529		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5530			/*
5531			 * If userused is not enabled, it may be because the
5532			 * objset needs to be closed & reopened (to grow the
5533			 * objset_phys_t).  Suspend/resume the fs will do that.
5534			 */
5535			dsl_dataset_t *ds, *newds;
5536
5537			ds = dmu_objset_ds(zfsvfs->z_os);
5538			error = zfs_suspend_fs(zfsvfs);
5539			if (error == 0) {
5540				dmu_objset_refresh_ownership(ds, &newds,
5541				    B_TRUE, zfsvfs);
5542				error = zfs_resume_fs(zfsvfs, newds);
5543			}
5544		}
5545		if (error == 0)
5546			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5547		VFS_RELE(zfsvfs->z_vfs);
5548	} else {
5549		/* XXX kind of reading contents without owning */
5550		error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
5551		if (error != 0)
5552			return (error);
5553
5554		error = dmu_objset_userspace_upgrade(os);
5555		dmu_objset_rele_flags(os, B_TRUE, FTAG);
5556	}
5557
5558	return (error);
5559}
5560
5561/*
5562 * inputs:
5563 * zc_name		name of filesystem
5564 *
5565 * outputs:
5566 * none
5567 */
5568static int
5569zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
5570{
5571	objset_t *os;
5572	int error;
5573
5574	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5575	if (error != 0)
5576		return (error);
5577
5578	dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
5579	dsl_pool_rele(dmu_objset_pool(os), FTAG);
5580
5581	if (dmu_objset_userobjspace_upgradable(os) ||
5582	    dmu_objset_projectquota_upgradable(os)) {
5583		mutex_enter(&os->os_upgrade_lock);
5584		if (os->os_upgrade_id == 0) {
5585			/* clear potential error code and retry */
5586			os->os_upgrade_status = 0;
5587			mutex_exit(&os->os_upgrade_lock);
5588
5589			dmu_objset_id_quota_upgrade(os);
5590		} else {
5591			mutex_exit(&os->os_upgrade_lock);
5592		}
5593
5594		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
5595		error = os->os_upgrade_status;
5596	}
5597
5598	dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
5599	dsl_dataset_rele(dmu_objset_ds(os), FTAG);
5600
5601	return (error);
5602}
5603
5604/*
5605 * We don't want to have a hard dependency
5606 * against some special symbols in sharefs
5607 * nfs, and smbsrv.  Determine them if needed when
5608 * the first file system is shared.
5609 * Neither sharefs, nfs or smbsrv are unloadable modules.
5610 */
5611int (*znfsexport_fs)(void *arg);
5612int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5613int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5614
5615int zfs_nfsshare_inited;
5616int zfs_smbshare_inited;
5617
5618ddi_modhandle_t nfs_mod;
5619ddi_modhandle_t sharefs_mod;
5620ddi_modhandle_t smbsrv_mod;
5621kmutex_t zfs_share_lock;
5622
5623static int
5624zfs_init_sharefs()
5625{
5626	int error;
5627
5628	ASSERT(MUTEX_HELD(&zfs_share_lock));
5629	/* Both NFS and SMB shares also require sharetab support. */
5630	if (sharefs_mod == NULL && ((sharefs_mod =
5631	    ddi_modopen("fs/sharefs",
5632	    KRTLD_MODE_FIRST, &error)) == NULL)) {
5633		return (SET_ERROR(ENOSYS));
5634	}
5635	if (zshare_fs == NULL && ((zshare_fs =
5636	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5637	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5638		return (SET_ERROR(ENOSYS));
5639	}
5640	return (0);
5641}
5642
5643static int
5644zfs_ioc_share(zfs_cmd_t *zc)
5645{
5646	int error;
5647	int opcode;
5648
5649	switch (zc->zc_share.z_sharetype) {
5650	case ZFS_SHARE_NFS:
5651	case ZFS_UNSHARE_NFS:
5652		if (zfs_nfsshare_inited == 0) {
5653			mutex_enter(&zfs_share_lock);
5654			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5655			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5656				mutex_exit(&zfs_share_lock);
5657				return (SET_ERROR(ENOSYS));
5658			}
5659			if (znfsexport_fs == NULL &&
5660			    ((znfsexport_fs = (int (*)(void *))
5661			    ddi_modsym(nfs_mod,
5662			    "nfs_export", &error)) == NULL)) {
5663				mutex_exit(&zfs_share_lock);
5664				return (SET_ERROR(ENOSYS));
5665			}
5666			error = zfs_init_sharefs();
5667			if (error != 0) {
5668				mutex_exit(&zfs_share_lock);
5669				return (SET_ERROR(ENOSYS));
5670			}
5671			zfs_nfsshare_inited = 1;
5672			mutex_exit(&zfs_share_lock);
5673		}
5674		break;
5675	case ZFS_SHARE_SMB:
5676	case ZFS_UNSHARE_SMB:
5677		if (zfs_smbshare_inited == 0) {
5678			mutex_enter(&zfs_share_lock);
5679			if (smbsrv_mod == NULL && ((smbsrv_mod =
5680			    ddi_modopen("drv/smbsrv",
5681			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5682				mutex_exit(&zfs_share_lock);
5683				return (SET_ERROR(ENOSYS));
5684			}
5685			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5686			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5687			    "smb_server_share", &error)) == NULL)) {
5688				mutex_exit(&zfs_share_lock);
5689				return (SET_ERROR(ENOSYS));
5690			}
5691			error = zfs_init_sharefs();
5692			if (error != 0) {
5693				mutex_exit(&zfs_share_lock);
5694				return (SET_ERROR(ENOSYS));
5695			}
5696			zfs_smbshare_inited = 1;
5697			mutex_exit(&zfs_share_lock);
5698		}
5699		break;
5700	default:
5701		return (SET_ERROR(EINVAL));
5702	}
5703
5704	switch (zc->zc_share.z_sharetype) {
5705	case ZFS_SHARE_NFS:
5706	case ZFS_UNSHARE_NFS:
5707		if (error =
5708		    znfsexport_fs((void *)
5709		    (uintptr_t)zc->zc_share.z_exportdata))
5710			return (error);
5711		break;
5712	case ZFS_SHARE_SMB:
5713	case ZFS_UNSHARE_SMB:
5714		if (error = zsmbexport_fs((void *)
5715		    (uintptr_t)zc->zc_share.z_exportdata,
5716		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5717		    B_TRUE: B_FALSE)) {
5718			return (error);
5719		}
5720		break;
5721	}
5722
5723	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5724	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5725	    SHAREFS_ADD : SHAREFS_REMOVE;
5726
5727	/*
5728	 * Add or remove share from sharetab
5729	 */
5730	error = zshare_fs(opcode,
5731	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
5732	    zc->zc_share.z_sharemax);
5733
5734	return (error);
5735
5736}
5737
5738ace_t full_access[] = {
5739	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5740};
5741
5742/*
5743 * inputs:
5744 * zc_name		name of containing filesystem
5745 * zc_obj		object # beyond which we want next in-use object #
5746 *
5747 * outputs:
5748 * zc_obj		next in-use object #
5749 */
5750static int
5751zfs_ioc_next_obj(zfs_cmd_t *zc)
5752{
5753	objset_t *os = NULL;
5754	int error;
5755
5756	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5757	if (error != 0)
5758		return (error);
5759
5760	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5761	    dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5762
5763	dmu_objset_rele(os, FTAG);
5764	return (error);
5765}
5766
5767/*
5768 * inputs:
5769 * zc_name		name of filesystem
5770 * zc_value		prefix name for snapshot
5771 * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
5772 *
5773 * outputs:
5774 * zc_value		short name of new snapshot
5775 */
5776static int
5777zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5778{
5779	char *snap_name;
5780	char *hold_name;
5781	int error;
5782	minor_t minor;
5783
5784	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5785	if (error != 0)
5786		return (error);
5787
5788	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5789	    (u_longlong_t)ddi_get_lbolt64());
5790	hold_name = kmem_asprintf("%%%s", zc->zc_value);
5791
5792	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5793	    hold_name);
5794	if (error == 0)
5795		(void) strcpy(zc->zc_value, snap_name);
5796	strfree(snap_name);
5797	strfree(hold_name);
5798	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5799	return (error);
5800}
5801
5802/*
5803 * inputs:
5804 * zc_name		name of "to" snapshot
5805 * zc_value		name of "from" snapshot
5806 * zc_cookie		file descriptor to write diff data on
5807 *
5808 * outputs:
5809 * dmu_diff_record_t's to the file descriptor
5810 */
5811static int
5812zfs_ioc_diff(zfs_cmd_t *zc)
5813{
5814	file_t *fp;
5815	offset_t off;
5816	int error;
5817
5818	fp = getf(zc->zc_cookie);
5819	if (fp == NULL)
5820		return (SET_ERROR(EBADF));
5821
5822	off = fp->f_offset;
5823
5824	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5825
5826	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5827		fp->f_offset = off;
5828	releasef(zc->zc_cookie);
5829
5830	return (error);
5831}
5832
5833/*
5834 * Remove all ACL files in shares dir
5835 */
5836static int
5837zfs_smb_acl_purge(znode_t *dzp)
5838{
5839	zap_cursor_t	zc;
5840	zap_attribute_t	zap;
5841	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5842	int error;
5843
5844	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5845	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5846	    zap_cursor_advance(&zc)) {
5847		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5848		    NULL, 0)) != 0)
5849			break;
5850	}
5851	zap_cursor_fini(&zc);
5852	return (error);
5853}
5854
5855static int
5856zfs_ioc_smb_acl(zfs_cmd_t *zc)
5857{
5858	vnode_t *vp;
5859	znode_t *dzp;
5860	vnode_t *resourcevp = NULL;
5861	znode_t *sharedir;
5862	zfsvfs_t *zfsvfs;
5863	nvlist_t *nvlist;
5864	char *src, *target;
5865	vattr_t vattr;
5866	vsecattr_t vsec;
5867	int error = 0;
5868
5869	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5870	    NO_FOLLOW, NULL, &vp)) != 0)
5871		return (error);
5872
5873	/* Now make sure mntpnt and dataset are ZFS */
5874
5875	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
5876	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5877	    zc->zc_name) != 0)) {
5878		VN_RELE(vp);
5879		return (SET_ERROR(EINVAL));
5880	}
5881
5882	dzp = VTOZ(vp);
5883	zfsvfs = dzp->z_zfsvfs;
5884	ZFS_ENTER(zfsvfs);
5885
5886	/*
5887	 * Create share dir if its missing.
5888	 */
5889	mutex_enter(&zfsvfs->z_lock);
5890	if (zfsvfs->z_shares_dir == 0) {
5891		dmu_tx_t *tx;
5892
5893		tx = dmu_tx_create(zfsvfs->z_os);
5894		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5895		    ZFS_SHARES_DIR);
5896		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5897		error = dmu_tx_assign(tx, TXG_WAIT);
5898		if (error != 0) {
5899			dmu_tx_abort(tx);
5900		} else {
5901			error = zfs_create_share_dir(zfsvfs, tx);
5902			dmu_tx_commit(tx);
5903		}
5904		if (error != 0) {
5905			mutex_exit(&zfsvfs->z_lock);
5906			VN_RELE(vp);
5907			ZFS_EXIT(zfsvfs);
5908			return (error);
5909		}
5910	}
5911	mutex_exit(&zfsvfs->z_lock);
5912
5913	ASSERT(zfsvfs->z_shares_dir);
5914	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5915		VN_RELE(vp);
5916		ZFS_EXIT(zfsvfs);
5917		return (error);
5918	}
5919
5920	switch (zc->zc_cookie) {
5921	case ZFS_SMB_ACL_ADD:
5922		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5923		vattr.va_type = VREG;
5924		vattr.va_mode = S_IFREG|0777;
5925		vattr.va_uid = 0;
5926		vattr.va_gid = 0;
5927
5928		vsec.vsa_mask = VSA_ACE;
5929		vsec.vsa_aclentp = &full_access;
5930		vsec.vsa_aclentsz = sizeof (full_access);
5931		vsec.vsa_aclcnt = 1;
5932
5933		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5934		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5935		if (resourcevp)
5936			VN_RELE(resourcevp);
5937		break;
5938
5939	case ZFS_SMB_ACL_REMOVE:
5940		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5941		    NULL, 0);
5942		break;
5943
5944	case ZFS_SMB_ACL_RENAME:
5945		if ((error = get_nvlist(zc->zc_nvlist_src,
5946		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5947			VN_RELE(vp);
5948			VN_RELE(ZTOV(sharedir));
5949			ZFS_EXIT(zfsvfs);
5950			return (error);
5951		}
5952		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5953		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5954		    &target)) {
5955			VN_RELE(vp);
5956			VN_RELE(ZTOV(sharedir));
5957			ZFS_EXIT(zfsvfs);
5958			nvlist_free(nvlist);
5959			return (error);
5960		}
5961		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5962		    kcred, NULL, 0);
5963		nvlist_free(nvlist);
5964		break;
5965
5966	case ZFS_SMB_ACL_PURGE:
5967		error = zfs_smb_acl_purge(sharedir);
5968		break;
5969
5970	default:
5971		error = SET_ERROR(EINVAL);
5972		break;
5973	}
5974
5975	VN_RELE(vp);
5976	VN_RELE(ZTOV(sharedir));
5977
5978	ZFS_EXIT(zfsvfs);
5979
5980	return (error);
5981}
5982
5983/*
5984 * innvl: {
5985 *     "holds" -> { snapname -> holdname (string), ... }
5986 *     (optional) "cleanup_fd" -> fd (int32)
5987 * }
5988 *
5989 * outnvl: {
5990 *     snapname -> error value (int32)
5991 *     ...
5992 * }
5993 */
5994static const zfs_ioc_key_t zfs_keys_hold[] = {
5995	{"holds",		DATA_TYPE_NVLIST,	0},
5996	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
5997};
5998
5999/* ARGSUSED */
6000static int
6001zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
6002{
6003	nvpair_t *pair;
6004	nvlist_t *holds;
6005	int cleanup_fd = -1;
6006	int error;
6007	minor_t minor = 0;
6008
6009	holds = fnvlist_lookup_nvlist(args, "holds");
6010
6011	/* make sure the user didn't pass us any invalid (empty) tags */
6012	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
6013	    pair = nvlist_next_nvpair(holds, pair)) {
6014		char *htag;
6015
6016		error = nvpair_value_string(pair, &htag);
6017		if (error != 0)
6018			return (SET_ERROR(error));
6019
6020		if (strlen(htag) == 0)
6021			return (SET_ERROR(EINVAL));
6022	}
6023
6024	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
6025		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
6026		if (error != 0)
6027			return (error);
6028	}
6029
6030	error = dsl_dataset_user_hold(holds, minor, errlist);
6031	if (minor != 0)
6032		zfs_onexit_fd_rele(cleanup_fd);
6033	return (error);
6034}
6035
6036/*
6037 * innvl is not used.
6038 *
6039 * outnvl: {
6040 *    holdname -> time added (uint64 seconds since epoch)
6041 *    ...
6042 * }
6043 */
6044static const zfs_ioc_key_t zfs_keys_get_holds[] = {
6045	/* no nvl keys */
6046};
6047
6048/* ARGSUSED */
6049static int
6050zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
6051{
6052	return (dsl_dataset_get_holds(snapname, outnvl));
6053}
6054
6055/*
6056 * innvl: {
6057 *     snapname -> { holdname, ... }
6058 *     ...
6059 * }
6060 *
6061 * outnvl: {
6062 *     snapname -> error value (int32)
6063 *     ...
6064 * }
6065 */
6066static const zfs_ioc_key_t zfs_keys_release[] = {
6067	{"<snapname>...",	DATA_TYPE_NVLIST,	ZK_WILDCARDLIST},
6068};
6069
6070/* ARGSUSED */
6071static int
6072zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
6073{
6074	return (dsl_dataset_user_release(holds, errlist));
6075}
6076
6077/*
6078 * inputs:
6079 * zc_name		name of new filesystem or snapshot
6080 * zc_value		full name of old snapshot
6081 *
6082 * outputs:
6083 * zc_cookie		space in bytes
6084 * zc_objset_type	compressed space in bytes
6085 * zc_perm_action	uncompressed space in bytes
6086 */
6087static int
6088zfs_ioc_space_written(zfs_cmd_t *zc)
6089{
6090	int error;
6091	dsl_pool_t *dp;
6092	dsl_dataset_t *new, *old;
6093
6094	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6095	if (error != 0)
6096		return (error);
6097	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
6098	if (error != 0) {
6099		dsl_pool_rele(dp, FTAG);
6100		return (error);
6101	}
6102	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
6103	if (error != 0) {
6104		dsl_dataset_rele(new, FTAG);
6105		dsl_pool_rele(dp, FTAG);
6106		return (error);
6107	}
6108
6109	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
6110	    &zc->zc_objset_type, &zc->zc_perm_action);
6111	dsl_dataset_rele(old, FTAG);
6112	dsl_dataset_rele(new, FTAG);
6113	dsl_pool_rele(dp, FTAG);
6114	return (error);
6115}
6116
6117/*
6118 * innvl: {
6119 *     "firstsnap" -> snapshot name
6120 * }
6121 *
6122 * outnvl: {
6123 *     "used" -> space in bytes
6124 *     "compressed" -> compressed space in bytes
6125 *     "uncompressed" -> uncompressed space in bytes
6126 * }
6127 */
6128static const zfs_ioc_key_t zfs_keys_space_snaps[] = {
6129	{"firstsnap",	DATA_TYPE_STRING,	0},
6130};
6131
6132static int
6133zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
6134{
6135	int error;
6136	dsl_pool_t *dp;
6137	dsl_dataset_t *new, *old;
6138	char *firstsnap;
6139	uint64_t used, comp, uncomp;
6140
6141	firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
6142
6143	error = dsl_pool_hold(lastsnap, FTAG, &dp);
6144	if (error != 0)
6145		return (error);
6146
6147	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
6148	if (error == 0 && !new->ds_is_snapshot) {
6149		dsl_dataset_rele(new, FTAG);
6150		error = SET_ERROR(EINVAL);
6151	}
6152	if (error != 0) {
6153		dsl_pool_rele(dp, FTAG);
6154		return (error);
6155	}
6156	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
6157	if (error == 0 && !old->ds_is_snapshot) {
6158		dsl_dataset_rele(old, FTAG);
6159		error = SET_ERROR(EINVAL);
6160	}
6161	if (error != 0) {
6162		dsl_dataset_rele(new, FTAG);
6163		dsl_pool_rele(dp, FTAG);
6164		return (error);
6165	}
6166
6167	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
6168	dsl_dataset_rele(old, FTAG);
6169	dsl_dataset_rele(new, FTAG);
6170	dsl_pool_rele(dp, FTAG);
6171	fnvlist_add_uint64(outnvl, "used", used);
6172	fnvlist_add_uint64(outnvl, "compressed", comp);
6173	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
6174	return (error);
6175}
6176
6177/*
6178 * innvl: {
6179 *     "fd" -> file descriptor to write stream to (int32)
6180 *     (optional) "fromsnap" -> full snap name to send an incremental from
6181 *     (optional) "largeblockok" -> (value ignored)
6182 *         indicates that blocks > 128KB are permitted
6183 *     (optional) "embedok" -> (value ignored)
6184 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6185 *     (optional) "compressok" -> (value ignored)
6186 *         presence indicates compressed DRR_WRITE records are permitted
6187 *     (optional) "rawok" -> (value ignored)
6188 *         presence indicates raw encrypted records should be used.
6189 *     (optional) "resume_object" and "resume_offset" -> (uint64)
6190 *         if present, resume send stream from specified object and offset.
6191 * }
6192 *
6193 * outnvl is unused
6194 */
6195static const zfs_ioc_key_t zfs_keys_send_new[] = {
6196	{"fd",			DATA_TYPE_INT32,	0},
6197	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6198	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6199	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6200	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6201	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6202	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6203	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6204};
6205
6206/* ARGSUSED */
6207static int
6208zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6209{
6210	int error;
6211	offset_t off;
6212	char *fromname = NULL;
6213	int fd;
6214	boolean_t largeblockok;
6215	boolean_t embedok;
6216	boolean_t compressok;
6217	boolean_t rawok;
6218	uint64_t resumeobj = 0;
6219	uint64_t resumeoff = 0;
6220
6221	fd = fnvlist_lookup_int32(innvl, "fd");
6222
6223	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
6224
6225	largeblockok = nvlist_exists(innvl, "largeblockok");
6226	embedok = nvlist_exists(innvl, "embedok");
6227	compressok = nvlist_exists(innvl, "compressok");
6228	rawok = nvlist_exists(innvl, "rawok");
6229
6230	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6231	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6232
6233	file_t *fp = getf(fd);
6234	if (fp == NULL)
6235		return (SET_ERROR(EBADF));
6236
6237	off = fp->f_offset;
6238	error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
6239	    rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off);
6240
6241	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
6242		fp->f_offset = off;
6243	releasef(fd);
6244	return (error);
6245}
6246
6247/*
6248 * Determine approximately how large a zfs send stream will be -- the number
6249 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
6250 *
6251 * innvl: {
6252 *     (optional) "from" -> full snap or bookmark name to send an incremental
6253 *                          from
6254 *     (optional) "largeblockok" -> (value ignored)
6255 *         indicates that blocks > 128KB are permitted
6256 *     (optional) "embedok" -> (value ignored)
6257 *         presence indicates DRR_WRITE_EMBEDDED records are permitted
6258 *     (optional) "compressok" -> (value ignored)
6259 *         presence indicates compressed DRR_WRITE records are permitted
6260 * }
6261 *
6262 * outnvl: {
6263 *     "space" -> bytes of space (uint64)
6264 * }
6265 */
6266static const zfs_ioc_key_t zfs_keys_send_space[] = {
6267	{"from",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6268	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6269	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6270	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6271	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6272	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6273};
6274
6275static int
6276zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6277{
6278	dsl_pool_t *dp;
6279	dsl_dataset_t *tosnap;
6280	int error;
6281	char *fromname;
6282	boolean_t compressok;
6283	boolean_t rawok;
6284	uint64_t space;
6285
6286	error = dsl_pool_hold(snapname, FTAG, &dp);
6287	if (error != 0)
6288		return (error);
6289
6290	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
6291	if (error != 0) {
6292		dsl_pool_rele(dp, FTAG);
6293		return (error);
6294	}
6295
6296	compressok = nvlist_exists(innvl, "compressok");
6297	rawok = nvlist_exists(innvl, "rawok");
6298
6299	error = nvlist_lookup_string(innvl, "from", &fromname);
6300	if (error == 0) {
6301		if (strchr(fromname, '@') != NULL) {
6302			/*
6303			 * If from is a snapshot, hold it and use the more
6304			 * efficient dmu_send_estimate to estimate send space
6305			 * size using deadlists.
6306			 */
6307			dsl_dataset_t *fromsnap;
6308			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
6309			if (error != 0)
6310				goto out;
6311			error = dmu_send_estimate(tosnap, fromsnap,
6312			    compressok || rawok, &space);
6313			dsl_dataset_rele(fromsnap, FTAG);
6314		} else if (strchr(fromname, '#') != NULL) {
6315			/*
6316			 * If from is a bookmark, fetch the creation TXG of the
6317			 * snapshot it was created from and use that to find
6318			 * blocks that were born after it.
6319			 */
6320			zfs_bookmark_phys_t frombm;
6321
6322			error = dsl_bookmark_lookup(dp, fromname, tosnap,
6323			    &frombm);
6324			if (error != 0)
6325				goto out;