xref: /illumos-gate/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision 7f1f55ea3af0e6153a8ee9316c7f0b8b4f1ba773)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/errno.h>
31 #include <sys/uio.h>
32 #include <sys/buf.h>
33 #include <sys/modctl.h>
34 #include <sys/open.h>
35 #include <sys/file.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/cmn_err.h>
39 #include <sys/stat.h>
40 #include <sys/zfs_ioctl.h>
41 #include <sys/zap.h>
42 #include <sys/spa.h>
43 #include <sys/spa_impl.h>
44 #include <sys/vdev.h>
45 #include <sys/vdev_impl.h>
46 #include <sys/dmu.h>
47 #include <sys/dsl_dir.h>
48 #include <sys/dsl_dataset.h>
49 #include <sys/dsl_prop.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/sunldi.h>
53 #include <sys/policy.h>
54 #include <sys/zone.h>
55 #include <sys/nvpair.h>
56 #include <sys/pathname.h>
57 #include <sys/mount.h>
58 #include <sys/sdt.h>
59 #include <sys/fs/zfs.h>
60 #include <sys/zfs_ctldir.h>
61 #include <sys/zvol.h>
62 
63 #include "zfs_namecheck.h"
64 #include "zfs_prop.h"
65 
66 extern struct modlfs zfs_modlfs;
67 
68 extern void zfs_init(void);
69 extern void zfs_fini(void);
70 
71 ldi_ident_t zfs_li = NULL;
72 dev_info_t *zfs_dip;
73 
74 typedef int zfs_ioc_func_t(zfs_cmd_t *);
75 typedef int zfs_secpolicy_func_t(const char *, cred_t *);
76 
77 typedef struct zfs_ioc_vec {
78 	zfs_ioc_func_t		*zvec_func;
79 	zfs_secpolicy_func_t	*zvec_secpolicy;
80 	enum {
81 		no_name,
82 		pool_name,
83 		dataset_name
84 	}			zvec_namecheck;
85 } zfs_ioc_vec_t;
86 
87 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
88 void
89 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
90 {
91 	const char *newfile;
92 	char buf[256];
93 	va_list adx;
94 
95 	/*
96 	 * Get rid of annoying "../common/" prefix to filename.
97 	 */
98 	newfile = strrchr(file, '/');
99 	if (newfile != NULL) {
100 		newfile = newfile + 1; /* Get rid of leading / */
101 	} else {
102 		newfile = file;
103 	}
104 
105 	va_start(adx, fmt);
106 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
107 	va_end(adx);
108 
109 	/*
110 	 * To get this data, use the zfs-dprintf probe as so:
111 	 * dtrace -q -n 'zfs-dprintf \
112 	 *	/stringof(arg0) == "dbuf.c"/ \
113 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
114 	 * arg0 = file name
115 	 * arg1 = function name
116 	 * arg2 = line number
117 	 * arg3 = message
118 	 */
119 	DTRACE_PROBE4(zfs__dprintf,
120 	    char *, newfile, char *, func, int, line, char *, buf);
121 }
122 
123 /*
124  * Policy for top-level read operations (list pools).  Requires no privileges,
125  * and can be used in the local zone, as there is no associated dataset.
126  */
127 /* ARGSUSED */
128 static int
129 zfs_secpolicy_none(const char *unused1, cred_t *cr)
130 {
131 	return (0);
132 }
133 
134 /*
135  * Policy for dataset read operations (list children, get statistics).  Requires
136  * no privileges, but must be visible in the local zone.
137  */
138 /* ARGSUSED */
139 static int
140 zfs_secpolicy_read(const char *dataset, cred_t *cr)
141 {
142 	if (INGLOBALZONE(curproc) ||
143 	    zone_dataset_visible(dataset, NULL))
144 		return (0);
145 
146 	return (ENOENT);
147 }
148 
149 static int
150 zfs_dozonecheck(const char *dataset, cred_t *cr)
151 {
152 	uint64_t zoned;
153 	int writable = 1;
154 
155 	/*
156 	 * The dataset must be visible by this zone -- check this first
157 	 * so they don't see EPERM on something they shouldn't know about.
158 	 */
159 	if (!INGLOBALZONE(curproc) &&
160 	    !zone_dataset_visible(dataset, &writable))
161 		return (ENOENT);
162 
163 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
164 		return (ENOENT);
165 
166 	if (INGLOBALZONE(curproc)) {
167 		/*
168 		 * If the fs is zoned, only root can access it from the
169 		 * global zone.
170 		 */
171 		if (secpolicy_zfs(cr) && zoned)
172 			return (EPERM);
173 	} else {
174 		/*
175 		 * If we are in a local zone, the 'zoned' property must be set.
176 		 */
177 		if (!zoned)
178 			return (EPERM);
179 
180 		/* must be writable by this zone */
181 		if (!writable)
182 			return (EPERM);
183 	}
184 	return (0);
185 }
186 
187 /*
188  * Policy for dataset write operations (create children, set properties, etc).
189  * Requires SYS_MOUNT privilege, and must be writable in the local zone.
190  */
191 int
192 zfs_secpolicy_write(const char *dataset, cred_t *cr)
193 {
194 	int error;
195 
196 	if (error = zfs_dozonecheck(dataset, cr))
197 		return (error);
198 
199 	return (secpolicy_zfs(cr));
200 }
201 
202 /*
203  * Policy for operations that want to write a dataset's parent:
204  * create, destroy, snapshot, clone, restore.
205  */
206 static int
207 zfs_secpolicy_parent(const char *dataset, cred_t *cr)
208 {
209 	char parentname[MAXNAMELEN];
210 	char *cp;
211 
212 	/*
213 	 * Remove the @bla or /bla from the end of the name to get the parent.
214 	 */
215 	(void) strncpy(parentname, dataset, sizeof (parentname));
216 	cp = strrchr(parentname, '@');
217 	if (cp != NULL) {
218 		cp[0] = '\0';
219 	} else {
220 		cp = strrchr(parentname, '/');
221 		if (cp == NULL)
222 			return (ENOENT);
223 		cp[0] = '\0';
224 
225 	}
226 
227 	return (zfs_secpolicy_write(parentname, cr));
228 }
229 
230 /*
231  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
232  * SYS_CONFIG privilege, which is not available in a local zone.
233  */
234 /* ARGSUSED */
235 static int
236 zfs_secpolicy_config(const char *unused, cred_t *cr)
237 {
238 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
239 		return (EPERM);
240 
241 	return (0);
242 }
243 
244 /*
245  * Policy for fault injection.  Requires all privileges.
246  */
247 /* ARGSUSED */
248 static int
249 zfs_secpolicy_inject(const char *unused, cred_t *cr)
250 {
251 	return (secpolicy_zinject(cr));
252 }
253 
254 /*
255  * Returns the nvlist as specified by the user in the zfs_cmd_t.
256  */
257 static int
258 get_nvlist(zfs_cmd_t *zc, nvlist_t **nvp)
259 {
260 	char *packed;
261 	size_t size;
262 	int error;
263 	nvlist_t *config = NULL;
264 
265 	/*
266 	 * Read in and unpack the user-supplied nvlist.
267 	 */
268 	if ((size = zc->zc_nvlist_src_size) == 0)
269 		return (EINVAL);
270 
271 	packed = kmem_alloc(size, KM_SLEEP);
272 
273 	if ((error = xcopyin((void *)(uintptr_t)zc->zc_nvlist_src, packed,
274 	    size)) != 0) {
275 		kmem_free(packed, size);
276 		return (error);
277 	}
278 
279 	if ((error = nvlist_unpack(packed, size, &config, 0)) != 0) {
280 		kmem_free(packed, size);
281 		return (error);
282 	}
283 
284 	kmem_free(packed, size);
285 
286 	*nvp = config;
287 	return (0);
288 }
289 
290 static int
291 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
292 {
293 	char *packed = NULL;
294 	size_t size;
295 	int error;
296 
297 	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
298 
299 	if (size > zc->zc_nvlist_dst_size) {
300 		error = ENOMEM;
301 	} else {
302 		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
303 		    KM_SLEEP) == 0);
304 		error = xcopyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
305 		    size);
306 		kmem_free(packed, size);
307 	}
308 
309 	zc->zc_nvlist_dst_size = size;
310 	return (error);
311 }
312 
313 static int
314 zfs_ioc_pool_create(zfs_cmd_t *zc)
315 {
316 	int error;
317 	nvlist_t *config;
318 
319 	if ((error = get_nvlist(zc, &config)) != 0)
320 		return (error);
321 
322 	error = spa_create(zc->zc_name, config, zc->zc_value[0] == '\0' ?
323 	    NULL : zc->zc_value);
324 
325 	nvlist_free(config);
326 
327 	return (error);
328 }
329 
330 static int
331 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
332 {
333 	return (spa_destroy(zc->zc_name));
334 }
335 
336 static int
337 zfs_ioc_pool_import(zfs_cmd_t *zc)
338 {
339 	int error;
340 	nvlist_t *config;
341 	uint64_t guid;
342 
343 	if ((error = get_nvlist(zc, &config)) != 0)
344 		return (error);
345 
346 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
347 	    guid != zc->zc_guid)
348 		error = EINVAL;
349 	else
350 		error = spa_import(zc->zc_name, config,
351 		    zc->zc_value[0] == '\0' ? NULL : zc->zc_value);
352 
353 	nvlist_free(config);
354 
355 	return (error);
356 }
357 
358 static int
359 zfs_ioc_pool_export(zfs_cmd_t *zc)
360 {
361 	return (spa_export(zc->zc_name, NULL));
362 }
363 
364 static int
365 zfs_ioc_pool_configs(zfs_cmd_t *zc)
366 {
367 	nvlist_t *configs;
368 	int error;
369 
370 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
371 		return (EEXIST);
372 
373 	error = put_nvlist(zc, configs);
374 
375 	nvlist_free(configs);
376 
377 	return (error);
378 }
379 
380 static int
381 zfs_ioc_pool_stats(zfs_cmd_t *zc)
382 {
383 	nvlist_t *config;
384 	int error;
385 	int ret = 0;
386 
387 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
388 	    sizeof (zc->zc_value));
389 
390 	if (config != NULL) {
391 		ret = put_nvlist(zc, config);
392 		nvlist_free(config);
393 
394 		/*
395 		 * The config may be present even if 'error' is non-zero.
396 		 * In this case we return success, and preserve the real errno
397 		 * in 'zc_cookie'.
398 		 */
399 		zc->zc_cookie = error;
400 	} else {
401 		ret = error;
402 	}
403 
404 	return (ret);
405 }
406 
407 /*
408  * Try to import the given pool, returning pool stats as appropriate so that
409  * user land knows which devices are available and overall pool health.
410  */
411 static int
412 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
413 {
414 	nvlist_t *tryconfig, *config;
415 	int error;
416 
417 	if ((error = get_nvlist(zc, &tryconfig)) != 0)
418 		return (error);
419 
420 	config = spa_tryimport(tryconfig);
421 
422 	nvlist_free(tryconfig);
423 
424 	if (config == NULL)
425 		return (EINVAL);
426 
427 	error = put_nvlist(zc, config);
428 	nvlist_free(config);
429 
430 	return (error);
431 }
432 
433 static int
434 zfs_ioc_pool_scrub(zfs_cmd_t *zc)
435 {
436 	spa_t *spa;
437 	int error;
438 
439 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
440 		return (error);
441 
442 	spa_config_enter(spa, RW_READER, FTAG);
443 	error = spa_scrub(spa, zc->zc_cookie, B_FALSE);
444 	spa_config_exit(spa, FTAG);
445 
446 	spa_close(spa, FTAG);
447 
448 	return (error);
449 }
450 
451 static int
452 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
453 {
454 	spa_t *spa;
455 	int error;
456 
457 	error = spa_open(zc->zc_name, &spa, FTAG);
458 	if (error == 0) {
459 		spa_freeze(spa);
460 		spa_close(spa, FTAG);
461 	}
462 	return (error);
463 }
464 
465 static int
466 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
467 {
468 	spa_t *spa;
469 	int error;
470 
471 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
472 		return (error);
473 
474 	spa_upgrade(spa);
475 
476 	spa_close(spa, FTAG);
477 
478 	return (error);
479 }
480 
481 static int
482 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
483 {
484 	spa_t *spa;
485 	char *hist_buf;
486 	uint64_t size;
487 	int error;
488 
489 	if ((size = zc->zc_history_len) == 0)
490 		return (EINVAL);
491 
492 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
493 		return (error);
494 
495 	if (spa_version(spa) < ZFS_VERSION_ZPOOL_HISTORY) {
496 		spa_close(spa, FTAG);
497 		return (ENOTSUP);
498 	}
499 
500 	hist_buf = kmem_alloc(size, KM_SLEEP);
501 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
502 	    &zc->zc_history_len, hist_buf)) == 0) {
503 		error = xcopyout(hist_buf, (char *)(uintptr_t)zc->zc_history,
504 		    zc->zc_history_len);
505 	}
506 
507 	spa_close(spa, FTAG);
508 	kmem_free(hist_buf, size);
509 	return (error);
510 }
511 
512 static int
513 zfs_ioc_pool_log_history(zfs_cmd_t *zc)
514 {
515 	spa_t *spa;
516 	char *history_str = NULL;
517 	size_t size;
518 	int error;
519 
520 	size = zc->zc_history_len;
521 	if (size == 0 || size > HIS_MAX_RECORD_LEN)
522 		return (EINVAL);
523 
524 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
525 		return (error);
526 
527 	if (spa_version(spa) < ZFS_VERSION_ZPOOL_HISTORY) {
528 		spa_close(spa, FTAG);
529 		return (ENOTSUP);
530 	}
531 
532 	/* add one for the NULL delimiter */
533 	size++;
534 	history_str = kmem_alloc(size, KM_SLEEP);
535 	if ((error = xcopyin((void *)(uintptr_t)zc->zc_history, history_str,
536 	    size)) != 0) {
537 		spa_close(spa, FTAG);
538 		kmem_free(history_str, size);
539 		return (error);
540 	}
541 	history_str[size - 1] = '\0';
542 
543 	error = spa_history_log(spa, history_str, zc->zc_history_offset);
544 
545 	spa_close(spa, FTAG);
546 	kmem_free(history_str, size);
547 
548 	return (error);
549 }
550 
551 static int
552 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
553 {
554 	int error;
555 
556 	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
557 		return (error);
558 
559 	return (0);
560 }
561 
562 static int
563 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
564 {
565 	objset_t *osp;
566 	int error;
567 
568 	if ((error = dmu_objset_open(zc->zc_name, DMU_OST_ZFS,
569 	    DS_MODE_NONE | DS_MODE_READONLY, &osp)) != 0)
570 		return (error);
571 
572 	error = zfs_obj_to_path(osp, zc->zc_obj, zc->zc_value,
573 	    sizeof (zc->zc_value));
574 	dmu_objset_close(osp);
575 
576 	return (error);
577 }
578 
579 static int
580 zfs_ioc_vdev_add(zfs_cmd_t *zc)
581 {
582 	spa_t *spa;
583 	int error;
584 	nvlist_t *config;
585 
586 	error = spa_open(zc->zc_name, &spa, FTAG);
587 	if (error != 0)
588 		return (error);
589 
590 	/*
591 	 * A root pool with concatenated devices is not supported.
592 	 * Thus, can not add a device to a root pool with one device.
593 	 */
594 	if (spa->spa_root_vdev->vdev_children == 1 && spa->spa_bootfs != 0) {
595 		spa_close(spa, FTAG);
596 		return (EDOM);
597 	}
598 
599 	if ((error = get_nvlist(zc, &config)) == 0) {
600 		error = spa_vdev_add(spa, config);
601 		nvlist_free(config);
602 	}
603 
604 	spa_close(spa, FTAG);
605 	return (error);
606 }
607 
608 static int
609 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
610 {
611 	spa_t *spa;
612 	int error;
613 
614 	error = spa_open(zc->zc_name, &spa, FTAG);
615 	if (error != 0)
616 		return (error);
617 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
618 	spa_close(spa, FTAG);
619 	return (error);
620 }
621 
622 static int
623 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
624 {
625 	spa_t *spa;
626 	int error;
627 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
628 
629 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
630 		return (error);
631 	switch (zc->zc_cookie) {
632 	case VDEV_STATE_ONLINE:
633 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
634 		break;
635 
636 	case VDEV_STATE_OFFLINE:
637 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
638 		break;
639 
640 	case VDEV_STATE_FAULTED:
641 		error = vdev_fault(spa, zc->zc_guid);
642 		break;
643 
644 	case VDEV_STATE_DEGRADED:
645 		error = vdev_degrade(spa, zc->zc_guid);
646 		break;
647 
648 	default:
649 		error = EINVAL;
650 	}
651 	zc->zc_cookie = newstate;
652 	spa_close(spa, FTAG);
653 	return (error);
654 }
655 
656 static int
657 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
658 {
659 	spa_t *spa;
660 	int replacing = zc->zc_cookie;
661 	nvlist_t *config;
662 	int error;
663 
664 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
665 		return (error);
666 
667 	if ((error = get_nvlist(zc, &config)) == 0) {
668 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
669 		nvlist_free(config);
670 	}
671 
672 	spa_close(spa, FTAG);
673 	return (error);
674 }
675 
676 static int
677 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
678 {
679 	spa_t *spa;
680 	int error;
681 
682 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
683 		return (error);
684 
685 	error = spa_vdev_detach(spa, zc->zc_guid, B_FALSE);
686 
687 	spa_close(spa, FTAG);
688 	return (error);
689 }
690 
691 static int
692 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
693 {
694 	spa_t *spa;
695 	char *path = zc->zc_value;
696 	uint64_t guid = zc->zc_guid;
697 	int error;
698 
699 	error = spa_open(zc->zc_name, &spa, FTAG);
700 	if (error != 0)
701 		return (error);
702 
703 	error = spa_vdev_setpath(spa, guid, path);
704 	spa_close(spa, FTAG);
705 	return (error);
706 }
707 
708 static int
709 zfs_ioc_objset_stats(zfs_cmd_t *zc)
710 {
711 	objset_t *os = NULL;
712 	int error;
713 	nvlist_t *nv;
714 
715 retry:
716 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
717 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
718 	if (error != 0) {
719 		/*
720 		 * This is ugly: dmu_objset_open() can return EBUSY if
721 		 * the objset is held exclusively. Fortunately this hold is
722 		 * only for a short while, so we retry here.
723 		 * This avoids user code having to handle EBUSY,
724 		 * for example for a "zfs list".
725 		 */
726 		if (error == EBUSY) {
727 			delay(1);
728 			goto retry;
729 		}
730 		return (error);
731 	}
732 
733 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
734 
735 	if (zc->zc_nvlist_dst != 0 &&
736 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
737 		dmu_objset_stats(os, nv);
738 		/*
739 		 * NB: zvol_get_stats() will read the objset contents,
740 		 * which we aren't supposed to do with a
741 		 * DS_MODE_STANDARD open, because it could be
742 		 * inconsistent.  So this is a bit of a workaround...
743 		 */
744 		if (!zc->zc_objset_stats.dds_inconsistent &&
745 		    dmu_objset_type(os) == DMU_OST_ZVOL)
746 			VERIFY(zvol_get_stats(os, nv) == 0);
747 		error = put_nvlist(zc, nv);
748 		nvlist_free(nv);
749 	}
750 
751 	spa_altroot(dmu_objset_spa(os), zc->zc_value, sizeof (zc->zc_value));
752 
753 	dmu_objset_close(os);
754 	return (error);
755 }
756 
757 static int
758 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
759 {
760 	objset_t *os;
761 	int error;
762 	char *p;
763 
764 retry:
765 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
766 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
767 	if (error != 0) {
768 		/*
769 		 * This is ugly: dmu_objset_open() can return EBUSY if
770 		 * the objset is held exclusively. Fortunately this hold is
771 		 * only for a short while, so we retry here.
772 		 * This avoids user code having to handle EBUSY,
773 		 * for example for a "zfs list".
774 		 */
775 		if (error == EBUSY) {
776 			delay(1);
777 			goto retry;
778 		}
779 		if (error == ENOENT)
780 			error = ESRCH;
781 		return (error);
782 	}
783 
784 	p = strrchr(zc->zc_name, '/');
785 	if (p == NULL || p[1] != '\0')
786 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
787 	p = zc->zc_name + strlen(zc->zc_name);
788 
789 	do {
790 		error = dmu_dir_list_next(os,
791 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
792 		    NULL, &zc->zc_cookie);
793 		if (error == ENOENT)
794 			error = ESRCH;
795 	} while (error == 0 && !INGLOBALZONE(curproc) &&
796 	    !zone_dataset_visible(zc->zc_name, NULL));
797 
798 	/*
799 	 * If it's a hidden dataset (ie. with a '$' in its name), don't
800 	 * try to get stats for it.  Userland will skip over it.
801 	 */
802 	if (error == 0 && strchr(zc->zc_name, '$') == NULL)
803 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
804 
805 	dmu_objset_close(os);
806 	return (error);
807 }
808 
809 static int
810 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
811 {
812 	objset_t *os;
813 	int error;
814 
815 retry:
816 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
817 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
818 	if (error != 0) {
819 		/*
820 		 * This is ugly: dmu_objset_open() can return EBUSY if
821 		 * the objset is held exclusively. Fortunately this hold is
822 		 * only for a short while, so we retry here.
823 		 * This avoids user code having to handle EBUSY,
824 		 * for example for a "zfs list".
825 		 */
826 		if (error == EBUSY) {
827 			delay(1);
828 			goto retry;
829 		}
830 		if (error == ENOENT)
831 			error = ESRCH;
832 		return (error);
833 	}
834 
835 	/*
836 	 * A dataset name of maximum length cannot have any snapshots,
837 	 * so exit immediately.
838 	 */
839 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
840 		dmu_objset_close(os);
841 		return (ESRCH);
842 	}
843 
844 	error = dmu_snapshot_list_next(os,
845 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
846 	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie);
847 	if (error == ENOENT)
848 		error = ESRCH;
849 
850 	if (error == 0)
851 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
852 
853 	dmu_objset_close(os);
854 	return (error);
855 }
856 
857 static int
858 zfs_set_prop_nvlist(const char *name, dev_t dev, cred_t *cr, nvlist_t *nvl)
859 {
860 	nvpair_t *elem;
861 	int error;
862 	const char *propname;
863 	zfs_prop_t prop;
864 	uint64_t intval;
865 	char *strval;
866 	char buf[MAXNAMELEN];
867 	const char *p;
868 	spa_t *spa;
869 
870 	elem = NULL;
871 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
872 		propname = nvpair_name(elem);
873 
874 		if ((prop = zfs_name_to_prop(propname)) ==
875 		    ZFS_PROP_INVAL) {
876 			/*
877 			 * If this is a user-defined property, it must be a
878 			 * string, and there is no further validation to do.
879 			 */
880 			if (!zfs_prop_user(propname) ||
881 			    nvpair_type(elem) != DATA_TYPE_STRING)
882 				return (EINVAL);
883 
884 			VERIFY(nvpair_value_string(elem, &strval) == 0);
885 			error = dsl_prop_set(name, propname, 1,
886 			    strlen(strval) + 1, strval);
887 			if (error == 0)
888 				continue;
889 			else
890 				return (error);
891 		}
892 
893 		/*
894 		 * Check permissions for special properties.
895 		 */
896 		switch (prop) {
897 		case ZFS_PROP_ZONED:
898 			/*
899 			 * Disallow setting of 'zoned' from within a local zone.
900 			 */
901 			if (!INGLOBALZONE(curproc))
902 				return (EPERM);
903 			break;
904 
905 		case ZFS_PROP_QUOTA:
906 			if (error = zfs_dozonecheck(name, cr))
907 				return (error);
908 
909 			if (!INGLOBALZONE(curproc)) {
910 				uint64_t zoned;
911 				char setpoint[MAXNAMELEN];
912 				int dslen;
913 				/*
914 				 * Unprivileged users are allowed to modify the
915 				 * quota on things *under* (ie. contained by)
916 				 * the thing they own.
917 				 */
918 				if (dsl_prop_get_integer(name, "zoned", &zoned,
919 				    setpoint))
920 					return (EPERM);
921 				if (!zoned) /* this shouldn't happen */
922 					return (EPERM);
923 				dslen = strlen(name);
924 				if (dslen <= strlen(setpoint))
925 					return (EPERM);
926 			}
927 			break;
928 
929 		case ZFS_PROP_COMPRESSION:
930 			/*
931 			 * If the user specified gzip compression, make sure
932 			 * the SPA supports it. We ignore any errors here since
933 			 * we'll catch them later.
934 			 */
935 			if (nvpair_type(elem) == DATA_TYPE_UINT64 &&
936 			    nvpair_value_uint64(elem, &intval) == 0 &&
937 			    intval >= ZIO_COMPRESS_GZIP_1 &&
938 			    intval <= ZIO_COMPRESS_GZIP_9) {
939 				if ((p = strchr(name, '/')) == NULL) {
940 					p = name;
941 				} else {
942 					bcopy(name, buf, p - name);
943 					buf[p - name] = '\0';
944 					p = buf;
945 				}
946 
947 				if (spa_open(p, &spa, FTAG) == 0) {
948 					if (spa_version(spa) <
949 					    ZFS_VERSION_GZIP_COMPRESSION) {
950 						spa_close(spa, FTAG);
951 						return (ENOTSUP);
952 					}
953 
954 					spa_close(spa, FTAG);
955 				}
956 			}
957 			break;
958 		}
959 
960 		switch (prop) {
961 		case ZFS_PROP_QUOTA:
962 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
963 			    (error = dsl_dir_set_quota(name,
964 			    intval)) != 0)
965 				return (error);
966 			break;
967 
968 		case ZFS_PROP_RESERVATION:
969 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
970 			    (error = dsl_dir_set_reservation(name,
971 			    intval)) != 0)
972 				return (error);
973 			break;
974 
975 		case ZFS_PROP_VOLSIZE:
976 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
977 			    (error = zvol_set_volsize(name, dev,
978 			    intval)) != 0)
979 				return (error);
980 			break;
981 
982 		case ZFS_PROP_VOLBLOCKSIZE:
983 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
984 			    (error = zvol_set_volblocksize(name,
985 			    intval)) != 0)
986 				return (error);
987 			break;
988 
989 		default:
990 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
991 				if (zfs_prop_get_type(prop) !=
992 				    prop_type_string)
993 					return (EINVAL);
994 				VERIFY(nvpair_value_string(elem, &strval) == 0);
995 				if ((error = dsl_prop_set(name,
996 				    nvpair_name(elem), 1, strlen(strval) + 1,
997 				    strval)) != 0)
998 					return (error);
999 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
1000 				const char *unused;
1001 
1002 				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
1003 
1004 				switch (zfs_prop_get_type(prop)) {
1005 				case prop_type_number:
1006 					break;
1007 				case prop_type_boolean:
1008 					if (intval > 1)
1009 						return (EINVAL);
1010 					break;
1011 				case prop_type_string:
1012 					return (EINVAL);
1013 				case prop_type_index:
1014 					if (zfs_prop_index_to_string(prop,
1015 					    intval, &unused) != 0)
1016 						return (EINVAL);
1017 					break;
1018 				default:
1019 					cmn_err(CE_PANIC, "unknown property "
1020 					    "type");
1021 					break;
1022 				}
1023 
1024 				if ((error = dsl_prop_set(name, propname,
1025 				    8, 1, &intval)) != 0)
1026 					return (error);
1027 			} else {
1028 				return (EINVAL);
1029 			}
1030 			break;
1031 		}
1032 	}
1033 
1034 	return (0);
1035 }
1036 
1037 static int
1038 zfs_ioc_set_prop(zfs_cmd_t *zc)
1039 {
1040 	nvlist_t *nvl;
1041 	int error;
1042 	zfs_prop_t prop;
1043 
1044 	/*
1045 	 * If zc_value is set, then this is an attempt to inherit a value.
1046 	 * Otherwise, zc_nvlist refers to a list of properties to set.
1047 	 */
1048 	if (zc->zc_value[0] != '\0') {
1049 		if (!zfs_prop_user(zc->zc_value) &&
1050 		    ((prop = zfs_name_to_prop(zc->zc_value)) ==
1051 		    ZFS_PROP_INVAL ||
1052 		    !zfs_prop_inheritable(prop)))
1053 			return (EINVAL);
1054 
1055 		return (dsl_prop_set(zc->zc_name, zc->zc_value, 0, 0, NULL));
1056 	}
1057 
1058 	if ((error = get_nvlist(zc, &nvl)) != 0)
1059 		return (error);
1060 
1061 	error = zfs_set_prop_nvlist(zc->zc_name, zc->zc_dev,
1062 	    (cred_t *)(uintptr_t)zc->zc_cred, nvl);
1063 	nvlist_free(nvl);
1064 	return (error);
1065 }
1066 
1067 static int
1068 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
1069 {
1070 	nvlist_t *nvl;
1071 	int error, reset_bootfs = 0;
1072 	uint64_t objnum;
1073 	zpool_prop_t prop;
1074 	nvpair_t *elem;
1075 	char *propname, *strval;
1076 	spa_t *spa;
1077 	vdev_t *rvdev;
1078 	char *vdev_type;
1079 	objset_t *os;
1080 
1081 	if ((error = get_nvlist(zc, &nvl)) != 0)
1082 		return (error);
1083 
1084 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
1085 		nvlist_free(nvl);
1086 		return (error);
1087 	}
1088 
1089 	if (spa_version(spa) < ZFS_VERSION_BOOTFS) {
1090 		nvlist_free(nvl);
1091 		spa_close(spa, FTAG);
1092 		return (ENOTSUP);
1093 	}
1094 
1095 	elem = NULL;
1096 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1097 
1098 		propname = nvpair_name(elem);
1099 
1100 		if ((prop = zpool_name_to_prop(propname)) ==
1101 		    ZFS_PROP_INVAL) {
1102 			nvlist_free(nvl);
1103 			spa_close(spa, FTAG);
1104 			return (EINVAL);
1105 		}
1106 
1107 		switch (prop) {
1108 		case ZPOOL_PROP_BOOTFS:
1109 			/*
1110 			 * A bootable filesystem can not be on a RAIDZ pool
1111 			 * nor a striped pool with more than 1 device.
1112 			 */
1113 			rvdev = spa->spa_root_vdev;
1114 			vdev_type =
1115 			    rvdev->vdev_child[0]->vdev_ops->vdev_op_type;
1116 			if (strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 ||
1117 			    (strcmp(vdev_type, VDEV_TYPE_MIRROR) != 0 &&
1118 			    rvdev->vdev_children > 1)) {
1119 				error = ENOTSUP;
1120 				break;
1121 			}
1122 
1123 			reset_bootfs = 1;
1124 
1125 			VERIFY(nvpair_value_string(elem, &strval) == 0);
1126 			if (strval == NULL || strval[0] == '\0') {
1127 				objnum = zpool_prop_default_numeric(
1128 				    ZPOOL_PROP_BOOTFS);
1129 				break;
1130 			}
1131 
1132 			if (error = dmu_objset_open(strval, DMU_OST_ZFS,
1133 			    DS_MODE_STANDARD | DS_MODE_READONLY, &os))
1134 				break;
1135 			objnum = dmu_objset_id(os);
1136 			dmu_objset_close(os);
1137 			break;
1138 		}
1139 
1140 		if (error)
1141 			break;
1142 	}
1143 	if (error == 0) {
1144 		if (reset_bootfs) {
1145 			VERIFY(nvlist_remove(nvl,
1146 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS),
1147 			    DATA_TYPE_STRING) == 0);
1148 			VERIFY(nvlist_add_uint64(nvl,
1149 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS),
1150 			    objnum) == 0);
1151 		}
1152 		error = spa_set_props(spa, nvl);
1153 	}
1154 
1155 	nvlist_free(nvl);
1156 	spa_close(spa, FTAG);
1157 
1158 	return (error);
1159 }
1160 
1161 static int
1162 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
1163 {
1164 	spa_t *spa;
1165 	int error;
1166 	nvlist_t *nvp = NULL;
1167 
1168 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1169 		return (error);
1170 
1171 	error = spa_get_props(spa, &nvp);
1172 
1173 	if (error == 0 && zc->zc_nvlist_dst != NULL)
1174 		error = put_nvlist(zc, nvp);
1175 	else
1176 		error = EFAULT;
1177 
1178 	spa_close(spa, FTAG);
1179 
1180 	if (nvp)
1181 		nvlist_free(nvp);
1182 	return (error);
1183 }
1184 
1185 static int
1186 zfs_ioc_create_minor(zfs_cmd_t *zc)
1187 {
1188 	return (zvol_create_minor(zc->zc_name, zc->zc_dev));
1189 }
1190 
1191 static int
1192 zfs_ioc_remove_minor(zfs_cmd_t *zc)
1193 {
1194 	return (zvol_remove_minor(zc->zc_name));
1195 }
1196 
1197 /*
1198  * Search the vfs list for a specified resource.  Returns a pointer to it
1199  * or NULL if no suitable entry is found. The caller of this routine
1200  * is responsible for releasing the returned vfs pointer.
1201  */
1202 static vfs_t *
1203 zfs_get_vfs(const char *resource)
1204 {
1205 	struct vfs *vfsp;
1206 	struct vfs *vfs_found = NULL;
1207 
1208 	vfs_list_read_lock();
1209 	vfsp = rootvfs;
1210 	do {
1211 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
1212 			VFS_HOLD(vfsp);
1213 			vfs_found = vfsp;
1214 			break;
1215 		}
1216 		vfsp = vfsp->vfs_next;
1217 	} while (vfsp != rootvfs);
1218 	vfs_list_unlock();
1219 	return (vfs_found);
1220 }
1221 
1222 static void
1223 zfs_create_cb(objset_t *os, void *arg, dmu_tx_t *tx)
1224 {
1225 	zfs_create_data_t *zc = arg;
1226 	zfs_create_fs(os, (cred_t *)(uintptr_t)zc->zc_cred, tx);
1227 }
1228 
1229 static int
1230 zfs_ioc_create(zfs_cmd_t *zc)
1231 {
1232 	objset_t *clone;
1233 	int error = 0;
1234 	zfs_create_data_t cbdata = { 0 };
1235 	void (*cbfunc)(objset_t *os, void *arg, dmu_tx_t *tx);
1236 	dmu_objset_type_t type = zc->zc_objset_type;
1237 
1238 	switch (type) {
1239 
1240 	case DMU_OST_ZFS:
1241 		cbfunc = zfs_create_cb;
1242 		break;
1243 
1244 	case DMU_OST_ZVOL:
1245 		cbfunc = zvol_create_cb;
1246 		break;
1247 
1248 	default:
1249 		cbfunc = NULL;
1250 	}
1251 	if (strchr(zc->zc_name, '@'))
1252 		return (EINVAL);
1253 
1254 	if (zc->zc_nvlist_src != NULL &&
1255 	    (error = get_nvlist(zc, &cbdata.zc_props)) != 0)
1256 		return (error);
1257 
1258 	cbdata.zc_cred = (cred_t *)(uintptr_t)zc->zc_cred;
1259 	cbdata.zc_dev = (dev_t)zc->zc_dev;
1260 
1261 	if (zc->zc_value[0] != '\0') {
1262 		/*
1263 		 * We're creating a clone of an existing snapshot.
1264 		 */
1265 		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
1266 		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
1267 			nvlist_free(cbdata.zc_props);
1268 			return (EINVAL);
1269 		}
1270 
1271 		error = dmu_objset_open(zc->zc_value, type,
1272 		    DS_MODE_STANDARD | DS_MODE_READONLY, &clone);
1273 		if (error) {
1274 			nvlist_free(cbdata.zc_props);
1275 			return (error);
1276 		}
1277 		error = dmu_objset_create(zc->zc_name, type, clone, NULL, NULL);
1278 		dmu_objset_close(clone);
1279 	} else {
1280 		if (cbfunc == NULL) {
1281 			nvlist_free(cbdata.zc_props);
1282 			return (EINVAL);
1283 		}
1284 
1285 		if (type == DMU_OST_ZVOL) {
1286 			uint64_t volsize, volblocksize;
1287 
1288 			if (cbdata.zc_props == NULL ||
1289 			    nvlist_lookup_uint64(cbdata.zc_props,
1290 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
1291 			    &volsize) != 0) {
1292 				nvlist_free(cbdata.zc_props);
1293 				return (EINVAL);
1294 			}
1295 
1296 			if ((error = nvlist_lookup_uint64(cbdata.zc_props,
1297 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
1298 			    &volblocksize)) != 0 && error != ENOENT) {
1299 				nvlist_free(cbdata.zc_props);
1300 				return (EINVAL);
1301 			}
1302 
1303 			if (error != 0)
1304 				volblocksize = zfs_prop_default_numeric(
1305 				    ZFS_PROP_VOLBLOCKSIZE);
1306 
1307 			if ((error = zvol_check_volblocksize(
1308 			    volblocksize)) != 0 ||
1309 			    (error = zvol_check_volsize(volsize,
1310 			    volblocksize)) != 0) {
1311 				nvlist_free(cbdata.zc_props);
1312 				return (error);
1313 			}
1314 		}
1315 
1316 		error = dmu_objset_create(zc->zc_name, type, NULL, cbfunc,
1317 		    &cbdata);
1318 	}
1319 
1320 	/*
1321 	 * It would be nice to do this atomically.
1322 	 */
1323 	if (error == 0) {
1324 		if ((error = zfs_set_prop_nvlist(zc->zc_name,
1325 		    zc->zc_dev, (cred_t *)(uintptr_t)zc->zc_cred,
1326 		    cbdata.zc_props)) != 0)
1327 			(void) dmu_objset_destroy(zc->zc_name);
1328 	}
1329 
1330 	nvlist_free(cbdata.zc_props);
1331 	return (error);
1332 }
1333 
1334 static int
1335 zfs_ioc_snapshot(zfs_cmd_t *zc)
1336 {
1337 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
1338 		return (EINVAL);
1339 	return (dmu_objset_snapshot(zc->zc_name,
1340 	    zc->zc_value, zc->zc_cookie));
1341 }
1342 
1343 int
1344 zfs_unmount_snap(char *name, void *arg)
1345 {
1346 	char *snapname = arg;
1347 	char *cp;
1348 	vfs_t *vfsp = NULL;
1349 
1350 	/*
1351 	 * Snapshots (which are under .zfs control) must be unmounted
1352 	 * before they can be destroyed.
1353 	 */
1354 
1355 	if (snapname) {
1356 		(void) strcat(name, "@");
1357 		(void) strcat(name, snapname);
1358 		vfsp = zfs_get_vfs(name);
1359 		cp = strchr(name, '@');
1360 		*cp = '\0';
1361 	} else if (strchr(name, '@')) {
1362 		vfsp = zfs_get_vfs(name);
1363 	}
1364 
1365 	if (vfsp) {
1366 		/*
1367 		 * Always force the unmount for snapshots.
1368 		 */
1369 		int flag = MS_FORCE;
1370 		int err;
1371 
1372 		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
1373 			VFS_RELE(vfsp);
1374 			return (err);
1375 		}
1376 		VFS_RELE(vfsp);
1377 		if ((err = dounmount(vfsp, flag, kcred)) != 0)
1378 			return (err);
1379 	}
1380 	return (0);
1381 }
1382 
1383 static int
1384 zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
1385 {
1386 	int err;
1387 
1388 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
1389 		return (EINVAL);
1390 	err = dmu_objset_find(zc->zc_name,
1391 	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
1392 	if (err)
1393 		return (err);
1394 	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value));
1395 }
1396 
1397 static int
1398 zfs_ioc_destroy(zfs_cmd_t *zc)
1399 {
1400 	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
1401 		int err = zfs_unmount_snap(zc->zc_name, NULL);
1402 		if (err)
1403 			return (err);
1404 	}
1405 
1406 	return (dmu_objset_destroy(zc->zc_name));
1407 }
1408 
1409 static int
1410 zfs_ioc_rollback(zfs_cmd_t *zc)
1411 {
1412 	return (dmu_objset_rollback(zc->zc_name));
1413 }
1414 
1415 static int
1416 zfs_ioc_rename(zfs_cmd_t *zc)
1417 {
1418 	boolean_t recursive = zc->zc_cookie & 1;
1419 
1420 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
1421 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0)
1422 		return (EINVAL);
1423 
1424 	/*
1425 	 * Unmount snapshot unless we're doing a recursive rename,
1426 	 * in which case the dataset code figures out which snapshots
1427 	 * to unmount.
1428 	 */
1429 	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
1430 	    zc->zc_objset_type == DMU_OST_ZFS) {
1431 		int err = zfs_unmount_snap(zc->zc_name, NULL);
1432 		if (err)
1433 			return (err);
1434 	}
1435 
1436 	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
1437 }
1438 
1439 static int
1440 zfs_ioc_recvbackup(zfs_cmd_t *zc)
1441 {
1442 	file_t *fp;
1443 	int error, fd;
1444 	offset_t new_off;
1445 
1446 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
1447 	    strchr(zc->zc_value, '@') == NULL)
1448 		return (EINVAL);
1449 
1450 	fd = zc->zc_cookie;
1451 	fp = getf(fd);
1452 	if (fp == NULL)
1453 		return (EBADF);
1454 	error = dmu_recvbackup(zc->zc_value, &zc->zc_begin_record,
1455 	    &zc->zc_cookie, (boolean_t)zc->zc_guid, fp->f_vnode,
1456 	    fp->f_offset);
1457 
1458 	new_off = fp->f_offset + zc->zc_cookie;
1459 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &new_off) == 0)
1460 		fp->f_offset = new_off;
1461 
1462 	releasef(fd);
1463 	return (error);
1464 }
1465 
1466 static int
1467 zfs_ioc_sendbackup(zfs_cmd_t *zc)
1468 {
1469 	objset_t *fromsnap = NULL;
1470 	objset_t *tosnap;
1471 	file_t *fp;
1472 	int error;
1473 
1474 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
1475 	    DS_MODE_STANDARD | DS_MODE_READONLY, &tosnap);
1476 	if (error)
1477 		return (error);
1478 
1479 	if (zc->zc_value[0] != '\0') {
1480 		char buf[MAXPATHLEN];
1481 		char *cp;
1482 
1483 		(void) strncpy(buf, zc->zc_name, sizeof (buf));
1484 		cp = strchr(buf, '@');
1485 		if (cp)
1486 			*(cp+1) = 0;
1487 		(void) strncat(buf, zc->zc_value, sizeof (buf));
1488 		error = dmu_objset_open(buf, DMU_OST_ANY,
1489 		    DS_MODE_STANDARD | DS_MODE_READONLY, &fromsnap);
1490 		if (error) {
1491 			dmu_objset_close(tosnap);
1492 			return (error);
1493 		}
1494 	}
1495 
1496 	fp = getf(zc->zc_cookie);
1497 	if (fp == NULL) {
1498 		dmu_objset_close(tosnap);
1499 		if (fromsnap)
1500 			dmu_objset_close(fromsnap);
1501 		return (EBADF);
1502 	}
1503 
1504 	error = dmu_sendbackup(tosnap, fromsnap, fp->f_vnode);
1505 
1506 	releasef(zc->zc_cookie);
1507 	if (fromsnap)
1508 		dmu_objset_close(fromsnap);
1509 	dmu_objset_close(tosnap);
1510 	return (error);
1511 }
1512 
1513 static int
1514 zfs_ioc_inject_fault(zfs_cmd_t *zc)
1515 {
1516 	int id, error;
1517 
1518 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
1519 	    &zc->zc_inject_record);
1520 
1521 	if (error == 0)
1522 		zc->zc_guid = (uint64_t)id;
1523 
1524 	return (error);
1525 }
1526 
1527 static int
1528 zfs_ioc_clear_fault(zfs_cmd_t *zc)
1529 {
1530 	return (zio_clear_fault((int)zc->zc_guid));
1531 }
1532 
1533 static int
1534 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
1535 {
1536 	int id = (int)zc->zc_guid;
1537 	int error;
1538 
1539 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
1540 	    &zc->zc_inject_record);
1541 
1542 	zc->zc_guid = id;
1543 
1544 	return (error);
1545 }
1546 
1547 static int
1548 zfs_ioc_error_log(zfs_cmd_t *zc)
1549 {
1550 	spa_t *spa;
1551 	int error;
1552 	size_t count = (size_t)zc->zc_nvlist_dst_size;
1553 
1554 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1555 		return (error);
1556 
1557 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
1558 	    &count);
1559 	if (error == 0)
1560 		zc->zc_nvlist_dst_size = count;
1561 	else
1562 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
1563 
1564 	spa_close(spa, FTAG);
1565 
1566 	return (error);
1567 }
1568 
1569 static int
1570 zfs_ioc_clear(zfs_cmd_t *zc)
1571 {
1572 	spa_t *spa;
1573 	vdev_t *vd;
1574 	int error;
1575 	uint64_t txg;
1576 
1577 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1578 		return (error);
1579 
1580 	txg = spa_vdev_enter(spa);
1581 
1582 	if (zc->zc_guid == 0) {
1583 		vd = NULL;
1584 	} else if ((vd = spa_lookup_by_guid(spa, zc->zc_guid)) == NULL) {
1585 		(void) spa_vdev_exit(spa, NULL, txg, ENODEV);
1586 		spa_close(spa, FTAG);
1587 		return (ENODEV);
1588 	}
1589 
1590 	vdev_clear(spa, vd);
1591 
1592 	(void) spa_vdev_exit(spa, NULL, txg, 0);
1593 
1594 	spa_close(spa, FTAG);
1595 
1596 	return (0);
1597 }
1598 
1599 static int
1600 zfs_ioc_promote(zfs_cmd_t *zc)
1601 {
1602 	char *cp;
1603 
1604 	/*
1605 	 * We don't need to unmount *all* the origin fs's snapshots, but
1606 	 * it's easier.
1607 	 */
1608 	cp = strchr(zc->zc_value, '@');
1609 	if (cp)
1610 		*cp = '\0';
1611 	(void) dmu_objset_find(zc->zc_value,
1612 	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
1613 	return (dsl_dataset_promote(zc->zc_name));
1614 }
1615 
1616 static zfs_ioc_vec_t zfs_ioc_vec[] = {
1617 	{ zfs_ioc_pool_create,		zfs_secpolicy_config,	pool_name },
1618 	{ zfs_ioc_pool_destroy,		zfs_secpolicy_config,	pool_name },
1619 	{ zfs_ioc_pool_import,		zfs_secpolicy_config,	pool_name },
1620 	{ zfs_ioc_pool_export,		zfs_secpolicy_config,	pool_name },
1621 	{ zfs_ioc_pool_configs,		zfs_secpolicy_none,	no_name },
1622 	{ zfs_ioc_pool_stats,		zfs_secpolicy_read,	pool_name },
1623 	{ zfs_ioc_pool_tryimport,	zfs_secpolicy_config,	no_name },
1624 	{ zfs_ioc_pool_scrub,		zfs_secpolicy_config,	pool_name },
1625 	{ zfs_ioc_pool_freeze,		zfs_secpolicy_config,	no_name },
1626 	{ zfs_ioc_pool_upgrade,		zfs_secpolicy_config,	pool_name },
1627 	{ zfs_ioc_pool_get_history,	zfs_secpolicy_config,	pool_name },
1628 	{ zfs_ioc_pool_log_history,	zfs_secpolicy_config,	pool_name },
1629 	{ zfs_ioc_vdev_add,		zfs_secpolicy_config,	pool_name },
1630 	{ zfs_ioc_vdev_remove,		zfs_secpolicy_config,	pool_name },
1631 	{ zfs_ioc_vdev_set_state,	zfs_secpolicy_config,	pool_name },
1632 	{ zfs_ioc_vdev_attach,		zfs_secpolicy_config,	pool_name },
1633 	{ zfs_ioc_vdev_detach,		zfs_secpolicy_config,	pool_name },
1634 	{ zfs_ioc_vdev_setpath,		zfs_secpolicy_config,	pool_name },
1635 	{ zfs_ioc_objset_stats,		zfs_secpolicy_read,	dataset_name },
1636 	{ zfs_ioc_dataset_list_next,	zfs_secpolicy_read,	dataset_name },
1637 	{ zfs_ioc_snapshot_list_next,	zfs_secpolicy_read,	dataset_name },
1638 	{ zfs_ioc_set_prop,		zfs_secpolicy_write,	dataset_name },
1639 	{ zfs_ioc_create_minor,		zfs_secpolicy_config,	dataset_name },
1640 	{ zfs_ioc_remove_minor,		zfs_secpolicy_config,	dataset_name },
1641 	{ zfs_ioc_create,		zfs_secpolicy_parent,	dataset_name },
1642 	{ zfs_ioc_destroy,		zfs_secpolicy_parent,	dataset_name },
1643 	{ zfs_ioc_rollback,		zfs_secpolicy_write,	dataset_name },
1644 	{ zfs_ioc_rename,		zfs_secpolicy_write,	dataset_name },
1645 	{ zfs_ioc_recvbackup,		zfs_secpolicy_write,	dataset_name },
1646 	{ zfs_ioc_sendbackup,		zfs_secpolicy_write,	dataset_name },
1647 	{ zfs_ioc_inject_fault,		zfs_secpolicy_inject,	no_name },
1648 	{ zfs_ioc_clear_fault,		zfs_secpolicy_inject,	no_name },
1649 	{ zfs_ioc_inject_list_next,	zfs_secpolicy_inject,	no_name },
1650 	{ zfs_ioc_error_log,		zfs_secpolicy_inject,	pool_name },
1651 	{ zfs_ioc_clear,		zfs_secpolicy_config,	pool_name },
1652 	{ zfs_ioc_promote,		zfs_secpolicy_write,	dataset_name },
1653 	{ zfs_ioc_destroy_snaps,	zfs_secpolicy_write,	dataset_name },
1654 	{ zfs_ioc_snapshot,		zfs_secpolicy_write,	dataset_name },
1655 	{ zfs_ioc_dsobj_to_dsname,	zfs_secpolicy_config,	pool_name },
1656 	{ zfs_ioc_obj_to_path,		zfs_secpolicy_config,	no_name },
1657 	{ zfs_ioc_pool_set_props,	zfs_secpolicy_config,	pool_name },
1658 	{ zfs_ioc_pool_get_props,	zfs_secpolicy_read,	pool_name },
1659 };
1660 
1661 static int
1662 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
1663 {
1664 	zfs_cmd_t *zc;
1665 	uint_t vec;
1666 	int error, rc;
1667 
1668 	if (getminor(dev) != 0)
1669 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
1670 
1671 	vec = cmd - ZFS_IOC;
1672 
1673 	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
1674 		return (EINVAL);
1675 
1676 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
1677 
1678 	error = xcopyin((void *)arg, zc, sizeof (zfs_cmd_t));
1679 
1680 	if (error == 0) {
1681 		zc->zc_cred = (uintptr_t)cr;
1682 		zc->zc_dev = dev;
1683 		error = zfs_ioc_vec[vec].zvec_secpolicy(zc->zc_name, cr);
1684 	}
1685 
1686 	/*
1687 	 * Ensure that all pool/dataset names are valid before we pass down to
1688 	 * the lower layers.
1689 	 */
1690 	if (error == 0) {
1691 		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
1692 		switch (zfs_ioc_vec[vec].zvec_namecheck) {
1693 		case pool_name:
1694 			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
1695 				error = EINVAL;
1696 			break;
1697 
1698 		case dataset_name:
1699 			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
1700 				error = EINVAL;
1701 			break;
1702 
1703 		case no_name:
1704 			break;
1705 		}
1706 	}
1707 
1708 	if (error == 0)
1709 		error = zfs_ioc_vec[vec].zvec_func(zc);
1710 
1711 	rc = xcopyout(zc, (void *)arg, sizeof (zfs_cmd_t));
1712 	if (error == 0)
1713 		error = rc;
1714 
1715 	kmem_free(zc, sizeof (zfs_cmd_t));
1716 	return (error);
1717 }
1718 
1719 static int
1720 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1721 {
1722 	if (cmd != DDI_ATTACH)
1723 		return (DDI_FAILURE);
1724 
1725 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
1726 	    DDI_PSEUDO, 0) == DDI_FAILURE)
1727 		return (DDI_FAILURE);
1728 
1729 	zfs_dip = dip;
1730 
1731 	ddi_report_dev(dip);
1732 
1733 	return (DDI_SUCCESS);
1734 }
1735 
1736 static int
1737 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1738 {
1739 	if (spa_busy() || zfs_busy() || zvol_busy())
1740 		return (DDI_FAILURE);
1741 
1742 	if (cmd != DDI_DETACH)
1743 		return (DDI_FAILURE);
1744 
1745 	zfs_dip = NULL;
1746 
1747 	ddi_prop_remove_all(dip);
1748 	ddi_remove_minor_node(dip, NULL);
1749 
1750 	return (DDI_SUCCESS);
1751 }
1752 
1753 /*ARGSUSED*/
1754 static int
1755 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1756 {
1757 	switch (infocmd) {
1758 	case DDI_INFO_DEVT2DEVINFO:
1759 		*result = zfs_dip;
1760 		return (DDI_SUCCESS);
1761 
1762 	case DDI_INFO_DEVT2INSTANCE:
1763 		*result = (void *)0;
1764 		return (DDI_SUCCESS);
1765 	}
1766 
1767 	return (DDI_FAILURE);
1768 }
1769 
1770 /*
1771  * OK, so this is a little weird.
1772  *
1773  * /dev/zfs is the control node, i.e. minor 0.
1774  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
1775  *
1776  * /dev/zfs has basically nothing to do except serve up ioctls,
1777  * so most of the standard driver entry points are in zvol.c.
1778  */
1779 static struct cb_ops zfs_cb_ops = {
1780 	zvol_open,	/* open */
1781 	zvol_close,	/* close */
1782 	zvol_strategy,	/* strategy */
1783 	nodev,		/* print */
1784 	nodev,		/* dump */
1785 	zvol_read,	/* read */
1786 	zvol_write,	/* write */
1787 	zfsdev_ioctl,	/* ioctl */
1788 	nodev,		/* devmap */
1789 	nodev,		/* mmap */
1790 	nodev,		/* segmap */
1791 	nochpoll,	/* poll */
1792 	ddi_prop_op,	/* prop_op */
1793 	NULL,		/* streamtab */
1794 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
1795 	CB_REV,		/* version */
1796 	nodev,		/* async read */
1797 	nodev,		/* async write */
1798 };
1799 
1800 static struct dev_ops zfs_dev_ops = {
1801 	DEVO_REV,	/* version */
1802 	0,		/* refcnt */
1803 	zfs_info,	/* info */
1804 	nulldev,	/* identify */
1805 	nulldev,	/* probe */
1806 	zfs_attach,	/* attach */
1807 	zfs_detach,	/* detach */
1808 	nodev,		/* reset */
1809 	&zfs_cb_ops,	/* driver operations */
1810 	NULL		/* no bus operations */
1811 };
1812 
1813 static struct modldrv zfs_modldrv = {
1814 	&mod_driverops, "ZFS storage pool version " ZFS_VERSION_STRING,
1815 	    &zfs_dev_ops
1816 };
1817 
1818 static struct modlinkage modlinkage = {
1819 	MODREV_1,
1820 	(void *)&zfs_modlfs,
1821 	(void *)&zfs_modldrv,
1822 	NULL
1823 };
1824 
1825 int
1826 _init(void)
1827 {
1828 	int error;
1829 
1830 	spa_init(FREAD | FWRITE);
1831 	zfs_init();
1832 	zvol_init();
1833 
1834 	if ((error = mod_install(&modlinkage)) != 0) {
1835 		zvol_fini();
1836 		zfs_fini();
1837 		spa_fini();
1838 		return (error);
1839 	}
1840 
1841 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
1842 	ASSERT(error == 0);
1843 
1844 	return (0);
1845 }
1846 
1847 int
1848 _fini(void)
1849 {
1850 	int error;
1851 
1852 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
1853 		return (EBUSY);
1854 
1855 	if ((error = mod_remove(&modlinkage)) != 0)
1856 		return (error);
1857 
1858 	zvol_fini();
1859 	zfs_fini();
1860 	spa_fini();
1861 
1862 	ldi_ident_release(zfs_li);
1863 	zfs_li = NULL;
1864 
1865 	return (error);
1866 }
1867 
1868 int
1869 _info(struct modinfo *modinfop)
1870 {
1871 	return (mod_info(&modlinkage, modinfop));
1872 }
1873