xref: /illumos-gate/usr/src/lib/libzfs/common/libzfs_mount.c (revision 591e0e133f9980083db5d64ac33a30bcc3382ff7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27  * Copyright 2017 Joyent, Inc.
28  * Copyright 2017 RackTop Systems.
29  */
30 
31 /*
32  * Routines to manage ZFS mounts.  We separate all the nasty routines that have
33  * to deal with the OS.  The following functions are the main entry points --
34  * they are used by mount and unmount and when changing a filesystem's
35  * mountpoint.
36  *
37  *	zfs_is_mounted()
38  *	zfs_mount()
39  *	zfs_unmount()
40  *	zfs_unmountall()
41  *
42  * This file also contains the functions used to manage sharing filesystems via
43  * NFS and iSCSI:
44  *
45  *	zfs_is_shared()
46  *	zfs_share()
47  *	zfs_unshare()
48  *
49  *	zfs_is_shared_nfs()
50  *	zfs_is_shared_smb()
51  *	zfs_share_proto()
52  *	zfs_shareall();
53  *	zfs_unshare_nfs()
54  *	zfs_unshare_smb()
55  *	zfs_unshareall_nfs()
56  *	zfs_unshareall_smb()
57  *	zfs_unshareall()
58  *	zfs_unshareall_bypath()
59  *
60  * The following functions are available for pool consumers, and will
61  * mount/unmount and share/unshare all datasets within pool:
62  *
63  *	zpool_enable_datasets()
64  *	zpool_disable_datasets()
65  */
66 
67 #include <dirent.h>
68 #include <dlfcn.h>
69 #include <errno.h>
70 #include <fcntl.h>
71 #include <libgen.h>
72 #include <libintl.h>
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <strings.h>
76 #include <unistd.h>
77 #include <zone.h>
78 #include <sys/mntent.h>
79 #include <sys/mount.h>
80 #include <sys/stat.h>
81 #include <sys/statvfs.h>
82 
83 #include <libzfs.h>
84 
85 #include "libzfs_impl.h"
86 #include "libzfs_taskq.h"
87 
88 #include <libshare.h>
89 #include <sys/systeminfo.h>
90 #define	MAXISALEN	257	/* based on sysinfo(2) man page */
91 
92 static int mount_tq_nthr = 512;	/* taskq threads for multi-threaded mounting */
93 
94 static void zfs_mount_task(void *);
95 static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
96 zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
97     zfs_share_proto_t);
98 
99 /*
100  * The share protocols table must be in the same order as the zfs_share_proto_t
101  * enum in libzfs_impl.h
102  */
103 typedef struct {
104 	zfs_prop_t p_prop;
105 	char *p_name;
106 	int p_share_err;
107 	int p_unshare_err;
108 } proto_table_t;
109 
110 proto_table_t proto_table[PROTO_END] = {
111 	{ZFS_PROP_SHARENFS, "nfs", EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},
112 	{ZFS_PROP_SHARESMB, "smb", EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},
113 };
114 
115 zfs_share_proto_t nfs_only[] = {
116 	PROTO_NFS,
117 	PROTO_END
118 };
119 
120 zfs_share_proto_t smb_only[] = {
121 	PROTO_SMB,
122 	PROTO_END
123 };
124 zfs_share_proto_t share_all_proto[] = {
125 	PROTO_NFS,
126 	PROTO_SMB,
127 	PROTO_END
128 };
129 
130 /*
131  * Search the sharetab for the given mountpoint and protocol, returning
132  * a zfs_share_type_t value.
133  */
134 static zfs_share_type_t
135 is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto)
136 {
137 	char buf[MAXPATHLEN], *tab;
138 	char *ptr;
139 
140 	if (hdl->libzfs_sharetab == NULL)
141 		return (SHARED_NOT_SHARED);
142 
143 	(void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
144 
145 	while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
146 
147 		/* the mountpoint is the first entry on each line */
148 		if ((tab = strchr(buf, '\t')) == NULL)
149 			continue;
150 
151 		*tab = '\0';
152 		if (strcmp(buf, mountpoint) == 0) {
153 			/*
154 			 * the protocol field is the third field
155 			 * skip over second field
156 			 */
157 			ptr = ++tab;
158 			if ((tab = strchr(ptr, '\t')) == NULL)
159 				continue;
160 			ptr = ++tab;
161 			if ((tab = strchr(ptr, '\t')) == NULL)
162 				continue;
163 			*tab = '\0';
164 			if (strcmp(ptr,
165 			    proto_table[proto].p_name) == 0) {
166 				switch (proto) {
167 				case PROTO_NFS:
168 					return (SHARED_NFS);
169 				case PROTO_SMB:
170 					return (SHARED_SMB);
171 				default:
172 					return (0);
173 				}
174 			}
175 		}
176 	}
177 
178 	return (SHARED_NOT_SHARED);
179 }
180 
181 static boolean_t
182 dir_is_empty_stat(const char *dirname)
183 {
184 	struct stat st;
185 
186 	/*
187 	 * We only want to return false if the given path is a non empty
188 	 * directory, all other errors are handled elsewhere.
189 	 */
190 	if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) {
191 		return (B_TRUE);
192 	}
193 
194 	/*
195 	 * An empty directory will still have two entries in it, one
196 	 * entry for each of "." and "..".
197 	 */
198 	if (st.st_size > 2) {
199 		return (B_FALSE);
200 	}
201 
202 	return (B_TRUE);
203 }
204 
205 static boolean_t
206 dir_is_empty_readdir(const char *dirname)
207 {
208 	DIR *dirp;
209 	struct dirent64 *dp;
210 	int dirfd;
211 
212 	if ((dirfd = openat(AT_FDCWD, dirname,
213 	    O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) {
214 		return (B_TRUE);
215 	}
216 
217 	if ((dirp = fdopendir(dirfd)) == NULL) {
218 		(void) close(dirfd);
219 		return (B_TRUE);
220 	}
221 
222 	while ((dp = readdir64(dirp)) != NULL) {
223 
224 		if (strcmp(dp->d_name, ".") == 0 ||
225 		    strcmp(dp->d_name, "..") == 0)
226 			continue;
227 
228 		(void) closedir(dirp);
229 		return (B_FALSE);
230 	}
231 
232 	(void) closedir(dirp);
233 	return (B_TRUE);
234 }
235 
236 /*
237  * Returns true if the specified directory is empty.  If we can't open the
238  * directory at all, return true so that the mount can fail with a more
239  * informative error message.
240  */
241 static boolean_t
242 dir_is_empty(const char *dirname)
243 {
244 	struct statvfs64 st;
245 
246 	/*
247 	 * If the statvfs call fails or the filesystem is not a ZFS
248 	 * filesystem, fall back to the slow path which uses readdir.
249 	 */
250 	if ((statvfs64(dirname, &st) != 0) ||
251 	    (strcmp(st.f_basetype, "zfs") != 0)) {
252 		return (dir_is_empty_readdir(dirname));
253 	}
254 
255 	/*
256 	 * At this point, we know the provided path is on a ZFS
257 	 * filesystem, so we can use stat instead of readdir to
258 	 * determine if the directory is empty or not. We try to avoid
259 	 * using readdir because that requires opening "dirname"; this
260 	 * open file descriptor can potentially end up in a child
261 	 * process if there's a concurrent fork, thus preventing the
262 	 * zfs_mount() from otherwise succeeding (the open file
263 	 * descriptor inherited by the child process will cause the
264 	 * parent's mount to fail with EBUSY). The performance
265 	 * implications of replacing the open, read, and close with a
266 	 * single stat is nice; but is not the main motivation for the
267 	 * added complexity.
268 	 */
269 	return (dir_is_empty_stat(dirname));
270 }
271 
272 /*
273  * Checks to see if the mount is active.  If the filesystem is mounted, we fill
274  * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
275  * 0.
276  */
277 boolean_t
278 is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
279 {
280 	struct mnttab entry;
281 
282 	if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0)
283 		return (B_FALSE);
284 
285 	if (where != NULL)
286 		*where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
287 
288 	return (B_TRUE);
289 }
290 
291 boolean_t
292 zfs_is_mounted(zfs_handle_t *zhp, char **where)
293 {
294 	return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
295 }
296 
297 /*
298  * Returns true if the given dataset is mountable, false otherwise.  Returns the
299  * mountpoint in 'buf'.
300  */
301 static boolean_t
302 zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
303     zprop_source_t *source)
304 {
305 	char sourceloc[MAXNAMELEN];
306 	zprop_source_t sourcetype;
307 
308 	if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type))
309 		return (B_FALSE);
310 
311 	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
312 	    &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
313 
314 	if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
315 	    strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
316 		return (B_FALSE);
317 
318 	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)
319 		return (B_FALSE);
320 
321 	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
322 	    getzoneid() == GLOBAL_ZONEID)
323 		return (B_FALSE);
324 
325 	if (source)
326 		*source = sourcetype;
327 
328 	return (B_TRUE);
329 }
330 
331 /*
332  * Mount the given filesystem.
333  */
334 int
335 zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
336 {
337 	struct stat buf;
338 	char mountpoint[ZFS_MAXPROPLEN];
339 	char mntopts[MNT_LINE_MAX];
340 	libzfs_handle_t *hdl = zhp->zfs_hdl;
341 
342 	if (options == NULL)
343 		mntopts[0] = '\0';
344 	else
345 		(void) strlcpy(mntopts, options, sizeof (mntopts));
346 
347 	/*
348 	 * If the pool is imported read-only then all mounts must be read-only
349 	 */
350 	if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))
351 		flags |= MS_RDONLY;
352 
353 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
354 		return (0);
355 
356 	/* Create the directory if it doesn't already exist */
357 	if (lstat(mountpoint, &buf) != 0) {
358 		if (mkdirp(mountpoint, 0755) != 0) {
359 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
360 			    "failed to create mountpoint"));
361 			return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
362 			    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
363 			    mountpoint));
364 		}
365 	}
366 
367 	/*
368 	 * Determine if the mountpoint is empty.  If so, refuse to perform the
369 	 * mount.  We don't perform this check if MS_OVERLAY is specified, which
370 	 * would defeat the point.  We also avoid this check if 'remount' is
371 	 * specified.
372 	 */
373 	if ((flags & MS_OVERLAY) == 0 &&
374 	    strstr(mntopts, MNTOPT_REMOUNT) == NULL &&
375 	    !dir_is_empty(mountpoint)) {
376 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
377 		    "directory is not empty"));
378 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
379 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
380 	}
381 
382 	/* perform the mount */
383 	if (mount(zfs_get_name(zhp), mountpoint, MS_OPTIONSTR | flags,
384 	    MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) {
385 		/*
386 		 * Generic errors are nasty, but there are just way too many
387 		 * from mount(), and they're well-understood.  We pick a few
388 		 * common ones to improve upon.
389 		 */
390 		if (errno == EBUSY) {
391 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
392 			    "mountpoint or dataset is busy"));
393 		} else if (errno == EPERM) {
394 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
395 			    "Insufficient privileges"));
396 		} else if (errno == ENOTSUP) {
397 			char buf[256];
398 			int spa_version;
399 
400 			VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
401 			(void) snprintf(buf, sizeof (buf),
402 			    dgettext(TEXT_DOMAIN, "Can't mount a version %lld "
403 			    "file system on a version %d pool. Pool must be"
404 			    " upgraded to mount this file system."),
405 			    (u_longlong_t)zfs_prop_get_int(zhp,
406 			    ZFS_PROP_VERSION), spa_version);
407 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf));
408 		} else {
409 			zfs_error_aux(hdl, strerror(errno));
410 		}
411 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
412 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
413 		    zhp->zfs_name));
414 	}
415 
416 	/* add the mounted entry into our cache */
417 	libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint,
418 	    mntopts);
419 	return (0);
420 }
421 
422 /*
423  * Unmount a single filesystem.
424  */
425 static int
426 unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
427 {
428 	if (umount2(mountpoint, flags) != 0) {
429 		zfs_error_aux(hdl, strerror(errno));
430 		return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
431 		    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
432 		    mountpoint));
433 	}
434 
435 	return (0);
436 }
437 
438 /*
439  * Unmount the given filesystem.
440  */
441 int
442 zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
443 {
444 	libzfs_handle_t *hdl = zhp->zfs_hdl;
445 	struct mnttab entry;
446 	char *mntpt = NULL;
447 
448 	/* check to see if we need to unmount the filesystem */
449 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
450 	    libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) {
451 		/*
452 		 * mountpoint may have come from a call to
453 		 * getmnt/getmntany if it isn't NULL. If it is NULL,
454 		 * we know it comes from libzfs_mnttab_find which can
455 		 * then get freed later. We strdup it to play it safe.
456 		 */
457 		if (mountpoint == NULL)
458 			mntpt = zfs_strdup(hdl, entry.mnt_mountp);
459 		else
460 			mntpt = zfs_strdup(hdl, mountpoint);
461 
462 		/*
463 		 * Unshare and unmount the filesystem
464 		 */
465 		if (zfs_unshare_proto(zhp, mntpt, share_all_proto) != 0)
466 			return (-1);
467 
468 		if (unmount_one(hdl, mntpt, flags) != 0) {
469 			free(mntpt);
470 			(void) zfs_shareall(zhp);
471 			return (-1);
472 		}
473 		libzfs_mnttab_remove(hdl, zhp->zfs_name);
474 		free(mntpt);
475 	}
476 
477 	return (0);
478 }
479 
480 /*
481  * Unmount this filesystem and any children inheriting the mountpoint property.
482  * To do this, just act like we're changing the mountpoint property, but don't
483  * remount the filesystems afterwards.
484  */
485 int
486 zfs_unmountall(zfs_handle_t *zhp, int flags)
487 {
488 	prop_changelist_t *clp;
489 	int ret;
490 
491 	clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, 0, flags);
492 	if (clp == NULL)
493 		return (-1);
494 
495 	ret = changelist_prefix(clp);
496 	changelist_free(clp);
497 
498 	return (ret);
499 }
500 
501 boolean_t
502 zfs_is_shared(zfs_handle_t *zhp)
503 {
504 	zfs_share_type_t rc = 0;
505 	zfs_share_proto_t *curr_proto;
506 
507 	if (ZFS_IS_VOLUME(zhp))
508 		return (B_FALSE);
509 
510 	for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
511 	    curr_proto++)
512 		rc |= zfs_is_shared_proto(zhp, NULL, *curr_proto);
513 
514 	return (rc ? B_TRUE : B_FALSE);
515 }
516 
517 int
518 zfs_share(zfs_handle_t *zhp)
519 {
520 	assert(!ZFS_IS_VOLUME(zhp));
521 	return (zfs_share_proto(zhp, share_all_proto));
522 }
523 
524 int
525 zfs_unshare(zfs_handle_t *zhp)
526 {
527 	assert(!ZFS_IS_VOLUME(zhp));
528 	return (zfs_unshareall(zhp));
529 }
530 
531 /*
532  * Check to see if the filesystem is currently shared.
533  */
534 zfs_share_type_t
535 zfs_is_shared_proto(zfs_handle_t *zhp, char **where, zfs_share_proto_t proto)
536 {
537 	char *mountpoint;
538 	zfs_share_type_t rc;
539 
540 	if (!zfs_is_mounted(zhp, &mountpoint))
541 		return (SHARED_NOT_SHARED);
542 
543 	if ((rc = is_shared(zhp->zfs_hdl, mountpoint, proto))
544 	    != SHARED_NOT_SHARED) {
545 		if (where != NULL)
546 			*where = mountpoint;
547 		else
548 			free(mountpoint);
549 		return (rc);
550 	} else {
551 		free(mountpoint);
552 		return (SHARED_NOT_SHARED);
553 	}
554 }
555 
556 boolean_t
557 zfs_is_shared_nfs(zfs_handle_t *zhp, char **where)
558 {
559 	return (zfs_is_shared_proto(zhp, where,
560 	    PROTO_NFS) != SHARED_NOT_SHARED);
561 }
562 
563 boolean_t
564 zfs_is_shared_smb(zfs_handle_t *zhp, char **where)
565 {
566 	return (zfs_is_shared_proto(zhp, where,
567 	    PROTO_SMB) != SHARED_NOT_SHARED);
568 }
569 
570 /*
571  * Make sure things will work if libshare isn't installed by using
572  * wrapper functions that check to see that the pointers to functions
573  * initialized in _zfs_init_libshare() are actually present.
574  */
575 
576 static sa_handle_t (*_sa_init)(int);
577 static sa_handle_t (*_sa_init_arg)(int, void *);
578 static void (*_sa_fini)(sa_handle_t);
579 static sa_share_t (*_sa_find_share)(sa_handle_t, char *);
580 static int (*_sa_enable_share)(sa_share_t, char *);
581 static int (*_sa_disable_share)(sa_share_t, char *);
582 static char *(*_sa_errorstr)(int);
583 static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *);
584 static boolean_t (*_sa_needs_refresh)(sa_handle_t *);
585 static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t);
586 static int (*_sa_zfs_process_share)(sa_handle_t, sa_group_t, sa_share_t,
587     char *, char *, zprop_source_t, char *, char *, char *);
588 static void (*_sa_update_sharetab_ts)(sa_handle_t);
589 
590 /*
591  * _zfs_init_libshare()
592  *
593  * Find the libshare.so.1 entry points that we use here and save the
594  * values to be used later. This is triggered by the runtime loader.
595  * Make sure the correct ISA version is loaded.
596  */
597 
598 #pragma init(_zfs_init_libshare)
599 static void
600 _zfs_init_libshare(void)
601 {
602 	void *libshare;
603 	char path[MAXPATHLEN];
604 	char isa[MAXISALEN];
605 
606 #if defined(_LP64)
607 	if (sysinfo(SI_ARCHITECTURE_64, isa, MAXISALEN) == -1)
608 		isa[0] = '\0';
609 #else
610 	isa[0] = '\0';
611 #endif
612 	(void) snprintf(path, MAXPATHLEN,
613 	    "/usr/lib/%s/libshare.so.1", isa);
614 
615 	if ((libshare = dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) != NULL) {
616 		_sa_init = (sa_handle_t (*)(int))dlsym(libshare, "sa_init");
617 		_sa_init_arg = (sa_handle_t (*)(int, void *))dlsym(libshare,
618 		    "sa_init_arg");
619 		_sa_fini = (void (*)(sa_handle_t))dlsym(libshare, "sa_fini");
620 		_sa_find_share = (sa_share_t (*)(sa_handle_t, char *))
621 		    dlsym(libshare, "sa_find_share");
622 		_sa_enable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
623 		    "sa_enable_share");
624 		_sa_disable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
625 		    "sa_disable_share");
626 		_sa_errorstr = (char *(*)(int))dlsym(libshare, "sa_errorstr");
627 		_sa_parse_legacy_options = (int (*)(sa_group_t, char *, char *))
628 		    dlsym(libshare, "sa_parse_legacy_options");
629 		_sa_needs_refresh = (boolean_t (*)(sa_handle_t *))
630 		    dlsym(libshare, "sa_needs_refresh");
631 		_sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t))
632 		    dlsym(libshare, "sa_get_zfs_handle");
633 		_sa_zfs_process_share = (int (*)(sa_handle_t, sa_group_t,
634 		    sa_share_t, char *, char *, zprop_source_t, char *,
635 		    char *, char *))dlsym(libshare, "sa_zfs_process_share");
636 		_sa_update_sharetab_ts = (void (*)(sa_handle_t))
637 		    dlsym(libshare, "sa_update_sharetab_ts");
638 		if (_sa_init == NULL || _sa_init_arg == NULL ||
639 		    _sa_fini == NULL || _sa_find_share == NULL ||
640 		    _sa_enable_share == NULL || _sa_disable_share == NULL ||
641 		    _sa_errorstr == NULL || _sa_parse_legacy_options == NULL ||
642 		    _sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL ||
643 		    _sa_zfs_process_share == NULL ||
644 		    _sa_update_sharetab_ts == NULL) {
645 			_sa_init = NULL;
646 			_sa_init_arg = NULL;
647 			_sa_fini = NULL;
648 			_sa_disable_share = NULL;
649 			_sa_enable_share = NULL;
650 			_sa_errorstr = NULL;
651 			_sa_parse_legacy_options = NULL;
652 			(void) dlclose(libshare);
653 			_sa_needs_refresh = NULL;
654 			_sa_get_zfs_handle = NULL;
655 			_sa_zfs_process_share = NULL;
656 			_sa_update_sharetab_ts = NULL;
657 		}
658 	}
659 }
660 
661 /*
662  * zfs_init_libshare(zhandle, service)
663  *
664  * Initialize the libshare API if it hasn't already been initialized.
665  * In all cases it returns 0 if it succeeded and an error if not. The
666  * service value is which part(s) of the API to initialize and is a
667  * direct map to the libshare sa_init(service) interface.
668  */
669 static int
670 zfs_init_libshare_impl(libzfs_handle_t *zhandle, int service, void *arg)
671 {
672 	/*
673 	 * libshare is either not installed or we're in a branded zone. The
674 	 * rest of the wrapper functions around the libshare calls already
675 	 * handle NULL function pointers, but we don't want the callers of
676 	 * zfs_init_libshare() to fail prematurely if libshare is not available.
677 	 */
678 	if (_sa_init == NULL)
679 		return (SA_OK);
680 
681 	/*
682 	 * Attempt to refresh libshare. This is necessary if there was a cache
683 	 * miss for a new ZFS dataset that was just created, or if state of the
684 	 * sharetab file has changed since libshare was last initialized. We
685 	 * want to make sure so check timestamps to see if a different process
686 	 * has updated any of the configuration. If there was some non-ZFS
687 	 * change, we need to re-initialize the internal cache.
688 	 */
689 	if (_sa_needs_refresh != NULL &&
690 	    _sa_needs_refresh(zhandle->libzfs_sharehdl)) {
691 		zfs_uninit_libshare(zhandle);
692 		zhandle->libzfs_sharehdl = _sa_init_arg(service, arg);
693 	}
694 
695 	if (zhandle && zhandle->libzfs_sharehdl == NULL)
696 		zhandle->libzfs_sharehdl = _sa_init_arg(service, arg);
697 
698 	if (zhandle->libzfs_sharehdl == NULL)
699 		return (SA_NO_MEMORY);
700 
701 	return (SA_OK);
702 }
703 int
704 zfs_init_libshare(libzfs_handle_t *zhandle, int service)
705 {
706 	return (zfs_init_libshare_impl(zhandle, service, NULL));
707 }
708 
709 int
710 zfs_init_libshare_arg(libzfs_handle_t *zhandle, int service, void *arg)
711 {
712 	return (zfs_init_libshare_impl(zhandle, service, arg));
713 }
714 
715 
716 /*
717  * zfs_uninit_libshare(zhandle)
718  *
719  * Uninitialize the libshare API if it hasn't already been
720  * uninitialized. It is OK to call multiple times.
721  */
722 void
723 zfs_uninit_libshare(libzfs_handle_t *zhandle)
724 {
725 	if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) {
726 		if (_sa_fini != NULL)
727 			_sa_fini(zhandle->libzfs_sharehdl);
728 		zhandle->libzfs_sharehdl = NULL;
729 	}
730 }
731 
732 /*
733  * zfs_parse_options(options, proto)
734  *
735  * Call the legacy parse interface to get the protocol specific
736  * options using the NULL arg to indicate that this is a "parse" only.
737  */
738 int
739 zfs_parse_options(char *options, zfs_share_proto_t proto)
740 {
741 	if (_sa_parse_legacy_options != NULL) {
742 		return (_sa_parse_legacy_options(NULL, options,
743 		    proto_table[proto].p_name));
744 	}
745 	return (SA_CONFIG_ERR);
746 }
747 
748 /*
749  * zfs_sa_find_share(handle, path)
750  *
751  * wrapper around sa_find_share to find a share path in the
752  * configuration.
753  */
754 static sa_share_t
755 zfs_sa_find_share(sa_handle_t handle, char *path)
756 {
757 	if (_sa_find_share != NULL)
758 		return (_sa_find_share(handle, path));
759 	return (NULL);
760 }
761 
762 /*
763  * zfs_sa_enable_share(share, proto)
764  *
765  * Wrapper for sa_enable_share which enables a share for a specified
766  * protocol.
767  */
768 static int
769 zfs_sa_enable_share(sa_share_t share, char *proto)
770 {
771 	if (_sa_enable_share != NULL)
772 		return (_sa_enable_share(share, proto));
773 	return (SA_CONFIG_ERR);
774 }
775 
776 /*
777  * zfs_sa_disable_share(share, proto)
778  *
779  * Wrapper for sa_enable_share which disables a share for a specified
780  * protocol.
781  */
782 static int
783 zfs_sa_disable_share(sa_share_t share, char *proto)
784 {
785 	if (_sa_disable_share != NULL)
786 		return (_sa_disable_share(share, proto));
787 	return (SA_CONFIG_ERR);
788 }
789 
790 /*
791  * Share the given filesystem according to the options in the specified
792  * protocol specific properties (sharenfs, sharesmb).  We rely
793  * on "libshare" to the dirty work for us.
794  */
795 static int
796 zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
797 {
798 	char mountpoint[ZFS_MAXPROPLEN];
799 	char shareopts[ZFS_MAXPROPLEN];
800 	char sourcestr[ZFS_MAXPROPLEN];
801 	libzfs_handle_t *hdl = zhp->zfs_hdl;
802 	sa_share_t share;
803 	zfs_share_proto_t *curr_proto;
804 	zprop_source_t sourcetype;
805 	int ret;
806 
807 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
808 		return (0);
809 
810 	for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
811 		/*
812 		 * Return success if there are no share options.
813 		 */
814 		if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
815 		    shareopts, sizeof (shareopts), &sourcetype, sourcestr,
816 		    ZFS_MAXPROPLEN, B_FALSE) != 0 ||
817 		    strcmp(shareopts, "off") == 0)
818 			continue;
819 		ret = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_HANDLE,
820 		    zhp);
821 		if (ret != SA_OK) {
822 			(void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
823 			    dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
824 			    zfs_get_name(zhp), _sa_errorstr != NULL ?
825 			    _sa_errorstr(ret) : "");
826 			return (-1);
827 		}
828 
829 		/*
830 		 * If the 'zoned' property is set, then zfs_is_mountable()
831 		 * will have already bailed out if we are in the global zone.
832 		 * But local zones cannot be NFS servers, so we ignore it for
833 		 * local zones as well.
834 		 */
835 		if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
836 			continue;
837 
838 		share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint);
839 		if (share == NULL) {
840 			/*
841 			 * This may be a new file system that was just
842 			 * created so isn't in the internal cache
843 			 * (second time through). Rather than
844 			 * reloading the entire configuration, we can
845 			 * assume ZFS has done the checking and it is
846 			 * safe to add this to the internal
847 			 * configuration.
848 			 */
849 			if (_sa_zfs_process_share(hdl->libzfs_sharehdl,
850 			    NULL, NULL, mountpoint,
851 			    proto_table[*curr_proto].p_name, sourcetype,
852 			    shareopts, sourcestr, zhp->zfs_name) != SA_OK) {
853 				(void) zfs_error_fmt(hdl,
854 				    proto_table[*curr_proto].p_share_err,
855 				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
856 				    zfs_get_name(zhp));
857 				return (-1);
858 			}
859 			share = zfs_sa_find_share(hdl->libzfs_sharehdl,
860 			    mountpoint);
861 		}
862 		if (share != NULL) {
863 			int err;
864 			err = zfs_sa_enable_share(share,
865 			    proto_table[*curr_proto].p_name);
866 			if (err != SA_OK) {
867 				(void) zfs_error_fmt(hdl,
868 				    proto_table[*curr_proto].p_share_err,
869 				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
870 				    zfs_get_name(zhp));
871 				return (-1);
872 			}
873 		} else {
874 			(void) zfs_error_fmt(hdl,
875 			    proto_table[*curr_proto].p_share_err,
876 			    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
877 			    zfs_get_name(zhp));
878 			return (-1);
879 		}
880 
881 	}
882 	return (0);
883 }
884 
885 
886 int
887 zfs_share_nfs(zfs_handle_t *zhp)
888 {
889 	return (zfs_share_proto(zhp, nfs_only));
890 }
891 
892 int
893 zfs_share_smb(zfs_handle_t *zhp)
894 {
895 	return (zfs_share_proto(zhp, smb_only));
896 }
897 
898 int
899 zfs_shareall(zfs_handle_t *zhp)
900 {
901 	return (zfs_share_proto(zhp, share_all_proto));
902 }
903 
904 /*
905  * Unshare a filesystem by mountpoint.
906  */
907 static int
908 unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
909     zfs_share_proto_t proto)
910 {
911 	sa_share_t share;
912 	int err;
913 	char *mntpt;
914 
915 	/*
916 	 * Mountpoint could get trashed if libshare calls getmntany
917 	 * which it does during API initialization, so strdup the
918 	 * value.
919 	 */
920 	mntpt = zfs_strdup(hdl, mountpoint);
921 
922 	/*
923 	 * make sure libshare initialized, initialize everything because we
924 	 * don't know what other unsharing may happen later. Functions up the
925 	 * stack are allowed to initialize instead a subset of shares at the
926 	 * time the set is known.
927 	 */
928 	if ((err = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_NAME,
929 	    (void *)name)) != SA_OK) {
930 		free(mntpt);	/* don't need the copy anymore */
931 		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
932 		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
933 		    name, _sa_errorstr(err)));
934 	}
935 
936 	share = zfs_sa_find_share(hdl->libzfs_sharehdl, mntpt);
937 	free(mntpt);	/* don't need the copy anymore */
938 
939 	if (share != NULL) {
940 		err = zfs_sa_disable_share(share, proto_table[proto].p_name);
941 		if (err != SA_OK) {
942 			return (zfs_error_fmt(hdl,
943 			    proto_table[proto].p_unshare_err,
944 			    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
945 			    name, _sa_errorstr(err)));
946 		}
947 	} else {
948 		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
949 		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"),
950 		    name));
951 	}
952 	return (0);
953 }
954 
955 /*
956  * Unshare the given filesystem.
957  */
958 int
959 zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint,
960     zfs_share_proto_t *proto)
961 {
962 	libzfs_handle_t *hdl = zhp->zfs_hdl;
963 	struct mnttab entry;
964 	char *mntpt = NULL;
965 
966 	/* check to see if need to unmount the filesystem */
967 	rewind(zhp->zfs_hdl->libzfs_mnttab);
968 	if (mountpoint != NULL)
969 		mountpoint = mntpt = zfs_strdup(hdl, mountpoint);
970 
971 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
972 	    libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) {
973 		zfs_share_proto_t *curr_proto;
974 
975 		if (mountpoint == NULL)
976 			mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
977 
978 		for (curr_proto = proto; *curr_proto != PROTO_END;
979 		    curr_proto++) {
980 
981 			if (is_shared(hdl, mntpt, *curr_proto) &&
982 			    unshare_one(hdl, zhp->zfs_name,
983 			    mntpt, *curr_proto) != 0) {
984 				if (mntpt != NULL)
985 					free(mntpt);
986 				return (-1);
987 			}
988 		}
989 	}
990 	if (mntpt != NULL)
991 		free(mntpt);
992 
993 	return (0);
994 }
995 
996 int
997 zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint)
998 {
999 	return (zfs_unshare_proto(zhp, mountpoint, nfs_only));
1000 }
1001 
1002 int
1003 zfs_unshare_smb(zfs_handle_t *zhp, const char *mountpoint)
1004 {
1005 	return (zfs_unshare_proto(zhp, mountpoint, smb_only));
1006 }
1007 
1008 /*
1009  * Same as zfs_unmountall(), but for NFS and SMB unshares.
1010  */
1011 int
1012 zfs_unshareall_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
1013 {
1014 	prop_changelist_t *clp;
1015 	int ret;
1016 
1017 	clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);
1018 	if (clp == NULL)
1019 		return (-1);
1020 
1021 	ret = changelist_unshare(clp, proto);
1022 	changelist_free(clp);
1023 
1024 	return (ret);
1025 }
1026 
1027 int
1028 zfs_unshareall_nfs(zfs_handle_t *zhp)
1029 {
1030 	return (zfs_unshareall_proto(zhp, nfs_only));
1031 }
1032 
1033 int
1034 zfs_unshareall_smb(zfs_handle_t *zhp)
1035 {
1036 	return (zfs_unshareall_proto(zhp, smb_only));
1037 }
1038 
1039 int
1040 zfs_unshareall(zfs_handle_t *zhp)
1041 {
1042 	return (zfs_unshareall_proto(zhp, share_all_proto));
1043 }
1044 
1045 int
1046 zfs_unshareall_bypath(zfs_handle_t *zhp, const char *mountpoint)
1047 {
1048 	return (zfs_unshare_proto(zhp, mountpoint, share_all_proto));
1049 }
1050 
1051 /*
1052  * Remove the mountpoint associated with the current dataset, if necessary.
1053  * We only remove the underlying directory if:
1054  *
1055  *	- The mountpoint is not 'none' or 'legacy'
1056  *	- The mountpoint is non-empty
1057  *	- The mountpoint is the default or inherited
1058  *	- The 'zoned' property is set, or we're in a local zone
1059  *
1060  * Any other directories we leave alone.
1061  */
1062 void
1063 remove_mountpoint(zfs_handle_t *zhp)
1064 {
1065 	char mountpoint[ZFS_MAXPROPLEN];
1066 	zprop_source_t source;
1067 
1068 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
1069 	    &source))
1070 		return;
1071 
1072 	if (source == ZPROP_SRC_DEFAULT ||
1073 	    source == ZPROP_SRC_INHERITED) {
1074 		/*
1075 		 * Try to remove the directory, silently ignoring any errors.
1076 		 * The filesystem may have since been removed or moved around,
1077 		 * and this error isn't really useful to the administrator in
1078 		 * any way.
1079 		 */
1080 		(void) rmdir(mountpoint);
1081 	}
1082 }
1083 
1084 /*
1085  * Add the given zfs handle to the cb_handles array, dynamically reallocating
1086  * the array if it is out of space.
1087  */
1088 void
1089 libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
1090 {
1091 	if (cbp->cb_alloc == cbp->cb_used) {
1092 		size_t newsz;
1093 		zfs_handle_t **newhandles;
1094 
1095 		newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;
1096 		newhandles = zfs_realloc(zhp->zfs_hdl,
1097 		    cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),
1098 		    newsz * sizeof (zfs_handle_t *));
1099 		cbp->cb_handles = newhandles;
1100 		cbp->cb_alloc = newsz;
1101 	}
1102 	cbp->cb_handles[cbp->cb_used++] = zhp;
1103 }
1104 
1105 /*
1106  * Recursive helper function used during file system enumeration
1107  */
1108 static int
1109 zfs_iter_cb(zfs_handle_t *zhp, void *data)
1110 {
1111 	get_all_cb_t *cbp = data;
1112 
1113 	if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) {
1114 		zfs_close(zhp);
1115 		return (0);
1116 	}
1117 
1118 	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {
1119 		zfs_close(zhp);
1120 		return (0);
1121 	}
1122 
1123 	/*
1124 	 * If this filesystem is inconsistent and has a receive resume
1125 	 * token, we can not mount it.
1126 	 */
1127 	if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
1128 	    zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
1129 	    NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
1130 		zfs_close(zhp);
1131 		return (0);
1132 	}
1133 
1134 	libzfs_add_handle(cbp, zhp);
1135 	if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) {
1136 		zfs_close(zhp);
1137 		return (-1);
1138 	}
1139 	return (0);
1140 }
1141 
1142 /*
1143  * Sort comparator that compares two mountpoint paths. We sort these paths so
1144  * that subdirectories immediately follow their parents. This means that we
1145  * effectively treat the '/' character as the lowest value non-nul char. An
1146  * example sorted list using this comparator would look like:
1147  *
1148  * /foo
1149  * /foo/bar
1150  * /foo/bar/baz
1151  * /foo/baz
1152  * /foo.bar
1153  *
1154  * The mounting code depends on this ordering to deterministically iterate
1155  * over filesystems in order to spawn parallel mount tasks.
1156  */
1157 int
1158 mountpoint_cmp(const void *arga, const void *argb)
1159 {
1160 	zfs_handle_t *const *zap = arga;
1161 	zfs_handle_t *za = *zap;
1162 	zfs_handle_t *const *zbp = argb;
1163 	zfs_handle_t *zb = *zbp;
1164 	char mounta[MAXPATHLEN];
1165 	char mountb[MAXPATHLEN];
1166 	const char *a = mounta;
1167 	const char *b = mountb;
1168 	boolean_t gota, gotb;
1169 
1170 	gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);
1171 	if (gota) {
1172 		verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,
1173 		    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
1174 	}
1175 	gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);
1176 	if (gotb) {
1177 		verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,
1178 		    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
1179 	}
1180 
1181 	if (gota && gotb) {
1182 		while (*a != '\0' && (*a == *b)) {
1183 			a++;
1184 			b++;
1185 		}
1186 		if (*a == *b)
1187 			return (0);
1188 		if (*a == '\0')
1189 			return (-1);
1190 		if (*b == '\0')
1191 			return (1);
1192 		if (*a == '/')
1193 			return (-1);
1194 		if (*b == '/')
1195 			return (1);
1196 		return (*a < *b ? -1 : *a > *b);
1197 	}
1198 
1199 	if (gota)
1200 		return (-1);
1201 	if (gotb)
1202 		return (1);
1203 
1204 	/*
1205 	 * If neither filesystem has a mountpoint, revert to sorting by
1206 	 * dataset name.
1207 	 */
1208 	return (strcmp(zfs_get_name(za), zfs_get_name(zb)));
1209 }
1210 
1211 /*
1212  * Return true if path2 is a child of path1.
1213  */
1214 static boolean_t
1215 libzfs_path_contains(const char *path1, const char *path2)
1216 {
1217 	return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/');
1218 }
1219 
1220 /*
1221  * Given a mountpoint specified by idx in the handles array, find the first
1222  * non-descendent of that mountpoint and return its index. Descendant paths
1223  * start with the parent's path. This function relies on the ordering
1224  * enforced by mountpoint_cmp().
1225  */
1226 static int
1227 non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)
1228 {
1229 	char parent[ZFS_MAXPROPLEN];
1230 	char child[ZFS_MAXPROPLEN];
1231 	int i;
1232 
1233 	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,
1234 	    sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);
1235 
1236 	for (i = idx + 1; i < num_handles; i++) {
1237 		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,
1238 		    sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1239 		if (!libzfs_path_contains(parent, child))
1240 			break;
1241 	}
1242 	return (i);
1243 }
1244 
1245 typedef struct mnt_param {
1246 	libzfs_handle_t	*mnt_hdl;
1247 	zfs_taskq_t	*mnt_tq;
1248 	zfs_handle_t	**mnt_zhps; /* filesystems to mount */
1249 	size_t		mnt_num_handles;
1250 	int		mnt_idx;	/* Index of selected entry to mount */
1251 	zfs_iter_f	mnt_func;
1252 	void		*mnt_data;
1253 } mnt_param_t;
1254 
1255 /*
1256  * Allocate and populate the parameter struct for mount function, and
1257  * schedule mounting of the entry selected by idx.
1258  */
1259 static void
1260 zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,
1261     size_t num_handles, int idx, zfs_iter_f func, void *data, zfs_taskq_t *tq)
1262 {
1263 	mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));
1264 
1265 	mnt_param->mnt_hdl = hdl;
1266 	mnt_param->mnt_tq = tq;
1267 	mnt_param->mnt_zhps = handles;
1268 	mnt_param->mnt_num_handles = num_handles;
1269 	mnt_param->mnt_idx = idx;
1270 	mnt_param->mnt_func = func;
1271 	mnt_param->mnt_data = data;
1272 
1273 	(void) zfs_taskq_dispatch(tq, zfs_mount_task, (void*)mnt_param,
1274 	    ZFS_TQ_SLEEP);
1275 }
1276 
1277 /*
1278  * This is the structure used to keep state of mounting or sharing operations
1279  * during a call to zpool_enable_datasets().
1280  */
1281 typedef struct mount_state {
1282 	/*
1283 	 * ms_mntstatus is set to -1 if any mount fails. While multiple threads
1284 	 * could update this variable concurrently, no synchronization is
1285 	 * needed as it's only ever set to -1.
1286 	 */
1287 	int		ms_mntstatus;
1288 	int		ms_mntflags;
1289 	const char	*ms_mntopts;
1290 } mount_state_t;
1291 
1292 static int
1293 zfs_mount_one(zfs_handle_t *zhp, void *arg)
1294 {
1295 	mount_state_t *ms = arg;
1296 	int ret = 0;
1297 
1298 	if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)
1299 		ret = ms->ms_mntstatus = -1;
1300 	return (ret);
1301 }
1302 
1303 static int
1304 zfs_share_one(zfs_handle_t *zhp, void *arg)
1305 {
1306 	mount_state_t *ms = arg;
1307 	int ret = 0;
1308 
1309 	if (zfs_share(zhp) != 0)
1310 		ret = ms->ms_mntstatus = -1;
1311 	return (ret);
1312 }
1313 
1314 /*
1315  * Task queue function to mount one file system. On completion, it finds and
1316  * schedules its children to be mounted. This depends on the sorting done in
1317  * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries
1318  * each descending from the previous) will have no parallelism since we always
1319  * have to wait for the parent to finish mounting before we can schedule
1320  * its children.
1321  */
1322 static void
1323 zfs_mount_task(void *arg)
1324 {
1325 	mnt_param_t *mp = arg;
1326 	int idx = mp->mnt_idx;
1327 	zfs_handle_t **handles = mp->mnt_zhps;
1328 	size_t num_handles = mp->mnt_num_handles;
1329 	char mountpoint[ZFS_MAXPROPLEN];
1330 
1331 	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,
1332 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
1333 
1334 	if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)
1335 		return;
1336 
1337 	/*
1338 	 * We dispatch tasks to mount filesystems with mountpoints underneath
1339 	 * this one. We do this by dispatching the next filesystem with a
1340 	 * descendant mountpoint of the one we just mounted, then skip all of
1341 	 * its descendants, dispatch the next descendant mountpoint, and so on.
1342 	 * The non_descendant_idx() function skips over filesystems that are
1343 	 * descendants of the filesystem we just dispatched.
1344 	 */
1345 	for (int i = idx + 1; i < num_handles;
1346 	    i = non_descendant_idx(handles, num_handles, i)) {
1347 		char child[ZFS_MAXPROPLEN];
1348 		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,
1349 		    child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1350 
1351 		if (!libzfs_path_contains(mountpoint, child))
1352 			break; /* not a descendant, return */
1353 		zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
1354 		    mp->mnt_func, mp->mnt_data, mp->mnt_tq);
1355 	}
1356 	free(mp);
1357 }
1358 
1359 /*
1360  * Issue the func callback for each ZFS handle contained in the handles
1361  * array. This function is used to mount all datasets, and so this function
1362  * guarantees that filesystems for parent mountpoints are called before their
1363  * children. As such, before issuing any callbacks, we first sort the array
1364  * of handles by mountpoint.
1365  *
1366  * Callbacks are issued in one of two ways:
1367  *
1368  * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT
1369  *    environment variable is set, then we issue callbacks sequentially.
1370  *
1371  * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT
1372  *    environment variable is not set, then we use a taskq to dispatch threads
1373  *    to mount filesystems is parallel. This function dispatches tasks to mount
1374  *    the filesystems at the top-level mountpoints, and these tasks in turn
1375  *    are responsible for recursively mounting filesystems in their children
1376  *    mountpoints.
1377  */
1378 void
1379 zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
1380     size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel)
1381 {
1382 	/*
1383 	 * The ZFS_SERIAL_MOUNT environment variable is an undocumented
1384 	 * variable that can be used as a convenience to do a/b comparison
1385 	 * of serial vs. parallel mounting.
1386 	 */
1387 	boolean_t serial_mount = !parallel ||
1388 	    (getenv("ZFS_SERIAL_MOUNT") != NULL);
1389 
1390 	/*
1391 	 * Sort the datasets by mountpoint. See mountpoint_cmp for details
1392 	 * of how these are sorted.
1393 	 */
1394 	qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);
1395 
1396 	if (serial_mount) {
1397 		for (int i = 0; i < num_handles; i++) {
1398 			func(handles[i], data);
1399 		}
1400 		return;
1401 	}
1402 
1403 	/*
1404 	 * Issue the callback function for each dataset using a parallel
1405 	 * algorithm that uses a taskq to manage threads.
1406 	 */
1407 	zfs_taskq_t *tq = zfs_taskq_create("mount_taskq", mount_tq_nthr, 0,
1408 	    mount_tq_nthr, mount_tq_nthr, ZFS_TASKQ_PREPOPULATE);
1409 
1410 	/*
1411 	 * There may be multiple "top level" mountpoints outside of the pool's
1412 	 * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of
1413 	 * these.
1414 	 */
1415 	for (int i = 0; i < num_handles;
1416 	    i = non_descendant_idx(handles, num_handles, i)) {
1417 		zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,
1418 		    tq);
1419 	}
1420 
1421 	zfs_taskq_wait(tq); /* wait for all scheduled mounts to complete */
1422 	zfs_taskq_destroy(tq);
1423 }
1424 
1425 /*
1426  * Mount and share all datasets within the given pool.  This assumes that no
1427  * datasets within the pool are currently mounted.
1428  */
1429 #pragma weak zpool_mount_datasets = zpool_enable_datasets
1430 int
1431 zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
1432 {
1433 	get_all_cb_t cb = { 0 };
1434 	mount_state_t ms = { 0 };
1435 	zfs_handle_t *zfsp;
1436 	sa_init_selective_arg_t sharearg;
1437 	int ret = 0;
1438 
1439 	if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
1440 	    ZFS_TYPE_DATASET)) == NULL)
1441 		goto out;
1442 
1443 
1444 	/*
1445 	 * Gather all non-snapshot datasets within the pool. Start by adding
1446 	 * the root filesystem for this pool to the list, and then iterate
1447 	 * over all child filesystems.
1448 	 */
1449 	libzfs_add_handle(&cb, zfsp);
1450 	if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0)
1451 		goto out;
1452 
1453 	ms.ms_mntopts = mntopts;
1454 	ms.ms_mntflags = flags;
1455 	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1456 	    zfs_mount_one, &ms, B_TRUE);
1457 	if (ms.ms_mntstatus != 0)
1458 		ret = ms.ms_mntstatus;
1459 
1460 	/*
1461 	 * Share all filesystems that need to be shared. This needs to be
1462 	 * a separate pass because libshare is not mt-safe, and so we need
1463 	 * to share serially.
1464 	 */
1465 	sharearg.zhandle_arr = cb.cb_handles;
1466 	sharearg.zhandle_len = cb.cb_used;
1467 	if ((ret = zfs_init_libshare_arg(zhp->zpool_hdl,
1468 	    SA_INIT_SHARE_API_SELECTIVE, &sharearg)) != 0)
1469 		goto out;
1470 
1471 	ms.ms_mntstatus = 0;
1472 	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1473 	    zfs_share_one, &ms, B_FALSE);
1474 	if (ms.ms_mntstatus != 0)
1475 		ret = ms.ms_mntstatus;
1476 
1477 out:
1478 	for (int i = 0; i < cb.cb_used; i++)
1479 		zfs_close(cb.cb_handles[i]);
1480 	free(cb.cb_handles);
1481 
1482 	return (ret);
1483 }
1484 
1485 static int
1486 mountpoint_compare(const void *a, const void *b)
1487 {
1488 	const char *mounta = *((char **)a);
1489 	const char *mountb = *((char **)b);
1490 
1491 	return (strcmp(mountb, mounta));
1492 }
1493 
1494 /* alias for 2002/240 */
1495 #pragma weak zpool_unmount_datasets = zpool_disable_datasets
1496 /*
1497  * Unshare and unmount all datasets within the given pool.  We don't want to
1498  * rely on traversing the DSL to discover the filesystems within the pool,
1499  * because this may be expensive (if not all of them are mounted), and can fail
1500  * arbitrarily (on I/O error, for example).  Instead, we walk /etc/mnttab and
1501  * gather all the filesystems that are currently mounted.
1502  */
1503 int
1504 zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
1505 {
1506 	int used, alloc;
1507 	struct mnttab entry;
1508 	size_t namelen;
1509 	char **mountpoints = NULL;
1510 	zfs_handle_t **datasets = NULL;
1511 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1512 	int i;
1513 	int ret = -1;
1514 	int flags = (force ? MS_FORCE : 0);
1515 	sa_init_selective_arg_t sharearg;
1516 
1517 	namelen = strlen(zhp->zpool_name);
1518 
1519 	rewind(hdl->libzfs_mnttab);
1520 	used = alloc = 0;
1521 	while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
1522 		/*
1523 		 * Ignore non-ZFS entries.
1524 		 */
1525 		if (entry.mnt_fstype == NULL ||
1526 		    strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
1527 			continue;
1528 
1529 		/*
1530 		 * Ignore filesystems not within this pool.
1531 		 */
1532 		if (entry.mnt_mountp == NULL ||
1533 		    strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||
1534 		    (entry.mnt_special[namelen] != '/' &&
1535 		    entry.mnt_special[namelen] != '\0'))
1536 			continue;
1537 
1538 		/*
1539 		 * At this point we've found a filesystem within our pool.  Add
1540 		 * it to our growing list.
1541 		 */
1542 		if (used == alloc) {
1543 			if (alloc == 0) {
1544 				if ((mountpoints = zfs_alloc(hdl,
1545 				    8 * sizeof (void *))) == NULL)
1546 					goto out;
1547 
1548 				if ((datasets = zfs_alloc(hdl,
1549 				    8 * sizeof (void *))) == NULL)
1550 					goto out;
1551 
1552 				alloc = 8;
1553 			} else {
1554 				void *ptr;
1555 
1556 				if ((ptr = zfs_realloc(hdl, mountpoints,
1557 				    alloc * sizeof (void *),
1558 				    alloc * 2 * sizeof (void *))) == NULL)
1559 					goto out;
1560 				mountpoints = ptr;
1561 
1562 				if ((ptr = zfs_realloc(hdl, datasets,
1563 				    alloc * sizeof (void *),
1564 				    alloc * 2 * sizeof (void *))) == NULL)
1565 					goto out;
1566 				datasets = ptr;
1567 
1568 				alloc *= 2;
1569 			}
1570 		}
1571 
1572 		if ((mountpoints[used] = zfs_strdup(hdl,
1573 		    entry.mnt_mountp)) == NULL)
1574 			goto out;
1575 
1576 		/*
1577 		 * This is allowed to fail, in case there is some I/O error.  It
1578 		 * is only used to determine if we need to remove the underlying
1579 		 * mountpoint, so failure is not fatal.
1580 		 */
1581 		datasets[used] = make_dataset_handle(hdl, entry.mnt_special);
1582 
1583 		used++;
1584 	}
1585 
1586 	/*
1587 	 * At this point, we have the entire list of filesystems, so sort it by
1588 	 * mountpoint.
1589 	 */
1590 	sharearg.zhandle_arr = datasets;
1591 	sharearg.zhandle_len = used;
1592 	ret = zfs_init_libshare_arg(hdl, SA_INIT_SHARE_API_SELECTIVE,
1593 	    &sharearg);
1594 	if (ret != 0)
1595 		goto out;
1596 	qsort(mountpoints, used, sizeof (char *), mountpoint_compare);
1597 
1598 	/*
1599 	 * Walk through and first unshare everything.
1600 	 */
1601 	for (i = 0; i < used; i++) {
1602 		zfs_share_proto_t *curr_proto;
1603 		for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
1604 		    curr_proto++) {
1605 			if (is_shared(hdl, mountpoints[i], *curr_proto) &&
1606 			    unshare_one(hdl, mountpoints[i],
1607 			    mountpoints[i], *curr_proto) != 0)
1608 				goto out;
1609 		}
1610 	}
1611 
1612 	/*
1613 	 * Now unmount everything, removing the underlying directories as
1614 	 * appropriate.
1615 	 */
1616 	for (i = 0; i < used; i++) {
1617 		if (unmount_one(hdl, mountpoints[i], flags) != 0)
1618 			goto out;
1619 	}
1620 
1621 	for (i = 0; i < used; i++) {
1622 		if (datasets[i])
1623 			remove_mountpoint(datasets[i]);
1624 	}
1625 
1626 	ret = 0;
1627 out:
1628 	for (i = 0; i < used; i++) {
1629 		if (datasets[i])
1630 			zfs_close(datasets[i]);
1631 		free(mountpoints[i]);
1632 	}
1633 	free(datasets);
1634 	free(mountpoints);
1635 
1636 	return (ret);
1637 }
1638