1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27  * Copyright 2017 Joyent, Inc.
28  * Copyright 2017 RackTop Systems.
29  * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
30  */
31 
32 /*
33  * Routines to manage ZFS mounts.  We separate all the nasty routines that have
34  * to deal with the OS.  The following functions are the main entry points --
35  * they are used by mount and unmount and when changing a filesystem's
36  * mountpoint.
37  *
38  *	zfs_is_mounted()
39  *	zfs_mount()
40  *	zfs_unmount()
41  *	zfs_unmountall()
42  *
43  * This file also contains the functions used to manage sharing filesystems via
44  * NFS and iSCSI:
45  *
46  *	zfs_is_shared()
47  *	zfs_share()
48  *	zfs_unshare()
49  *
50  *	zfs_is_shared_nfs()
51  *	zfs_is_shared_smb()
52  *	zfs_share_proto()
53  *	zfs_shareall();
54  *	zfs_unshare_nfs()
55  *	zfs_unshare_smb()
56  *	zfs_unshareall_nfs()
57  *	zfs_unshareall_smb()
58  *	zfs_unshareall()
59  *	zfs_unshareall_bypath()
60  *
61  * The following functions are available for pool consumers, and will
62  * mount/unmount and share/unshare all datasets within pool:
63  *
64  *	zpool_enable_datasets()
65  *	zpool_disable_datasets()
66  */
67 
68 #include <dirent.h>
69 #include <dlfcn.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <libgen.h>
73 #include <libintl.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <strings.h>
77 #include <unistd.h>
78 #include <zone.h>
79 #include <sys/mntent.h>
80 #include <sys/mount.h>
81 #include <sys/stat.h>
82 #include <sys/statvfs.h>
83 
84 #include <libzfs.h>
85 
86 #include "libzfs_impl.h"
87 #include "libzfs_taskq.h"
88 
89 #include <libshare.h>
90 #include <sys/systeminfo.h>
91 #define	MAXISALEN	257	/* based on sysinfo(2) man page */
92 
93 static int mount_tq_nthr = 512;	/* taskq threads for multi-threaded mounting */
94 
95 static void zfs_mount_task(void *);
96 static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
97 zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
98     zfs_share_proto_t);
99 
100 /*
101  * The share protocols table must be in the same order as the zfs_share_proto_t
102  * enum in libzfs_impl.h
103  */
104 typedef struct {
105 	zfs_prop_t p_prop;
106 	char *p_name;
107 	int p_share_err;
108 	int p_unshare_err;
109 } proto_table_t;
110 
111 proto_table_t proto_table[PROTO_END] = {
112 	{ZFS_PROP_SHARENFS, "nfs", EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},
113 	{ZFS_PROP_SHARESMB, "smb", EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},
114 };
115 
116 zfs_share_proto_t nfs_only[] = {
117 	PROTO_NFS,
118 	PROTO_END
119 };
120 
121 zfs_share_proto_t smb_only[] = {
122 	PROTO_SMB,
123 	PROTO_END
124 };
125 zfs_share_proto_t share_all_proto[] = {
126 	PROTO_NFS,
127 	PROTO_SMB,
128 	PROTO_END
129 };
130 
131 /*
132  * Search the sharetab for the given mountpoint and protocol, returning
133  * a zfs_share_type_t value.
134  */
135 static zfs_share_type_t
136 is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto)
137 {
138 	char buf[MAXPATHLEN], *tab;
139 	char *ptr;
140 
141 	if (hdl->libzfs_sharetab == NULL)
142 		return (SHARED_NOT_SHARED);
143 
144 	(void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
145 
146 	while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
147 
148 		/* the mountpoint is the first entry on each line */
149 		if ((tab = strchr(buf, '\t')) == NULL)
150 			continue;
151 
152 		*tab = '\0';
153 		if (strcmp(buf, mountpoint) == 0) {
154 			/*
155 			 * the protocol field is the third field
156 			 * skip over second field
157 			 */
158 			ptr = ++tab;
159 			if ((tab = strchr(ptr, '\t')) == NULL)
160 				continue;
161 			ptr = ++tab;
162 			if ((tab = strchr(ptr, '\t')) == NULL)
163 				continue;
164 			*tab = '\0';
165 			if (strcmp(ptr,
166 			    proto_table[proto].p_name) == 0) {
167 				switch (proto) {
168 				case PROTO_NFS:
169 					return (SHARED_NFS);
170 				case PROTO_SMB:
171 					return (SHARED_SMB);
172 				default:
173 					return (0);
174 				}
175 			}
176 		}
177 	}
178 
179 	return (SHARED_NOT_SHARED);
180 }
181 
182 static boolean_t
183 dir_is_empty_stat(const char *dirname)
184 {
185 	struct stat st;
186 
187 	/*
188 	 * We only want to return false if the given path is a non empty
189 	 * directory, all other errors are handled elsewhere.
190 	 */
191 	if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) {
192 		return (B_TRUE);
193 	}
194 
195 	/*
196 	 * An empty directory will still have two entries in it, one
197 	 * entry for each of "." and "..".
198 	 */
199 	if (st.st_size > 2) {
200 		return (B_FALSE);
201 	}
202 
203 	return (B_TRUE);
204 }
205 
206 static boolean_t
207 dir_is_empty_readdir(const char *dirname)
208 {
209 	DIR *dirp;
210 	struct dirent64 *dp;
211 	int dirfd;
212 
213 	if ((dirfd = openat(AT_FDCWD, dirname,
214 	    O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) {
215 		return (B_TRUE);
216 	}
217 
218 	if ((dirp = fdopendir(dirfd)) == NULL) {
219 		(void) close(dirfd);
220 		return (B_TRUE);
221 	}
222 
223 	while ((dp = readdir64(dirp)) != NULL) {
224 
225 		if (strcmp(dp->d_name, ".") == 0 ||
226 		    strcmp(dp->d_name, "..") == 0)
227 			continue;
228 
229 		(void) closedir(dirp);
230 		return (B_FALSE);
231 	}
232 
233 	(void) closedir(dirp);
234 	return (B_TRUE);
235 }
236 
237 /*
238  * Returns true if the specified directory is empty.  If we can't open the
239  * directory at all, return true so that the mount can fail with a more
240  * informative error message.
241  */
242 static boolean_t
243 dir_is_empty(const char *dirname)
244 {
245 	struct statvfs64 st;
246 
247 	/*
248 	 * If the statvfs call fails or the filesystem is not a ZFS
249 	 * filesystem, fall back to the slow path which uses readdir.
250 	 */
251 	if ((statvfs64(dirname, &st) != 0) ||
252 	    (strcmp(st.f_basetype, "zfs") != 0)) {
253 		return (dir_is_empty_readdir(dirname));
254 	}
255 
256 	/*
257 	 * At this point, we know the provided path is on a ZFS
258 	 * filesystem, so we can use stat instead of readdir to
259 	 * determine if the directory is empty or not. We try to avoid
260 	 * using readdir because that requires opening "dirname"; this
261 	 * open file descriptor can potentially end up in a child
262 	 * process if there's a concurrent fork, thus preventing the
263 	 * zfs_mount() from otherwise succeeding (the open file
264 	 * descriptor inherited by the child process will cause the
265 	 * parent's mount to fail with EBUSY). The performance
266 	 * implications of replacing the open, read, and close with a
267 	 * single stat is nice; but is not the main motivation for the
268 	 * added complexity.
269 	 */
270 	return (dir_is_empty_stat(dirname));
271 }
272 
273 /*
274  * Checks to see if the mount is active.  If the filesystem is mounted, we fill
275  * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
276  * 0.
277  */
278 boolean_t
279 is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
280 {
281 	struct mnttab entry;
282 
283 	if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0)
284 		return (B_FALSE);
285 
286 	if (where != NULL)
287 		*where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
288 
289 	return (B_TRUE);
290 }
291 
292 boolean_t
293 zfs_is_mounted(zfs_handle_t *zhp, char **where)
294 {
295 	return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
296 }
297 
298 /*
299  * Returns true if the given dataset is mountable, false otherwise.  Returns the
300  * mountpoint in 'buf'.
301  */
302 static boolean_t
303 zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
304     zprop_source_t *source)
305 {
306 	char sourceloc[MAXNAMELEN];
307 	zprop_source_t sourcetype;
308 
309 	if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type))
310 		return (B_FALSE);
311 
312 	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
313 	    &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
314 
315 	if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
316 	    strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
317 		return (B_FALSE);
318 
319 	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)
320 		return (B_FALSE);
321 
322 	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
323 	    getzoneid() == GLOBAL_ZONEID)
324 		return (B_FALSE);
325 
326 	if (source)
327 		*source = sourcetype;
328 
329 	return (B_TRUE);
330 }
331 
332 /*
333  * Mount the given filesystem.
334  */
335 int
336 zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
337 {
338 	struct stat buf;
339 	char mountpoint[ZFS_MAXPROPLEN];
340 	char mntopts[MNT_LINE_MAX];
341 	libzfs_handle_t *hdl = zhp->zfs_hdl;
342 
343 	if (options == NULL)
344 		mntopts[0] = '\0';
345 	else
346 		(void) strlcpy(mntopts, options, sizeof (mntopts));
347 
348 	/*
349 	 * If the pool is imported read-only then all mounts must be read-only
350 	 */
351 	if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))
352 		flags |= MS_RDONLY;
353 
354 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
355 		return (0);
356 
357 	/* Create the directory if it doesn't already exist */
358 	if (lstat(mountpoint, &buf) != 0) {
359 		if (mkdirp(mountpoint, 0755) != 0) {
360 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
361 			    "failed to create mountpoint"));
362 			return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
363 			    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
364 			    mountpoint));
365 		}
366 	}
367 
368 	/*
369 	 * Determine if the mountpoint is empty.  If so, refuse to perform the
370 	 * mount.  We don't perform this check if MS_OVERLAY is specified, which
371 	 * would defeat the point.  We also avoid this check if 'remount' is
372 	 * specified.
373 	 */
374 	if ((flags & MS_OVERLAY) == 0 &&
375 	    strstr(mntopts, MNTOPT_REMOUNT) == NULL &&
376 	    !dir_is_empty(mountpoint)) {
377 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
378 		    "directory is not empty"));
379 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
380 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
381 	}
382 
383 	/* perform the mount */
384 	if (mount(zfs_get_name(zhp), mountpoint, MS_OPTIONSTR | flags,
385 	    MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) {
386 		/*
387 		 * Generic errors are nasty, but there are just way too many
388 		 * from mount(), and they're well-understood.  We pick a few
389 		 * common ones to improve upon.
390 		 */
391 		if (errno == EBUSY) {
392 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
393 			    "mountpoint or dataset is busy"));
394 		} else if (errno == EPERM) {
395 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
396 			    "Insufficient privileges"));
397 		} else if (errno == ENOTSUP) {
398 			char buf[256];
399 			int spa_version;
400 
401 			VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
402 			(void) snprintf(buf, sizeof (buf),
403 			    dgettext(TEXT_DOMAIN, "Can't mount a version %lld "
404 			    "file system on a version %d pool. Pool must be"
405 			    " upgraded to mount this file system."),
406 			    (u_longlong_t)zfs_prop_get_int(zhp,
407 			    ZFS_PROP_VERSION), spa_version);
408 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf));
409 		} else {
410 			zfs_error_aux(hdl, strerror(errno));
411 		}
412 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
413 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
414 		    zhp->zfs_name));
415 	}
416 
417 	/* add the mounted entry into our cache */
418 	libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint,
419 	    mntopts);
420 	return (0);
421 }
422 
423 /*
424  * Unmount a single filesystem.
425  */
426 static int
427 unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
428 {
429 	if (umount2(mountpoint, flags) != 0) {
430 		zfs_error_aux(hdl, strerror(errno));
431 		return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
432 		    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
433 		    mountpoint));
434 	}
435 
436 	return (0);
437 }
438 
439 /*
440  * Unmount the given filesystem.
441  */
442 int
443 zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
444 {
445 	libzfs_handle_t *hdl = zhp->zfs_hdl;
446 	struct mnttab entry;
447 	char *mntpt = NULL;
448 
449 	/* check to see if we need to unmount the filesystem */
450 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
451 	    libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) {
452 		/*
453 		 * mountpoint may have come from a call to
454 		 * getmnt/getmntany if it isn't NULL. If it is NULL,
455 		 * we know it comes from libzfs_mnttab_find which can
456 		 * then get freed later. We strdup it to play it safe.
457 		 */
458 		if (mountpoint == NULL)
459 			mntpt = zfs_strdup(hdl, entry.mnt_mountp);
460 		else
461 			mntpt = zfs_strdup(hdl, mountpoint);
462 
463 		/*
464 		 * Unshare and unmount the filesystem
465 		 */
466 		if (zfs_unshare_proto(zhp, mntpt, share_all_proto) != 0)
467 			return (-1);
468 
469 		if (unmount_one(hdl, mntpt, flags) != 0) {
470 			free(mntpt);
471 			(void) zfs_shareall(zhp);
472 			return (-1);
473 		}
474 		libzfs_mnttab_remove(hdl, zhp->zfs_name);
475 		free(mntpt);
476 	}
477 
478 	return (0);
479 }
480 
481 /*
482  * Unmount this filesystem and any children inheriting the mountpoint property.
483  * To do this, just act like we're changing the mountpoint property, but don't
484  * remount the filesystems afterwards.
485  */
486 int
487 zfs_unmountall(zfs_handle_t *zhp, int flags)
488 {
489 	prop_changelist_t *clp;
490 	int ret;
491 
492 	clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, 0, flags);
493 	if (clp == NULL)
494 		return (-1);
495 
496 	ret = changelist_prefix(clp);
497 	changelist_free(clp);
498 
499 	return (ret);
500 }
501 
502 boolean_t
503 zfs_is_shared(zfs_handle_t *zhp)
504 {
505 	zfs_share_type_t rc = 0;
506 	zfs_share_proto_t *curr_proto;
507 
508 	if (ZFS_IS_VOLUME(zhp))
509 		return (B_FALSE);
510 
511 	for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
512 	    curr_proto++)
513 		rc |= zfs_is_shared_proto(zhp, NULL, *curr_proto);
514 
515 	return (rc ? B_TRUE : B_FALSE);
516 }
517 
518 int
519 zfs_share(zfs_handle_t *zhp)
520 {
521 	assert(!ZFS_IS_VOLUME(zhp));
522 	return (zfs_share_proto(zhp, share_all_proto));
523 }
524 
525 int
526 zfs_unshare(zfs_handle_t *zhp)
527 {
528 	assert(!ZFS_IS_VOLUME(zhp));
529 	return (zfs_unshareall(zhp));
530 }
531 
532 /*
533  * Check to see if the filesystem is currently shared.
534  */
535 zfs_share_type_t
536 zfs_is_shared_proto(zfs_handle_t *zhp, char **where, zfs_share_proto_t proto)
537 {
538 	char *mountpoint;
539 	zfs_share_type_t rc;
540 
541 	if (!zfs_is_mounted(zhp, &mountpoint))
542 		return (SHARED_NOT_SHARED);
543 
544 	if ((rc = is_shared(zhp->zfs_hdl, mountpoint, proto))
545 	    != SHARED_NOT_SHARED) {
546 		if (where != NULL)
547 			*where = mountpoint;
548 		else
549 			free(mountpoint);
550 		return (rc);
551 	} else {
552 		free(mountpoint);
553 		return (SHARED_NOT_SHARED);
554 	}
555 }
556 
557 boolean_t
558 zfs_is_shared_nfs(zfs_handle_t *zhp, char **where)
559 {
560 	return (zfs_is_shared_proto(zhp, where,
561 	    PROTO_NFS) != SHARED_NOT_SHARED);
562 }
563 
564 boolean_t
565 zfs_is_shared_smb(zfs_handle_t *zhp, char **where)
566 {
567 	return (zfs_is_shared_proto(zhp, where,
568 	    PROTO_SMB) != SHARED_NOT_SHARED);
569 }
570 
571 /*
572  * Make sure things will work if libshare isn't installed by using
573  * wrapper functions that check to see that the pointers to functions
574  * initialized in _zfs_init_libshare() are actually present.
575  */
576 
577 static sa_handle_t (*_sa_init)(int);
578 static sa_handle_t (*_sa_init_arg)(int, void *);
579 static void (*_sa_fini)(sa_handle_t);
580 static sa_share_t (*_sa_find_share)(sa_handle_t, char *);
581 static int (*_sa_enable_share)(sa_share_t, char *);
582 static int (*_sa_disable_share)(sa_share_t, char *);
583 static char *(*_sa_errorstr)(int);
584 static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *);
585 static boolean_t (*_sa_needs_refresh)(sa_handle_t *);
586 static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t);
587 static int (*_sa_zfs_process_share)(sa_handle_t, sa_group_t, sa_share_t,
588     char *, char *, zprop_source_t, char *, char *, char *);
589 static void (*_sa_update_sharetab_ts)(sa_handle_t);
590 
591 /*
592  * _zfs_init_libshare()
593  *
594  * Find the libshare.so.1 entry points that we use here and save the
595  * values to be used later. This is triggered by the runtime loader.
596  * Make sure the correct ISA version is loaded.
597  */
598 
599 #pragma init(_zfs_init_libshare)
600 static void
601 _zfs_init_libshare(void)
602 {
603 	void *libshare;
604 	char path[MAXPATHLEN];
605 	char isa[MAXISALEN];
606 
607 #if defined(_LP64)
608 	if (sysinfo(SI_ARCHITECTURE_64, isa, MAXISALEN) == -1)
609 		isa[0] = '\0';
610 #else
611 	isa[0] = '\0';
612 #endif
613 	(void) snprintf(path, MAXPATHLEN,
614 	    "/usr/lib/%s/libshare.so.1", isa);
615 
616 	if ((libshare = dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) != NULL) {
617 		_sa_init = (sa_handle_t (*)(int))dlsym(libshare, "sa_init");
618 		_sa_init_arg = (sa_handle_t (*)(int, void *))dlsym(libshare,
619 		    "sa_init_arg");
620 		_sa_fini = (void (*)(sa_handle_t))dlsym(libshare, "sa_fini");
621 		_sa_find_share = (sa_share_t (*)(sa_handle_t, char *))
622 		    dlsym(libshare, "sa_find_share");
623 		_sa_enable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
624 		    "sa_enable_share");
625 		_sa_disable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
626 		    "sa_disable_share");
627 		_sa_errorstr = (char *(*)(int))dlsym(libshare, "sa_errorstr");
628 		_sa_parse_legacy_options = (int (*)(sa_group_t, char *, char *))
629 		    dlsym(libshare, "sa_parse_legacy_options");
630 		_sa_needs_refresh = (boolean_t (*)(sa_handle_t *))
631 		    dlsym(libshare, "sa_needs_refresh");
632 		_sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t))
633 		    dlsym(libshare, "sa_get_zfs_handle");
634 		_sa_zfs_process_share = (int (*)(sa_handle_t, sa_group_t,
635 		    sa_share_t, char *, char *, zprop_source_t, char *,
636 		    char *, char *))dlsym(libshare, "sa_zfs_process_share");
637 		_sa_update_sharetab_ts = (void (*)(sa_handle_t))
638 		    dlsym(libshare, "sa_update_sharetab_ts");
639 		if (_sa_init == NULL || _sa_init_arg == NULL ||
640 		    _sa_fini == NULL || _sa_find_share == NULL ||
641 		    _sa_enable_share == NULL || _sa_disable_share == NULL ||
642 		    _sa_errorstr == NULL || _sa_parse_legacy_options == NULL ||
643 		    _sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL ||
644 		    _sa_zfs_process_share == NULL ||
645 		    _sa_update_sharetab_ts == NULL) {
646 			_sa_init = NULL;
647 			_sa_init_arg = NULL;
648 			_sa_fini = NULL;
649 			_sa_disable_share = NULL;
650 			_sa_enable_share = NULL;
651 			_sa_errorstr = NULL;
652 			_sa_parse_legacy_options = NULL;
653 			(void) dlclose(libshare);
654 			_sa_needs_refresh = NULL;
655 			_sa_get_zfs_handle = NULL;
656 			_sa_zfs_process_share = NULL;
657 			_sa_update_sharetab_ts = NULL;
658 		}
659 	}
660 }
661 
662 /*
663  * zfs_init_libshare(zhandle, service)
664  *
665  * Initialize the libshare API if it hasn't already been initialized.
666  * In all cases it returns 0 if it succeeded and an error if not. The
667  * service value is which part(s) of the API to initialize and is a
668  * direct map to the libshare sa_init(service) interface.
669  */
670 static int
671 zfs_init_libshare_impl(libzfs_handle_t *zhandle, int service, void *arg)
672 {
673 	/*
674 	 * libshare is either not installed or we're in a branded zone. The
675 	 * rest of the wrapper functions around the libshare calls already
676 	 * handle NULL function pointers, but we don't want the callers of
677 	 * zfs_init_libshare() to fail prematurely if libshare is not available.
678 	 */
679 	if (_sa_init == NULL)
680 		return (SA_OK);
681 
682 	/*
683 	 * Attempt to refresh libshare. This is necessary if there was a cache
684 	 * miss for a new ZFS dataset that was just created, or if state of the
685 	 * sharetab file has changed since libshare was last initialized. We
686 	 * want to make sure so check timestamps to see if a different process
687 	 * has updated any of the configuration. If there was some non-ZFS
688 	 * change, we need to re-initialize the internal cache.
689 	 */
690 	if (_sa_needs_refresh != NULL &&
691 	    _sa_needs_refresh(zhandle->libzfs_sharehdl)) {
692 		zfs_uninit_libshare(zhandle);
693 		zhandle->libzfs_sharehdl = _sa_init_arg(service, arg);
694 	}
695 
696 	if (zhandle && zhandle->libzfs_sharehdl == NULL)
697 		zhandle->libzfs_sharehdl = _sa_init_arg(service, arg);
698 
699 	if (zhandle->libzfs_sharehdl == NULL)
700 		return (SA_NO_MEMORY);
701 
702 	return (SA_OK);
703 }
704 int
705 zfs_init_libshare(libzfs_handle_t *zhandle, int service)
706 {
707 	return (zfs_init_libshare_impl(zhandle, service, NULL));
708 }
709 
710 int
711 zfs_init_libshare_arg(libzfs_handle_t *zhandle, int service, void *arg)
712 {
713 	return (zfs_init_libshare_impl(zhandle, service, arg));
714 }
715 
716 
717 /*
718  * zfs_uninit_libshare(zhandle)
719  *
720  * Uninitialize the libshare API if it hasn't already been
721  * uninitialized. It is OK to call multiple times.
722  */
723 void
724 zfs_uninit_libshare(libzfs_handle_t *zhandle)
725 {
726 	if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) {
727 		if (_sa_fini != NULL)
728 			_sa_fini(zhandle->libzfs_sharehdl);
729 		zhandle->libzfs_sharehdl = NULL;
730 	}
731 }
732 
733 /*
734  * zfs_parse_options(options, proto)
735  *
736  * Call the legacy parse interface to get the protocol specific
737  * options using the NULL arg to indicate that this is a "parse" only.
738  */
739 int
740 zfs_parse_options(char *options, zfs_share_proto_t proto)
741 {
742 	if (_sa_parse_legacy_options != NULL) {
743 		return (_sa_parse_legacy_options(NULL, options,
744 		    proto_table[proto].p_name));
745 	}
746 	return (SA_CONFIG_ERR);
747 }
748 
749 /*
750  * zfs_sa_find_share(handle, path)
751  *
752  * wrapper around sa_find_share to find a share path in the
753  * configuration.
754  */
755 static sa_share_t
756 zfs_sa_find_share(sa_handle_t handle, char *path)
757 {
758 	if (_sa_find_share != NULL)
759 		return (_sa_find_share(handle, path));
760 	return (NULL);
761 }
762 
763 /*
764  * zfs_sa_enable_share(share, proto)
765  *
766  * Wrapper for sa_enable_share which enables a share for a specified
767  * protocol.
768  */
769 static int
770 zfs_sa_enable_share(sa_share_t share, char *proto)
771 {
772 	if (_sa_enable_share != NULL)
773 		return (_sa_enable_share(share, proto));
774 	return (SA_CONFIG_ERR);
775 }
776 
777 /*
778  * zfs_sa_disable_share(share, proto)
779  *
780  * Wrapper for sa_enable_share which disables a share for a specified
781  * protocol.
782  */
783 static int
784 zfs_sa_disable_share(sa_share_t share, char *proto)
785 {
786 	if (_sa_disable_share != NULL)
787 		return (_sa_disable_share(share, proto));
788 	return (SA_CONFIG_ERR);
789 }
790 
791 /*
792  * Share the given filesystem according to the options in the specified
793  * protocol specific properties (sharenfs, sharesmb).  We rely
794  * on "libshare" to the dirty work for us.
795  */
796 static int
797 zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
798 {
799 	char mountpoint[ZFS_MAXPROPLEN];
800 	char shareopts[ZFS_MAXPROPLEN];
801 	char sourcestr[ZFS_MAXPROPLEN];
802 	libzfs_handle_t *hdl = zhp->zfs_hdl;
803 	sa_share_t share;
804 	zfs_share_proto_t *curr_proto;
805 	zprop_source_t sourcetype;
806 	int ret;
807 
808 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
809 		return (0);
810 
811 	for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
812 		/*
813 		 * Return success if there are no share options.
814 		 */
815 		if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
816 		    shareopts, sizeof (shareopts), &sourcetype, sourcestr,
817 		    ZFS_MAXPROPLEN, B_FALSE) != 0 ||
818 		    strcmp(shareopts, "off") == 0)
819 			continue;
820 		ret = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_HANDLE,
821 		    zhp);
822 		if (ret != SA_OK) {
823 			(void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
824 			    dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
825 			    zfs_get_name(zhp), _sa_errorstr != NULL ?
826 			    _sa_errorstr(ret) : "");
827 			return (-1);
828 		}
829 
830 		/*
831 		 * If the 'zoned' property is set, then zfs_is_mountable()
832 		 * will have already bailed out if we are in the global zone.
833 		 * But local zones cannot be NFS servers, so we ignore it for
834 		 * local zones as well.
835 		 */
836 		if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
837 			continue;
838 
839 		share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint);
840 		if (share == NULL) {
841 			/*
842 			 * This may be a new file system that was just
843 			 * created so isn't in the internal cache
844 			 * (second time through). Rather than
845 			 * reloading the entire configuration, we can
846 			 * assume ZFS has done the checking and it is
847 			 * safe to add this to the internal
848 			 * configuration.
849 			 */
850 			if (_sa_zfs_process_share(hdl->libzfs_sharehdl,
851 			    NULL, NULL, mountpoint,
852 			    proto_table[*curr_proto].p_name, sourcetype,
853 			    shareopts, sourcestr, zhp->zfs_name) != SA_OK) {
854 				(void) zfs_error_fmt(hdl,
855 				    proto_table[*curr_proto].p_share_err,
856 				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
857 				    zfs_get_name(zhp));
858 				return (-1);
859 			}
860 			share = zfs_sa_find_share(hdl->libzfs_sharehdl,
861 			    mountpoint);
862 		}
863 		if (share != NULL) {
864 			int err;
865 			err = zfs_sa_enable_share(share,
866 			    proto_table[*curr_proto].p_name);
867 			if (err != SA_OK) {
868 				(void) zfs_error_fmt(hdl,
869 				    proto_table[*curr_proto].p_share_err,
870 				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
871 				    zfs_get_name(zhp));
872 				return (-1);
873 			}
874 		} else {
875 			(void) zfs_error_fmt(hdl,
876 			    proto_table[*curr_proto].p_share_err,
877 			    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
878 			    zfs_get_name(zhp));
879 			return (-1);
880 		}
881 
882 	}
883 	return (0);
884 }
885 
886 
887 int
888 zfs_share_nfs(zfs_handle_t *zhp)
889 {
890 	return (zfs_share_proto(zhp, nfs_only));
891 }
892 
893 int
894 zfs_share_smb(zfs_handle_t *zhp)
895 {
896 	return (zfs_share_proto(zhp, smb_only));
897 }
898 
899 int
900 zfs_shareall(zfs_handle_t *zhp)
901 {
902 	return (zfs_share_proto(zhp, share_all_proto));
903 }
904 
905 /*
906  * Unshare a filesystem by mountpoint.
907  */
908 static int
909 unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
910     zfs_share_proto_t proto)
911 {
912 	sa_share_t share;
913 	int err;
914 	char *mntpt;
915 
916 	/*
917 	 * Mountpoint could get trashed if libshare calls getmntany
918 	 * which it does during API initialization, so strdup the
919 	 * value.
920 	 */
921 	mntpt = zfs_strdup(hdl, mountpoint);
922 
923 	/*
924 	 * make sure libshare initialized, initialize everything because we
925 	 * don't know what other unsharing may happen later. Functions up the
926 	 * stack are allowed to initialize instead a subset of shares at the
927 	 * time the set is known.
928 	 */
929 	if ((err = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_NAME,
930 	    (void *)name)) != SA_OK) {
931 		free(mntpt);	/* don't need the copy anymore */
932 		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
933 		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
934 		    name, _sa_errorstr(err)));
935 	}
936 
937 	share = zfs_sa_find_share(hdl->libzfs_sharehdl, mntpt);
938 	free(mntpt);	/* don't need the copy anymore */
939 
940 	if (share != NULL) {
941 		err = zfs_sa_disable_share(share, proto_table[proto].p_name);
942 		if (err != SA_OK) {
943 			return (zfs_error_fmt(hdl,
944 			    proto_table[proto].p_unshare_err,
945 			    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
946 			    name, _sa_errorstr(err)));
947 		}
948 	} else {
949 		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
950 		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"),
951 		    name));
952 	}
953 	return (0);
954 }
955 
956 /*
957  * Unshare the given filesystem.
958  */
959 int
960 zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint,
961     zfs_share_proto_t *proto)
962 {
963 	libzfs_handle_t *hdl = zhp->zfs_hdl;
964 	struct mnttab entry;
965 	char *mntpt = NULL;
966 
967 	/* check to see if need to unmount the filesystem */
968 	rewind(zhp->zfs_hdl->libzfs_mnttab);
969 	if (mountpoint != NULL)
970 		mountpoint = mntpt = zfs_strdup(hdl, mountpoint);
971 
972 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
973 	    libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) {
974 		zfs_share_proto_t *curr_proto;
975 
976 		if (mountpoint == NULL)
977 			mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
978 
979 		for (curr_proto = proto; *curr_proto != PROTO_END;
980 		    curr_proto++) {
981 
982 			if (is_shared(hdl, mntpt, *curr_proto) &&
983 			    unshare_one(hdl, zhp->zfs_name,
984 			    mntpt, *curr_proto) != 0) {
985 				if (mntpt != NULL)
986 					free(mntpt);
987 				return (-1);
988 			}
989 		}
990 	}
991 	if (mntpt != NULL)
992 		free(mntpt);
993 
994 	return (0);
995 }
996 
997 int
998 zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint)
999 {
1000 	return (zfs_unshare_proto(zhp, mountpoint, nfs_only));
1001 }
1002 
1003 int
1004 zfs_unshare_smb(zfs_handle_t *zhp, const char *mountpoint)
1005 {
1006 	return (zfs_unshare_proto(zhp, mountpoint, smb_only));
1007 }
1008 
1009 /*
1010  * Same as zfs_unmountall(), but for NFS and SMB unshares.
1011  */
1012 int
1013 zfs_unshareall_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
1014 {
1015 	prop_changelist_t *clp;
1016 	int ret;
1017 
1018 	clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);
1019 	if (clp == NULL)
1020 		return (-1);
1021 
1022 	ret = changelist_unshare(clp, proto);
1023 	changelist_free(clp);
1024 
1025 	return (ret);
1026 }
1027 
1028 int
1029 zfs_unshareall_nfs(zfs_handle_t *zhp)
1030 {
1031 	return (zfs_unshareall_proto(zhp, nfs_only));
1032 }
1033 
1034 int
1035 zfs_unshareall_smb(zfs_handle_t *zhp)
1036 {
1037 	return (zfs_unshareall_proto(zhp, smb_only));
1038 }
1039 
1040 int
1041 zfs_unshareall(zfs_handle_t *zhp)
1042 {
1043 	return (zfs_unshareall_proto(zhp, share_all_proto));
1044 }
1045 
1046 int
1047 zfs_unshareall_bypath(zfs_handle_t *zhp, const char *mountpoint)
1048 {
1049 	return (zfs_unshare_proto(zhp, mountpoint, share_all_proto));
1050 }
1051 
1052 /*
1053  * Remove the mountpoint associated with the current dataset, if necessary.
1054  * We only remove the underlying directory if:
1055  *
1056  *	- The mountpoint is not 'none' or 'legacy'
1057  *	- The mountpoint is non-empty
1058  *	- The mountpoint is the default or inherited
1059  *	- The 'zoned' property is set, or we're in a local zone
1060  *
1061  * Any other directories we leave alone.
1062  */
1063 void
1064 remove_mountpoint(zfs_handle_t *zhp)
1065 {
1066 	char mountpoint[ZFS_MAXPROPLEN];
1067 	zprop_source_t source;
1068 
1069 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
1070 	    &source))
1071 		return;
1072 
1073 	if (source == ZPROP_SRC_DEFAULT ||
1074 	    source == ZPROP_SRC_INHERITED) {
1075 		/*
1076 		 * Try to remove the directory, silently ignoring any errors.
1077 		 * The filesystem may have since been removed or moved around,
1078 		 * and this error isn't really useful to the administrator in
1079 		 * any way.
1080 		 */
1081 		(void) rmdir(mountpoint);
1082 	}
1083 }
1084 
1085 /*
1086  * Add the given zfs handle to the cb_handles array, dynamically reallocating
1087  * the array if it is out of space.
1088  */
1089 void
1090 libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
1091 {
1092 	if (cbp->cb_alloc == cbp->cb_used) {
1093 		size_t newsz;
1094 		zfs_handle_t **newhandles;
1095 
1096 		newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;
1097 		newhandles = zfs_realloc(zhp->zfs_hdl,
1098 		    cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),
1099 		    newsz * sizeof (zfs_handle_t *));
1100 		cbp->cb_handles = newhandles;
1101 		cbp->cb_alloc = newsz;
1102 	}
1103 	cbp->cb_handles[cbp->cb_used++] = zhp;
1104 }
1105 
1106 /*
1107  * Recursive helper function used during file system enumeration
1108  */
1109 static int
1110 zfs_iter_cb(zfs_handle_t *zhp, void *data)
1111 {
1112 	get_all_cb_t *cbp = data;
1113 
1114 	if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) {
1115 		zfs_close(zhp);
1116 		return (0);
1117 	}
1118 
1119 	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {
1120 		zfs_close(zhp);
1121 		return (0);
1122 	}
1123 
1124 	/*
1125 	 * If this filesystem is inconsistent and has a receive resume
1126 	 * token, we can not mount it.
1127 	 */
1128 	if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
1129 	    zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
1130 	    NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
1131 		zfs_close(zhp);
1132 		return (0);
1133 	}
1134 
1135 	libzfs_add_handle(cbp, zhp);
1136 	if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) {
1137 		zfs_close(zhp);
1138 		return (-1);
1139 	}
1140 	return (0);
1141 }
1142 
1143 /*
1144  * Sort comparator that compares two mountpoint paths. We sort these paths so
1145  * that subdirectories immediately follow their parents. This means that we
1146  * effectively treat the '/' character as the lowest value non-nul char.
1147  * Since filesystems from non-global zones can have the same mountpoint
1148  * as other filesystems, the comparator sorts global zone filesystems to
1149  * the top of the list. This means that the global zone will traverse the
1150  * filesystem list in the correct order and can stop when it sees the
1151  * first zoned filesystem. In a non-global zone, only the delegated
1152  * filesystems are seen.
1153  *
1154  * An example sorted list using this comparator would look like:
1155  *
1156  * /foo
1157  * /foo/bar
1158  * /foo/bar/baz
1159  * /foo/baz
1160  * /foo.bar
1161  * /foo (NGZ1)
1162  * /foo (NGZ2)
1163  *
1164  * The mounting code depends on this ordering to deterministically iterate
1165  * over filesystems in order to spawn parallel mount tasks.
1166  */
1167 static int
1168 mountpoint_cmp(const void *arga, const void *argb)
1169 {
1170 	zfs_handle_t *const *zap = arga;
1171 	zfs_handle_t *za = *zap;
1172 	zfs_handle_t *const *zbp = argb;
1173 	zfs_handle_t *zb = *zbp;
1174 	char mounta[MAXPATHLEN];
1175 	char mountb[MAXPATHLEN];
1176 	const char *a = mounta;
1177 	const char *b = mountb;
1178 	boolean_t gota, gotb;
1179 	uint64_t zoneda, zonedb;
1180 
1181 	zoneda = zfs_prop_get_int(za, ZFS_PROP_ZONED);
1182 	zonedb = zfs_prop_get_int(zb, ZFS_PROP_ZONED);
1183 	if (zoneda && !zonedb)
1184 		return (1);
1185 	if (!zoneda && zonedb)
1186 		return (-1);
1187 
1188 	gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);
1189 	if (gota) {
1190 		verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,
1191 		    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
1192 	}
1193 	gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);
1194 	if (gotb) {
1195 		verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,
1196 		    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
1197 	}
1198 
1199 	if (gota && gotb) {
1200 		while (*a != '\0' && (*a == *b)) {
1201 			a++;
1202 			b++;
1203 		}
1204 		if (*a == *b)
1205 			return (0);
1206 		if (*a == '\0')
1207 			return (-1);
1208 		if (*b == '\0')
1209 			return (1);
1210 		if (*a == '/')
1211 			return (-1);
1212 		if (*b == '/')
1213 			return (1);
1214 		return (*a < *b ? -1 : *a > *b);
1215 	}
1216 
1217 	if (gota)
1218 		return (-1);
1219 	if (gotb)
1220 		return (1);
1221 
1222 	/*
1223 	 * If neither filesystem has a mountpoint, revert to sorting by
1224 	 * dataset name.
1225 	 */
1226 	return (strcmp(zfs_get_name(za), zfs_get_name(zb)));
1227 }
1228 
1229 /*
1230  * Return true if path2 is a child of path1.
1231  */
1232 static boolean_t
1233 libzfs_path_contains(const char *path1, const char *path2)
1234 {
1235 	return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/');
1236 }
1237 
1238 /*
1239  * Given a mountpoint specified by idx in the handles array, find the first
1240  * non-descendent of that mountpoint and return its index. Descendant paths
1241  * start with the parent's path. This function relies on the ordering
1242  * enforced by mountpoint_cmp().
1243  */
1244 static int
1245 non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)
1246 {
1247 	char parent[ZFS_MAXPROPLEN];
1248 	char child[ZFS_MAXPROPLEN];
1249 	int i;
1250 
1251 	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,
1252 	    sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);
1253 
1254 	for (i = idx + 1; i < num_handles; i++) {
1255 		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,
1256 		    sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1257 		if (!libzfs_path_contains(parent, child))
1258 			break;
1259 	}
1260 	return (i);
1261 }
1262 
1263 typedef struct mnt_param {
1264 	libzfs_handle_t	*mnt_hdl;
1265 	zfs_taskq_t	*mnt_tq;
1266 	zfs_handle_t	**mnt_zhps; /* filesystems to mount */
1267 	size_t		mnt_num_handles;
1268 	int		mnt_idx;	/* Index of selected entry to mount */
1269 	zfs_iter_f	mnt_func;
1270 	void		*mnt_data;
1271 } mnt_param_t;
1272 
1273 /*
1274  * Allocate and populate the parameter struct for mount function, and
1275  * schedule mounting of the entry selected by idx.
1276  */
1277 static void
1278 zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,
1279     size_t num_handles, int idx, zfs_iter_f func, void *data, zfs_taskq_t *tq)
1280 {
1281 	mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));
1282 
1283 	mnt_param->mnt_hdl = hdl;
1284 	mnt_param->mnt_tq = tq;
1285 	mnt_param->mnt_zhps = handles;
1286 	mnt_param->mnt_num_handles = num_handles;
1287 	mnt_param->mnt_idx = idx;
1288 	mnt_param->mnt_func = func;
1289 	mnt_param->mnt_data = data;
1290 
1291 	(void) zfs_taskq_dispatch(tq, zfs_mount_task, (void*)mnt_param,
1292 	    ZFS_TQ_SLEEP);
1293 }
1294 
1295 /*
1296  * This is the structure used to keep state of mounting or sharing operations
1297  * during a call to zpool_enable_datasets().
1298  */
1299 typedef struct mount_state {
1300 	/*
1301 	 * ms_mntstatus is set to -1 if any mount fails. While multiple threads
1302 	 * could update this variable concurrently, no synchronization is
1303 	 * needed as it's only ever set to -1.
1304 	 */
1305 	int		ms_mntstatus;
1306 	int		ms_mntflags;
1307 	const char	*ms_mntopts;
1308 } mount_state_t;
1309 
1310 static int
1311 zfs_mount_one(zfs_handle_t *zhp, void *arg)
1312 {
1313 	mount_state_t *ms = arg;
1314 	int ret = 0;
1315 
1316 	if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)
1317 		ret = ms->ms_mntstatus = -1;
1318 	return (ret);
1319 }
1320 
1321 static int
1322 zfs_share_one(zfs_handle_t *zhp, void *arg)
1323 {
1324 	mount_state_t *ms = arg;
1325 	int ret = 0;
1326 
1327 	if (zfs_share(zhp) != 0)
1328 		ret = ms->ms_mntstatus = -1;
1329 	return (ret);
1330 }
1331 
1332 /*
1333  * Task queue function to mount one file system. On completion, it finds and
1334  * schedules its children to be mounted. This depends on the sorting done in
1335  * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries
1336  * each descending from the previous) will have no parallelism since we always
1337  * have to wait for the parent to finish mounting before we can schedule
1338  * its children.
1339  */
1340 static void
1341 zfs_mount_task(void *arg)
1342 {
1343 	mnt_param_t *mp = arg;
1344 	int idx = mp->mnt_idx;
1345 	zfs_handle_t **handles = mp->mnt_zhps;
1346 	size_t num_handles = mp->mnt_num_handles;
1347 	char mountpoint[ZFS_MAXPROPLEN];
1348 
1349 	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,
1350 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
1351 
1352 	if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)
1353 		return;
1354 
1355 	/*
1356 	 * We dispatch tasks to mount filesystems with mountpoints underneath
1357 	 * this one. We do this by dispatching the next filesystem with a
1358 	 * descendant mountpoint of the one we just mounted, then skip all of
1359 	 * its descendants, dispatch the next descendant mountpoint, and so on.
1360 	 * The non_descendant_idx() function skips over filesystems that are
1361 	 * descendants of the filesystem we just dispatched.
1362 	 */
1363 	for (int i = idx + 1; i < num_handles;
1364 	    i = non_descendant_idx(handles, num_handles, i)) {
1365 		char child[ZFS_MAXPROPLEN];
1366 		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,
1367 		    child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1368 
1369 		if (!libzfs_path_contains(mountpoint, child))
1370 			break; /* not a descendant, return */
1371 		zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
1372 		    mp->mnt_func, mp->mnt_data, mp->mnt_tq);
1373 	}
1374 	free(mp);
1375 }
1376 
1377 /*
1378  * Issue the func callback for each ZFS handle contained in the handles
1379  * array. This function is used to mount all datasets, and so this function
1380  * guarantees that filesystems for parent mountpoints are called before their
1381  * children. As such, before issuing any callbacks, we first sort the array
1382  * of handles by mountpoint.
1383  *
1384  * Callbacks are issued in one of two ways:
1385  *
1386  * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT
1387  *    environment variable is set, then we issue callbacks sequentially.
1388  *
1389  * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT
1390  *    environment variable is not set, then we use a taskq to dispatch threads
1391  *    to mount filesystems is parallel. This function dispatches tasks to mount
1392  *    the filesystems at the top-level mountpoints, and these tasks in turn
1393  *    are responsible for recursively mounting filesystems in their children
1394  *    mountpoints.
1395  */
1396 void
1397 zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
1398     size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel)
1399 {
1400 	zoneid_t zoneid = getzoneid();
1401 
1402 	/*
1403 	 * The ZFS_SERIAL_MOUNT environment variable is an undocumented
1404 	 * variable that can be used as a convenience to do a/b comparison
1405 	 * of serial vs. parallel mounting.
1406 	 */
1407 	boolean_t serial_mount = !parallel ||
1408 	    (getenv("ZFS_SERIAL_MOUNT") != NULL);
1409 
1410 	/*
1411 	 * Sort the datasets by mountpoint. See mountpoint_cmp for details
1412 	 * of how these are sorted.
1413 	 */
1414 	qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);
1415 
1416 	if (serial_mount) {
1417 		for (int i = 0; i < num_handles; i++) {
1418 			func(handles[i], data);
1419 		}
1420 		return;
1421 	}
1422 
1423 	/*
1424 	 * Issue the callback function for each dataset using a parallel
1425 	 * algorithm that uses a taskq to manage threads.
1426 	 */
1427 	zfs_taskq_t *tq = zfs_taskq_create("mount_taskq", mount_tq_nthr, 0,
1428 	    mount_tq_nthr, mount_tq_nthr, ZFS_TASKQ_PREPOPULATE);
1429 
1430 	/*
1431 	 * There may be multiple "top level" mountpoints outside of the pool's
1432 	 * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of
1433 	 * these.
1434 	 */
1435 	for (int i = 0; i < num_handles;
1436 	    i = non_descendant_idx(handles, num_handles, i)) {
1437 		/*
1438 		 * Since the mountpoints have been sorted so that the zoned
1439 		 * filesystems are at the end, a zoned filesystem seen from
1440 		 * the global zone means that we're done.
1441 		 */
1442 		if (zoneid == GLOBAL_ZONEID &&
1443 		    zfs_prop_get_int(handles[i], ZFS_PROP_ZONED))
1444 			break;
1445 		zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,
1446 		    tq);
1447 	}
1448 
1449 	zfs_taskq_wait(tq); /* wait for all scheduled mounts to complete */
1450 	zfs_taskq_destroy(tq);
1451 }
1452 
1453 /*
1454  * Mount and share all datasets within the given pool.  This assumes that no
1455  * datasets within the pool are currently mounted.
1456  */
1457 #pragma weak zpool_mount_datasets = zpool_enable_datasets
1458 int
1459 zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
1460 {
1461 	get_all_cb_t cb = { 0 };
1462 	mount_state_t ms = { 0 };
1463 	zfs_handle_t *zfsp;
1464 	sa_init_selective_arg_t sharearg;
1465 	int ret = 0;
1466 
1467 	if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
1468 	    ZFS_TYPE_DATASET)) == NULL)
1469 		goto out;
1470 
1471 
1472 	/*
1473 	 * Gather all non-snapshot datasets within the pool. Start by adding
1474 	 * the root filesystem for this pool to the list, and then iterate
1475 	 * over all child filesystems.
1476 	 */
1477 	libzfs_add_handle(&cb, zfsp);
1478 	if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0)
1479 		goto out;
1480 
1481 	ms.ms_mntopts = mntopts;
1482 	ms.ms_mntflags = flags;
1483 	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1484 	    zfs_mount_one, &ms, B_TRUE);
1485 	if (ms.ms_mntstatus != 0)
1486 		ret = ms.ms_mntstatus;
1487 
1488 	/*
1489 	 * Share all filesystems that need to be shared. This needs to be
1490 	 * a separate pass because libshare is not mt-safe, and so we need
1491 	 * to share serially.
1492 	 */
1493 	sharearg.zhandle_arr = cb.cb_handles;
1494 	sharearg.zhandle_len = cb.cb_used;
1495 	if ((ret = zfs_init_libshare_arg(zhp->zpool_hdl,
1496 	    SA_INIT_SHARE_API_SELECTIVE, &sharearg)) != 0)
1497 		goto out;
1498 
1499 	ms.ms_mntstatus = 0;
1500 	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1501 	    zfs_share_one, &ms, B_FALSE);
1502 	if (ms.ms_mntstatus != 0)
1503 		ret = ms.ms_mntstatus;
1504 
1505 out:
1506 	for (int i = 0; i < cb.cb_used; i++)
1507 		zfs_close(cb.cb_handles[i]);
1508 	free(cb.cb_handles);
1509 
1510 	return (ret);
1511 }
1512 
1513 static int
1514 mountpoint_compare(const void *a, const void *b)
1515 {
1516 	const char *mounta = *((char **)a);
1517 	const char *mountb = *((char **)b);
1518 
1519 	return (strcmp(mountb, mounta));
1520 }
1521 
1522 /* alias for 2002/240 */
1523 #pragma weak zpool_unmount_datasets = zpool_disable_datasets
1524 /*
1525  * Unshare and unmount all datasets within the given pool.  We don't want to
1526  * rely on traversing the DSL to discover the filesystems within the pool,
1527  * because this may be expensive (if not all of them are mounted), and can fail
1528  * arbitrarily (on I/O error, for example).  Instead, we walk /etc/mnttab and
1529  * gather all the filesystems that are currently mounted.
1530  */
1531 int
1532 zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
1533 {
1534 	int used, alloc;
1535 	struct mnttab entry;
1536 	size_t namelen;
1537 	char **mountpoints = NULL;
1538 	zfs_handle_t **datasets = NULL;
1539 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1540 	int i;
1541 	int ret = -1;
1542 	int flags = (force ? MS_FORCE : 0);
1543 	sa_init_selective_arg_t sharearg;
1544 
1545 	namelen = strlen(zhp->zpool_name);
1546 
1547 	rewind(hdl->libzfs_mnttab);
1548 	used = alloc = 0;
1549 	while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
1550 		/*
1551 		 * Ignore non-ZFS entries.
1552 		 */
1553 		if (entry.mnt_fstype == NULL ||
1554 		    strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
1555 			continue;
1556 
1557 		/*
1558 		 * Ignore filesystems not within this pool.
1559 		 */
1560 		if (entry.mnt_mountp == NULL ||
1561 		    strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||
1562 		    (entry.mnt_special[namelen] != '/' &&
1563 		    entry.mnt_special[namelen] != '\0'))
1564 			continue;
1565 
1566 		/*
1567 		 * At this point we've found a filesystem within our pool.  Add
1568 		 * it to our growing list.
1569 		 */
1570 		if (used == alloc) {
1571 			if (alloc == 0) {
1572 				if ((mountpoints = zfs_alloc(hdl,
1573 				    8 * sizeof (void *))) == NULL)
1574 					goto out;
1575 
1576 				if ((datasets = zfs_alloc(hdl,
1577 				    8 * sizeof (void *))) == NULL)
1578 					goto out;
1579 
1580 				alloc = 8;
1581 			} else {
1582 				void *ptr;
1583 
1584 				if ((ptr = zfs_realloc(hdl, mountpoints,
1585 				    alloc * sizeof (void *),
1586 				    alloc * 2 * sizeof (void *))) == NULL)
1587 					goto out;
1588 				mountpoints = ptr;
1589 
1590 				if ((ptr = zfs_realloc(hdl, datasets,
1591 				    alloc * sizeof (void *),
1592 				    alloc * 2 * sizeof (void *))) == NULL)
1593 					goto out;
1594 				datasets = ptr;
1595 
1596 				alloc *= 2;
1597 			}
1598 		}
1599 
1600 		if ((mountpoints[used] = zfs_strdup(hdl,
1601 		    entry.mnt_mountp)) == NULL)
1602 			goto out;
1603 
1604 		/*
1605 		 * This is allowed to fail, in case there is some I/O error.  It
1606 		 * is only used to determine if we need to remove the underlying
1607 		 * mountpoint, so failure is not fatal.
1608 		 */
1609 		datasets[used] = make_dataset_handle(hdl, entry.mnt_special);
1610 
1611 		used++;
1612 	}
1613 
1614 	/*
1615 	 * At this point, we have the entire list of filesystems, so sort it by
1616 	 * mountpoint.
1617 	 */
1618 	sharearg.zhandle_arr = datasets;
1619 	sharearg.zhandle_len = used;
1620 	ret = zfs_init_libshare_arg(hdl, SA_INIT_SHARE_API_SELECTIVE,
1621 	    &sharearg);
1622 	if (ret != 0)
1623 		goto out;
1624 	qsort(mountpoints, used, sizeof (char *), mountpoint_compare);
1625 
1626 	/*
1627 	 * Walk through and first unshare everything.
1628 	 */
1629 	for (i = 0; i < used; i++) {
1630 		zfs_share_proto_t *curr_proto;
1631 		for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
1632 		    curr_proto++) {
1633 			if (is_shared(hdl, mountpoints[i], *curr_proto) &&
1634 			    unshare_one(hdl, mountpoints[i],
1635 			    mountpoints[i], *curr_proto) != 0)
1636 				goto out;
1637 		}
1638 	}
1639 
1640 	/*
1641 	 * Now unmount everything, removing the underlying directories as
1642 	 * appropriate.
1643 	 */
1644 	for (i = 0; i < used; i++) {
1645 		if (unmount_one(hdl, mountpoints[i], flags) != 0)
1646 			goto out;
1647 	}
1648 
1649 	for (i = 0; i < used; i++) {
1650 		if (datasets[i])
1651 			remove_mountpoint(datasets[i]);
1652 	}
1653 
1654 	ret = 0;
1655 out:
1656 	for (i = 0; i < used; i++) {
1657 		if (datasets[i])
1658 			zfs_close(datasets[i]);
1659 		free(mountpoints[i]);
1660 	}
1661 	free(datasets);
1662 	free(mountpoints);
1663 
1664 	return (ret);
1665 }
1666