17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
55a59a8b3Srsb  * Common Development and Distribution License (the "License").
65a59a8b3Srsb  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
225a59a8b3Srsb  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate #include <sys/param.h>
297c478bd9Sstevel@tonic-gate #include <sys/errno.h>
307c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
317c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
327c478bd9Sstevel@tonic-gate #include <sys/uio.h>
337c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
347c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
357c478bd9Sstevel@tonic-gate #include <sys/cred.h>
367c478bd9Sstevel@tonic-gate #include <sys/statvfs.h>
377c478bd9Sstevel@tonic-gate #include <sys/fs/lofs_info.h>
387c478bd9Sstevel@tonic-gate #include <sys/fs/lofs_node.h>
397c478bd9Sstevel@tonic-gate #include <sys/mount.h>
407c478bd9Sstevel@tonic-gate #include <sys/mntent.h>
417c478bd9Sstevel@tonic-gate #include <sys/mkdev.h>
4245916cd2Sjpk #include <sys/priv.h>
437c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
447c478bd9Sstevel@tonic-gate #include <sys/systm.h>
457c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
467c478bd9Sstevel@tonic-gate #include <sys/policy.h>
4745916cd2Sjpk #include <sys/tsol/label.h>
487c478bd9Sstevel@tonic-gate #include "fs/fs_subr.h"
497c478bd9Sstevel@tonic-gate 
507c478bd9Sstevel@tonic-gate /*
517c478bd9Sstevel@tonic-gate  * This is the loadable module wrapper.
527c478bd9Sstevel@tonic-gate  */
537c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
547c478bd9Sstevel@tonic-gate 
557c478bd9Sstevel@tonic-gate static mntopts_t lofs_mntopts;
567c478bd9Sstevel@tonic-gate 
577c478bd9Sstevel@tonic-gate static int lofsinit(int, char *);
587c478bd9Sstevel@tonic-gate 
597c478bd9Sstevel@tonic-gate static vfsdef_t vfw = {
607c478bd9Sstevel@tonic-gate 	VFSDEF_VERSION,
617c478bd9Sstevel@tonic-gate 	"lofs",
627c478bd9Sstevel@tonic-gate 	lofsinit,
635a59a8b3Srsb 	VSW_HASPROTO|VSW_STATS,
647c478bd9Sstevel@tonic-gate 	&lofs_mntopts
657c478bd9Sstevel@tonic-gate };
667c478bd9Sstevel@tonic-gate 
677c478bd9Sstevel@tonic-gate /*
687c478bd9Sstevel@tonic-gate  * Stuff needed to support "zonedevfs" mode.
697c478bd9Sstevel@tonic-gate  */
707c478bd9Sstevel@tonic-gate static major_t lofs_major;
717c478bd9Sstevel@tonic-gate static minor_t lofs_minor;
727c478bd9Sstevel@tonic-gate static kmutex_t lofs_minor_lock;
737c478bd9Sstevel@tonic-gate 
747c478bd9Sstevel@tonic-gate /*
757c478bd9Sstevel@tonic-gate  * LOFS mount options table
767c478bd9Sstevel@tonic-gate  */
777c478bd9Sstevel@tonic-gate static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
787c478bd9Sstevel@tonic-gate static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
797c478bd9Sstevel@tonic-gate static char *zonedevfs_cancel[] = { MNTOPT_LOFS_NOZONEDEVFS, NULL };
807c478bd9Sstevel@tonic-gate static char *nozonedevfs_cancel[] = { MNTOPT_LOFS_ZONEDEVFS, NULL };
817c478bd9Sstevel@tonic-gate static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL };
827c478bd9Sstevel@tonic-gate static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL };
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate static mntopt_t mntopts[] = {
857c478bd9Sstevel@tonic-gate /*
867c478bd9Sstevel@tonic-gate  *	option name		cancel option	default arg	flags
877c478bd9Sstevel@tonic-gate  *		private data
887c478bd9Sstevel@tonic-gate  */
897c478bd9Sstevel@tonic-gate 	{ MNTOPT_XATTR,		xattr_cancel,	NULL,		0,
907c478bd9Sstevel@tonic-gate 		(void *)0 },
917c478bd9Sstevel@tonic-gate 	{ MNTOPT_NOXATTR,	noxattr_cancel,	NULL,		0,
927c478bd9Sstevel@tonic-gate 		(void *)0 },
937c478bd9Sstevel@tonic-gate 	{ MNTOPT_LOFS_ZONEDEVFS,	zonedevfs_cancel,	NULL,	0,
947c478bd9Sstevel@tonic-gate 		(void *)0 },
957c478bd9Sstevel@tonic-gate 	{ MNTOPT_LOFS_NOZONEDEVFS,	nozonedevfs_cancel,	NULL,	0,
967c478bd9Sstevel@tonic-gate 		(void *)0 },
977c478bd9Sstevel@tonic-gate 	{ MNTOPT_LOFS_SUB,	sub_cancel,	NULL,		0,
987c478bd9Sstevel@tonic-gate 		(void *)0 },
997c478bd9Sstevel@tonic-gate 	{ MNTOPT_LOFS_NOSUB,	nosub_cancel,	NULL,		0,
1007c478bd9Sstevel@tonic-gate 		(void *)0 },
1017c478bd9Sstevel@tonic-gate };
1027c478bd9Sstevel@tonic-gate 
1037c478bd9Sstevel@tonic-gate static mntopts_t lofs_mntopts = {
1047c478bd9Sstevel@tonic-gate 	sizeof (mntopts) / sizeof (mntopt_t),
1057c478bd9Sstevel@tonic-gate 	mntopts
1067c478bd9Sstevel@tonic-gate };
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate /*
1097c478bd9Sstevel@tonic-gate  * Module linkage information for the kernel.
1107c478bd9Sstevel@tonic-gate  */
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate static struct modlfs modlfs = {
1137c478bd9Sstevel@tonic-gate 	&mod_fsops, "filesystem for lofs", &vfw
1147c478bd9Sstevel@tonic-gate };
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate static struct modlinkage modlinkage = {
1177c478bd9Sstevel@tonic-gate 	MODREV_1, (void *)&modlfs, NULL
1187c478bd9Sstevel@tonic-gate };
1197c478bd9Sstevel@tonic-gate 
1207c478bd9Sstevel@tonic-gate /*
1217c478bd9Sstevel@tonic-gate  * This is the module initialization routine.
1227c478bd9Sstevel@tonic-gate  */
12345916cd2Sjpk 
1247c478bd9Sstevel@tonic-gate int
12545916cd2Sjpk _init(void)
1267c478bd9Sstevel@tonic-gate {
1277c478bd9Sstevel@tonic-gate 	int status;
1287c478bd9Sstevel@tonic-gate 
1297c478bd9Sstevel@tonic-gate 	lofs_subrinit();
1307c478bd9Sstevel@tonic-gate 	status = mod_install(&modlinkage);
1317c478bd9Sstevel@tonic-gate 	if (status != 0) {
1327c478bd9Sstevel@tonic-gate 		/*
1337c478bd9Sstevel@tonic-gate 		 * Cleanup previously initialized work.
1347c478bd9Sstevel@tonic-gate 		 */
1357c478bd9Sstevel@tonic-gate 		lofs_subrfini();
1367c478bd9Sstevel@tonic-gate 	}
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate 	return (status);
1397c478bd9Sstevel@tonic-gate }
1407c478bd9Sstevel@tonic-gate 
1417c478bd9Sstevel@tonic-gate /*
1427c478bd9Sstevel@tonic-gate  * Don't allow the lofs module to be unloaded for now.
1437c478bd9Sstevel@tonic-gate  * There is a memory leak if it gets unloaded.
1447c478bd9Sstevel@tonic-gate  */
14545916cd2Sjpk 
1467c478bd9Sstevel@tonic-gate int
14745916cd2Sjpk _fini(void)
1487c478bd9Sstevel@tonic-gate {
1497c478bd9Sstevel@tonic-gate 	return (EBUSY);
1507c478bd9Sstevel@tonic-gate }
1517c478bd9Sstevel@tonic-gate 
1527c478bd9Sstevel@tonic-gate int
1537c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop)
1547c478bd9Sstevel@tonic-gate {
1557c478bd9Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
1567c478bd9Sstevel@tonic-gate }
1577c478bd9Sstevel@tonic-gate 
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate static int lofsfstype;
1607c478bd9Sstevel@tonic-gate vfsops_t *lo_vfsops;
1617c478bd9Sstevel@tonic-gate 
1627c478bd9Sstevel@tonic-gate /*
1637c478bd9Sstevel@tonic-gate  * lo mount vfsop
1647c478bd9Sstevel@tonic-gate  * Set up mount info record and attach it to vfs struct.
1657c478bd9Sstevel@tonic-gate  */
1667c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1677c478bd9Sstevel@tonic-gate static int
1687c478bd9Sstevel@tonic-gate lo_mount(struct vfs *vfsp,
1697c478bd9Sstevel@tonic-gate 	struct vnode *vp,
1707c478bd9Sstevel@tonic-gate 	struct mounta *uap,
1717c478bd9Sstevel@tonic-gate 	struct cred *cr)
1727c478bd9Sstevel@tonic-gate {
1737c478bd9Sstevel@tonic-gate 	int error;
1747c478bd9Sstevel@tonic-gate 	struct vnode *srootvp = NULL;	/* the server's root */
1757c478bd9Sstevel@tonic-gate 	struct vnode *realrootvp;
1767c478bd9Sstevel@tonic-gate 	struct loinfo *li;
1777c478bd9Sstevel@tonic-gate 	int is_zonedevfs = 0;
1787c478bd9Sstevel@tonic-gate 	int nodev;
1797c478bd9Sstevel@tonic-gate 
1807c478bd9Sstevel@tonic-gate 	nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL);
1817c478bd9Sstevel@tonic-gate 
1827c478bd9Sstevel@tonic-gate 	if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0)
1837c478bd9Sstevel@tonic-gate 		return (EPERM);
1847c478bd9Sstevel@tonic-gate 
1857c478bd9Sstevel@tonic-gate 	/*
1867c478bd9Sstevel@tonic-gate 	 * Loopback devices which get "nodevices" added can be done without
1877c478bd9Sstevel@tonic-gate 	 * "nodevices" set because we cannot import devices into a zone
1887c478bd9Sstevel@tonic-gate 	 * with loopback.  Note that we have all zone privileges when
1897c478bd9Sstevel@tonic-gate 	 * this happens; if not, we'd have gotten "nosuid".
1907c478bd9Sstevel@tonic-gate 	 */
1917c478bd9Sstevel@tonic-gate 	if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
1927c478bd9Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY);
1937c478bd9Sstevel@tonic-gate 
1947c478bd9Sstevel@tonic-gate 	/*
1957c478bd9Sstevel@tonic-gate 	 * We must ensure that only the global zone applies the 'zonedevfs'
1967c478bd9Sstevel@tonic-gate 	 * option; we don't want non-global zones to be able to establish
1977c478bd9Sstevel@tonic-gate 	 * lofs mounts using the special dev_t we use to ensure that the
1987c478bd9Sstevel@tonic-gate 	 * contents of a zone's /dev cannot be victim to link(2) or rename(2).
1997c478bd9Sstevel@tonic-gate 	 * See below, where we set all of this up.
2007c478bd9Sstevel@tonic-gate 	 *
2017c478bd9Sstevel@tonic-gate 	 * Since this is more like a privilege check, we use crgetzoneid(cr)
2027c478bd9Sstevel@tonic-gate 	 * instead of getzoneid().
2037c478bd9Sstevel@tonic-gate 	 */
2047c478bd9Sstevel@tonic-gate 	is_zonedevfs = vfs_optionisset(vfsp, MNTOPT_LOFS_ZONEDEVFS, NULL);
2057c478bd9Sstevel@tonic-gate 	if (crgetzoneid(cr) != GLOBAL_ZONEID && is_zonedevfs)
2067c478bd9Sstevel@tonic-gate 		return (EPERM);
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
2097c478bd9Sstevel@tonic-gate 	if (!(uap->flags & MS_OVERLAY) &&
21045916cd2Sjpk 	    (vp->v_count != 1 || (vp->v_flag & VROOT))) {
2117c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
2127c478bd9Sstevel@tonic-gate 		return (EBUSY);
2137c478bd9Sstevel@tonic-gate 	}
2147c478bd9Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate 	/*
2177c478bd9Sstevel@tonic-gate 	 * Find real root, and make vfs point to real vfs
2187c478bd9Sstevel@tonic-gate 	 */
2197c478bd9Sstevel@tonic-gate 	if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ?
2207c478bd9Sstevel@tonic-gate 		UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP,
2217c478bd9Sstevel@tonic-gate 	    &realrootvp))
2227c478bd9Sstevel@tonic-gate 		return (error);
2237c478bd9Sstevel@tonic-gate 
22445916cd2Sjpk 	/*
22545916cd2Sjpk 	 * Enforce MAC policy if needed.
22645916cd2Sjpk 	 *
22745916cd2Sjpk 	 * Loopback mounts must not allow writing up. The dominance test
22845916cd2Sjpk 	 * is intended to prevent a global zone caller from accidentally
22945916cd2Sjpk 	 * creating write-up conditions between two labeled zones.
23045916cd2Sjpk 	 * Local zones can't violate MAC on their own without help from
23145916cd2Sjpk 	 * the global zone because they can't name a pathname that
23245916cd2Sjpk 	 * they don't already have.
23345916cd2Sjpk 	 *
23445916cd2Sjpk 	 * The special case check for the NET_MAC_AWARE process flag is
23545916cd2Sjpk 	 * to support the case of the automounter in the global zone. We
23645916cd2Sjpk 	 * permit automounting of local zone directories such as home
23745916cd2Sjpk 	 * directories, into the global zone as required by setlabel,
23845916cd2Sjpk 	 * zonecopy, and saving of desktop sessions. Such mounts are
23945916cd2Sjpk 	 * trusted not to expose the contents of one zone's directories
24045916cd2Sjpk 	 * to another by leaking them through the global zone.
24145916cd2Sjpk 	 */
24245916cd2Sjpk 	if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) {
24366aa579dSrica 		char	specname[MAXPATHLEN];
24466aa579dSrica 		zone_t	*from_zptr;
24566aa579dSrica 		zone_t	*to_zptr;
24666aa579dSrica 
24766aa579dSrica 		if (vnodetopath(NULL, realrootvp, specname,
24866aa579dSrica 		    sizeof (specname), CRED()) != 0)
24966aa579dSrica 			return (EACCES);
25045916cd2Sjpk 
25166aa579dSrica 		from_zptr = zone_find_by_path(specname);
25245916cd2Sjpk 		to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
25345916cd2Sjpk 
25445916cd2Sjpk 		/*
25545916cd2Sjpk 		 * Special case for zone devfs: the zone for /dev will
25645916cd2Sjpk 		 * incorrectly appear as the global zone since it's not
25745916cd2Sjpk 		 * under the zone rootpath.  So for zone devfs check allow
25845916cd2Sjpk 		 * read-write mounts.
259*48451833Scarlsonj 		 *
260*48451833Scarlsonj 		 * Second special case for scratch zones used for Live Upgrade:
261*48451833Scarlsonj 		 * this is used to mount the zone's root from /root to /a in
262*48451833Scarlsonj 		 * the scratch zone.  As with the other special case, this
263*48451833Scarlsonj 		 * appears to be outside of the zone because it's not under
264*48451833Scarlsonj 		 * the zone rootpath, which is $ZONEPATH/lu in the scratch
265*48451833Scarlsonj 		 * zone case.
26645916cd2Sjpk 		 */
26745916cd2Sjpk 
268*48451833Scarlsonj 		if (from_zptr != to_zptr && !is_zonedevfs &&
269*48451833Scarlsonj 		    !(to_zptr->zone_flags & ZF_IS_SCRATCH)) {
27045916cd2Sjpk 			/*
27145916cd2Sjpk 			 * We know at this point that the labels aren't equal
27245916cd2Sjpk 			 * because the zone pointers aren't equal, and zones
27345916cd2Sjpk 			 * can't share a label.
27445916cd2Sjpk 			 *
27545916cd2Sjpk 			 * If the source is the global zone then making
27645916cd2Sjpk 			 * it available to a local zone must be done in
27745916cd2Sjpk 			 * read-only mode as the label will become admin_low.
27845916cd2Sjpk 			 *
27945916cd2Sjpk 			 * If it is a mount between local zones then if
28045916cd2Sjpk 			 * the current process is in the global zone and has
28145916cd2Sjpk 			 * the NET_MAC_AWARE flag, then regular read-write
28245916cd2Sjpk 			 * access is allowed.  If it's in some other zone, but
28345916cd2Sjpk 			 * the label on the mount point dominates the original
28445916cd2Sjpk 			 * source, then allow the mount as read-only
28545916cd2Sjpk 			 * ("read-down").
28645916cd2Sjpk 			 */
28745916cd2Sjpk 			if (from_zptr->zone_id == GLOBAL_ZONEID) {
28845916cd2Sjpk 				/* make the mount read-only */
28945916cd2Sjpk 				vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
29045916cd2Sjpk 			} else { /* cross-zone mount */
29145916cd2Sjpk 				if (to_zptr->zone_id == GLOBAL_ZONEID &&
29245916cd2Sjpk 				    /* LINTED: no consequent */
29345916cd2Sjpk 				    getpflags(NET_MAC_AWARE, cr) != 0) {
29445916cd2Sjpk 					/* Allow the mount as read-write */
29545916cd2Sjpk 				} else if (bldominates(
29645916cd2Sjpk 				    label2bslabel(to_zptr->zone_slabel),
29745916cd2Sjpk 				    label2bslabel(from_zptr->zone_slabel))) {
29845916cd2Sjpk 					/* make the mount read-only */
29945916cd2Sjpk 					vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
30045916cd2Sjpk 				} else {
30145916cd2Sjpk 					zone_rele(to_zptr);
30245916cd2Sjpk 					zone_rele(from_zptr);
30345916cd2Sjpk 					return (EACCES);
30445916cd2Sjpk 				}
30545916cd2Sjpk 			}
30645916cd2Sjpk 		}
30745916cd2Sjpk 		zone_rele(to_zptr);
30845916cd2Sjpk 		zone_rele(from_zptr);
30945916cd2Sjpk 	}
31045916cd2Sjpk 
3117c478bd9Sstevel@tonic-gate 	/*
3127c478bd9Sstevel@tonic-gate 	 * realrootvp may be an AUTOFS node, in which case we
3137c478bd9Sstevel@tonic-gate 	 * perform a VOP_ACCESS() to trigger the mount of the
3147c478bd9Sstevel@tonic-gate 	 * intended filesystem, so we loopback mount the intended
3157c478bd9Sstevel@tonic-gate 	 * filesystem instead of the AUTOFS filesystem.
3167c478bd9Sstevel@tonic-gate 	 */
3177c478bd9Sstevel@tonic-gate 	(void) VOP_ACCESS(realrootvp, 0, 0, cr);
3187c478bd9Sstevel@tonic-gate 
3197c478bd9Sstevel@tonic-gate 	/*
3207c478bd9Sstevel@tonic-gate 	 * We're interested in the top most filesystem.
3217c478bd9Sstevel@tonic-gate 	 * This is specially important when uap->spec is a trigger
3227c478bd9Sstevel@tonic-gate 	 * AUTOFS node, since we're really interested in mounting the
3237c478bd9Sstevel@tonic-gate 	 * filesystem AUTOFS mounted as result of the VOP_ACCESS()
3247c478bd9Sstevel@tonic-gate 	 * call not the AUTOFS node itself.
3257c478bd9Sstevel@tonic-gate 	 */
3267c478bd9Sstevel@tonic-gate 	if (vn_mountedvfs(realrootvp) != NULL) {
3277c478bd9Sstevel@tonic-gate 		if (error = traverse(&realrootvp)) {
3287c478bd9Sstevel@tonic-gate 			VN_RELE(realrootvp);
3297c478bd9Sstevel@tonic-gate 			return (error);
3307c478bd9Sstevel@tonic-gate 		}
3317c478bd9Sstevel@tonic-gate 	}
3327c478bd9Sstevel@tonic-gate 
3337c478bd9Sstevel@tonic-gate 	/*
3347c478bd9Sstevel@tonic-gate 	 * Allocate a vfs info struct and attach it
3357c478bd9Sstevel@tonic-gate 	 */
3367c478bd9Sstevel@tonic-gate 	li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP);
3377c478bd9Sstevel@tonic-gate 	li->li_realvfs = realrootvp->v_vfsp;
3387c478bd9Sstevel@tonic-gate 	li->li_mountvfs = vfsp;
3397c478bd9Sstevel@tonic-gate 
3407c478bd9Sstevel@tonic-gate 	/*
3417c478bd9Sstevel@tonic-gate 	 * Set mount flags to be inherited by loopback vfs's
3427c478bd9Sstevel@tonic-gate 	 */
3437c478bd9Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
3447c478bd9Sstevel@tonic-gate 		li->li_mflag |= VFS_RDONLY;
3457c478bd9Sstevel@tonic-gate 	}
3467c478bd9Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
3477c478bd9Sstevel@tonic-gate 		li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES);
3487c478bd9Sstevel@tonic-gate 	}
3497c478bd9Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
3507c478bd9Sstevel@tonic-gate 		li->li_mflag |= VFS_NODEVICES;
3517c478bd9Sstevel@tonic-gate 	}
3527c478bd9Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
3537c478bd9Sstevel@tonic-gate 		li->li_mflag |= VFS_NOSETUID;
3547c478bd9Sstevel@tonic-gate 	}
3557c478bd9Sstevel@tonic-gate 	/*
3567c478bd9Sstevel@tonic-gate 	 * Permissive flags are added to the "deny" bitmap.
3577c478bd9Sstevel@tonic-gate 	 */
3587c478bd9Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
3597c478bd9Sstevel@tonic-gate 		li->li_dflag |= VFS_XATTR;
3607c478bd9Sstevel@tonic-gate 	}
3617c478bd9Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
3627c478bd9Sstevel@tonic-gate 		li->li_dflag |= VFS_NBMAND;
3637c478bd9Sstevel@tonic-gate 	}
3647c478bd9Sstevel@tonic-gate 
3657c478bd9Sstevel@tonic-gate 	/*
3667c478bd9Sstevel@tonic-gate 	 * Propagate inheritable mount flags from the real vfs.
3677c478bd9Sstevel@tonic-gate 	 */
3687c478bd9Sstevel@tonic-gate 	if ((li->li_realvfs->vfs_flag & VFS_RDONLY) &&
3697c478bd9Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_RO, NULL))
3707c478bd9Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_RO, NULL,
3717c478bd9Sstevel@tonic-gate 		    VFS_NODISPLAY);
3727c478bd9Sstevel@tonic-gate 	if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) &&
3737c478bd9Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
3747c478bd9Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL,
3757c478bd9Sstevel@tonic-gate 		    VFS_NODISPLAY);
3767c478bd9Sstevel@tonic-gate 	if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) &&
3777c478bd9Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
3787c478bd9Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL,
3797c478bd9Sstevel@tonic-gate 		    VFS_NODISPLAY);
3807c478bd9Sstevel@tonic-gate 	/*
3817c478bd9Sstevel@tonic-gate 	 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags
3827c478bd9Sstevel@tonic-gate 	 * such as VFS_RDONLY, are handled differently.  An explicit
3837c478bd9Sstevel@tonic-gate 	 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR.
3847c478bd9Sstevel@tonic-gate 	 */
3857c478bd9Sstevel@tonic-gate 	if ((li->li_realvfs->vfs_flag & VFS_XATTR) &&
3867c478bd9Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) &&
3877c478bd9Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
3887c478bd9Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL,
3897c478bd9Sstevel@tonic-gate 		    VFS_NODISPLAY);
3907c478bd9Sstevel@tonic-gate 	if ((li->li_realvfs->vfs_flag & VFS_NBMAND) &&
3917c478bd9Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) &&
3927c478bd9Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
3937c478bd9Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL,
3947c478bd9Sstevel@tonic-gate 		    VFS_NODISPLAY);
3957c478bd9Sstevel@tonic-gate 
3967c478bd9Sstevel@tonic-gate 	li->li_refct = 0;
3977c478bd9Sstevel@tonic-gate 	vfsp->vfs_data = (caddr_t)li;
3987c478bd9Sstevel@tonic-gate 	vfsp->vfs_bcount = 0;
3997c478bd9Sstevel@tonic-gate 	vfsp->vfs_fstype = lofsfstype;
4007c478bd9Sstevel@tonic-gate 	vfsp->vfs_bsize = li->li_realvfs->vfs_bsize;
4017c478bd9Sstevel@tonic-gate 
4027c478bd9Sstevel@tonic-gate 	/*
4037c478bd9Sstevel@tonic-gate 	 * Test to see if we need to be in "zone /dev" mode.  In zonedevfs
4047c478bd9Sstevel@tonic-gate 	 * mode, we pull a nasty trick; we make sure that the lofs dev_t does
4057c478bd9Sstevel@tonic-gate 	 * *not* reflect the underlying device, so that no renames or links
4067c478bd9Sstevel@tonic-gate 	 * can occur to or from the /dev hierarchy.
4077c478bd9Sstevel@tonic-gate 	 */
4087c478bd9Sstevel@tonic-gate 	if (is_zonedevfs) {
4097c478bd9Sstevel@tonic-gate 		dev_t dev;
4107c478bd9Sstevel@tonic-gate 
4117c478bd9Sstevel@tonic-gate 		mutex_enter(&lofs_minor_lock);
4127c478bd9Sstevel@tonic-gate 		do {
4137c478bd9Sstevel@tonic-gate 			lofs_minor = (lofs_minor + 1) & MAXMIN32;
4147c478bd9Sstevel@tonic-gate 			dev = makedevice(lofs_major, lofs_minor);
4157c478bd9Sstevel@tonic-gate 		} while (vfs_devismounted(dev));
4167c478bd9Sstevel@tonic-gate 		mutex_exit(&lofs_minor_lock);
4177c478bd9Sstevel@tonic-gate 
4187c478bd9Sstevel@tonic-gate 		vfsp->vfs_dev = dev;
4197c478bd9Sstevel@tonic-gate 		vfs_make_fsid(&vfsp->vfs_fsid, dev, lofsfstype);
4207c478bd9Sstevel@tonic-gate 
4217c478bd9Sstevel@tonic-gate 		li->li_flag |= LO_ZONEDEVFS;
4227c478bd9Sstevel@tonic-gate 	} else {
4237c478bd9Sstevel@tonic-gate 		vfsp->vfs_dev = li->li_realvfs->vfs_dev;
4247c478bd9Sstevel@tonic-gate 		vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0];
4257c478bd9Sstevel@tonic-gate 		vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1];
4267c478bd9Sstevel@tonic-gate 	}
4277c478bd9Sstevel@tonic-gate 
4287c478bd9Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) {
4297c478bd9Sstevel@tonic-gate 		li->li_flag |= LO_NOSUB;
4307c478bd9Sstevel@tonic-gate 	}
4317c478bd9Sstevel@tonic-gate 
4327c478bd9Sstevel@tonic-gate 	/*
4337c478bd9Sstevel@tonic-gate 	 * Setup the hashtable. If the root of this mount isn't a directory,
4347c478bd9Sstevel@tonic-gate 	 * there's no point in allocating a large hashtable. A table with one
4357c478bd9Sstevel@tonic-gate 	 * bucket is sufficient.
4367c478bd9Sstevel@tonic-gate 	 */
4377c478bd9Sstevel@tonic-gate 	if (realrootvp->v_type != VDIR)
4387c478bd9Sstevel@tonic-gate 		lsetup(li, 1);
4397c478bd9Sstevel@tonic-gate 	else
4407c478bd9Sstevel@tonic-gate 		lsetup(li, 0);
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 	/*
4437c478bd9Sstevel@tonic-gate 	 * Make the root vnode
4447c478bd9Sstevel@tonic-gate 	 */
445b431137cSowenr 	srootvp = makelonode(realrootvp, li, 0);
4467c478bd9Sstevel@tonic-gate 	srootvp->v_flag |= VROOT;
4477c478bd9Sstevel@tonic-gate 	li->li_rootvp = srootvp;
4487c478bd9Sstevel@tonic-gate 
4497c478bd9Sstevel@tonic-gate #ifdef LODEBUG
4507c478bd9Sstevel@tonic-gate 	lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n",
4517c478bd9Sstevel@tonic-gate 	    vfsp, li->li_realvfs, srootvp, realrootvp, li);
4527c478bd9Sstevel@tonic-gate #endif
4537c478bd9Sstevel@tonic-gate 	return (0);
4547c478bd9Sstevel@tonic-gate }
4557c478bd9Sstevel@tonic-gate 
4567c478bd9Sstevel@tonic-gate /*
4577c478bd9Sstevel@tonic-gate  * Undo loopback mount
4587c478bd9Sstevel@tonic-gate  */
4597c478bd9Sstevel@tonic-gate static int
4607c478bd9Sstevel@tonic-gate lo_unmount(struct vfs *vfsp, int flag, struct cred *cr)
4617c478bd9Sstevel@tonic-gate {
4627c478bd9Sstevel@tonic-gate 	struct loinfo *li;
4637c478bd9Sstevel@tonic-gate 
4647c478bd9Sstevel@tonic-gate 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
4657c478bd9Sstevel@tonic-gate 		return (EPERM);
4667c478bd9Sstevel@tonic-gate 
4677c478bd9Sstevel@tonic-gate 	/*
4687c478bd9Sstevel@tonic-gate 	 * Forced unmount is not supported by this file system
4697c478bd9Sstevel@tonic-gate 	 * and thus, ENOTSUP, is being returned.
4707c478bd9Sstevel@tonic-gate 	 */
4717c478bd9Sstevel@tonic-gate 	if (flag & MS_FORCE)
4727c478bd9Sstevel@tonic-gate 		return (ENOTSUP);
4737c478bd9Sstevel@tonic-gate 
4747c478bd9Sstevel@tonic-gate 	li = vtoli(vfsp);
4757c478bd9Sstevel@tonic-gate #ifdef LODEBUG
4767c478bd9Sstevel@tonic-gate 	lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li);
4777c478bd9Sstevel@tonic-gate #endif
4787c478bd9Sstevel@tonic-gate 	if (li->li_refct != 1 || li->li_rootvp->v_count != 1) {
4797c478bd9Sstevel@tonic-gate #ifdef LODEBUG
4807c478bd9Sstevel@tonic-gate 		lo_dprint(4, "refct %d v_ct %d\n", li->li_refct,
4817c478bd9Sstevel@tonic-gate 		    li->li_rootvp->v_count);
4827c478bd9Sstevel@tonic-gate #endif
4837c478bd9Sstevel@tonic-gate 		return (EBUSY);
4847c478bd9Sstevel@tonic-gate 	}
4857c478bd9Sstevel@tonic-gate 	VN_RELE(li->li_rootvp);
4867c478bd9Sstevel@tonic-gate 	return (0);
4877c478bd9Sstevel@tonic-gate }
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate /*
4907c478bd9Sstevel@tonic-gate  * Find root of lofs mount.
4917c478bd9Sstevel@tonic-gate  */
4927c478bd9Sstevel@tonic-gate static int
4937c478bd9Sstevel@tonic-gate lo_root(struct vfs *vfsp, struct vnode **vpp)
4947c478bd9Sstevel@tonic-gate {
4957c478bd9Sstevel@tonic-gate 	*vpp = vtoli(vfsp)->li_rootvp;
4967c478bd9Sstevel@tonic-gate #ifdef LODEBUG
4977c478bd9Sstevel@tonic-gate 	lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp);
4987c478bd9Sstevel@tonic-gate #endif
4997c478bd9Sstevel@tonic-gate 	/*
5007c478bd9Sstevel@tonic-gate 	 * If the root of the filesystem is a special file, return the specvp
5017c478bd9Sstevel@tonic-gate 	 * version of the vnode. We don't save the specvp vnode in our
5027c478bd9Sstevel@tonic-gate 	 * hashtable since that's exclusively for lnodes.
5037c478bd9Sstevel@tonic-gate 	 */
5047c478bd9Sstevel@tonic-gate 	if (IS_DEVVP(*vpp)) {
5057c478bd9Sstevel@tonic-gate 		struct vnode *svp;
5067c478bd9Sstevel@tonic-gate 
5077c478bd9Sstevel@tonic-gate 		svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred);
5087c478bd9Sstevel@tonic-gate 		if (svp == NULL)
5097c478bd9Sstevel@tonic-gate 			return (ENOSYS);
5107c478bd9Sstevel@tonic-gate 		*vpp = svp;
5117c478bd9Sstevel@tonic-gate 	} else {
5127c478bd9Sstevel@tonic-gate 		VN_HOLD(*vpp);
5137c478bd9Sstevel@tonic-gate 	}
5147c478bd9Sstevel@tonic-gate 
5157c478bd9Sstevel@tonic-gate 	return (0);
5167c478bd9Sstevel@tonic-gate }
5177c478bd9Sstevel@tonic-gate 
5187c478bd9Sstevel@tonic-gate /*
5197c478bd9Sstevel@tonic-gate  * Get file system statistics.
5207c478bd9Sstevel@tonic-gate  */
5217c478bd9Sstevel@tonic-gate static int
5227c478bd9Sstevel@tonic-gate lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp)
5237c478bd9Sstevel@tonic-gate {
5247c478bd9Sstevel@tonic-gate 	vnode_t *realrootvp;
5257c478bd9Sstevel@tonic-gate 
5267c478bd9Sstevel@tonic-gate #ifdef LODEBUG
5277c478bd9Sstevel@tonic-gate 	lo_dprint(4, "lostatvfs %p\n", vfsp);
5287c478bd9Sstevel@tonic-gate #endif
5297c478bd9Sstevel@tonic-gate 	/*
5307c478bd9Sstevel@tonic-gate 	 * Using realrootvp->v_vfsp (instead of the realvfsp that was
5317c478bd9Sstevel@tonic-gate 	 * cached) is necessary to make lofs work woth forced UFS unmounts.
5327c478bd9Sstevel@tonic-gate 	 * In the case of a forced unmount, UFS stores a set of dummy vfsops
5337c478bd9Sstevel@tonic-gate 	 * in all the (i)vnodes in the filesystem. The dummy ops simply
5347c478bd9Sstevel@tonic-gate 	 * returns back EIO.
5357c478bd9Sstevel@tonic-gate 	 */
5367c478bd9Sstevel@tonic-gate 	(void) lo_realvfs(vfsp, &realrootvp);
5377c478bd9Sstevel@tonic-gate 	if (realrootvp != NULL)
5387c478bd9Sstevel@tonic-gate 		return (VFS_STATVFS(realrootvp->v_vfsp, sbp));
5397c478bd9Sstevel@tonic-gate 	else
5407c478bd9Sstevel@tonic-gate 		return (EIO);
5417c478bd9Sstevel@tonic-gate }
5427c478bd9Sstevel@tonic-gate 
5437c478bd9Sstevel@tonic-gate /*
5447c478bd9Sstevel@tonic-gate  * LOFS doesn't have any data or metadata to flush, pending I/O on the
5457c478bd9Sstevel@tonic-gate  * underlying filesystem will be flushed when such filesystem is synched.
5467c478bd9Sstevel@tonic-gate  */
5477c478bd9Sstevel@tonic-gate /* ARGSUSED */
5487c478bd9Sstevel@tonic-gate static int
5497c478bd9Sstevel@tonic-gate lo_sync(struct vfs *vfsp,
5507c478bd9Sstevel@tonic-gate 	short flag,
5517c478bd9Sstevel@tonic-gate 	struct cred *cr)
5527c478bd9Sstevel@tonic-gate {
5537c478bd9Sstevel@tonic-gate #ifdef LODEBUG
5547c478bd9Sstevel@tonic-gate 	lo_dprint(4, "lo_sync: %p\n", vfsp);
5557c478bd9Sstevel@tonic-gate #endif
5567c478bd9Sstevel@tonic-gate 	return (0);
5577c478bd9Sstevel@tonic-gate }
5587c478bd9Sstevel@tonic-gate 
5597c478bd9Sstevel@tonic-gate /*
5607c478bd9Sstevel@tonic-gate  * Obtain the vnode from the underlying filesystem.
5617c478bd9Sstevel@tonic-gate  */
5627c478bd9Sstevel@tonic-gate static int
5637c478bd9Sstevel@tonic-gate lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
5647c478bd9Sstevel@tonic-gate {
5657c478bd9Sstevel@tonic-gate 	vnode_t *realrootvp;
5667c478bd9Sstevel@tonic-gate 
5677c478bd9Sstevel@tonic-gate #ifdef LODEBUG
5687c478bd9Sstevel@tonic-gate 	lo_dprint(4, "lo_vget: %p\n", vfsp);
5697c478bd9Sstevel@tonic-gate #endif
5707c478bd9Sstevel@tonic-gate 	(void) lo_realvfs(vfsp, &realrootvp);
5717c478bd9Sstevel@tonic-gate 	if (realrootvp != NULL)
5727c478bd9Sstevel@tonic-gate 		return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp));
5737c478bd9Sstevel@tonic-gate 	else
5747c478bd9Sstevel@tonic-gate 		return (EIO);
5757c478bd9Sstevel@tonic-gate }
5767c478bd9Sstevel@tonic-gate 
5777c478bd9Sstevel@tonic-gate /*
5787c478bd9Sstevel@tonic-gate  * Free mount-specific data.
5797c478bd9Sstevel@tonic-gate  */
5807c478bd9Sstevel@tonic-gate static void
5817c478bd9Sstevel@tonic-gate lo_freevfs(struct vfs *vfsp)
5827c478bd9Sstevel@tonic-gate {
5837c478bd9Sstevel@tonic-gate 	struct loinfo *li = vtoli(vfsp);
5847c478bd9Sstevel@tonic-gate 
5857c478bd9Sstevel@tonic-gate 	ldestroy(li);
5867c478bd9Sstevel@tonic-gate 	kmem_free(li, sizeof (struct loinfo));
5877c478bd9Sstevel@tonic-gate }
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate static int
5907c478bd9Sstevel@tonic-gate lofsinit(int fstyp, char *name)
5917c478bd9Sstevel@tonic-gate {
5927c478bd9Sstevel@tonic-gate 	static const fs_operation_def_t lo_vfsops_template[] = {
5937c478bd9Sstevel@tonic-gate 		VFSNAME_MOUNT, lo_mount,
5947c478bd9Sstevel@tonic-gate 		VFSNAME_UNMOUNT, lo_unmount,
5957c478bd9Sstevel@tonic-gate 		VFSNAME_ROOT, lo_root,
5967c478bd9Sstevel@tonic-gate 		VFSNAME_STATVFS, lo_statvfs,
5977c478bd9Sstevel@tonic-gate 		VFSNAME_SYNC, (fs_generic_func_p) lo_sync,
5987c478bd9Sstevel@tonic-gate 		VFSNAME_VGET, lo_vget,
5997c478bd9Sstevel@tonic-gate 		VFSNAME_FREEVFS, (fs_generic_func_p) lo_freevfs,
6007c478bd9Sstevel@tonic-gate 		NULL, NULL
6017c478bd9Sstevel@tonic-gate 	};
6027c478bd9Sstevel@tonic-gate 	int error;
6037c478bd9Sstevel@tonic-gate 
6047c478bd9Sstevel@tonic-gate 	error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops);
6057c478bd9Sstevel@tonic-gate 	if (error != 0) {
6067c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "lofsinit: bad vfs ops template");
6077c478bd9Sstevel@tonic-gate 		return (error);
6087c478bd9Sstevel@tonic-gate 	}
6097c478bd9Sstevel@tonic-gate 
6107c478bd9Sstevel@tonic-gate 	error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops);
6117c478bd9Sstevel@tonic-gate 	if (error != 0) {
6127c478bd9Sstevel@tonic-gate 		(void) vfs_freevfsops_by_type(fstyp);
6137c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "lofsinit: bad vnode ops template");
6147c478bd9Sstevel@tonic-gate 		return (error);
6157c478bd9Sstevel@tonic-gate 	}
6167c478bd9Sstevel@tonic-gate 
6177c478bd9Sstevel@tonic-gate 	lofsfstype = fstyp;
6187c478bd9Sstevel@tonic-gate 
6197c478bd9Sstevel@tonic-gate 	if ((lofs_major = getudev()) == (major_t)-1) {
6207c478bd9Sstevel@tonic-gate 		(void) vfs_freevfsops_by_type(fstyp);
6217c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "lofsinit: Can't get unique device number.");
6227c478bd9Sstevel@tonic-gate 		return (ENXIO);
6237c478bd9Sstevel@tonic-gate 	}
6247c478bd9Sstevel@tonic-gate 
6257c478bd9Sstevel@tonic-gate 	lofs_minor = 0;
6267c478bd9Sstevel@tonic-gate 	mutex_init(&lofs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
6277c478bd9Sstevel@tonic-gate 
6287c478bd9Sstevel@tonic-gate 	return (0);
6297c478bd9Sstevel@tonic-gate }
630