1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/param.h>
26 #include <sys/errno.h>
27 #include <sys/vfs.h>
28 #include <sys/vfs_opreg.h>
29 #include <sys/vnode.h>
30 #include <sys/uio.h>
31 #include <sys/pathname.h>
32 #include <sys/kmem.h>
33 #include <sys/cred.h>
34 #include <sys/statvfs.h>
35 #include <sys/fs/lofs_info.h>
36 #include <sys/fs/lofs_node.h>
37 #include <sys/mount.h>
38 #include <sys/mntent.h>
39 #include <sys/mkdev.h>
40 #include <sys/priv.h>
41 #include <sys/sysmacros.h>
42 #include <sys/systm.h>
43 #include <sys/cmn_err.h>
44 #include <sys/policy.h>
45 #include <sys/tsol/label.h>
46 #include "fs/fs_subr.h"
47 
48 /*
49  * This is the loadable module wrapper.
50  */
51 #include <sys/modctl.h>
52 
53 static mntopts_t lofs_mntopts;
54 
55 static int lofsinit(int, char *);
56 
57 static vfsdef_t vfw = {
58 	VFSDEF_VERSION,
59 	"lofs",
60 	lofsinit,
61 	VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT,
62 	&lofs_mntopts
63 };
64 
65 /*
66  * LOFS mount options table
67  */
68 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
69 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
70 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL };
71 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL };
72 
73 static mntopt_t mntopts[] = {
74 /*
75  *	option name		cancel option	default arg	flags
76  *		private data
77  */
78 	{ MNTOPT_XATTR,		xattr_cancel,	NULL,		0,
79 		(void *)0 },
80 	{ MNTOPT_NOXATTR,	noxattr_cancel,	NULL,		0,
81 		(void *)0 },
82 	{ MNTOPT_LOFS_SUB,	sub_cancel,	NULL,		0,
83 		(void *)0 },
84 	{ MNTOPT_LOFS_NOSUB,	nosub_cancel,	NULL,		0,
85 		(void *)0 },
86 };
87 
88 static mntopts_t lofs_mntopts = {
89 	sizeof (mntopts) / sizeof (mntopt_t),
90 	mntopts
91 };
92 
93 /*
94  * Module linkage information for the kernel.
95  */
96 
97 static struct modlfs modlfs = {
98 	&mod_fsops, "filesystem for lofs", &vfw
99 };
100 
101 static struct modlinkage modlinkage = {
102 	MODREV_1, (void *)&modlfs, NULL
103 };
104 
105 /*
106  * This is the module initialization routine.
107  */
108 
109 int
110 _init(void)
111 {
112 	int status;
113 
114 	lofs_subrinit();
115 	status = mod_install(&modlinkage);
116 	if (status != 0) {
117 		/*
118 		 * Cleanup previously initialized work.
119 		 */
120 		lofs_subrfini();
121 	}
122 
123 	return (status);
124 }
125 
126 /*
127  * Don't allow the lofs module to be unloaded for now.
128  * There is a memory leak if it gets unloaded.
129  */
130 
131 int
132 _fini(void)
133 {
134 	return (EBUSY);
135 }
136 
137 int
138 _info(struct modinfo *modinfop)
139 {
140 	return (mod_info(&modlinkage, modinfop));
141 }
142 
143 
144 static int lofsfstype;
145 vfsops_t *lo_vfsops;
146 
147 /*
148  * lo mount vfsop
149  * Set up mount info record and attach it to vfs struct.
150  */
151 /*ARGSUSED*/
152 static int
153 lo_mount(struct vfs *vfsp,
154 	struct vnode *vp,
155 	struct mounta *uap,
156 	struct cred *cr)
157 {
158 	int error;
159 	struct vnode *srootvp = NULL;	/* the server's root */
160 	struct vnode *realrootvp;
161 	struct loinfo *li;
162 	int nodev;
163 
164 	nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL);
165 
166 	if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0)
167 		return (EPERM);
168 
169 	/*
170 	 * Loopback devices which get "nodevices" added can be done without
171 	 * "nodevices" set because we cannot import devices into a zone
172 	 * with loopback.  Note that we have all zone privileges when
173 	 * this happens; if not, we'd have gotten "nosuid".
174 	 */
175 	if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
176 		vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY);
177 
178 	mutex_enter(&vp->v_lock);
179 	if (!(uap->flags & MS_OVERLAY) &&
180 	    (vp->v_count != 1 || (vp->v_flag & VROOT))) {
181 		mutex_exit(&vp->v_lock);
182 		return (EBUSY);
183 	}
184 	mutex_exit(&vp->v_lock);
185 
186 	/*
187 	 * Find real root, and make vfs point to real vfs
188 	 */
189 
190 	if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ?
191 	    UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp))
192 		return (error);
193 
194 	/*
195 	 * Enforce MAC policy if needed.
196 	 *
197 	 * Loopback mounts must not allow writing up. The dominance test
198 	 * is intended to prevent a global zone caller from accidentally
199 	 * creating write-up conditions between two labeled zones.
200 	 * Local zones can't violate MAC on their own without help from
201 	 * the global zone because they can't name a pathname that
202 	 * they don't already have.
203 	 *
204 	 * The special case check for the NET_MAC_AWARE process flag is
205 	 * to support the case of the automounter in the global zone. We
206 	 * permit automounting of local zone directories such as home
207 	 * directories, into the global zone as required by setlabel,
208 	 * zonecopy, and saving of desktop sessions. Such mounts are
209 	 * trusted not to expose the contents of one zone's directories
210 	 * to another by leaking them through the global zone.
211 	 */
212 	if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) {
213 		char	specname[MAXPATHLEN];
214 		zone_t	*from_zptr;
215 		zone_t	*to_zptr;
216 
217 		if (vnodetopath(NULL, realrootvp, specname,
218 		    sizeof (specname), CRED()) != 0) {
219 			VN_RELE(realrootvp);
220 			return (EACCES);
221 		}
222 
223 		from_zptr = zone_find_by_path(specname);
224 		to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
225 
226 		/*
227 		 * Special case for scratch zones used for Live Upgrade:
228 		 * this is used to mount the zone's root from /root to /a in
229 		 * the scratch zone.  As with the other special case, this
230 		 * appears to be outside of the zone because it's not under
231 		 * the zone rootpath, which is $ZONEPATH/lu in the scratch
232 		 * zone case.
233 		 */
234 
235 		if (from_zptr != to_zptr &&
236 		    !(to_zptr->zone_flags & ZF_IS_SCRATCH)) {
237 			/*
238 			 * We know at this point that the labels aren't equal
239 			 * because the zone pointers aren't equal, and zones
240 			 * can't share a label.
241 			 *
242 			 * If the source is the global zone then making
243 			 * it available to a local zone must be done in
244 			 * read-only mode as the label will become admin_low.
245 			 *
246 			 * If it is a mount between local zones then if
247 			 * the current process is in the global zone and has
248 			 * the NET_MAC_AWARE flag, then regular read-write
249 			 * access is allowed.  If it's in some other zone, but
250 			 * the label on the mount point dominates the original
251 			 * source, then allow the mount as read-only
252 			 * ("read-down").
253 			 */
254 			if (from_zptr->zone_id == GLOBAL_ZONEID) {
255 				/* make the mount read-only */
256 				vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
257 			} else { /* cross-zone mount */
258 				if (to_zptr->zone_id == GLOBAL_ZONEID &&
259 				    /* LINTED: no consequent */
260 				    getpflags(NET_MAC_AWARE, cr) != 0) {
261 					/* Allow the mount as read-write */
262 				} else if (bldominates(
263 				    label2bslabel(to_zptr->zone_slabel),
264 				    label2bslabel(from_zptr->zone_slabel))) {
265 					/* make the mount read-only */
266 					vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
267 				} else {
268 					VN_RELE(realrootvp);
269 					zone_rele(to_zptr);
270 					zone_rele(from_zptr);
271 					return (EACCES);
272 				}
273 			}
274 		}
275 		zone_rele(to_zptr);
276 		zone_rele(from_zptr);
277 	}
278 
279 	/*
280 	 * realrootvp may be an AUTOFS node, in which case we
281 	 * perform a VOP_ACCESS() to trigger the mount of the
282 	 * intended filesystem, so we loopback mount the intended
283 	 * filesystem instead of the AUTOFS filesystem.
284 	 */
285 	(void) VOP_ACCESS(realrootvp, 0, 0, cr, NULL);
286 
287 	/*
288 	 * We're interested in the top most filesystem.
289 	 * This is specially important when uap->spec is a trigger
290 	 * AUTOFS node, since we're really interested in mounting the
291 	 * filesystem AUTOFS mounted as result of the VOP_ACCESS()
292 	 * call not the AUTOFS node itself.
293 	 */
294 	if (vn_mountedvfs(realrootvp) != NULL) {
295 		if (error = traverse(&realrootvp)) {
296 			VN_RELE(realrootvp);
297 			return (error);
298 		}
299 	}
300 
301 	/*
302 	 * Allocate a vfs info struct and attach it
303 	 */
304 	li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP);
305 	li->li_realvfs = realrootvp->v_vfsp;
306 	li->li_mountvfs = vfsp;
307 
308 	/*
309 	 * Set mount flags to be inherited by loopback vfs's
310 	 */
311 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
312 		li->li_mflag |= VFS_RDONLY;
313 	}
314 	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
315 		li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES);
316 	}
317 	if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
318 		li->li_mflag |= VFS_NODEVICES;
319 	}
320 	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
321 		li->li_mflag |= VFS_NOSETUID;
322 	}
323 	/*
324 	 * Permissive flags are added to the "deny" bitmap.
325 	 */
326 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
327 		li->li_dflag |= VFS_XATTR;
328 	}
329 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
330 		li->li_dflag |= VFS_NBMAND;
331 	}
332 
333 	/*
334 	 * Propagate inheritable mount flags from the real vfs.
335 	 */
336 	if ((li->li_realvfs->vfs_flag & VFS_RDONLY) &&
337 	    !vfs_optionisset(vfsp, MNTOPT_RO, NULL))
338 		vfs_setmntopt(vfsp, MNTOPT_RO, NULL,
339 		    VFS_NODISPLAY);
340 	if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) &&
341 	    !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
342 		vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL,
343 		    VFS_NODISPLAY);
344 	if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) &&
345 	    !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
346 		vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL,
347 		    VFS_NODISPLAY);
348 	/*
349 	 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags
350 	 * such as VFS_RDONLY, are handled differently.  An explicit
351 	 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR.
352 	 */
353 	if ((li->li_realvfs->vfs_flag & VFS_XATTR) &&
354 	    !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) &&
355 	    !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
356 		vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL,
357 		    VFS_NODISPLAY);
358 	if ((li->li_realvfs->vfs_flag & VFS_NBMAND) &&
359 	    !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) &&
360 	    !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
361 		vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL,
362 		    VFS_NODISPLAY);
363 
364 	li->li_refct = 0;
365 	vfsp->vfs_data = (caddr_t)li;
366 	vfsp->vfs_bcount = 0;
367 	vfsp->vfs_fstype = lofsfstype;
368 	vfsp->vfs_bsize = li->li_realvfs->vfs_bsize;
369 
370 	vfsp->vfs_dev = li->li_realvfs->vfs_dev;
371 	vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0];
372 	vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1];
373 
374 	if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) {
375 		li->li_flag |= LO_NOSUB;
376 	}
377 
378 	/*
379 	 * Propagate any VFS features
380 	 */
381 
382 	vfs_propagate_features(li->li_realvfs, vfsp);
383 
384 	/*
385 	 * Setup the hashtable. If the root of this mount isn't a directory,
386 	 * there's no point in allocating a large hashtable. A table with one
387 	 * bucket is sufficient.
388 	 */
389 	if (realrootvp->v_type != VDIR)
390 		lsetup(li, 1);
391 	else
392 		lsetup(li, 0);
393 
394 	/*
395 	 * Make the root vnode
396 	 */
397 	srootvp = makelonode(realrootvp, li, 0);
398 	srootvp->v_flag |= VROOT;
399 	li->li_rootvp = srootvp;
400 
401 #ifdef LODEBUG
402 	lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n",
403 	    vfsp, li->li_realvfs, srootvp, realrootvp, li);
404 #endif
405 	return (0);
406 }
407 
408 /*
409  * Undo loopback mount
410  */
411 static int
412 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr)
413 {
414 	struct loinfo *li;
415 
416 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
417 		return (EPERM);
418 
419 	/*
420 	 * Forced unmount is not supported by this file system
421 	 * and thus, ENOTSUP, is being returned.
422 	 */
423 	if (flag & MS_FORCE)
424 		return (ENOTSUP);
425 
426 	li = vtoli(vfsp);
427 #ifdef LODEBUG
428 	lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li);
429 #endif
430 	if (li->li_refct != 1 || li->li_rootvp->v_count != 1) {
431 #ifdef LODEBUG
432 		lo_dprint(4, "refct %d v_ct %d\n", li->li_refct,
433 		    li->li_rootvp->v_count);
434 #endif
435 		return (EBUSY);
436 	}
437 	VN_RELE(li->li_rootvp);
438 	return (0);
439 }
440 
441 /*
442  * Find root of lofs mount.
443  */
444 static int
445 lo_root(struct vfs *vfsp, struct vnode **vpp)
446 {
447 	*vpp = vtoli(vfsp)->li_rootvp;
448 #ifdef LODEBUG
449 	lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp);
450 #endif
451 	/*
452 	 * If the root of the filesystem is a special file, return the specvp
453 	 * version of the vnode. We don't save the specvp vnode in our
454 	 * hashtable since that's exclusively for lnodes.
455 	 */
456 	if (IS_DEVVP(*vpp)) {
457 		struct vnode *svp;
458 
459 		svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred);
460 		if (svp == NULL)
461 			return (ENOSYS);
462 		*vpp = svp;
463 	} else {
464 		VN_HOLD(*vpp);
465 	}
466 
467 	return (0);
468 }
469 
470 /*
471  * Get file system statistics.
472  */
473 static int
474 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp)
475 {
476 	vnode_t *realrootvp;
477 
478 #ifdef LODEBUG
479 	lo_dprint(4, "lostatvfs %p\n", vfsp);
480 #endif
481 	/*
482 	 * Using realrootvp->v_vfsp (instead of the realvfsp that was
483 	 * cached) is necessary to make lofs work woth forced UFS unmounts.
484 	 * In the case of a forced unmount, UFS stores a set of dummy vfsops
485 	 * in all the (i)vnodes in the filesystem. The dummy ops simply
486 	 * returns back EIO.
487 	 */
488 	(void) lo_realvfs(vfsp, &realrootvp);
489 	if (realrootvp != NULL)
490 		return (VFS_STATVFS(realrootvp->v_vfsp, sbp));
491 	else
492 		return (EIO);
493 }
494 
495 /*
496  * LOFS doesn't have any data or metadata to flush, pending I/O on the
497  * underlying filesystem will be flushed when such filesystem is synched.
498  */
499 /* ARGSUSED */
500 static int
501 lo_sync(struct vfs *vfsp,
502 	short flag,
503 	struct cred *cr)
504 {
505 #ifdef LODEBUG
506 	lo_dprint(4, "lo_sync: %p\n", vfsp);
507 #endif
508 	return (0);
509 }
510 
511 /*
512  * Obtain the vnode from the underlying filesystem.
513  */
514 static int
515 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
516 {
517 	vnode_t *realrootvp;
518 
519 #ifdef LODEBUG
520 	lo_dprint(4, "lo_vget: %p\n", vfsp);
521 #endif
522 	(void) lo_realvfs(vfsp, &realrootvp);
523 	if (realrootvp != NULL)
524 		return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp));
525 	else
526 		return (EIO);
527 }
528 
529 /*
530  * Free mount-specific data.
531  */
532 static void
533 lo_freevfs(struct vfs *vfsp)
534 {
535 	struct loinfo *li = vtoli(vfsp);
536 
537 	ldestroy(li);
538 	kmem_free(li, sizeof (struct loinfo));
539 }
540 
541 static int
542 lofsinit(int fstyp, char *name)
543 {
544 	static const fs_operation_def_t lo_vfsops_template[] = {
545 		VFSNAME_MOUNT,		{ .vfs_mount = lo_mount },
546 		VFSNAME_UNMOUNT,	{ .vfs_unmount = lo_unmount },
547 		VFSNAME_ROOT,		{ .vfs_root = lo_root },
548 		VFSNAME_STATVFS,	{ .vfs_statvfs = lo_statvfs },
549 		VFSNAME_SYNC,		{ .vfs_sync = lo_sync },
550 		VFSNAME_VGET,		{ .vfs_vget = lo_vget },
551 		VFSNAME_FREEVFS,	{ .vfs_freevfs = lo_freevfs },
552 		NULL,			NULL
553 	};
554 	int error;
555 
556 	error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops);
557 	if (error != 0) {
558 		cmn_err(CE_WARN, "lofsinit: bad vfs ops template");
559 		return (error);
560 	}
561 
562 	error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops);
563 	if (error != 0) {
564 		(void) vfs_freevfsops_by_type(fstyp);
565 		cmn_err(CE_WARN, "lofsinit: bad vnode ops template");
566 		return (error);
567 	}
568 
569 	lofsfstype = fstyp;
570 
571 	return (0);
572 }
573