1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/errno.h>
31 #include <sys/vnode.h>
32 #include <sys/vfs.h>
33 #include <sys/uio.h>
34 #include <sys/cred.h>
35 #include <sys/pathname.h>
36 #include <sys/debug.h>
37 #include <sys/fs/lofs_node.h>
38 #include <sys/fs/lofs_info.h>
39 #include <fs/fs_subr.h>
40 #include <vm/as.h>
41 #include <vm/seg.h>
42 
43 #define	IS_ZONEDEVFS(vp) \
44 	(vtoli((vp)->v_vfsp)->li_flag & LO_ZONEDEVFS)
45 
46 /*
47  * These are the vnode ops routines which implement the vnode interface to
48  * the looped-back file system.  These routines just take their parameters,
49  * and then calling the appropriate real vnode routine(s) to do the work.
50  */
51 
52 static int
53 lo_open(vnode_t **vpp, int flag, struct cred *cr)
54 {
55 	vnode_t *vp = *vpp;
56 	vnode_t *rvp;
57 	vnode_t *oldvp;
58 	int error;
59 
60 #ifdef LODEBUG
61 	lo_dprint(4, "lo_open vp %p cnt=%d realvp %p cnt=%d\n",
62 		vp, vp->v_count, realvp(vp), realvp(vp)->v_count);
63 #endif
64 
65 	oldvp = vp;
66 	vp = rvp = realvp(vp);
67 	/*
68 	 * Need to hold new reference to vp since VOP_OPEN() may
69 	 * decide to release it.
70 	 */
71 	VN_HOLD(vp);
72 	error = VOP_OPEN(&rvp, flag, cr);
73 
74 	if (!error && rvp != vp) {
75 		/*
76 		 * the FS which we called should have released the
77 		 * new reference on vp
78 		 */
79 		*vpp = makelonode(rvp, vtoli(oldvp->v_vfsp), 0);
80 		if ((*vpp)->v_type == VDIR) {
81 			/*
82 			 * Copy over any looping flags to the new lnode.
83 			 */
84 			(vtol(*vpp))->lo_looping |= (vtol(oldvp))->lo_looping;
85 		}
86 		if (IS_DEVVP(*vpp)) {
87 			vnode_t *svp;
88 
89 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
90 			VN_RELE(*vpp);
91 			if (svp == NULL)
92 				error = ENOSYS;
93 			else
94 				*vpp = svp;
95 		}
96 		VN_RELE(oldvp);
97 	} else {
98 		ASSERT(rvp->v_count > 1);
99 		VN_RELE(rvp);
100 	}
101 
102 	return (error);
103 }
104 
105 static int
106 lo_close(
107 	vnode_t *vp,
108 	int flag,
109 	int count,
110 	offset_t offset,
111 	struct cred *cr)
112 {
113 #ifdef LODEBUG
114 	lo_dprint(4, "lo_close vp %p realvp %p\n", vp, realvp(vp));
115 #endif
116 	vp = realvp(vp);
117 	return (VOP_CLOSE(vp, flag, count, offset, cr));
118 }
119 
120 static int
121 lo_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
122 	caller_context_t *ct)
123 {
124 #ifdef LODEBUG
125 	lo_dprint(4, "lo_read vp %p realvp %p\n", vp, realvp(vp));
126 #endif
127 	vp = realvp(vp);
128 	return (VOP_READ(vp, uiop, ioflag, cr, ct));
129 }
130 
131 static int
132 lo_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
133 	caller_context_t *ct)
134 {
135 #ifdef LODEBUG
136 	lo_dprint(4, "lo_write vp %p realvp %p\n", vp, realvp(vp));
137 #endif
138 	vp = realvp(vp);
139 	return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
140 }
141 
142 static int
143 lo_ioctl(
144 	vnode_t *vp,
145 	int cmd,
146 	intptr_t arg,
147 	int flag,
148 	struct cred *cr,
149 	int *rvalp)
150 {
151 #ifdef LODEBUG
152 	lo_dprint(4, "lo_ioctl vp %p realvp %p\n", vp, realvp(vp));
153 #endif
154 	vp = realvp(vp);
155 	return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp));
156 }
157 
158 static int
159 lo_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr)
160 {
161 	vp = realvp(vp);
162 	return (VOP_SETFL(vp, oflags, nflags, cr));
163 }
164 
165 static int
166 lo_getattr(
167 	vnode_t *vp,
168 	struct vattr *vap,
169 	int flags,
170 	struct cred *cr)
171 {
172 	int error;
173 
174 #ifdef LODEBUG
175 	lo_dprint(4, "lo_getattr vp %p realvp %p\n", vp, realvp(vp));
176 #endif
177 	if (error = VOP_GETATTR(realvp(vp), vap, flags, cr))
178 		return (error);
179 
180 	/*
181 	 * In zonedevfs mode, we pull a nasty trick; we make sure that
182 	 * the dev_t does *not* reflect the underlying device, so that
183 	 * no renames can occur to or from the /dev hierarchy.
184 	 */
185 	if (IS_ZONEDEVFS(vp)) {
186 		vap->va_fsid = expldev(vp->v_vfsp->vfs_fsid.val[0]);
187 	}
188 
189 	return (0);
190 }
191 
192 static int
193 lo_setattr(
194 	vnode_t *vp,
195 	struct vattr *vap,
196 	int flags,
197 	struct cred *cr,
198 	caller_context_t *ct)
199 {
200 #ifdef LODEBUG
201 	lo_dprint(4, "lo_setattr vp %p realvp %p\n", vp, realvp(vp));
202 #endif
203 	if (IS_ZONEDEVFS(vp) && !IS_DEVVP(vp)) {
204 		return (EACCES);
205 	}
206 	vp = realvp(vp);
207 	return (VOP_SETATTR(vp, vap, flags, cr, ct));
208 }
209 
210 static int
211 lo_access(vnode_t *vp, int mode, int flags, struct cred *cr)
212 {
213 #ifdef LODEBUG
214 	lo_dprint(4, "lo_access vp %p realvp %p\n", vp, realvp(vp));
215 #endif
216 	if (mode & VWRITE) {
217 		if (vp->v_type == VREG && vn_is_readonly(vp))
218 			return (EROFS);
219 		if (IS_ZONEDEVFS(vp) && !IS_DEVVP(vp))
220 			return (EACCES);
221 	}
222 	vp = realvp(vp);
223 	return (VOP_ACCESS(vp, mode, flags, cr));
224 }
225 
226 static int
227 lo_fsync(vnode_t *vp, int syncflag, struct cred *cr)
228 {
229 #ifdef LODEBUG
230 	lo_dprint(4, "lo_fsync vp %p realvp %p\n", vp, realvp(vp));
231 #endif
232 	vp = realvp(vp);
233 	return (VOP_FSYNC(vp, syncflag, cr));
234 }
235 
236 /*ARGSUSED*/
237 static void
238 lo_inactive(vnode_t *vp, struct cred *cr)
239 {
240 #ifdef LODEBUG
241 	lo_dprint(4, "lo_inactive %p, realvp %p\n", vp, realvp(vp));
242 #endif
243 	freelonode(vtol(vp));
244 }
245 
246 /* ARGSUSED */
247 static int
248 lo_fid(vnode_t *vp, struct fid *fidp)
249 {
250 #ifdef LODEBUG
251 	lo_dprint(4, "lo_fid %p, realvp %p\n", vp, realvp(vp));
252 #endif
253 	vp = realvp(vp);
254 	return (VOP_FID(vp, fidp));
255 }
256 
257 /*
258  * Given a vnode of lofs type, lookup nm name and
259  * return a shadow vnode (of lofs type) of the
260  * real vnode found.
261  *
262  * Due to the nature of lofs, there is a potential
263  * looping in path traversal.
264  *
265  * starting from the mount point of an lofs;
266  * a loop is defined to be a traversal path
267  * where the mount point or the real vnode of
268  * the root of this lofs is encountered twice.
269  * Once at the start of traversal and second
270  * when the looping is found.
271  *
272  * When a loop is encountered, a shadow of the
273  * covered vnode is returned to stop the looping.
274  *
275  * This normally works, but with the advent of
276  * the new automounter, returning the shadow of the
277  * covered vnode (autonode, in this case) does not
278  * stop the loop.  Because further lookup on this
279  * lonode will cause the autonode to call lo_lookup()
280  * on the lonode covering it.
281  *
282  * example "/net/jurassic/net/jurassic" is a loop.
283  * returning the shadow of the autonode corresponding to
284  * "/net/jurassic/net/jurassic" will not terminate the
285  * loop.   To solve this problem we allow the loop to go
286  * through one more level component lookup.  Whichever
287  * directory is then looked up in "/net/jurassic/net/jurassic"
288  * the vnode returned is the vnode covered by the autonode
289  * "net" and this will terminate the loop.
290  *
291  * Lookup for dot dot has to be dealt with separately.
292  * It will be nice to have a "one size fits all" kind
293  * of solution, so that we don't have so many ifs statement
294  * in the lo_lookup() to handle dotdot.  But, since
295  * there are so many special cases to handle different
296  * kinds looping above, we need special codes to handle
297  * dotdot lookup as well.
298  */
299 static int
300 lo_lookup(
301 	vnode_t *dvp,
302 	char *nm,
303 	vnode_t **vpp,
304 	struct pathname *pnp,
305 	int flags,
306 	vnode_t *rdir,
307 	struct cred *cr)
308 {
309 	vnode_t *vp = NULL, *tvp = NULL, *nonlovp;
310 	int error, is_indirectloop;
311 	vnode_t *realdvp = realvp(dvp);
312 	struct loinfo *li = vtoli(dvp->v_vfsp);
313 	int looping = 0;
314 	int autoloop = 0;
315 	int doingdotdot = 0;
316 	int nosub = 0;
317 	int mkflag = 0;
318 
319 	/*
320 	 * If name is empty and no XATTR flags are set, then return
321 	 * dvp (empty name == lookup ".").  If an XATTR flag is set
322 	 * then we need to call VOP_LOOKUP to get the xattr dir.
323 	 */
324 	if (nm[0] == '\0' && ! (flags & (CREATE_XATTR_DIR|LOOKUP_XATTR))) {
325 		VN_HOLD(dvp);
326 		*vpp = dvp;
327 		return (0);
328 	}
329 
330 	if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
331 		doingdotdot++;
332 		/*
333 		 * Handle ".." out of mounted filesystem
334 		 */
335 		while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
336 			realdvp = realdvp->v_vfsp->vfs_vnodecovered;
337 			ASSERT(realdvp != NULL);
338 		}
339 	}
340 
341 	*vpp = NULL;	/* default(error) case */
342 
343 	/*
344 	 * Do the normal lookup
345 	 */
346 	if (error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr)) {
347 		vp = NULL;
348 		goto out;
349 	}
350 
351 	/*
352 	 * We do this check here to avoid returning a stale file handle to the
353 	 * caller.
354 	 */
355 	if (nm[0] == '.' && nm[1] == '\0') {
356 		ASSERT(vp == realdvp);
357 		VN_HOLD(dvp);
358 		VN_RELE(vp);
359 		*vpp = dvp;
360 		return (0);
361 	}
362 
363 	if (doingdotdot) {
364 		if ((vtol(dvp))->lo_looping & LO_LOOPING) {
365 			vfs_t *vfsp;
366 
367 			error = vn_vfsrlock_wait(realdvp);
368 			if (error)
369 				goto out;
370 			vfsp = vn_mountedvfs(realdvp);
371 			/*
372 			 * In the standard case if the looping flag is set and
373 			 * performing dotdot we would be returning from a
374 			 * covered vnode, implying vfsp could not be null. The
375 			 * exceptions being if we have looping and overlay
376 			 * mounts or looping and covered file systems.
377 			 */
378 			if (vfsp == NULL) {
379 				/*
380 				 * Overlay mount or covered file system,
381 				 * so just make the shadow node.
382 				 */
383 				vn_vfsunlock(realdvp);
384 				*vpp = makelonode(vp, li, 0);
385 				(vtol(*vpp))->lo_looping |= LO_LOOPING;
386 				return (0);
387 			}
388 			/*
389 			 * When looping get the actual found vnode
390 			 * instead of the vnode covered.
391 			 * Here we have to hold the lock for realdvp
392 			 * since an unmount during the traversal to the
393 			 * root vnode would turn *vfsp into garbage
394 			 * which would be fatal.
395 			 */
396 			error = VFS_ROOT(vfsp, &tvp);
397 			vn_vfsunlock(realdvp);
398 
399 			if (error)
400 				goto out;
401 
402 			if ((tvp == li->li_rootvp) && (vp == realvp(tvp))) {
403 				/*
404 				 * we're back at the real vnode
405 				 * of the rootvp
406 				 *
407 				 * return the rootvp
408 				 * Ex: /mnt/mnt/..
409 				 * where / has been lofs-mounted
410 				 * onto /mnt.  Return the lofs
411 				 * node mounted at /mnt.
412 				 */
413 				*vpp = tvp;
414 				VN_RELE(vp);
415 				return (0);
416 			} else {
417 				/*
418 				 * We are returning from a covered
419 				 * node whose vfs_mountedhere is
420 				 * not pointing to vfs of the current
421 				 * root vnode.
422 				 * This is a condn where in we
423 				 * returned a covered node say Zc
424 				 * but Zc is not the cover of current
425 				 * root.
426 				 * i.e.., if X is the root vnode
427 				 * lookup(Zc,"..") is taking us to
428 				 * X.
429 				 * Ex: /net/X/net/X/Y
430 				 *
431 				 * If LO_AUTOLOOP (autofs/lofs looping detected)
432 				 * has been set then we are encountering the
433 				 * cover of Y (Y being any directory vnode
434 				 * under /net/X/net/X/).
435 				 * When performing a dotdot set the
436 				 * returned vp to the vnode covered
437 				 * by the mounted lofs, ie /net/X/net/X
438 				 */
439 				VN_RELE(tvp);
440 				if ((vtol(dvp))->lo_looping & LO_AUTOLOOP) {
441 					VN_RELE(vp);
442 					vp = li->li_rootvp;
443 					vp = vp->v_vfsp->vfs_vnodecovered;
444 					VN_HOLD(vp);
445 					*vpp = makelonode(vp, li, 0);
446 					(vtol(*vpp))->lo_looping |= LO_LOOPING;
447 					return (0);
448 				}
449 			}
450 		} else {
451 			/*
452 			 * No frills just make the shadow node.
453 			 */
454 			*vpp = makelonode(vp, li, 0);
455 			return (0);
456 		}
457 	}
458 
459 	nosub = (vtoli(dvp->v_vfsp)->li_flag & LO_NOSUB);
460 
461 	/*
462 	 * If this vnode is mounted on, then we
463 	 * traverse to the vnode which is the root of
464 	 * the mounted file system.
465 	 */
466 	if (!nosub && (error = traverse(&vp)))
467 		goto out;
468 
469 	/*
470 	 * Make a lnode for the real vnode.
471 	 */
472 	if (vp->v_type != VDIR || nosub) {
473 		*vpp = makelonode(vp, li, 0);
474 		if (IS_DEVVP(*vpp)) {
475 			vnode_t *svp;
476 
477 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
478 			VN_RELE(*vpp);
479 			if (svp == NULL)
480 				error = ENOSYS;
481 			else
482 				*vpp = svp;
483 		}
484 		return (error);
485 	}
486 
487 	/*
488 	 * if the found vnode (vp) is not of type lofs
489 	 * then we're just going to make a shadow of that
490 	 * vp and get out.
491 	 *
492 	 * If the found vnode (vp) is of lofs type, and
493 	 * we're not doing dotdot, check if we are
494 	 * looping.
495 	 */
496 	if (!doingdotdot && vfs_matchops(vp->v_vfsp, lo_vfsops)) {
497 		/*
498 		 * Check if we're looping, i.e.
499 		 * vp equals the root vp of the lofs, directly
500 		 * or indirectly, return the covered node.
501 		 */
502 
503 		if (!((vtol(dvp))->lo_looping & LO_LOOPING)) {
504 			if (vp == li->li_rootvp) {
505 				/*
506 				 * Direct looping condn.
507 				 * Ex:- X is / mounted directory so lookup of
508 				 * /X/X is a direct looping condn.
509 				 */
510 				tvp = vp;
511 				vp = vp->v_vfsp->vfs_vnodecovered;
512 				VN_HOLD(vp);
513 				VN_RELE(tvp);
514 				looping++;
515 			} else {
516 				/*
517 				 * Indirect looping can be defined as
518 				 * real lookup returning rootvp of the current
519 				 * tree in any level of recursion.
520 				 *
521 				 * This check is useful if there are multiple
522 				 * levels of lofs indirections. Suppose vnode X
523 				 * in the current lookup has as its real vnode
524 				 * another lofs node. Y = realvp(X) Y should be
525 				 * a lofs node for the check to continue or Y
526 				 * is not the rootvp of X.
527 				 * Ex:- say X and Y are two vnodes
528 				 * say real(Y) is X and real(X) is Z
529 				 * parent vnode for X and Y is Z
530 				 * lookup(Y,"path") say we are looking for Y
531 				 * again under Y and we have to return Yc.
532 				 * but the lookup of Y under Y doesnot return
533 				 * Y the root vnode again here is why.
534 				 * 1. lookup(Y,"path of Y") will go to
535 				 * 2. lookup(real(Y),"path of Y") and then to
536 				 * 3. lookup(real(X),"path of Y").
537 				 * and now what lookup level 1 sees is the
538 				 * outcome of 2 but the vnode Y is due to
539 				 * lookup(Z,"path of Y") so we have to skip
540 				 * intermediate levels to find if in any level
541 				 * there is a looping.
542 				 */
543 				is_indirectloop = 0;
544 				nonlovp = vp;
545 				while (
546 				    vfs_matchops(nonlovp->v_vfsp, lo_vfsops) &&
547 				    !(is_indirectloop)) {
548 					if (li->li_rootvp  == nonlovp) {
549 						is_indirectloop++;
550 						break;
551 					}
552 					nonlovp = realvp(nonlovp);
553 				}
554 
555 				if (is_indirectloop) {
556 					VN_RELE(vp);
557 					vp = nonlovp;
558 					vp = vp->v_vfsp->vfs_vnodecovered;
559 					VN_HOLD(vp);
560 					looping++;
561 				}
562 			}
563 		} else {
564 			/*
565 			 * come here only because of the interaction between
566 			 * the autofs and lofs.
567 			 *
568 			 * Lookup of "/net/X/net/X" will return a shadow of
569 			 * an autonode X_a which we call X_l.
570 			 *
571 			 * Lookup of anything under X_l, will trigger a call to
572 			 * auto_lookup(X_a,nm) which will eventually call
573 			 * lo_lookup(X_lr,nm) where X_lr is the root vnode of
574 			 * the current lofs.
575 			 *
576 			 * We come here only when we are called with X_l as dvp
577 			 * and look for something underneath.
578 			 *
579 			 * Now that an autofs/lofs looping condition has been
580 			 * identified any directory vnode contained within
581 			 * dvp will be set to the vnode covered by the
582 			 * mounted autofs. Thus all directories within dvp
583 			 * will appear empty hence teminating the looping.
584 			 * The LO_AUTOLOOP flag is set on the returned lonode
585 			 * to indicate the termination of the autofs/lofs
586 			 * looping. This is required for the correct behaviour
587 			 * when performing a dotdot.
588 			 */
589 			realdvp = realvp(dvp);
590 			while (vfs_matchops(realdvp->v_vfsp, lo_vfsops)) {
591 				realdvp = realvp(realdvp);
592 			}
593 
594 			error = VFS_ROOT(realdvp->v_vfsp, &tvp);
595 			if (error)
596 				goto out;
597 			/*
598 			 * tvp now contains the rootvp of the vfs of the
599 			 * real vnode of dvp. The directory vnode vp is set
600 			 * to the covered vnode to terminate looping. No
601 			 * distinction is made between any vp as all directory
602 			 * vnodes contained in dvp are returned as the covered
603 			 * vnode.
604 			 */
605 			VN_RELE(vp);
606 			vp = tvp;	/* possibly is an autonode */
607 
608 			/*
609 			 * Need to find the covered vnode
610 			 */
611 			if (vp->v_vfsp->vfs_vnodecovered == NULL) {
612 				/*
613 				 * We don't have a covered vnode so this isn't
614 				 * an autonode. To find the autonode simply
615 				 * find the vnode covered by the lofs rootvp.
616 				 */
617 				vp = li->li_rootvp;
618 				vp = vp->v_vfsp->vfs_vnodecovered;
619 				VN_RELE(tvp);
620 				error = VFS_ROOT(vp->v_vfsp, &tvp);
621 				if (error)
622 					goto out;
623 				vp = tvp;	/* now this is an autonode */
624 				if (vp->v_vfsp->vfs_vnodecovered == NULL) {
625 					/*
626 					 * Still can't find a covered vnode.
627 					 * Fail the lookup, or we'd loop.
628 					 */
629 					error = ENOENT;
630 					goto out;
631 				}
632 			}
633 			vp = vp->v_vfsp->vfs_vnodecovered;
634 			VN_HOLD(vp);
635 			VN_RELE(tvp);
636 			/*
637 			 * Force the creation of a new lnode even if the hash
638 			 * table contains a lnode that references this vnode.
639 			 */
640 			mkflag = LOF_FORCE;
641 			autoloop++;
642 		}
643 	}
644 	*vpp = makelonode(vp, li, mkflag);
645 
646 	if ((looping) ||
647 	    (((vtol(dvp))->lo_looping & LO_LOOPING) && !doingdotdot)) {
648 		(vtol(*vpp))->lo_looping |= LO_LOOPING;
649 	}
650 
651 	if (autoloop) {
652 		(vtol(*vpp))->lo_looping |= LO_AUTOLOOP;
653 	}
654 
655 out:
656 	if (error != 0 && vp != NULL)
657 		VN_RELE(vp);
658 #ifdef LODEBUG
659 	lo_dprint(4,
660 	"lo_lookup dvp %x realdvp %x nm '%s' newvp %x real vp %x error %d\n",
661 		dvp, realvp(dvp), nm, *vpp, vp, error);
662 #endif
663 	return (error);
664 }
665 
666 /*ARGSUSED*/
667 static int
668 lo_create(
669 	vnode_t *dvp,
670 	char *nm,
671 	struct vattr *va,
672 	enum vcexcl exclusive,
673 	int mode,
674 	vnode_t **vpp,
675 	struct cred *cr,
676 	int flag)
677 {
678 	int error;
679 	vnode_t *vp = NULL;
680 	vnode_t *tvp = NULL;
681 
682 #ifdef LODEBUG
683 	lo_dprint(4, "lo_create vp %p realvp %p\n", dvp, realvp(dvp));
684 #endif
685 	if (*nm == '\0') {
686 		ASSERT(vpp && dvp == *vpp);
687 		vp = realvp(*vpp);
688 	}
689 
690 	if (IS_ZONEDEVFS(dvp)) {
691 
692 		/*
693 		 * In the case of an exclusive create, *vpp will not
694 		 * be populated.  We must check to see if the file exists.
695 		 */
696 		if ((exclusive == EXCL) && (*nm != '\0')) {
697 			(void) VOP_LOOKUP(dvp, nm, &tvp, NULL, 0, NULL, cr);
698 		}
699 
700 		/* Is this truly a create?  If so, fail */
701 		if ((*vpp == NULL) && (tvp == NULL))
702 			return (EACCES);
703 
704 		if (tvp != NULL)
705 			VN_RELE(tvp);
706 
707 		/* Is this an open of a non-special for writing?  If so, fail */
708 		if (*vpp != NULL && (mode & VWRITE) && !IS_DEVVP(*vpp))
709 			return (EACCES);
710 	}
711 
712 	error = VOP_CREATE(realvp(dvp), nm, va, exclusive, mode, &vp, cr, flag);
713 	if (!error) {
714 		*vpp = makelonode(vp, vtoli(dvp->v_vfsp), 0);
715 		if (IS_DEVVP(*vpp)) {
716 			vnode_t *svp;
717 
718 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
719 			VN_RELE(*vpp);
720 			if (svp == NULL)
721 				error = ENOSYS;
722 			else
723 				*vpp = svp;
724 		}
725 	}
726 	return (error);
727 }
728 
729 static int
730 lo_remove(vnode_t *dvp, char *nm, struct cred *cr)
731 {
732 #ifdef LODEBUG
733 	lo_dprint(4, "lo_remove vp %p realvp %p\n", dvp, realvp(dvp));
734 #endif
735 	if (IS_ZONEDEVFS(dvp))
736 		return (EACCES);
737 	dvp = realvp(dvp);
738 	return (VOP_REMOVE(dvp, nm, cr));
739 }
740 
741 static int
742 lo_link(vnode_t *tdvp, vnode_t *vp, char *tnm, struct cred *cr)
743 {
744 #ifdef LODEBUG
745 	lo_dprint(4, "lo_link vp %p realvp %p\n", vp, realvp(vp));
746 #endif
747 	while (vn_matchops(vp, lo_vnodeops)) {
748 		if (IS_ZONEDEVFS(vp))
749 			return (EACCES);
750 		vp = realvp(vp);
751 	}
752 	while (vn_matchops(tdvp, lo_vnodeops)) {
753 		if (IS_ZONEDEVFS(tdvp))
754 			return (EACCES);
755 		tdvp = realvp(tdvp);
756 	}
757 	if (vp->v_vfsp != tdvp->v_vfsp)
758 		return (EXDEV);
759 	return (VOP_LINK(tdvp, vp, tnm, cr));
760 }
761 
762 static int
763 lo_rename(
764 	vnode_t *odvp,
765 	char *onm,
766 	vnode_t *ndvp,
767 	char *nnm,
768 	struct cred *cr)
769 {
770 	vnode_t *tnvp;
771 
772 #ifdef LODEBUG
773 	lo_dprint(4, "lo_rename vp %p realvp %p\n", odvp, realvp(odvp));
774 #endif
775 	if (IS_ZONEDEVFS(odvp))
776 		return (EACCES);
777 	/*
778 	 * If we are coming from a loop back mounted fs, that has been
779 	 * mounted in the same filesystem as where we want to move to,
780 	 * and that filesystem is read/write, but the lofs filesystem is
781 	 * read only, we don't want to allow a rename of the file. The
782 	 * vn_rename code checks to be sure the target is read/write already
783 	 * so that is not necessary here. However, consider the following
784 	 * example:
785 	 *		/ - regular root fs
786 	 *		/foo - directory in root
787 	 *		/foo/bar - file in foo directory(in root fs)
788 	 *		/baz - directory in root
789 	 *		mount -F lofs -o ro /foo /baz - all still in root
790 	 *			directory
791 	 * The fact that we mounted /foo on /baz read only should stop us
792 	 * from renaming the file /foo/bar /bar, but it doesn't since
793 	 * / is read/write. We are still renaming here since we are still
794 	 * in the same filesystem, it is just that we do not check to see
795 	 * if the filesystem we are coming from in this case is read only.
796 	 */
797 	if (odvp->v_vfsp->vfs_flag & VFS_RDONLY)
798 		return (EROFS);
799 	/*
800 	 * We need to make sure we're not trying to remove a mount point for a
801 	 * filesystem mounted on top of lofs, which only we know about.
802 	 */
803 	if (vn_matchops(ndvp, lo_vnodeops))	/* Not our problem. */
804 		goto rename;
805 	if (VOP_LOOKUP(ndvp, nnm, &tnvp, NULL, 0, NULL, cr) != 0)
806 		goto rename;
807 	if (tnvp->v_type != VDIR) {
808 		VN_RELE(tnvp);
809 		goto rename;
810 	}
811 	if (vn_mountedvfs(tnvp)) {
812 		VN_RELE(tnvp);
813 		return (EBUSY);
814 	}
815 	VN_RELE(tnvp);
816 rename:
817 	/*
818 	 * Since the case we're dealing with above can happen at any layer in
819 	 * the stack of lofs filesystems, we need to recurse down the stack,
820 	 * checking to see if there are any instances of a filesystem mounted on
821 	 * top of lofs. In order to keep on using the lofs version of
822 	 * VOP_RENAME(), we make sure that while the target directory is of type
823 	 * lofs, the source directory (the one used for getting the fs-specific
824 	 * version of VOP_RENAME()) is also of type lofs.
825 	 */
826 	if (vn_matchops(ndvp, lo_vnodeops)) {
827 		if (IS_ZONEDEVFS(ndvp))
828 			return (EACCES);
829 		ndvp = realvp(ndvp);	/* Check the next layer */
830 	} else {
831 		/*
832 		 * We can go fast here
833 		 */
834 		while (vn_matchops(odvp, lo_vnodeops)) {
835 			if (IS_ZONEDEVFS(odvp))
836 				return (EACCES);
837 			odvp = realvp(odvp);
838 		}
839 		if (odvp->v_vfsp != ndvp->v_vfsp)
840 			return (EXDEV);
841 	}
842 	return (VOP_RENAME(odvp, onm, ndvp, nnm, cr));
843 }
844 
845 static int
846 lo_mkdir(
847 	vnode_t *dvp,
848 	char *nm,
849 	struct vattr *va,
850 	vnode_t **vpp,
851 	struct cred *cr)
852 {
853 	int error;
854 
855 #ifdef LODEBUG
856 	lo_dprint(4, "lo_mkdir vp %p realvp %p\n", dvp, realvp(dvp));
857 #endif
858 	if (IS_ZONEDEVFS(dvp))
859 		return (EACCES);
860 	error = VOP_MKDIR(realvp(dvp), nm, va, vpp, cr);
861 	if (!error)
862 		*vpp = makelonode(*vpp, vtoli(dvp->v_vfsp), 0);
863 	return (error);
864 }
865 
866 static int
867 lo_realvp(vnode_t *vp, vnode_t **vpp)
868 {
869 #ifdef LODEBUG
870 	lo_dprint(4, "lo_realvp %p\n", vp);
871 #endif
872 	while (vn_matchops(vp, lo_vnodeops))
873 		vp = realvp(vp);
874 
875 	if (VOP_REALVP(vp, vpp) != 0)
876 		*vpp = vp;
877 	return (0);
878 }
879 
880 static int
881 lo_rmdir(
882 	vnode_t *dvp,
883 	char *nm,
884 	vnode_t *cdir,
885 	struct cred *cr)
886 {
887 	vnode_t *rvp = cdir;
888 
889 #ifdef LODEBUG
890 	lo_dprint(4, "lo_rmdir vp %p realvp %p\n", dvp, realvp(dvp));
891 #endif
892 	if (IS_ZONEDEVFS(dvp))
893 		return (EACCES);
894 	/* if cdir is lofs vnode ptr get its real vnode ptr */
895 	if (vn_matchops(dvp, vn_getops(rvp)))
896 		(void) lo_realvp(cdir, &rvp);
897 	dvp = realvp(dvp);
898 	return (VOP_RMDIR(dvp, nm, rvp, cr));
899 }
900 
901 static int
902 lo_symlink(
903 	vnode_t *dvp,
904 	char *lnm,
905 	struct vattr *tva,
906 	char *tnm,
907 	struct cred *cr)
908 {
909 #ifdef LODEBUG
910 	lo_dprint(4, "lo_symlink vp %p realvp %p\n", dvp, realvp(dvp));
911 #endif
912 	if (IS_ZONEDEVFS(dvp))
913 		return (EACCES);
914 	dvp = realvp(dvp);
915 	return (VOP_SYMLINK(dvp, lnm, tva, tnm, cr));
916 }
917 
918 static int
919 lo_readlink(vnode_t *vp, struct uio *uiop, struct cred *cr)
920 {
921 	vp = realvp(vp);
922 	return (VOP_READLINK(vp, uiop, cr));
923 }
924 
925 static int
926 lo_readdir(vnode_t *vp, struct uio *uiop, struct cred *cr, int *eofp)
927 {
928 #ifdef LODEBUG
929 	lo_dprint(4, "lo_readdir vp %p realvp %p\n", vp, realvp(vp));
930 #endif
931 	vp = realvp(vp);
932 	return (VOP_READDIR(vp, uiop, cr, eofp));
933 }
934 
935 static int
936 lo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
937 {
938 	vp = realvp(vp);
939 	return (VOP_RWLOCK(vp, write_lock, ct));
940 }
941 
942 static void
943 lo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
944 {
945 	vp = realvp(vp);
946 	VOP_RWUNLOCK(vp, write_lock, ct);
947 }
948 
949 static int
950 lo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp)
951 {
952 	vp = realvp(vp);
953 	return (VOP_SEEK(vp, ooff, noffp));
954 }
955 
956 static int
957 lo_cmp(vnode_t *vp1, vnode_t *vp2)
958 {
959 	while (vn_matchops(vp1, lo_vnodeops))
960 		vp1 = realvp(vp1);
961 	while (vn_matchops(vp2, lo_vnodeops))
962 		vp2 = realvp(vp2);
963 	return (VOP_CMP(vp1, vp2));
964 }
965 
966 static int
967 lo_frlock(
968 	vnode_t *vp,
969 	int cmd,
970 	struct flock64 *bfp,
971 	int flag,
972 	offset_t offset,
973 	struct flk_callback *flk_cbp,
974 	cred_t *cr)
975 {
976 	vp = realvp(vp);
977 	return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr));
978 }
979 
980 static int
981 lo_space(
982 	vnode_t *vp,
983 	int cmd,
984 	struct flock64 *bfp,
985 	int flag,
986 	offset_t offset,
987 	struct cred *cr,
988 	caller_context_t *ct)
989 {
990 	vp = realvp(vp);
991 	return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
992 }
993 
994 static int
995 lo_getpage(
996 	vnode_t *vp,
997 	offset_t off,
998 	size_t len,
999 	uint_t *prot,
1000 	struct page *parr[],
1001 	size_t psz,
1002 	struct seg *seg,
1003 	caddr_t addr,
1004 	enum seg_rw rw,
1005 	struct cred *cr)
1006 {
1007 	vp = realvp(vp);
1008 	return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr));
1009 }
1010 
1011 static int
1012 lo_putpage(vnode_t *vp, offset_t off, size_t len, int flags, struct cred *cr)
1013 {
1014 	vp = realvp(vp);
1015 	return (VOP_PUTPAGE(vp, off, len, flags, cr));
1016 }
1017 
1018 static int
1019 lo_map(
1020 	vnode_t *vp,
1021 	offset_t off,
1022 	struct as *as,
1023 	caddr_t *addrp,
1024 	size_t len,
1025 	uchar_t prot,
1026 	uchar_t maxprot,
1027 	uint_t flags,
1028 	struct cred *cr)
1029 {
1030 	vp = realvp(vp);
1031 	return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr));
1032 }
1033 
1034 static int
1035 lo_addmap(
1036 	vnode_t *vp,
1037 	offset_t off,
1038 	struct as *as,
1039 	caddr_t addr,
1040 	size_t len,
1041 	uchar_t prot,
1042 	uchar_t maxprot,
1043 	uint_t flags,
1044 	struct cred *cr)
1045 {
1046 	vp = realvp(vp);
1047 	return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr));
1048 }
1049 
1050 static int
1051 lo_delmap(
1052 	vnode_t *vp,
1053 	offset_t off,
1054 	struct as *as,
1055 	caddr_t addr,
1056 	size_t len,
1057 	uint_t prot,
1058 	uint_t maxprot,
1059 	uint_t flags,
1060 	struct cred *cr)
1061 {
1062 	vp = realvp(vp);
1063 	return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr));
1064 }
1065 
1066 static int
1067 lo_poll(
1068 	vnode_t *vp,
1069 	short events,
1070 	int anyyet,
1071 	short *reventsp,
1072 	struct pollhead **phpp)
1073 {
1074 	vp = realvp(vp);
1075 	return (VOP_POLL(vp, events, anyyet, reventsp, phpp));
1076 }
1077 
1078 static int
1079 lo_dump(vnode_t *vp, caddr_t addr, int bn, int count)
1080 {
1081 	vp = realvp(vp);
1082 	return (VOP_DUMP(vp, addr, bn, count));
1083 }
1084 
1085 static int
1086 lo_pathconf(vnode_t *vp, int cmd, ulong_t *valp, struct cred *cr)
1087 {
1088 	vp = realvp(vp);
1089 	return (VOP_PATHCONF(vp, cmd, valp, cr));
1090 }
1091 
1092 static int
1093 lo_pageio(
1094 	vnode_t *vp,
1095 	struct page *pp,
1096 	u_offset_t io_off,
1097 	size_t io_len,
1098 	int flags,
1099 	cred_t *cr)
1100 {
1101 	vp = realvp(vp);
1102 	return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr));
1103 }
1104 
1105 static void
1106 lo_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr)
1107 {
1108 	vp = realvp(vp);
1109 	if (vp != NULL && vp != &kvp)
1110 		VOP_DISPOSE(vp, pp, fl, dn, cr);
1111 }
1112 
1113 static int
1114 lo_setsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr)
1115 {
1116 	if (vn_is_readonly(vp))
1117 		return (EROFS);
1118 	vp = realvp(vp);
1119 	return (VOP_SETSECATTR(vp, secattr, flags, cr));
1120 }
1121 
1122 static int
1123 lo_getsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr)
1124 {
1125 	vp = realvp(vp);
1126 	return (VOP_GETSECATTR(vp, secattr, flags, cr));
1127 }
1128 
1129 static int
1130 lo_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr)
1131 {
1132 	vp = realvp(vp);
1133 	return (VOP_SHRLOCK(vp, cmd, shr, flag, cr));
1134 }
1135 
1136 /*
1137  * Loopback vnode operations vector.
1138  */
1139 
1140 struct vnodeops *lo_vnodeops;
1141 
1142 const fs_operation_def_t lo_vnodeops_template[] = {
1143 	VOPNAME_OPEN, lo_open,
1144 	VOPNAME_CLOSE, lo_close,
1145 	VOPNAME_READ, lo_read,
1146 	VOPNAME_WRITE, lo_write,
1147 	VOPNAME_IOCTL, lo_ioctl,
1148 	VOPNAME_SETFL, lo_setfl,
1149 	VOPNAME_GETATTR, lo_getattr,
1150 	VOPNAME_SETATTR, lo_setattr,
1151 	VOPNAME_ACCESS, lo_access,
1152 	VOPNAME_LOOKUP, lo_lookup,
1153 	VOPNAME_CREATE, lo_create,
1154 	VOPNAME_REMOVE, lo_remove,
1155 	VOPNAME_LINK, lo_link,
1156 	VOPNAME_RENAME, lo_rename,
1157 	VOPNAME_MKDIR, lo_mkdir,
1158 	VOPNAME_RMDIR, lo_rmdir,
1159 	VOPNAME_READDIR, lo_readdir,
1160 	VOPNAME_SYMLINK, lo_symlink,
1161 	VOPNAME_READLINK, lo_readlink,
1162 	VOPNAME_FSYNC, lo_fsync,
1163 	VOPNAME_INACTIVE, (fs_generic_func_p) lo_inactive,
1164 	VOPNAME_FID, lo_fid,
1165 	VOPNAME_RWLOCK, lo_rwlock,
1166 	VOPNAME_RWUNLOCK, (fs_generic_func_p) lo_rwunlock,
1167 	VOPNAME_SEEK, lo_seek,
1168 	VOPNAME_CMP, lo_cmp,
1169 	VOPNAME_FRLOCK, lo_frlock,
1170 	VOPNAME_SPACE, lo_space,
1171 	VOPNAME_REALVP, lo_realvp,
1172 	VOPNAME_GETPAGE, lo_getpage,
1173 	VOPNAME_PUTPAGE, lo_putpage,
1174 	VOPNAME_MAP, (fs_generic_func_p) lo_map,
1175 	VOPNAME_ADDMAP, (fs_generic_func_p) lo_addmap,
1176 	VOPNAME_DELMAP, lo_delmap,
1177 	VOPNAME_POLL, (fs_generic_func_p) lo_poll,
1178 	VOPNAME_DUMP, lo_dump,
1179 	VOPNAME_DUMPCTL, fs_error,		/* XXX - why? */
1180 	VOPNAME_PATHCONF, lo_pathconf,
1181 	VOPNAME_PAGEIO, lo_pageio,
1182 	VOPNAME_DISPOSE, (fs_generic_func_p) lo_dispose,
1183 	VOPNAME_SETSECATTR, lo_setsecattr,
1184 	VOPNAME_GETSECATTR, lo_getsecattr,
1185 	VOPNAME_SHRLOCK, lo_shrlock,
1186 	NULL, NULL
1187 };
1188