1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/errno.h>
32 #include <sys/vnode.h>
33 #include <sys/vfs.h>
34 #include <sys/uio.h>
35 #include <sys/cred.h>
36 #include <sys/pathname.h>
37 #include <sys/debug.h>
38 #include <sys/fs/lofs_node.h>
39 #include <sys/fs/lofs_info.h>
40 #include <fs/fs_subr.h>
41 #include <vm/as.h>
42 #include <vm/seg.h>
43 
44 #define	IS_ZONEDEVFS(vp) \
45 	(vtoli((vp)->v_vfsp)->li_flag & LO_ZONEDEVFS)
46 
47 /*
48  * These are the vnode ops routines which implement the vnode interface to
49  * the looped-back file system.  These routines just take their parameters,
50  * and then calling the appropriate real vnode routine(s) to do the work.
51  */
52 
53 static int
54 lo_open(vnode_t **vpp, int flag, struct cred *cr)
55 {
56 	vnode_t *vp = *vpp;
57 	vnode_t *rvp;
58 	vnode_t *oldvp;
59 	int error;
60 
61 #ifdef LODEBUG
62 	lo_dprint(4, "lo_open vp %p cnt=%d realvp %p cnt=%d\n",
63 		vp, vp->v_count, realvp(vp), realvp(vp)->v_count);
64 #endif
65 
66 	oldvp = vp;
67 	vp = rvp = realvp(vp);
68 	/*
69 	 * Need to hold new reference to vp since VOP_OPEN() may
70 	 * decide to release it.
71 	 */
72 	VN_HOLD(vp);
73 	error = VOP_OPEN(&rvp, flag, cr);
74 
75 	if (!error && rvp != vp) {
76 		/*
77 		 * the FS which we called should have released the
78 		 * new reference on vp
79 		 */
80 		*vpp = makelonode(rvp, vtoli(oldvp->v_vfsp));
81 		if (IS_DEVVP(*vpp)) {
82 			vnode_t *svp;
83 
84 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
85 			VN_RELE(*vpp);
86 			if (svp == NULL)
87 				error = ENOSYS;
88 			else
89 				*vpp = svp;
90 		}
91 		VN_RELE(oldvp);
92 	} else {
93 		ASSERT(rvp->v_count > 1);
94 		VN_RELE(rvp);
95 	}
96 
97 	return (error);
98 }
99 
100 static int
101 lo_close(
102 	vnode_t *vp,
103 	int flag,
104 	int count,
105 	offset_t offset,
106 	struct cred *cr)
107 {
108 #ifdef LODEBUG
109 	lo_dprint(4, "lo_close vp %p realvp %p\n", vp, realvp(vp));
110 #endif
111 	vp = realvp(vp);
112 	return (VOP_CLOSE(vp, flag, count, offset, cr));
113 }
114 
115 static int
116 lo_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
117 	caller_context_t *ct)
118 {
119 #ifdef LODEBUG
120 	lo_dprint(4, "lo_read vp %p realvp %p\n", vp, realvp(vp));
121 #endif
122 	vp = realvp(vp);
123 	return (VOP_READ(vp, uiop, ioflag, cr, ct));
124 }
125 
126 static int
127 lo_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
128 	caller_context_t *ct)
129 {
130 #ifdef LODEBUG
131 	lo_dprint(4, "lo_write vp %p realvp %p\n", vp, realvp(vp));
132 #endif
133 	vp = realvp(vp);
134 	return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
135 }
136 
137 static int
138 lo_ioctl(
139 	vnode_t *vp,
140 	int cmd,
141 	intptr_t arg,
142 	int flag,
143 	struct cred *cr,
144 	int *rvalp)
145 {
146 #ifdef LODEBUG
147 	lo_dprint(4, "lo_ioctl vp %p realvp %p\n", vp, realvp(vp));
148 #endif
149 	vp = realvp(vp);
150 	return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp));
151 }
152 
153 static int
154 lo_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr)
155 {
156 	vp = realvp(vp);
157 	return (VOP_SETFL(vp, oflags, nflags, cr));
158 }
159 
160 static int
161 lo_getattr(
162 	vnode_t *vp,
163 	struct vattr *vap,
164 	int flags,
165 	struct cred *cr)
166 {
167 	int error;
168 
169 #ifdef LODEBUG
170 	lo_dprint(4, "lo_getattr vp %p realvp %p\n", vp, realvp(vp));
171 #endif
172 	if (error = VOP_GETATTR(realvp(vp), vap, flags, cr))
173 		return (error);
174 
175 	/*
176 	 * In zonedevfs mode, we pull a nasty trick; we make sure that
177 	 * the dev_t does *not* reflect the underlying device, so that
178 	 * no renames can occur to or from the /dev hierarchy.
179 	 */
180 	if (IS_ZONEDEVFS(vp)) {
181 		vap->va_fsid = expldev(vp->v_vfsp->vfs_fsid.val[0]);
182 	}
183 
184 	return (0);
185 }
186 
187 static int
188 lo_setattr(
189 	vnode_t *vp,
190 	struct vattr *vap,
191 	int flags,
192 	struct cred *cr,
193 	caller_context_t *ct)
194 {
195 #ifdef LODEBUG
196 	lo_dprint(4, "lo_setattr vp %p realvp %p\n", vp, realvp(vp));
197 #endif
198 	if (IS_ZONEDEVFS(vp) && !IS_DEVVP(vp)) {
199 		return (EACCES);
200 	}
201 	vp = realvp(vp);
202 	return (VOP_SETATTR(vp, vap, flags, cr, ct));
203 }
204 
205 static int
206 lo_access(vnode_t *vp, int mode, int flags, struct cred *cr)
207 {
208 #ifdef LODEBUG
209 	lo_dprint(4, "lo_access vp %p realvp %p\n", vp, realvp(vp));
210 #endif
211 	if (mode & VWRITE) {
212 		if (vp->v_type == VREG && vn_is_readonly(vp))
213 			return (EROFS);
214 		if (IS_ZONEDEVFS(vp) && !IS_DEVVP(vp))
215 			return (EACCES);
216 	}
217 	vp = realvp(vp);
218 	return (VOP_ACCESS(vp, mode, flags, cr));
219 }
220 
221 static int
222 lo_fsync(vnode_t *vp, int syncflag, struct cred *cr)
223 {
224 #ifdef LODEBUG
225 	lo_dprint(4, "lo_fsync vp %p realvp %p\n", vp, realvp(vp));
226 #endif
227 	vp = realvp(vp);
228 	return (VOP_FSYNC(vp, syncflag, cr));
229 }
230 
231 /*ARGSUSED*/
232 static void
233 lo_inactive(vnode_t *vp, struct cred *cr)
234 {
235 #ifdef LODEBUG
236 	lo_dprint(4, "lo_inactive %p, realvp %p\n", vp, realvp(vp));
237 #endif
238 	freelonode(vtol(vp));
239 }
240 
241 /* ARGSUSED */
242 static int
243 lo_fid(vnode_t *vp, struct fid *fidp)
244 {
245 #ifdef LODEBUG
246 	lo_dprint(4, "lo_fid %p, realvp %p\n", vp, realvp(vp));
247 #endif
248 	vp = realvp(vp);
249 	return (VOP_FID(vp, fidp));
250 }
251 
252 /*
253  * Given a vnode of lofs type, lookup nm name and
254  * return a shadow vnode (of lofs type) of the
255  * real vnode found.
256  *
257  * Due to the nature of lofs, there is a potential
258  * looping in path traversal.
259  *
260  * starting from the mount point of an lofs;
261  * a loop is defined to be a traversal path
262  * where the mount point or the real vnode of
263  * the root of this lofs is encountered twice.
264  * Once at the start of traversal and second
265  * when the looping is found.
266  *
267  * When a loop is encountered, a shadow of the
268  * covered vnode is returned to stop the looping.
269  *
270  * This normally works, but with the advent of
271  * the new automounter, returning the shadow of the
272  * covered vnode (autonode, in this case) does not
273  * stop the loop.  Because further lookup on this
274  * lonode will cause the autonode to call lo_lookup()
275  * on the lonode covering it.
276  *
277  * example "/net/jurassic/net/jurassic" is a loop.
278  * returning the shadow of the autonode corresponding to
279  * "/net/jurassic/net/jurassic" will not terminate the
280  * loop.   To solve this problem we allow the loop to go
281  * through one more level component lookup.  If it hit
282  * "net" after the loop as in "/net/jurassic/net/jurassic/net",
283  * then returning the vnode covered by the autonode "net"
284  * will terminate the loop.
285  *
286  * Lookup for dot dot has to be dealt with separately.
287  * It will be nice to have a "one size fits all" kind
288  * of solution, so that we don't have so many ifs statement
289  * in the lo_lookup() to handle dotdot.  But, since
290  * there are so many special cases to handle different
291  * kinds looping above, we need special codes to handle
292  * dotdot lookup as well.
293  */
294 static int
295 lo_lookup(
296 	vnode_t *dvp,
297 	char *nm,
298 	vnode_t **vpp,
299 	struct pathname *pnp,
300 	int flags,
301 	vnode_t *rdir,
302 	struct cred *cr)
303 {
304 	vnode_t *vp = NULL, *tvp = NULL, *nonlovp;
305 	int error, is_indirectloop;
306 	vnode_t *realdvp = realvp(dvp);
307 	struct loinfo *li = vtoli(dvp->v_vfsp);
308 	int looping = 0;
309 	int doingdotdot = 0;
310 	int nosub = 0;
311 
312 	/*
313 	 * If name is empty and no XATTR flags are set, then return
314 	 * dvp (empty name == lookup ".").  If an XATTR flag is set
315 	 * then we need to call VOP_LOOKUP to get the xattr dir.
316 	 */
317 	if (nm[0] == '\0' && ! (flags & (CREATE_XATTR_DIR|LOOKUP_XATTR))) {
318 		VN_HOLD(dvp);
319 		*vpp = dvp;
320 		return (0);
321 	}
322 
323 	if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
324 		doingdotdot++;
325 		/*
326 		 * Handle ".." out of mounted filesystem
327 		 */
328 		while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
329 			realdvp = realdvp->v_vfsp->vfs_vnodecovered;
330 			ASSERT(realdvp != NULL);
331 		}
332 	}
333 
334 	*vpp = NULL;	/* default(error) case */
335 
336 	/*
337 	 * Do the normal lookup
338 	 */
339 	if (error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr))
340 		goto out;
341 
342 	/*
343 	 * We do this check here to avoid returning a stale file handle to the
344 	 * caller.
345 	 */
346 	if (nm[0] == '.' && nm[1] == '\0') {
347 		ASSERT(vp == realdvp);
348 		VN_HOLD(dvp);
349 		VN_RELE(vp);
350 		*vpp = dvp;
351 		return (0);
352 	}
353 
354 	if (doingdotdot) {
355 		if ((vtol(dvp))->lo_looping) {
356 			vfs_t *vfsp;
357 
358 			error = vn_vfswlock_wait(realdvp);
359 			if (error)
360 				goto out;
361 			vfsp = vn_mountedvfs(realdvp);
362 			if (vfsp != NULL) {
363 				/*
364 				 * if looping get the actual found vnode
365 				 * instead of the vnode covered
366 				 * Here we have to hold the lock for realdvp
367 				 * since an unmount during the traversal to the
368 				 * root vnode would turn *vfsp into garbage
369 				 * which would be fatal.
370 				 */
371 				vfs_lock_wait(vfsp);
372 				vn_vfsunlock(realdvp);
373 
374 				error = VFS_ROOT(vfsp, &tvp);
375 
376 				vfs_unlock(vfsp);
377 				if (error)
378 					goto out;
379 				if ((tvp == li->li_rootvp)&&
380 				    (vp == realvp(tvp))) {
381 					/*
382 					 * we're back at the real vnode
383 					 * of the rootvp
384 					 *
385 					 * return the rootvp
386 					 * Ex: /mnt/mnt/..
387 					 * where / has been lofs-mounted
388 					 * onto /mnt.  Return the lofs
389 					 * node mounted at /mnt.
390 					 */
391 					*vpp = tvp;
392 					VN_RELE(vp);
393 					return (0);
394 				} else {
395 					/*
396 					 * We are returning from a covered
397 					 * node whose vfs_mountedhere is
398 					 * not pointing to vfs of the current
399 					 * root vnode.
400 					 * This is a condn where in we
401 					 * returned a covered node say Zc
402 					 * but Zc is not the cover of current
403 					 * root.
404 					 * i.e.., if X is the root vnode
405 					 * lookup(Zc,"..") is taking us to
406 					 * X.
407 					 * Ex: /net/X/net/X/net
408 					 * We are encountering cover of net.
409 					 * doing a dotdot from here means we
410 					 * to take the lookup to the same state
411 					 * that would have happened when we do
412 					 * lookup of any Y under /net/X/net/X
413 					 */
414 					VN_RELE(tvp);
415 					if (vp == realvp(li->li_rootvp)) {
416 						VN_RELE(vp);
417 						vp = li->li_rootvp;
418 						vp = vp->v_vfsp->
419 							vfs_vnodecovered;
420 						VN_HOLD(vp);
421 						*vpp = makelonode(vp, li);
422 						(vtol(*vpp))->lo_looping = 1;
423 						return (0);
424 					}
425 				}
426 			} else {
427 				/*
428 				 * We are returning from a looping dvp.
429 				 * If we are returning to rootvp return
430 				 * the covered node with looping bit set.
431 				 *
432 				 * This means we are not returning from cover
433 				 * but we should return to the root node by
434 				 * giving the covered node with looping flag
435 				 * set. We are returning from a non-covernode
436 				 * with looping bit set means we couldn't stop
437 				 * by giving the cover of root vnode.
438 				 *
439 				 *	Say X is the root vnode and lookup of
440 				 * X again under X returns Xc(due to looping
441 				 * condn). let Z=lookup(Xc,"path") and
442 				 * if lookup(Z,"..") returns  the root vp X
443 				 * return Xc with looping bit set or if a new
444 				 * node Z.. is returned make a shadow with a
445 				 * looping flag.
446 				 *
447 				 * Ex:- lookup of /net/X/net/X/Y/.. or
448 				 * lookup of /net/X/net/X/Y/Z/.. .
449 				 * In the first case we are returning to root
450 				 * we will return the cover of root with
451 				 * looping bit set.
452 				 */
453 				vn_vfsunlock(realdvp);
454 				if (vp == li->li_rootvp) {
455 					tvp = vp;
456 					vp = (vp)->v_vfsp->vfs_vnodecovered;
457 					VN_RELE(tvp);
458 					VN_HOLD(vp);
459 				}
460 				*vpp = makelonode(vp, li);
461 				(vtol(*vpp))->lo_looping = 1;
462 				return (0);
463 			}
464 		} else {
465 			/*
466 			 * No frills just make the shadow node.
467 			 */
468 			*vpp = makelonode(vp, li);
469 			return (0);
470 		}
471 	}
472 
473 	nosub = (vtoli(dvp->v_vfsp)->li_flag & LO_NOSUB);
474 
475 	/*
476 	 * If this vnode is mounted on, then we
477 	 * traverse to the vnode which is the root of
478 	 * the mounted file system.
479 	 */
480 	if (!nosub && (error = traverse(&vp)))
481 		goto out;
482 
483 	/*
484 	 * Make a lnode for the real vnode.
485 	 */
486 	if (vp->v_type != VDIR || nosub) {
487 		*vpp = makelonode(vp, li);
488 		if (IS_DEVVP(*vpp)) {
489 			vnode_t *svp;
490 
491 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
492 			VN_RELE(*vpp);
493 			if (svp == NULL)
494 				error = ENOSYS;
495 			else
496 				*vpp = svp;
497 		}
498 		return (error);
499 	}
500 
501 	/*
502 	 * if the found vnode (vp) is not of type lofs
503 	 * then we're just going to make a shadow of that
504 	 * vp and get out.
505 	 *
506 	 * If the found vnode (vp) is of lofs type, and
507 	 * we're not doing dotdot, check if we are
508 	 * looping.
509 	 */
510 	if (!doingdotdot && vfs_matchops(vp->v_vfsp, lo_vfsops)) {
511 		/*
512 		 * Check if we're looping, i.e.
513 		 * vp equals the root vp of the lofs, directly
514 		 * or indirectly, return the covered node.
515 		 */
516 
517 		if (!(vtol(dvp))->lo_looping) {
518 			if (vp == li->li_rootvp) {
519 				/*
520 				 * Direct looping condn.
521 				 * Ex:- X is / mounted directory so lookup of
522 				 * /X/X is a direct looping condn.
523 				 */
524 				tvp = vp;
525 				vp = vp->v_vfsp->vfs_vnodecovered;
526 				VN_HOLD(vp);
527 				VN_RELE(tvp);
528 				looping++;
529 			} else {
530 				/*
531 				 * Indirect looping can be defined as
532 				 * real lookup returning rootvp of the current
533 				 * tree in any level of recursion.
534 				 *
535 				 * This check is useful if there are multiple
536 				 * levels of lofs indirections. Suppose vnode X
537 				 * in the current lookup has as its real vnode
538 				 * another lofs node. Y = realvp(X) Y should be
539 				 * a lofs node for the check to continue or Y
540 				 * is not the rootvp of X.
541 				 * Ex:- say X and Y are two vnodes
542 				 * say real(Y) is X and real(X) is Z
543 				 * parent vnode for X and Y is Z
544 				 * lookup(Y,"path") say we are looking for Y
545 				 * again under Y and we have to return Yc.
546 				 * but the lookup of Y under Y doesnot return
547 				 * Y the root vnode again here is why.
548 				 * 1. lookup(Y,"path of Y") will go to
549 				 * 2. lookup(real(Y),"path of Y") and then to
550 				 * 3. lookup(real(X),"path of Y").
551 				 * and now what lookup level 1 sees is the
552 				 * outcome of 2 but the vnode Y is due to
553 				 * lookup(Z,"path of Y") so we have to skip
554 				 * intermediate levels to find if in any level
555 				 * there is a looping.
556 				 */
557 				is_indirectloop = 0;
558 				nonlovp = vp;
559 				while (
560 				    vfs_matchops(nonlovp->v_vfsp, lo_vfsops) &&
561 				    !(is_indirectloop)) {
562 					if (li->li_rootvp  == nonlovp) {
563 						is_indirectloop++;
564 						break;
565 					}
566 					nonlovp = realvp(nonlovp);
567 				}
568 
569 				if (is_indirectloop) {
570 					VN_RELE(vp);
571 					vp = nonlovp;
572 					vp = vp->v_vfsp->vfs_vnodecovered;
573 					VN_HOLD(vp);
574 					looping++;
575 				}
576 			}
577 		} else {
578 			/*
579 			 * come here only because of the interaction between
580 			 * the autofs and lofs.
581 			 *
582 			 * Lookup of "/net/X/net/X" will return a shadow of
583 			 * an autonode X_a which we call X_l.
584 			 *
585 			 * Lookup of anything under X_l, will trigger a call to
586 			 * auto_lookup(X_a,nm) which will eventually call
587 			 * lo_lookup(X_lr,nm) where X_lr is the root vnode of
588 			 * the current lofs.
589 			 *
590 			 * We come here only when we are called with X_l as dvp
591 			 * and look for something underneath.
592 			 *
593 			 * We need to find out if the vnode, which vp is
594 			 * shadowing, is the rootvp of the autofs.
595 			 *
596 			 */
597 			realdvp = realvp(dvp);
598 			while (vfs_matchops(realdvp->v_vfsp, lo_vfsops)) {
599 				realdvp = realvp(realdvp);
600 			}
601 
602 			error = VFS_ROOT(realdvp->v_vfsp, &tvp);
603 			if (error)
604 				goto out;
605 			/*
606 			 * tvp now contains the rootvp of the vfs of the
607 			 * real vnode of dvp
608 			 */
609 
610 			if (realvp(dvp)->v_vfsp == realvp(vp)->v_vfsp &&
611 			    tvp == realvp(vp)) {
612 				/*
613 				 * vp is the shadow of "net",
614 				 * the rootvp of autofs
615 				 */
616 				VN_RELE(vp);
617 				vp = tvp;	/* this is an autonode */
618 
619 				/*
620 				 * Need to find the covered vnode
621 				 */
622 				vp = vp->v_vfsp->vfs_vnodecovered;
623 				ASSERT(vp);
624 				VN_HOLD(vp);
625 				VN_RELE(tvp);
626 			} else {
627 				VN_RELE(tvp);
628 			}
629 		}
630 	}
631 	*vpp = makelonode(vp, li);
632 
633 	if ((looping) || ((vtol(dvp))->lo_looping && !doingdotdot)) {
634 		(vtol(*vpp))->lo_looping = 1;
635 	}
636 
637 out:
638 	if (error != 0 && vp != NULL)
639 		VN_RELE(vp);
640 #ifdef LODEBUG
641 	lo_dprint(4,
642 	"lo_lookup dvp %x realdvp %x nm '%s' newvp %x real vp %x error %d\n",
643 		dvp, realvp(dvp), nm, *vpp, vp, error);
644 #endif
645 	return (error);
646 }
647 
648 /*ARGSUSED*/
649 static int
650 lo_create(
651 	vnode_t *dvp,
652 	char *nm,
653 	struct vattr *va,
654 	enum vcexcl exclusive,
655 	int mode,
656 	vnode_t **vpp,
657 	struct cred *cr,
658 	int flag)
659 {
660 	int error;
661 	vnode_t *vp = NULL;
662 
663 #ifdef LODEBUG
664 	lo_dprint(4, "lo_create vp %p realvp %p\n", dvp, realvp(dvp));
665 #endif
666 	if (*nm == '\0') {
667 		ASSERT(vpp && dvp == *vpp);
668 		vp = realvp(*vpp);
669 	}
670 
671 	if (IS_ZONEDEVFS(dvp)) {
672 		/* Is this truly a create?  If so, fail */
673 		if (*vpp == NULL)
674 			return (EACCES);
675 
676 		/* Is this an open of a non-special for writing?  If so, fail */
677 		if (*vpp != NULL && (mode & VWRITE) && !IS_DEVVP(*vpp))
678 			return (EACCES);
679 	}
680 
681 	error = VOP_CREATE(realvp(dvp), nm, va, exclusive, mode, &vp, cr, flag);
682 	if (!error) {
683 		*vpp = makelonode(vp, vtoli(dvp->v_vfsp));
684 		if (IS_DEVVP(*vpp)) {
685 			vnode_t *svp;
686 
687 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
688 			VN_RELE(*vpp);
689 			if (svp == NULL)
690 				error = ENOSYS;
691 			else
692 				*vpp = svp;
693 		}
694 	}
695 	return (error);
696 }
697 
698 static int
699 lo_remove(vnode_t *dvp, char *nm, struct cred *cr)
700 {
701 #ifdef LODEBUG
702 	lo_dprint(4, "lo_remove vp %p realvp %p\n", dvp, realvp(dvp));
703 #endif
704 	if (IS_ZONEDEVFS(dvp))
705 		return (EACCES);
706 	dvp = realvp(dvp);
707 	return (VOP_REMOVE(dvp, nm, cr));
708 }
709 
710 static int
711 lo_link(vnode_t *tdvp, vnode_t *vp, char *tnm, struct cred *cr)
712 {
713 #ifdef LODEBUG
714 	lo_dprint(4, "lo_link vp %p realvp %p\n", vp, realvp(vp));
715 #endif
716 	while (vn_matchops(vp, lo_vnodeops)) {
717 		if (IS_ZONEDEVFS(vp))
718 			return (EACCES);
719 		vp = realvp(vp);
720 	}
721 	while (vn_matchops(tdvp, lo_vnodeops)) {
722 		if (IS_ZONEDEVFS(tdvp))
723 			return (EACCES);
724 		tdvp = realvp(tdvp);
725 	}
726 	if (vp->v_vfsp != tdvp->v_vfsp)
727 		return (EXDEV);
728 	return (VOP_LINK(tdvp, vp, tnm, cr));
729 }
730 
731 static int
732 lo_rename(
733 	vnode_t *odvp,
734 	char *onm,
735 	vnode_t *ndvp,
736 	char *nnm,
737 	struct cred *cr)
738 {
739 	vnode_t *tnvp;
740 
741 #ifdef LODEBUG
742 	lo_dprint(4, "lo_rename vp %p realvp %p\n", odvp, realvp(odvp));
743 #endif
744 	if (IS_ZONEDEVFS(odvp))
745 		return (EACCES);
746 	/*
747 	 * We need to make sure we're not trying to remove a mount point for a
748 	 * filesystem mounted on top of lofs, which only we know about.
749 	 */
750 	if (vn_matchops(ndvp, lo_vnodeops))	/* Not our problem. */
751 		goto rename;
752 	if (VOP_LOOKUP(ndvp, nnm, &tnvp, NULL, 0, NULL, cr) != 0)
753 		goto rename;
754 	if (tnvp->v_type != VDIR) {
755 		VN_RELE(tnvp);
756 		goto rename;
757 	}
758 	if (vn_mountedvfs(tnvp)) {
759 		VN_RELE(tnvp);
760 		return (EBUSY);
761 	}
762 	VN_RELE(tnvp);
763 rename:
764 	/*
765 	 * Since the case we're dealing with above can happen at any layer in
766 	 * the stack of lofs filesystems, we need to recurse down the stack,
767 	 * checking to see if there are any instances of a filesystem mounted on
768 	 * top of lofs. In order to keep on using the lofs version of
769 	 * VOP_RENAME(), we make sure that while the target directory is of type
770 	 * lofs, the source directory (the one used for getting the fs-specific
771 	 * version of VOP_RENAME()) is also of type lofs.
772 	 */
773 	if (vn_matchops(ndvp, lo_vnodeops)) {
774 		if (IS_ZONEDEVFS(ndvp))
775 			return (EACCES);
776 		ndvp = realvp(ndvp);	/* Check the next layer */
777 	} else {
778 		/*
779 		 * We can go fast here
780 		 */
781 		while (vn_matchops(odvp, lo_vnodeops)) {
782 			if (IS_ZONEDEVFS(odvp))
783 				return (EACCES);
784 			odvp = realvp(odvp);
785 		}
786 		if (odvp->v_vfsp != ndvp->v_vfsp)
787 			return (EXDEV);
788 	}
789 	return (VOP_RENAME(odvp, onm, ndvp, nnm, cr));
790 }
791 
792 static int
793 lo_mkdir(
794 	vnode_t *dvp,
795 	char *nm,
796 	struct vattr *va,
797 	vnode_t **vpp,
798 	struct cred *cr)
799 {
800 	int error;
801 
802 #ifdef LODEBUG
803 	lo_dprint(4, "lo_mkdir vp %p realvp %p\n", dvp, realvp(dvp));
804 #endif
805 	if (IS_ZONEDEVFS(dvp))
806 		return (EACCES);
807 	error = VOP_MKDIR(realvp(dvp), nm, va, vpp, cr);
808 	if (!error)
809 		*vpp = makelonode(*vpp, vtoli(dvp->v_vfsp));
810 	return (error);
811 }
812 
813 static int
814 lo_realvp(vnode_t *vp, vnode_t **vpp)
815 {
816 #ifdef LODEBUG
817 	lo_dprint(4, "lo_realvp %p\n", vp);
818 #endif
819 	while (vn_matchops(vp, lo_vnodeops))
820 		vp = realvp(vp);
821 
822 	if (VOP_REALVP(vp, vpp) != 0)
823 		*vpp = vp;
824 	return (0);
825 }
826 
827 static int
828 lo_rmdir(
829 	vnode_t *dvp,
830 	char *nm,
831 	vnode_t *cdir,
832 	struct cred *cr)
833 {
834 	vnode_t *rvp = cdir;
835 
836 #ifdef LODEBUG
837 	lo_dprint(4, "lo_rmdir vp %p realvp %p\n", dvp, realvp(dvp));
838 #endif
839 	if (IS_ZONEDEVFS(dvp))
840 		return (EACCES);
841 	/* if cdir is lofs vnode ptr get its real vnode ptr */
842 	if (vn_matchops(dvp, vn_getops(rvp)))
843 		(void) lo_realvp(cdir, &rvp);
844 	dvp = realvp(dvp);
845 	return (VOP_RMDIR(dvp, nm, rvp, cr));
846 }
847 
848 static int
849 lo_symlink(
850 	vnode_t *dvp,
851 	char *lnm,
852 	struct vattr *tva,
853 	char *tnm,
854 	struct cred *cr)
855 {
856 #ifdef LODEBUG
857 	lo_dprint(4, "lo_symlink vp %p realvp %p\n", dvp, realvp(dvp));
858 #endif
859 	if (IS_ZONEDEVFS(dvp))
860 		return (EACCES);
861 	dvp = realvp(dvp);
862 	return (VOP_SYMLINK(dvp, lnm, tva, tnm, cr));
863 }
864 
865 static int
866 lo_readlink(vnode_t *vp, struct uio *uiop, struct cred *cr)
867 {
868 	vp = realvp(vp);
869 	return (VOP_READLINK(vp, uiop, cr));
870 }
871 
872 static int
873 lo_readdir(vnode_t *vp, struct uio *uiop, struct cred *cr, int *eofp)
874 {
875 #ifdef LODEBUG
876 	lo_dprint(4, "lo_readdir vp %p realvp %p\n", vp, realvp(vp));
877 #endif
878 	vp = realvp(vp);
879 	return (VOP_READDIR(vp, uiop, cr, eofp));
880 }
881 
882 static int
883 lo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
884 {
885 	vp = realvp(vp);
886 	return (VOP_RWLOCK(vp, write_lock, ct));
887 }
888 
889 static void
890 lo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
891 {
892 	vp = realvp(vp);
893 	VOP_RWUNLOCK(vp, write_lock, ct);
894 }
895 
896 static int
897 lo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp)
898 {
899 	vp = realvp(vp);
900 	return (VOP_SEEK(vp, ooff, noffp));
901 }
902 
903 static int
904 lo_cmp(vnode_t *vp1, vnode_t *vp2)
905 {
906 	while (vn_matchops(vp1, lo_vnodeops))
907 		vp1 = realvp(vp1);
908 	while (vn_matchops(vp2, lo_vnodeops))
909 		vp2 = realvp(vp2);
910 	return (VOP_CMP(vp1, vp2));
911 }
912 
913 static int
914 lo_frlock(
915 	vnode_t *vp,
916 	int cmd,
917 	struct flock64 *bfp,
918 	int flag,
919 	offset_t offset,
920 	struct flk_callback *flk_cbp,
921 	cred_t *cr)
922 {
923 	vp = realvp(vp);
924 	return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr));
925 }
926 
927 static int
928 lo_space(
929 	vnode_t *vp,
930 	int cmd,
931 	struct flock64 *bfp,
932 	int flag,
933 	offset_t offset,
934 	struct cred *cr,
935 	caller_context_t *ct)
936 {
937 	vp = realvp(vp);
938 	return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
939 }
940 
941 static int
942 lo_getpage(
943 	vnode_t *vp,
944 	offset_t off,
945 	size_t len,
946 	uint_t *prot,
947 	struct page *parr[],
948 	size_t psz,
949 	struct seg *seg,
950 	caddr_t addr,
951 	enum seg_rw rw,
952 	struct cred *cr)
953 {
954 	vp = realvp(vp);
955 	return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr));
956 }
957 
958 static int
959 lo_putpage(vnode_t *vp, offset_t off, size_t len, int flags, struct cred *cr)
960 {
961 	vp = realvp(vp);
962 	return (VOP_PUTPAGE(vp, off, len, flags, cr));
963 }
964 
965 static int
966 lo_map(
967 	vnode_t *vp,
968 	offset_t off,
969 	struct as *as,
970 	caddr_t *addrp,
971 	size_t len,
972 	uchar_t prot,
973 	uchar_t maxprot,
974 	uint_t flags,
975 	struct cred *cr)
976 {
977 	vp = realvp(vp);
978 	return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr));
979 }
980 
981 static int
982 lo_addmap(
983 	vnode_t *vp,
984 	offset_t off,
985 	struct as *as,
986 	caddr_t addr,
987 	size_t len,
988 	uchar_t prot,
989 	uchar_t maxprot,
990 	uint_t flags,
991 	struct cred *cr)
992 {
993 	vp = realvp(vp);
994 	return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr));
995 }
996 
997 static int
998 lo_delmap(
999 	vnode_t *vp,
1000 	offset_t off,
1001 	struct as *as,
1002 	caddr_t addr,
1003 	size_t len,
1004 	uint_t prot,
1005 	uint_t maxprot,
1006 	uint_t flags,
1007 	struct cred *cr)
1008 {
1009 	vp = realvp(vp);
1010 	return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr));
1011 }
1012 
1013 static int
1014 lo_poll(
1015 	vnode_t *vp,
1016 	short events,
1017 	int anyyet,
1018 	short *reventsp,
1019 	struct pollhead **phpp)
1020 {
1021 	vp = realvp(vp);
1022 	return (VOP_POLL(vp, events, anyyet, reventsp, phpp));
1023 }
1024 
1025 static int
1026 lo_dump(vnode_t *vp, caddr_t addr, int bn, int count)
1027 {
1028 	vp = realvp(vp);
1029 	return (VOP_DUMP(vp, addr, bn, count));
1030 }
1031 
1032 static int
1033 lo_pathconf(vnode_t *vp, int cmd, ulong_t *valp, struct cred *cr)
1034 {
1035 	vp = realvp(vp);
1036 	return (VOP_PATHCONF(vp, cmd, valp, cr));
1037 }
1038 
1039 static int
1040 lo_pageio(
1041 	vnode_t *vp,
1042 	struct page *pp,
1043 	u_offset_t io_off,
1044 	size_t io_len,
1045 	int flags,
1046 	cred_t *cr)
1047 {
1048 	vp = realvp(vp);
1049 	return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr));
1050 }
1051 
1052 static void
1053 lo_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr)
1054 {
1055 	vp = realvp(vp);
1056 	if (vp != NULL && vp != &kvp)
1057 		VOP_DISPOSE(vp, pp, fl, dn, cr);
1058 }
1059 
1060 static int
1061 lo_setsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr)
1062 {
1063 	if (vn_is_readonly(vp))
1064 		return (EROFS);
1065 	vp = realvp(vp);
1066 	return (VOP_SETSECATTR(vp, secattr, flags, cr));
1067 }
1068 
1069 static int
1070 lo_getsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr)
1071 {
1072 	vp = realvp(vp);
1073 	return (VOP_GETSECATTR(vp, secattr, flags, cr));
1074 }
1075 
1076 static int
1077 lo_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr)
1078 {
1079 	vp = realvp(vp);
1080 	return (VOP_SHRLOCK(vp, cmd, shr, flag, cr));
1081 }
1082 
1083 /*
1084  * Loopback vnode operations vector.
1085  */
1086 
1087 struct vnodeops *lo_vnodeops;
1088 
1089 const fs_operation_def_t lo_vnodeops_template[] = {
1090 	VOPNAME_OPEN, lo_open,
1091 	VOPNAME_CLOSE, lo_close,
1092 	VOPNAME_READ, lo_read,
1093 	VOPNAME_WRITE, lo_write,
1094 	VOPNAME_IOCTL, lo_ioctl,
1095 	VOPNAME_SETFL, lo_setfl,
1096 	VOPNAME_GETATTR, lo_getattr,
1097 	VOPNAME_SETATTR, lo_setattr,
1098 	VOPNAME_ACCESS, lo_access,
1099 	VOPNAME_LOOKUP, lo_lookup,
1100 	VOPNAME_CREATE, lo_create,
1101 	VOPNAME_REMOVE, lo_remove,
1102 	VOPNAME_LINK, lo_link,
1103 	VOPNAME_RENAME, lo_rename,
1104 	VOPNAME_MKDIR, lo_mkdir,
1105 	VOPNAME_RMDIR, lo_rmdir,
1106 	VOPNAME_READDIR, lo_readdir,
1107 	VOPNAME_SYMLINK, lo_symlink,
1108 	VOPNAME_READLINK, lo_readlink,
1109 	VOPNAME_FSYNC, lo_fsync,
1110 	VOPNAME_INACTIVE, (fs_generic_func_p) lo_inactive,
1111 	VOPNAME_FID, lo_fid,
1112 	VOPNAME_RWLOCK, lo_rwlock,
1113 	VOPNAME_RWUNLOCK, (fs_generic_func_p) lo_rwunlock,
1114 	VOPNAME_SEEK, lo_seek,
1115 	VOPNAME_CMP, lo_cmp,
1116 	VOPNAME_FRLOCK, lo_frlock,
1117 	VOPNAME_SPACE, lo_space,
1118 	VOPNAME_REALVP, lo_realvp,
1119 	VOPNAME_GETPAGE, lo_getpage,
1120 	VOPNAME_PUTPAGE, lo_putpage,
1121 	VOPNAME_MAP, (fs_generic_func_p) lo_map,
1122 	VOPNAME_ADDMAP, (fs_generic_func_p) lo_addmap,
1123 	VOPNAME_DELMAP, lo_delmap,
1124 	VOPNAME_POLL, (fs_generic_func_p) lo_poll,
1125 	VOPNAME_DUMP, lo_dump,
1126 	VOPNAME_DUMPCTL, fs_error,		/* XXX - why? */
1127 	VOPNAME_PATHCONF, lo_pathconf,
1128 	VOPNAME_PAGEIO, lo_pageio,
1129 	VOPNAME_DISPOSE, (fs_generic_func_p) lo_dispose,
1130 	VOPNAME_SETSECATTR, lo_setsecattr,
1131 	VOPNAME_GETSECATTR, lo_getsecattr,
1132 	VOPNAME_SHRLOCK, lo_shrlock,
1133 	NULL, NULL
1134 };
1135