1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2015 Joyent, Inc.
25 */
26
27#include <sys/param.h>
28#include <sys/systm.h>
29#include <sys/errno.h>
30#include <sys/vnode.h>
31#include <sys/vfs.h>
32#include <sys/vfs_opreg.h>
33#include <sys/uio.h>
34#include <sys/cred.h>
35#include <sys/pathname.h>
36#include <sys/debug.h>
37#include <sys/fs/lofs_node.h>
38#include <sys/fs/lofs_info.h>
39#include <fs/fs_subr.h>
40#include <vm/as.h>
41#include <vm/seg.h>
42
43/*
44 * These are the vnode ops routines which implement the vnode interface to
45 * the looped-back file system.  These routines just take their parameters,
46 * and then calling the appropriate real vnode routine(s) to do the work.
47 */
48
49static int
50lo_open(vnode_t **vpp, int flag, struct cred *cr, caller_context_t *ct)
51{
52	vnode_t *vp = *vpp;
53	vnode_t *rvp;
54	vnode_t *oldvp;
55	int error;
56
57#ifdef LODEBUG
58	lo_dprint(4, "lo_open vp %p cnt=%d realvp %p cnt=%d\n",
59	    vp, vp->v_count, realvp(vp), realvp(vp)->v_count);
60#endif
61
62	oldvp = vp;
63	vp = rvp = realvp(vp);
64	/*
65	 * Need to hold new reference to vp since VOP_OPEN() may
66	 * decide to release it.
67	 */
68	VN_HOLD(vp);
69	error = VOP_OPEN(&rvp, flag, cr, ct);
70
71	if (!error && rvp != vp) {
72		/*
73		 * the FS which we called should have released the
74		 * new reference on vp
75		 */
76		*vpp = makelonode(rvp, vtoli(oldvp->v_vfsp), 0);
77		if ((*vpp)->v_type == VDIR) {
78			/*
79			 * Copy over any looping flags to the new lnode.
80			 */
81			(vtol(*vpp))->lo_looping |= (vtol(oldvp))->lo_looping;
82		}
83		if (IS_DEVVP(*vpp)) {
84			vnode_t *svp;
85
86			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
87			VN_RELE(*vpp);
88			if (svp == NULL)
89				error = ENOSYS;
90			else
91				*vpp = svp;
92		}
93		VN_RELE(oldvp);
94	} else {
95		ASSERT(rvp->v_count > 1);
96		VN_RELE(rvp);
97	}
98
99	return (error);
100}
101
102static int
103lo_close(
104	vnode_t *vp,
105	int flag,
106	int count,
107	offset_t offset,
108	struct cred *cr,
109	caller_context_t *ct)
110{
111#ifdef LODEBUG
112	lo_dprint(4, "lo_close vp %p realvp %p\n", vp, realvp(vp));
113#endif
114	vp = realvp(vp);
115	return (VOP_CLOSE(vp, flag, count, offset, cr, ct));
116}
117
118static int
119lo_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
120	caller_context_t *ct)
121{
122#ifdef LODEBUG
123	lo_dprint(4, "lo_read vp %p realvp %p\n", vp, realvp(vp));
124#endif
125	vp = realvp(vp);
126	return (VOP_READ(vp, uiop, ioflag, cr, ct));
127}
128
129static int
130lo_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
131	caller_context_t *ct)
132{
133#ifdef LODEBUG
134	lo_dprint(4, "lo_write vp %p realvp %p\n", vp, realvp(vp));
135#endif
136	vp = realvp(vp);
137	return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
138}
139
140static int
141lo_ioctl(
142	vnode_t *vp,
143	int cmd,
144	intptr_t arg,
145	int flag,
146	struct cred *cr,
147	int *rvalp,
148	caller_context_t *ct)
149{
150#ifdef LODEBUG
151	lo_dprint(4, "lo_ioctl vp %p realvp %p\n", vp, realvp(vp));
152#endif
153	vp = realvp(vp);
154	return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp, ct));
155}
156
157static int
158lo_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
159{
160	vp = realvp(vp);
161	return (VOP_SETFL(vp, oflags, nflags, cr, ct));
162}
163
164static int
165lo_getattr(
166	vnode_t *vp,
167	struct vattr *vap,
168	int flags,
169	struct cred *cr,
170	caller_context_t *ct)
171{
172	int error;
173
174#ifdef LODEBUG
175	lo_dprint(4, "lo_getattr vp %p realvp %p\n", vp, realvp(vp));
176#endif
177	if (error = VOP_GETATTR(realvp(vp), vap, flags, cr, ct))
178		return (error);
179
180	return (0);
181}
182
183static int
184lo_setattr(
185	vnode_t *vp,
186	struct vattr *vap,
187	int flags,
188	struct cred *cr,
189	caller_context_t *ct)
190{
191#ifdef LODEBUG
192	lo_dprint(4, "lo_setattr vp %p realvp %p\n", vp, realvp(vp));
193#endif
194	vp = realvp(vp);
195	return (VOP_SETATTR(vp, vap, flags, cr, ct));
196}
197
198static int
199lo_access(
200	vnode_t *vp,
201	int mode,
202	int flags,
203	struct cred *cr,
204	caller_context_t *ct)
205{
206#ifdef LODEBUG
207	lo_dprint(4, "lo_access vp %p realvp %p\n", vp, realvp(vp));
208#endif
209	if (mode & VWRITE) {
210		if (vp->v_type == VREG && vn_is_readonly(vp))
211			return (EROFS);
212	}
213	vp = realvp(vp);
214	return (VOP_ACCESS(vp, mode, flags, cr, ct));
215}
216
217static int
218lo_fsync(vnode_t *vp, int syncflag, struct cred *cr, caller_context_t *ct)
219{
220#ifdef LODEBUG
221	lo_dprint(4, "lo_fsync vp %p realvp %p\n", vp, realvp(vp));
222#endif
223	vp = realvp(vp);
224	return (VOP_FSYNC(vp, syncflag, cr, ct));
225}
226
227/*ARGSUSED*/
228static void
229lo_inactive(vnode_t *vp, struct cred *cr, caller_context_t *ct)
230{
231#ifdef LODEBUG
232	lo_dprint(4, "lo_inactive %p, realvp %p\n", vp, realvp(vp));
233#endif
234	freelonode(vtol(vp));
235}
236
237/* ARGSUSED */
238static int
239lo_fid(vnode_t *vp, struct fid *fidp, caller_context_t *ct)
240{
241#ifdef LODEBUG
242	lo_dprint(4, "lo_fid %p, realvp %p\n", vp, realvp(vp));
243#endif
244	vp = realvp(vp);
245	return (VOP_FID(vp, fidp, ct));
246}
247
248/*
249 * Given a vnode of lofs type, lookup nm name and
250 * return a shadow vnode (of lofs type) of the
251 * real vnode found.
252 *
253 * Due to the nature of lofs, there is a potential
254 * looping in path traversal.
255 *
256 * starting from the mount point of an lofs;
257 * a loop is defined to be a traversal path
258 * where the mount point or the real vnode of
259 * the root of this lofs is encountered twice.
260 * Once at the start of traversal and second
261 * when the looping is found.
262 *
263 * When a loop is encountered, a shadow of the
264 * covered vnode is returned to stop the looping.
265 *
266 * This normally works, but with the advent of
267 * the new automounter, returning the shadow of the
268 * covered vnode (autonode, in this case) does not
269 * stop the loop.  Because further lookup on this
270 * lonode will cause the autonode to call lo_lookup()
271 * on the lonode covering it.
272 *
273 * example "/net/jurassic/net/jurassic" is a loop.
274 * returning the shadow of the autonode corresponding to
275 * "/net/jurassic/net/jurassic" will not terminate the
276 * loop.   To solve this problem we allow the loop to go
277 * through one more level component lookup.  Whichever
278 * directory is then looked up in "/net/jurassic/net/jurassic"
279 * the vnode returned is the vnode covered by the autonode
280 * "net" and this will terminate the loop.
281 *
282 * Lookup for dot dot has to be dealt with separately.
283 * It will be nice to have a "one size fits all" kind
284 * of solution, so that we don't have so many ifs statement
285 * in the lo_lookup() to handle dotdot.  But, since
286 * there are so many special cases to handle different
287 * kinds looping above, we need special codes to handle
288 * dotdot lookup as well.
289 */
290static int
291lo_lookup(
292	vnode_t *dvp,
293	char *nm,
294	vnode_t **vpp,
295	struct pathname *pnp,
296	int flags,
297	vnode_t *rdir,
298	struct cred *cr,
299	caller_context_t *ct,
300	int *direntflags,
301	pathname_t *realpnp)
302{
303	vnode_t *vp = NULL, *tvp = NULL, *nonlovp;
304	int error, is_indirectloop;
305	vnode_t *realdvp = realvp(dvp);
306	struct loinfo *li = vtoli(dvp->v_vfsp);
307	int looping = 0;
308	int autoloop = 0;
309	int doingdotdot = 0;
310	int nosub = 0;
311	int mkflag = 0;
312
313	/*
314	 * If name is empty and no XATTR flags are set, then return
315	 * dvp (empty name == lookup ".").  If an XATTR flag is set
316	 * then we need to call VOP_LOOKUP to get the xattr dir.
317	 */
318	if (nm[0] == '\0' && ! (flags & (CREATE_XATTR_DIR|LOOKUP_XATTR))) {
319		VN_HOLD(dvp);
320		*vpp = dvp;
321		return (0);
322	}
323
324	if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
325		doingdotdot++;
326		/*
327		 * Handle ".." out of mounted filesystem
328		 */
329		while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
330			realdvp = realdvp->v_vfsp->vfs_vnodecovered;
331			ASSERT(realdvp != NULL);
332		}
333	}
334
335	*vpp = NULL;	/* default(error) case */
336
337	/*
338	 * Do the normal lookup
339	 */
340	if (error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr,
341	    ct, direntflags, realpnp)) {
342		vp = NULL;
343		goto out;
344	}
345
346	/*
347	 * We do this check here to avoid returning a stale file handle to the
348	 * caller.
349	 */
350	if (nm[0] == '.' && nm[1] == '\0') {
351		ASSERT(vp == realdvp);
352		VN_HOLD(dvp);
353		VN_RELE(vp);
354		*vpp = dvp;
355		return (0);
356	}
357
358	if (doingdotdot) {
359		if ((vtol(dvp))->lo_looping & LO_LOOPING) {
360			vfs_t *vfsp;
361
362			error = vn_vfsrlock_wait(realdvp);
363			if (error)
364				goto out;
365			vfsp = vn_mountedvfs(realdvp);
366			/*
367			 * In the standard case if the looping flag is set and
368			 * performing dotdot we would be returning from a
369			 * covered vnode, implying vfsp could not be null. The
370			 * exceptions being if we have looping and overlay
371			 * mounts or looping and covered file systems.
372			 */
373			if (vfsp == NULL) {
374				/*
375				 * Overlay mount or covered file system,
376				 * so just make the shadow node.
377				 */
378				vn_vfsunlock(realdvp);
379				*vpp = makelonode(vp, li, 0);
380				(vtol(*vpp))->lo_looping |= LO_LOOPING;
381				return (0);
382			}
383			/*
384			 * When looping get the actual found vnode
385			 * instead of the vnode covered.
386			 * Here we have to hold the lock for realdvp
387			 * since an unmount during the traversal to the
388			 * root vnode would turn *vfsp into garbage
389			 * which would be fatal.
390			 */
391			error = VFS_ROOT(vfsp, &tvp);
392			vn_vfsunlock(realdvp);
393
394			if (error)
395				goto out;
396
397			if ((tvp == li->li_rootvp) && (vp == realvp(tvp))) {
398				/*
399				 * we're back at the real vnode
400				 * of the rootvp
401				 *
402				 * return the rootvp
403				 * Ex: /mnt/mnt/..
404				 * where / has been lofs-mounted
405				 * onto /mnt.  Return the lofs
406				 * node mounted at /mnt.
407				 */
408				*vpp = tvp;
409				VN_RELE(vp);
410				return (0);
411			} else {
412				/*
413				 * We are returning from a covered
414				 * node whose vfs_mountedhere is
415				 * not pointing to vfs of the current
416				 * root vnode.
417				 * This is a condn where in we
418				 * returned a covered node say Zc
419				 * but Zc is not the cover of current
420				 * root.
421				 * i.e.., if X is the root vnode
422				 * lookup(Zc,"..") is taking us to
423				 * X.
424				 * Ex: /net/X/net/X/Y
425				 *
426				 * If LO_AUTOLOOP (autofs/lofs looping detected)
427				 * has been set then we are encountering the
428				 * cover of Y (Y being any directory vnode
429				 * under /net/X/net/X/).
430				 * When performing a dotdot set the
431				 * returned vp to the vnode covered
432				 * by the mounted lofs, ie /net/X/net/X
433				 */
434				VN_RELE(tvp);
435				if ((vtol(dvp))->lo_looping & LO_AUTOLOOP) {
436					VN_RELE(vp);
437					vp = li->li_rootvp;
438					vp = vp->v_vfsp->vfs_vnodecovered;
439					VN_HOLD(vp);
440					*vpp = makelonode(vp, li, 0);
441					(vtol(*vpp))->lo_looping |= LO_LOOPING;
442					return (0);
443				}
444			}
445		} else {
446			/*
447			 * No frills just make the shadow node.
448			 */
449			*vpp = makelonode(vp, li, 0);
450			return (0);
451		}
452	}
453
454	nosub = (vtoli(dvp->v_vfsp)->li_flag & LO_NOSUB);
455
456	/*
457	 * If this vnode is mounted on, then we
458	 * traverse to the vnode which is the root of
459	 * the mounted file system.
460	 */
461	if (!nosub && (error = traverse(&vp)))
462		goto out;
463
464	/*
465	 * Make a lnode for the real vnode.
466	 */
467	if (vp->v_type != VDIR || nosub) {
468		*vpp = makelonode(vp, li, 0);
469		if (IS_DEVVP(*vpp)) {
470			vnode_t *svp;
471
472			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
473			VN_RELE(*vpp);
474			if (svp == NULL)
475				error = ENOSYS;
476			else
477				*vpp = svp;
478		}
479		return (error);
480	}
481
482	/*
483	 * if the found vnode (vp) is not of type lofs
484	 * then we're just going to make a shadow of that
485	 * vp and get out.
486	 *
487	 * If the found vnode (vp) is of lofs type, and
488	 * we're not doing dotdot, check if we are
489	 * looping.
490	 */
491	if (!doingdotdot && vfs_matchops(vp->v_vfsp, lo_vfsops)) {
492		/*
493		 * Check if we're looping, i.e.
494		 * vp equals the root vp of the lofs, directly
495		 * or indirectly, return the covered node.
496		 */
497
498		if (!((vtol(dvp))->lo_looping & LO_LOOPING)) {
499			if (vp == li->li_rootvp) {
500				/*
501				 * Direct looping condn.
502				 * Ex:- X is / mounted directory so lookup of
503				 * /X/X is a direct looping condn.
504				 */
505				tvp = vp;
506				vp = vp->v_vfsp->vfs_vnodecovered;
507				VN_HOLD(vp);
508				VN_RELE(tvp);
509				looping++;
510			} else {
511				/*
512				 * Indirect looping can be defined as
513				 * real lookup returning rootvp of the current
514				 * tree in any level of recursion.
515				 *
516				 * This check is useful if there are multiple
517				 * levels of lofs indirections. Suppose vnode X
518				 * in the current lookup has as its real vnode
519				 * another lofs node. Y = realvp(X) Y should be
520				 * a lofs node for the check to continue or Y
521				 * is not the rootvp of X.
522				 * Ex:- say X and Y are two vnodes
523				 * say real(Y) is X and real(X) is Z
524				 * parent vnode for X and Y is Z
525				 * lookup(Y,"path") say we are looking for Y
526				 * again under Y and we have to return Yc.
527				 * but the lookup of Y under Y doesnot return
528				 * Y the root vnode again here is why.
529				 * 1. lookup(Y,"path of Y") will go to
530				 * 2. lookup(real(Y),"path of Y") and then to
531				 * 3. lookup(real(X),"path of Y").
532				 * and now what lookup level 1 sees is the
533				 * outcome of 2 but the vnode Y is due to
534				 * lookup(Z,"path of Y") so we have to skip
535				 * intermediate levels to find if in any level
536				 * there is a looping.
537				 */
538				is_indirectloop = 0;
539				nonlovp = vp;
540				while (
541				    vfs_matchops(nonlovp->v_vfsp, lo_vfsops) &&
542				    !(is_indirectloop)) {
543					if (li->li_rootvp  == nonlovp) {
544						is_indirectloop++;
545						break;
546					}
547					nonlovp = realvp(nonlovp);
548				}
549
550				if (is_indirectloop) {
551					VN_RELE(vp);
552					vp = nonlovp;
553					vp = vp->v_vfsp->vfs_vnodecovered;
554					VN_HOLD(vp);
555					looping++;
556				}
557			}
558		} else {
559			/*
560			 * come here only because of the interaction between
561			 * the autofs and lofs.
562			 *
563			 * Lookup of "/net/X/net/X" will return a shadow of
564			 * an autonode X_a which we call X_l.
565			 *
566			 * Lookup of anything under X_l, will trigger a call to
567			 * auto_lookup(X_a,nm) which will eventually call
568			 * lo_lookup(X_lr,nm) where X_lr is the root vnode of
569			 * the current lofs.
570			 *
571			 * We come here only when we are called with X_l as dvp
572			 * and look for something underneath.
573			 *
574			 * Now that an autofs/lofs looping condition has been
575			 * identified any directory vnode contained within
576			 * dvp will be set to the vnode covered by the
577			 * mounted autofs. Thus all directories within dvp
578			 * will appear empty hence teminating the looping.
579			 * The LO_AUTOLOOP flag is set on the returned lonode
580			 * to indicate the termination of the autofs/lofs
581			 * looping. This is required for the correct behaviour
582			 * when performing a dotdot.
583			 */
584			realdvp = realvp(dvp);
585			while (vfs_matchops(realdvp->v_vfsp, lo_vfsops)) {
586				realdvp = realvp(realdvp);
587			}
588
589			error = VFS_ROOT(realdvp->v_vfsp, &tvp);
590			if (error)
591				goto out;
592			/*
593			 * tvp now contains the rootvp of the vfs of the
594			 * real vnode of dvp. The directory vnode vp is set
595			 * to the covered vnode to terminate looping. No
596			 * distinction is made between any vp as all directory
597			 * vnodes contained in dvp are returned as the covered
598			 * vnode.
599			 */
600			VN_RELE(vp);
601			vp = tvp;	/* possibly is an autonode */
602
603			/*
604			 * Need to find the covered vnode
605			 */
606			if (vp->v_vfsp->vfs_vnodecovered == NULL) {
607				/*
608				 * We don't have a covered vnode so this isn't
609				 * an autonode. To find the autonode simply
610				 * find the vnode covered by the lofs rootvp.
611				 */
612				vp = li->li_rootvp;
613				vp = vp->v_vfsp->vfs_vnodecovered;
614				VN_RELE(tvp);
615				error = VFS_ROOT(vp->v_vfsp, &tvp);
616				if (error)
617					goto out;
618				vp = tvp;	/* now this is an autonode */
619				if (vp->v_vfsp->vfs_vnodecovered == NULL) {
620					/*
621					 * Still can't find a covered vnode.
622					 * Fail the lookup, or we'd loop.
623					 */
624					error = ENOENT;
625					goto out;
626				}
627			}
628			vp = vp->v_vfsp->vfs_vnodecovered;
629			VN_HOLD(vp);
630			VN_RELE(tvp);
631			/*
632			 * Force the creation of a new lnode even if the hash
633			 * table contains a lnode that references this vnode.
634			 */
635			mkflag = LOF_FORCE;
636			autoloop++;
637		}
638	}
639	*vpp = makelonode(vp, li, mkflag);
640
641	if ((looping) ||
642	    (((vtol(dvp))->lo_looping & LO_LOOPING) && !doingdotdot)) {
643		(vtol(*vpp))->lo_looping |= LO_LOOPING;
644	}
645
646	if (autoloop) {
647		(vtol(*vpp))->lo_looping |= LO_AUTOLOOP;
648	}
649
650out:
651	if (error != 0 && vp != NULL)
652		VN_RELE(vp);
653#ifdef LODEBUG
654	lo_dprint(4,
655	"lo_lookup dvp %x realdvp %x nm '%s' newvp %x real vp %x error %d\n",
656	    dvp, realvp(dvp), nm, *vpp, vp, error);
657#endif
658	return (error);
659}
660
661/*ARGSUSED*/
662static int
663lo_create(
664	vnode_t *dvp,
665	char *nm,
666	struct vattr *va,
667	enum vcexcl exclusive,
668	int mode,
669	vnode_t **vpp,
670	struct cred *cr,
671	int flag,
672	caller_context_t *ct,
673	vsecattr_t *vsecp)
674{
675	int error;
676	vnode_t *vp = NULL;
677
678#ifdef LODEBUG
679	lo_dprint(4, "lo_create vp %p realvp %p\n", dvp, realvp(dvp));
680#endif
681	if (*nm == '\0') {
682		ASSERT(vpp && dvp == *vpp);
683		vp = realvp(*vpp);
684	}
685
686	error = VOP_CREATE(realvp(dvp), nm, va, exclusive, mode, &vp, cr, flag,
687	    ct, vsecp);
688	if (!error) {
689		*vpp = makelonode(vp, vtoli(dvp->v_vfsp), 0);
690		if (IS_DEVVP(*vpp)) {
691			vnode_t *svp;
692
693			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
694			VN_RELE(*vpp);
695			if (svp == NULL)
696				error = ENOSYS;
697			else
698				*vpp = svp;
699		}
700	} else if (error == ENOSYS && exclusive == NONEXCL &&
701	    dvp == vtoli(dvp->v_vfsp)->li_rootvp &&
702	    realvp(dvp)->v_type == VREG) {
703		/*
704		 * We have a single regular file lofs mounted, thus the file is
705		 * the root vnode (the directory vp is the file vp). Some
706		 * underlying file systems (e.g. tmpfs or ufs) properly handle
707		 * this style of create but at least zfs won't support create
708		 * this way (see zfs_fvnodeops_template which has fs_nosys for
709		 * the vop_create entry because zfs_create doesn't work
710		 * properly for this case).
711		 */
712		if ((error = VOP_ACCESS(dvp, mode, 0, cr, NULL)) == 0) {
713			/*
714			 * Since we already know the vnode for the existing
715			 * file we can handle create as a no-op, as expected,
716			 * truncating the file if necessary.
717			 */
718			struct vattr vattr;
719
720			vattr.va_size = 0;
721			vattr.va_mask = AT_SIZE;
722
723			if ((va->va_mask & AT_SIZE) != 0 && va->va_size == 0 &&
724			    VOP_SETATTR(dvp, &vattr, 0, CRED(), NULL) != 0)
725				return (error);
726
727			/*
728			 * vn_createat will do a vn_rele on the file if it is
729			 * pre-existing, which it is in the case of a single
730			 * file mounted as the root. Thus, when we eventually
731			 * close the file the count will already be 1 so the
732			 * vnode would be freed. To prevent that, we add an
733			 * extra hold here.
734			 */
735			VN_HOLD(dvp);
736			*vpp = dvp;
737			error = 0;
738		}
739	}
740
741	return (error);
742}
743
744static int
745lo_remove(
746	vnode_t *dvp,
747	char *nm,
748	struct cred *cr,
749	caller_context_t *ct,
750	int flags)
751{
752#ifdef LODEBUG
753	lo_dprint(4, "lo_remove vp %p realvp %p\n", dvp, realvp(dvp));
754#endif
755	dvp = realvp(dvp);
756	return (VOP_REMOVE(dvp, nm, cr, ct, flags));
757}
758
759static int
760lo_link(
761	vnode_t *tdvp,
762	vnode_t *vp,
763	char *tnm,
764	struct cred *cr,
765	caller_context_t *ct,
766	int flags)
767{
768	vnode_t *realvp;
769
770#ifdef LODEBUG
771	lo_dprint(4, "lo_link vp %p realvp %p\n", vp, realvp(vp));
772#endif
773
774	/*
775	 * The source and destination vnodes may be in different lofs
776	 * filesystems sharing the same underlying filesystem, so we need to
777	 * make sure that the filesystem containing the source vnode is not
778	 * mounted read-only (vn_link() has already checked the target vnode).
779	 *
780	 * In a situation such as:
781	 *
782	 * /data	- regular filesystem
783	 * /foo		- lofs mount of /data/foo
784	 * /bar		- read-only lofs mount of /data/bar
785	 *
786	 * This disallows a link from /bar/somefile to /foo/somefile,
787	 * which would otherwise allow changes to somefile on the read-only
788	 * mounted /bar.
789	 */
790
791	if (vn_is_readonly(vp)) {
792		return (EROFS);
793	}
794	while (vn_matchops(vp, lo_vnodeops)) {
795		vp = realvp(vp);
796	}
797
798	/*
799	 * In the case where the source vnode is on another stacking
800	 * filesystem (such as specfs), the loop above will
801	 * terminate before finding the true underlying vnode.
802	 *
803	 * We use VOP_REALVP here to continue the search.
804	 */
805	if (VOP_REALVP(vp, &realvp, ct) == 0)
806		vp = realvp;
807
808	while (vn_matchops(tdvp, lo_vnodeops)) {
809		tdvp = realvp(tdvp);
810	}
811	if (vp->v_vfsp != tdvp->v_vfsp)
812		return (EXDEV);
813	return (VOP_LINK(tdvp, vp, tnm, cr, ct, flags));
814}
815
816static int
817lo_rename(
818	vnode_t *odvp,
819	char *onm,
820	vnode_t *ndvp,
821	char *nnm,
822	struct cred *cr,
823	caller_context_t *ct,
824	int flags)
825{
826	vnode_t *tnvp;
827
828#ifdef LODEBUG
829	lo_dprint(4, "lo_rename vp %p realvp %p\n", odvp, realvp(odvp));
830#endif
831	/*
832	 * If we are coming from a loop back mounted fs, that has been
833	 * mounted in the same filesystem as where we want to move to,
834	 * and that filesystem is read/write, but the lofs filesystem is
835	 * read only, we don't want to allow a rename of the file. The
836	 * vn_rename code checks to be sure the target is read/write already
837	 * so that is not necessary here. However, consider the following
838	 * example:
839	 *		/ - regular root fs
840	 *		/foo - directory in root
841	 *		/foo/bar - file in foo directory(in root fs)
842	 *		/baz - directory in root
843	 *		mount -F lofs -o ro /foo /baz - all still in root
844	 *			directory
845	 * The fact that we mounted /foo on /baz read only should stop us
846	 * from renaming the file /foo/bar /bar, but it doesn't since
847	 * / is read/write. We are still renaming here since we are still
848	 * in the same filesystem, it is just that we do not check to see
849	 * if the filesystem we are coming from in this case is read only.
850	 */
851	if (odvp->v_vfsp->vfs_flag & VFS_RDONLY)
852		return (EROFS);
853	/*
854	 * We need to make sure we're not trying to remove a mount point for a
855	 * filesystem mounted on top of lofs, which only we know about.
856	 */
857	if (vn_matchops(ndvp, lo_vnodeops))	/* Not our problem. */
858		goto rename;
859
860	/*
861	 * XXXci - Once case-insensitive behavior is implemented, it should
862	 * be added here.
863	 */
864	if (VOP_LOOKUP(ndvp, nnm, &tnvp, NULL, 0, NULL, cr,
865	    ct, NULL, NULL) != 0)
866		goto rename;
867	if (tnvp->v_type != VDIR) {
868		VN_RELE(tnvp);
869		goto rename;
870	}
871	if (vn_mountedvfs(tnvp)) {
872		VN_RELE(tnvp);
873		return (EBUSY);
874	}
875	VN_RELE(tnvp);
876rename:
877	/*
878	 * Since the case we're dealing with above can happen at any layer in
879	 * the stack of lofs filesystems, we need to recurse down the stack,
880	 * checking to see if there are any instances of a filesystem mounted on
881	 * top of lofs. In order to keep on using the lofs version of
882	 * VOP_RENAME(), we make sure that while the target directory is of type
883	 * lofs, the source directory (the one used for getting the fs-specific
884	 * version of VOP_RENAME()) is also of type lofs.
885	 */
886	if (vn_matchops(ndvp, lo_vnodeops)) {
887		ndvp = realvp(ndvp);	/* Check the next layer */
888	} else {
889		/*
890		 * We can go fast here
891		 */
892		while (vn_matchops(odvp, lo_vnodeops)) {
893			odvp = realvp(odvp);
894		}
895		if (odvp->v_vfsp != ndvp->v_vfsp)
896			return (EXDEV);
897	}
898	return (VOP_RENAME(odvp, onm, ndvp, nnm, cr, ct, flags));
899}
900
901static int
902lo_mkdir(
903	vnode_t *dvp,
904	char *nm,
905	struct vattr *va,
906	vnode_t **vpp,
907	struct cred *cr,
908	caller_context_t *ct,
909	int flags,
910	vsecattr_t *vsecp)
911{
912	int error;
913
914#ifdef LODEBUG
915	lo_dprint(4, "lo_mkdir vp %p realvp %p\n", dvp, realvp(dvp));
916#endif
917	error = VOP_MKDIR(realvp(dvp), nm, va, vpp, cr, ct, flags, vsecp);
918	if (!error)
919		*vpp = makelonode(*vpp, vtoli(dvp->v_vfsp), 0);
920	return (error);
921}
922
923static int
924lo_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
925{
926#ifdef LODEBUG
927	lo_dprint(4, "lo_realvp %p\n", vp);
928#endif
929	while (vn_matchops(vp, lo_vnodeops))
930		vp = realvp(vp);
931
932	if (VOP_REALVP(vp, vpp, ct) != 0)
933		*vpp = vp;
934	return (0);
935}
936
937static int
938lo_rmdir(
939	vnode_t *dvp,
940	char *nm,
941	vnode_t *cdir,
942	struct cred *cr,
943	caller_context_t *ct,
944	int flags)
945{
946	vnode_t *rvp = cdir;
947
948#ifdef LODEBUG
949	lo_dprint(4, "lo_rmdir vp %p realvp %p\n", dvp, realvp(dvp));
950#endif
951	/* if cdir is lofs vnode ptr get its real vnode ptr */
952	if (vn_matchops(dvp, vn_getops(rvp)))
953		(void) lo_realvp(cdir, &rvp, ct);
954	dvp = realvp(dvp);
955	return (VOP_RMDIR(dvp, nm, rvp, cr, ct, flags));
956}
957
958static int
959lo_symlink(
960	vnode_t *dvp,
961	char *lnm,
962	struct vattr *tva,
963	char *tnm,
964	struct cred *cr,
965	caller_context_t *ct,
966	int flags)
967{
968#ifdef LODEBUG
969	lo_dprint(4, "lo_symlink vp %p realvp %p\n", dvp, realvp(dvp));
970#endif
971	dvp = realvp(dvp);
972	return (VOP_SYMLINK(dvp, lnm, tva, tnm, cr, ct, flags));
973}
974
975static int
976lo_readlink(
977	vnode_t *vp,
978	struct uio *uiop,
979	struct cred *cr,
980	caller_context_t *ct)
981{
982	vp = realvp(vp);
983	return (VOP_READLINK(vp, uiop, cr, ct));
984}
985
986static int
987lo_readdir(
988	vnode_t *vp,
989	struct uio *uiop,
990	struct cred *cr,
991	int *eofp,
992	caller_context_t *ct,
993	int flags)
994{
995#ifdef LODEBUG
996	lo_dprint(4, "lo_readdir vp %p realvp %p\n", vp, realvp(vp));
997#endif
998	vp = realvp(vp);
999	return (VOP_READDIR(vp, uiop, cr, eofp, ct, flags));
1000}
1001
1002static int
1003lo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
1004{
1005	vp = realvp(vp);
1006	return (VOP_RWLOCK(vp, write_lock, ct));
1007}
1008
1009static void
1010lo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
1011{
1012	vp = realvp(vp);
1013	VOP_RWUNLOCK(vp, write_lock, ct);
1014}
1015
1016static int
1017lo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1018{
1019	vp = realvp(vp);
1020	return (VOP_SEEK(vp, ooff, noffp, ct));
1021}
1022
1023static int
1024lo_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
1025{
1026	while (vn_matchops(vp1, lo_vnodeops))
1027		vp1 = realvp(vp1);
1028	while (vn_matchops(vp2, lo_vnodeops))
1029		vp2 = realvp(vp2);
1030	return (VOP_CMP(vp1, vp2, ct));
1031}
1032
1033static int
1034lo_frlock(
1035	vnode_t *vp,
1036	int cmd,
1037	struct flock64 *bfp,
1038	int flag,
1039	offset_t offset,
1040	struct flk_callback *flk_cbp,
1041	cred_t *cr,
1042	caller_context_t *ct)
1043{
1044	vp = realvp(vp);
1045	return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1046}
1047
1048static int
1049lo_space(
1050	vnode_t *vp,
1051	int cmd,
1052	struct flock64 *bfp,
1053	int flag,
1054	offset_t offset,
1055	struct cred *cr,
1056	caller_context_t *ct)
1057{
1058	vp = realvp(vp);
1059	return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
1060}
1061
1062static int
1063lo_getpage(
1064	vnode_t *vp,
1065	offset_t off,
1066	size_t len,
1067	uint_t *prot,
1068	struct page *parr[],
1069	size_t psz,
1070	struct seg *seg,
1071	caddr_t addr,
1072	enum seg_rw rw,
1073	struct cred *cr,
1074	caller_context_t *ct)
1075{
1076	vp = realvp(vp);
1077	return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr,
1078	    ct));
1079}
1080
1081static int
1082lo_putpage(
1083	vnode_t *vp,
1084	offset_t off,
1085	size_t len,
1086	int flags,
1087	struct cred *cr,
1088	caller_context_t *ct)
1089{
1090	vp = realvp(vp);
1091	return (VOP_PUTPAGE(vp, off, len, flags, cr, ct));
1092}
1093
1094static int
1095lo_map(
1096	vnode_t *vp,
1097	offset_t off,
1098	struct as *as,
1099	caddr_t *addrp,
1100	size_t len,
1101	uchar_t prot,
1102	uchar_t maxprot,
1103	uint_t flags,
1104	struct cred *cr,
1105	caller_context_t *ct)
1106{
1107	vp = realvp(vp);
1108	return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr, ct));
1109}
1110
1111static int
1112lo_addmap(
1113	vnode_t *vp,
1114	offset_t off,
1115	struct as *as,
1116	caddr_t addr,
1117	size_t len,
1118	uchar_t prot,
1119	uchar_t maxprot,
1120	uint_t flags,
1121	struct cred *cr,
1122	caller_context_t *ct)
1123{
1124	vp = realvp(vp);
1125	return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
1126	    ct));
1127}
1128
1129static int
1130lo_delmap(
1131	vnode_t *vp,
1132	offset_t off,
1133	struct as *as,
1134	caddr_t addr,
1135	size_t len,
1136	uint_t prot,
1137	uint_t maxprot,
1138	uint_t flags,
1139	struct cred *cr,
1140	caller_context_t *ct)
1141{
1142	vp = realvp(vp);
1143	return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
1144	    ct));
1145}
1146
1147static int
1148lo_poll(
1149	vnode_t *vp,
1150	short events,
1151	int anyyet,
1152	short *reventsp,
1153	struct pollhead **phpp,
1154	caller_context_t *ct)
1155{
1156	vp = realvp(vp);
1157	return (VOP_POLL(vp, events, anyyet, reventsp, phpp, ct));
1158}
1159
1160static int
1161lo_dump(vnode_t *vp, caddr_t addr, offset_t bn, offset_t count,
1162    caller_context_t *ct)
1163{
1164	vp = realvp(vp);
1165	return (VOP_DUMP(vp, addr, bn, count, ct));
1166}
1167
1168static int
1169lo_pathconf(
1170	vnode_t *vp,
1171	int cmd,
1172	ulong_t *valp,
1173	struct cred *cr,
1174	caller_context_t *ct)
1175{
1176	vp = realvp(vp);
1177	return (VOP_PATHCONF(vp, cmd, valp, cr, ct));
1178}
1179
1180static int
1181lo_pageio(
1182	vnode_t *vp,
1183	struct page *pp,
1184	u_offset_t io_off,
1185	size_t io_len,
1186	int flags,
1187	cred_t *cr,
1188	caller_context_t *ct)
1189{
1190	vp = realvp(vp);
1191	return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct));
1192}
1193
1194static void
1195lo_dispose(
1196	vnode_t *vp,
1197	page_t *pp,
1198	int fl,
1199	int dn,
1200	cred_t *cr,
1201	caller_context_t *ct)
1202{
1203	vp = realvp(vp);
1204	if (vp != NULL && !VN_ISKAS(vp))
1205		VOP_DISPOSE(vp, pp, fl, dn, cr, ct);
1206}
1207
1208static int
1209lo_setsecattr(
1210	vnode_t *vp,
1211	vsecattr_t *secattr,
1212	int flags,
1213	struct cred *cr,
1214	caller_context_t *ct)
1215{
1216	if (vn_is_readonly(vp))
1217		return (EROFS);
1218	vp = realvp(vp);
1219	return (VOP_SETSECATTR(vp, secattr, flags, cr, ct));
1220}
1221
1222static int
1223lo_getsecattr(
1224	vnode_t *vp,
1225	vsecattr_t *secattr,
1226	int flags,
1227	struct cred *cr,
1228	caller_context_t *ct)
1229{
1230	vp = realvp(vp);
1231	return (VOP_GETSECATTR(vp, secattr, flags, cr, ct));
1232}
1233
1234static int
1235lo_shrlock(
1236	vnode_t *vp,
1237	int cmd,
1238	struct shrlock *shr,
1239	int flag,
1240	cred_t *cr,
1241	caller_context_t *ct)
1242{
1243	vp = realvp(vp);
1244	return (VOP_SHRLOCK(vp, cmd, shr, flag, cr, ct));
1245}
1246
1247/*
1248 * Loopback vnode operations vector.
1249 */
1250
1251struct vnodeops *lo_vnodeops;
1252
1253const fs_operation_def_t lo_vnodeops_template[] = {
1254	VOPNAME_OPEN,		{ .vop_open = lo_open },
1255	VOPNAME_CLOSE,		{ .vop_close = lo_close },
1256	VOPNAME_READ,		{ .vop_read = lo_read },
1257	VOPNAME_WRITE,		{ .vop_write = lo_write },
1258	VOPNAME_IOCTL,		{ .vop_ioctl = lo_ioctl },
1259	VOPNAME_SETFL,		{ .vop_setfl = lo_setfl },
1260	VOPNAME_GETATTR,	{ .vop_getattr = lo_getattr },
1261	VOPNAME_SETATTR,	{ .vop_setattr = lo_setattr },
1262	VOPNAME_ACCESS,		{ .vop_access = lo_access },
1263	VOPNAME_LOOKUP,		{ .vop_lookup = lo_lookup },
1264	VOPNAME_CREATE,		{ .vop_create = lo_create },
1265	VOPNAME_REMOVE,		{ .vop_remove = lo_remove },
1266	VOPNAME_LINK,		{ .vop_link = lo_link },
1267	VOPNAME_RENAME,		{ .vop_rename = lo_rename },
1268	VOPNAME_MKDIR,		{ .vop_mkdir = lo_mkdir },
1269	VOPNAME_RMDIR,		{ .vop_rmdir = lo_rmdir },
1270	VOPNAME_READDIR,	{ .vop_readdir = lo_readdir },
1271	VOPNAME_SYMLINK,	{ .vop_symlink = lo_symlink },
1272	VOPNAME_READLINK,	{ .vop_readlink = lo_readlink },
1273	VOPNAME_FSYNC,		{ .vop_fsync = lo_fsync },
1274	VOPNAME_INACTIVE,	{ .vop_inactive = lo_inactive },
1275	VOPNAME_FID,		{ .vop_fid = lo_fid },
1276	VOPNAME_RWLOCK,		{ .vop_rwlock = lo_rwlock },
1277	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = lo_rwunlock },
1278	VOPNAME_SEEK,		{ .vop_seek = lo_seek },
1279	VOPNAME_CMP,		{ .vop_cmp = lo_cmp },
1280	VOPNAME_FRLOCK,		{ .vop_frlock = lo_frlock },
1281	VOPNAME_SPACE,		{ .vop_space = lo_space },
1282	VOPNAME_REALVP,		{ .vop_realvp = lo_realvp },
1283	VOPNAME_GETPAGE,	{ .vop_getpage = lo_getpage },
1284	VOPNAME_PUTPAGE,	{ .vop_putpage = lo_putpage },
1285	VOPNAME_MAP,		{ .vop_map = lo_map },
1286	VOPNAME_ADDMAP,		{ .vop_addmap = lo_addmap },
1287	VOPNAME_DELMAP,		{ .vop_delmap = lo_delmap },
1288	VOPNAME_POLL,		{ .vop_poll = lo_poll },
1289	VOPNAME_DUMP,		{ .vop_dump = lo_dump },
1290	VOPNAME_DUMPCTL,	{ .error = fs_error },	/* XXX - why? */
1291	VOPNAME_PATHCONF,	{ .vop_pathconf = lo_pathconf },
1292	VOPNAME_PAGEIO,		{ .vop_pageio = lo_pageio },
1293	VOPNAME_DISPOSE,	{ .vop_dispose = lo_dispose },
1294	VOPNAME_SETSECATTR,	{ .vop_setsecattr = lo_setsecattr },
1295	VOPNAME_GETSECATTR,	{ .vop_getsecattr = lo_getsecattr },
1296	VOPNAME_SHRLOCK,	{ .vop_shrlock = lo_shrlock },
1297	NULL,			NULL
1298};
1299