fake_lookup.c revision 54026d5ae0b8cbf8da5c14341d51f7a3cab03758
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
25 */
26
27/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28/*	  All Rights Reserved  	*/
29
30/*
31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 * The Regents of the University of California
33 * All Rights Reserved
34 *
35 * University Acknowledgment- Portions of this document are derived from
36 * software developed by the University of California, Berkeley, and its
37 * contributors.
38 */
39
40#include <sys/types.h>
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/file.h>
44#include <sys/errno.h>
45#include <sys/cred.h>
46#include <sys/user.h>
47#include <sys/uio.h>
48#include <sys/vfs.h>
49#include <sys/vnode.h>
50#include <sys/pathname.h>
51#include <sys/proc.h>
52#include <sys/vtrace.h>
53#include <sys/sysmacros.h>
54#include <sys/debug.h>
55#include <sys/dirent.h>
56#include <sys/zone.h>
57#include <sys/dnlc.h>
58#include <sys/fs/snode.h>
59
60/*
61 * Starting at current directory, translate pathname pnp to end.
62 * Leave pathname of final component in pnp, return the vnode
63 * for the final component in *compvpp, and return the vnode
64 * for the parent of the final component in dirvpp.
65 *
66 * This is the central routine in pathname translation and handles
67 * multiple components in pathnames, separating them at /'s.  It also
68 * implements mounted file systems and processes symbolic links.
69 *
70 * vp is the vnode where the directory search should start.
71 *
72 * Reference counts: vp must be held prior to calling this function.  rootvp
73 * should only be held if rootvp != rootdir.
74 */
75int
76lookuppnvp(
77	struct pathname *pnp,		/* pathname to lookup */
78	struct pathname *rpnp,		/* if non-NULL, return resolved path */
79	int flags,			/* follow symlinks */
80	vnode_t **dirvpp,		/* ptr for parent vnode */
81	vnode_t **compvpp,		/* ptr for entry vnode */
82	vnode_t *rootvp,		/* rootvp */
83	vnode_t *vp,			/* directory to start search at */
84	cred_t *cr)			/* user's credential */
85{
86	vnode_t *cvp;	/* current component vp */
87	vnode_t *tvp;	/* addressable temp ptr */
88	char component[MAXNAMELEN];	/* buffer for component (incl null) */
89	int error;
90	int nlink;
91	int lookup_flags;
92	struct pathname presrvd; /* case preserved name */
93	struct pathname *pp = NULL;
94	vnode_t *startvp;
95	int must_be_directory = 0;
96	boolean_t retry_with_kcred;
97
98	nlink = 0;
99	cvp = NULL;
100	if (rpnp)
101		rpnp->pn_pathlen = 0;
102
103	lookup_flags = dirvpp ? LOOKUP_DIR : 0;
104	if (flags & FIGNORECASE) {
105		lookup_flags |= FIGNORECASE;
106		pn_alloc(&presrvd);
107		pp = &presrvd;
108	}
109
110	/*
111	 * Eliminate any trailing slashes in the pathname.
112	 * If there are any, we must follow all symlinks.
113	 * Also, we must guarantee that the last component is a directory.
114	 */
115	if (pn_fixslash(pnp)) {
116		flags |= FOLLOW;
117		must_be_directory = 1;
118	}
119
120	startvp = vp;
121next:
122	retry_with_kcred = B_FALSE;
123
124	/*
125	 * Make sure we have a directory.
126	 */
127	if (vp->v_type != VDIR) {
128		error = ENOTDIR;
129		goto bad;
130	}
131
132	if (rpnp && VN_CMP(vp, rootvp))
133		(void) pn_set(rpnp, "/");
134
135	/*
136	 * Process the next component of the pathname.
137	 */
138	if ((error = pn_getcomponent(pnp, component)) != 0) {
139		goto bad;
140	}
141
142	/*
143	 * Handle "..": two special cases.
144	 * 1. If we're at the root directory (e.g. after chroot or
145	 *    zone_enter) then change ".." to "." so we can't get
146	 *    out of this subtree.
147	 * 2. If this vnode is the root of a mounted file system,
148	 *    then replace it with the vnode that was mounted on
149	 *    so that we take the ".." in the other file system.
150	 */
151	if (component[0] == '.' && component[1] == '.' && component[2] == 0) {
152checkforroot:
153		if (VN_CMP(vp, rootvp)) {
154			component[1] = '\0';
155		} else if (vp->v_flag & VROOT) {
156			vfs_t *vfsp;
157			cvp = vp;
158
159			/*
160			 * While we deal with the vfs pointer from the vnode
161			 * the filesystem could have been forcefully unmounted
162			 * and the vnode's v_vfsp could have been invalidated
163			 * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it
164			 * with vfs_rlock_wait/vfs_unlock.
165			 * It is safe to use the v_vfsp even it is freed by
166			 * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock
167			 * do not dereference v_vfsp. It is just used as a
168			 * magic cookie.
169			 * One more corner case here is the memory getting
170			 * reused for another vfs structure. In this case
171			 * lookuppnvp's vfs_rlock_wait will succeed, domount's
172			 * vfs_lock will fail and domount will bail out with an
173			 * error (EBUSY).
174			 */
175			vfsp = cvp->v_vfsp;
176
177			/*
178			 * This lock is used to synchronize
179			 * mounts/unmounts and lookups.
180			 * Threads doing mounts/unmounts hold the
181			 * writers version vfs_lock_wait().
182			 */
183
184			vfs_rlock_wait(vfsp);
185
186			/*
187			 * If this vnode is on a file system that
188			 * has been forcibly unmounted,
189			 * we can't proceed. Cancel this operation
190			 * and return EIO.
191			 *
192			 * vfs_vnodecovered is NULL if unmounted.
193			 * Currently, nfs uses VFS_UNMOUNTED to
194			 * check if it's a forced-umount. Keep the
195			 * same checking here as well even though it
196			 * may not be needed.
197			 */
198			if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) ||
199			    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
200				vfs_unlock(vfsp);
201				VN_RELE(cvp);
202				if (pp)
203					pn_free(pp);
204				return (EIO);
205			}
206			VN_HOLD(vp);
207			vfs_unlock(vfsp);
208			VN_RELE(cvp);
209			cvp = NULL;
210			/*
211			 * Crossing mount points. For eg: We are doing
212			 * a lookup of ".." for file systems root vnode
213			 * mounted here, and VOP_LOOKUP() (with covered vnode)
214			 * will be on underlying file systems mount point
215			 * vnode. Set retry_with_kcred flag as we might end
216			 * up doing VOP_LOOKUP() with kcred if required.
217			 */
218			retry_with_kcred = B_TRUE;
219			goto checkforroot;
220		}
221	}
222
223	/*
224	 * Perform a lookup in the current directory.
225	 */
226	error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags,
227	    rootvp, cr, NULL, NULL, pp);
228
229	/*
230	 * Retry with kcred - If crossing mount points & error is EACCES.
231	 *
232	 * If we are crossing mount points here and doing ".." lookup,
233	 * VOP_LOOKUP() might fail if the underlying file systems
234	 * mount point has no execute permission. In cases like these,
235	 * we retry VOP_LOOKUP() by giving as much privilage as possible
236	 * by passing kcred credentials.
237	 *
238	 * In case of hierarchical file systems, passing kcred still may
239	 * or may not work.
240	 * For eg: UFS FS --> Mount NFS FS --> Again mount UFS on some
241	 *			directory inside NFS FS.
242	 */
243	if ((error == EACCES) && retry_with_kcred)
244		error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags,
245		    rootvp, zone_kcred(), NULL, NULL, pp);
246
247	cvp = tvp;
248	if (error) {
249		cvp = NULL;
250		/*
251		 * On error, return hard error if
252		 * (a) we're not at the end of the pathname yet, or
253		 * (b) the caller didn't want the parent directory, or
254		 * (c) we failed for some reason other than a missing entry.
255		 */
256		if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT)
257			goto bad;
258
259		pn_setlast(pnp);
260		/*
261		 * We inform the caller that the desired entry must be
262		 * a directory by adding a '/' to the component name.
263		 */
264		if (must_be_directory && (error = pn_addslash(pnp)) != 0)
265			goto bad;
266		*dirvpp = vp;
267		if (compvpp != NULL)
268			*compvpp = NULL;
269		if (rootvp != rootdir)
270			VN_RELE(rootvp);
271		if (pp)
272			pn_free(pp);
273		return (0);
274	}
275
276	/*
277	 * Traverse mount points.
278	 */
279	if (vn_mountedvfs(cvp) != NULL) {
280		tvp = cvp;
281		if ((error = traverse(&tvp)) != 0) {
282			/*
283			 * It is required to assign cvp here, because
284			 * traverse() will return a held vnode which
285			 * may different than the vnode that was passed
286			 * in (even in the error case).  If traverse()
287			 * changes the vnode it releases the original,
288			 * and holds the new one.
289			 */
290			cvp = tvp;
291			goto bad;
292		}
293		cvp = tvp;
294	}
295
296	/*
297	 * If we hit a symbolic link and there is more path to be
298	 * translated or this operation does not wish to apply
299	 * to a link, then place the contents of the link at the
300	 * front of the remaining pathname.
301	 */
302	if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) {
303		struct pathname linkpath;
304
305		if (++nlink > MAXSYMLINKS) {
306			error = ELOOP;
307			goto bad;
308		}
309		pn_alloc(&linkpath);
310		if ((error = pn_getsymlink(cvp, &linkpath, cr)) != 0) {
311			pn_free(&linkpath);
312			goto bad;
313		}
314
315		if (pn_pathleft(&linkpath) == 0)
316			(void) pn_set(&linkpath, ".");
317		error = pn_insert(pnp, &linkpath, strlen(component));
318		pn_free(&linkpath);
319		if (error)
320			goto bad;
321		VN_RELE(cvp);
322		cvp = NULL;
323		if (pnp->pn_pathlen == 0) {
324			error = ENOENT;
325			goto bad;
326		}
327		if (pnp->pn_path[0] == '/') {
328			do {
329				pnp->pn_path++;
330				pnp->pn_pathlen--;
331			} while (pnp->pn_path[0] == '/');
332			VN_RELE(vp);
333			vp = rootvp;
334			VN_HOLD(vp);
335		}
336		if (pn_fixslash(pnp)) {
337			flags |= FOLLOW;
338			must_be_directory = 1;
339		}
340		goto next;
341	}
342
343	/*
344	 * If rpnp is non-NULL, remember the resolved path name therein.
345	 * Do not include "." components.  Collapse occurrences of
346	 * "previous/..", so long as "previous" is not itself "..".
347	 * Exhausting rpnp results in error ENAMETOOLONG.
348	 */
349	if (rpnp && strcmp(component, ".") != 0) {
350		size_t len;
351
352		if (strcmp(component, "..") == 0 &&
353		    rpnp->pn_pathlen != 0 &&
354		    !((rpnp->pn_pathlen > 2 &&
355		    strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) ||
356		    (rpnp->pn_pathlen == 2 &&
357		    strncmp(rpnp->pn_path, "..", 2) == 0))) {
358			while (rpnp->pn_pathlen &&
359			    rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
360				rpnp->pn_pathlen--;
361			if (rpnp->pn_pathlen > 1)
362				rpnp->pn_pathlen--;
363			rpnp->pn_path[rpnp->pn_pathlen] = '\0';
364		} else {
365			if (rpnp->pn_pathlen != 0 &&
366			    rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
367				rpnp->pn_path[rpnp->pn_pathlen++] = '/';
368			if (flags & FIGNORECASE) {
369				/*
370				 * Return the case-preserved name
371				 * within the resolved path.
372				 */
373				error = copystr(pp->pn_buf,
374				    rpnp->pn_path + rpnp->pn_pathlen,
375				    rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
376			} else {
377				error = copystr(component,
378				    rpnp->pn_path + rpnp->pn_pathlen,
379				    rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
380			}
381			if (error)	/* copystr() returns ENAMETOOLONG */
382				goto bad;
383			rpnp->pn_pathlen += (len - 1);
384			ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen);
385		}
386	}
387
388	/*
389	 * If no more components, return last directory (if wanted) and
390	 * last component (if wanted).
391	 */
392	if (pn_pathleft(pnp) == 0) {
393		/*
394		 * If there was a trailing slash in the pathname,
395		 * make sure the last component is a directory.
396		 */
397		if (must_be_directory && cvp->v_type != VDIR) {
398			error = ENOTDIR;
399			goto bad;
400		}
401		if (dirvpp != NULL) {
402			/*
403			 * Check that we have the real parent and not
404			 * an alias of the last component.
405			 */
406			if (vn_compare(vp, cvp)) {
407				pn_setlast(pnp);
408				VN_RELE(vp);
409				VN_RELE(cvp);
410				if (rootvp != rootdir)
411					VN_RELE(rootvp);
412				if (pp)
413					pn_free(pp);
414				return (EINVAL);
415			}
416			*dirvpp = vp;
417		} else
418			VN_RELE(vp);
419		if (pnp->pn_path == pnp->pn_buf)
420			(void) pn_set(pnp, ".");
421		else
422			pn_setlast(pnp);
423		if (rpnp) {
424			if (VN_CMP(cvp, rootvp))
425				(void) pn_set(rpnp, "/");
426			else if (rpnp->pn_pathlen == 0)
427				(void) pn_set(rpnp, ".");
428		}
429
430		if (compvpp != NULL)
431			*compvpp = cvp;
432		else
433			VN_RELE(cvp);
434		if (rootvp != rootdir)
435			VN_RELE(rootvp);
436		if (pp)
437			pn_free(pp);
438		return (0);
439	}
440
441	/*
442	 * Skip over slashes from end of last component.
443	 */
444	while (pnp->pn_path[0] == '/') {
445		pnp->pn_path++;
446		pnp->pn_pathlen--;
447	}
448
449	/*
450	 * Searched through another level of directory:
451	 * release previous directory handle and save new (result
452	 * of lookup) as current directory.
453	 */
454	VN_RELE(vp);
455	vp = cvp;
456	cvp = NULL;
457	goto next;
458
459bad:
460	/*
461	 * Error.  Release vnodes and return.
462	 */
463	if (cvp)
464		VN_RELE(cvp);
465	/*
466	 * If the error was ESTALE and the current directory to look in
467	 * was the root for this lookup, the root for a mounted file
468	 * system, or the starting directory for lookups, then
469	 * return ENOENT instead of ESTALE.  In this case, no recovery
470	 * is possible by the higher level.  If ESTALE was returned for
471	 * some intermediate directory along the path, then recovery
472	 * is potentially possible and retrying from the higher level
473	 * will either correct the situation by purging stale cache
474	 * entries or eventually get back to the point where no recovery
475	 * is possible.
476	 */
477	if (error == ESTALE &&
478	    (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp))
479		error = ENOENT;
480	VN_RELE(vp);
481	if (rootvp != rootdir)
482		VN_RELE(rootvp);
483	if (pp)
484		pn_free(pp);
485	return (error);
486}
487
488/*
489 * Traverse a mount point.  Routine accepts a vnode pointer as a reference
490 * parameter and performs the indirection, releasing the original vnode.
491 */
492int
493traverse(vnode_t **cvpp)
494{
495	int error = 0;
496	vnode_t *cvp;
497	vnode_t *tvp;
498	vfs_t *vfsp;
499
500	cvp = *cvpp;
501
502	/*
503	 * If this vnode is mounted on, then we transparently indirect
504	 * to the vnode which is the root of the mounted file system.
505	 * Before we do this we must check that an unmount is not in
506	 * progress on this vnode.
507	 */
508
509	for (;;) {
510		/*
511		 * Used to try to read lock the vnode here.
512		 */
513
514		/*
515		 * Reached the end of the mount chain?
516		 */
517		vfsp = vn_mountedvfs(cvp);
518		if (vfsp == NULL) {
519			break;
520		}
521
522		/*
523		 * The read lock must be held across the call to VFS_ROOT() to
524		 * prevent a concurrent unmount from destroying the vfs.
525		 */
526		error = VFS_ROOT(vfsp, &tvp);
527		if (error)
528			break;
529
530		VN_RELE(cvp);
531
532		cvp = tvp;
533	}
534
535	*cvpp = cvp;
536	return (error);
537}
538
539/*
540 * Get the vnode path, relative to the passed rootvp.
541 * Our vncache always fills in v_path, so this is easy.
542 */
543/* ARGSUSED */
544int
545vnodetopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr)
546{
547	int len, rvp_len = 0;
548	const char *p = vp->v_path;
549
550	if (vrootp)
551		rvp_len = strlen(vrootp->v_path);
552	len = strlen(p);
553	if (rvp_len < len)
554		p += rvp_len;
555	else
556		p = "/";
557
558	(void) strlcpy(buf, p, buflen);
559	return (0);
560}
561