1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
25  */
26 
27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 #include <sys/types.h>
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/file.h>
44 #include <sys/errno.h>
45 #include <sys/cred.h>
46 #include <sys/user.h>
47 #include <sys/uio.h>
48 #include <sys/vfs.h>
49 #include <sys/vnode.h>
50 #include <sys/pathname.h>
51 #include <sys/proc.h>
52 #include <sys/vtrace.h>
53 #include <sys/sysmacros.h>
54 #include <sys/debug.h>
55 #include <sys/dirent.h>
56 #include <sys/zone.h>
57 #include <sys/dnlc.h>
58 #include <sys/fs/snode.h>
59 
60 /*
61  * Starting at current directory, translate pathname pnp to end.
62  * Leave pathname of final component in pnp, return the vnode
63  * for the final component in *compvpp, and return the vnode
64  * for the parent of the final component in dirvpp.
65  *
66  * This is the central routine in pathname translation and handles
67  * multiple components in pathnames, separating them at /'s.  It also
68  * implements mounted file systems and processes symbolic links.
69  *
70  * vp is the vnode where the directory search should start.
71  *
72  * Reference counts: vp must be held prior to calling this function.  rootvp
73  * should only be held if rootvp != rootdir.
74  */
75 int
lookuppnvp(struct pathname * pnp,struct pathname * rpnp,int flags,vnode_t ** dirvpp,vnode_t ** compvpp,vnode_t * rootvp,vnode_t * vp,cred_t * cr)76 lookuppnvp(
77 	struct pathname *pnp,		/* pathname to lookup */
78 	struct pathname *rpnp,		/* if non-NULL, return resolved path */
79 	int flags,			/* follow symlinks */
80 	vnode_t **dirvpp,		/* ptr for parent vnode */
81 	vnode_t **compvpp,		/* ptr for entry vnode */
82 	vnode_t *rootvp,		/* rootvp */
83 	vnode_t *vp,			/* directory to start search at */
84 	cred_t *cr)			/* user's credential */
85 {
86 	vnode_t *cvp;	/* current component vp */
87 	vnode_t *tvp;	/* addressable temp ptr */
88 	char component[MAXNAMELEN];	/* buffer for component (incl null) */
89 	int error;
90 	int nlink;
91 	int lookup_flags;
92 	struct pathname presrvd; /* case preserved name */
93 	struct pathname *pp = NULL;
94 	vnode_t *startvp;
95 	int must_be_directory = 0;
96 	boolean_t retry_with_kcred;
97 
98 	nlink = 0;
99 	cvp = NULL;
100 	if (rpnp)
101 		rpnp->pn_pathlen = 0;
102 
103 	lookup_flags = dirvpp ? LOOKUP_DIR : 0;
104 	if (flags & FIGNORECASE) {
105 		lookup_flags |= FIGNORECASE;
106 		pn_alloc(&presrvd);
107 		pp = &presrvd;
108 	}
109 
110 	/*
111 	 * Eliminate any trailing slashes in the pathname.
112 	 * If there are any, we must follow all symlinks.
113 	 * Also, we must guarantee that the last component is a directory.
114 	 */
115 	if (pn_fixslash(pnp)) {
116 		flags |= FOLLOW;
117 		must_be_directory = 1;
118 	}
119 
120 	startvp = vp;
121 next:
122 	retry_with_kcred = B_FALSE;
123 
124 	/*
125 	 * Make sure we have a directory.
126 	 */
127 	if (vp->v_type != VDIR) {
128 		error = ENOTDIR;
129 		goto bad;
130 	}
131 
132 	if (rpnp && VN_CMP(vp, rootvp))
133 		(void) pn_set(rpnp, "/");
134 
135 	/*
136 	 * Process the next component of the pathname.
137 	 */
138 	if ((error = pn_getcomponent(pnp, component)) != 0) {
139 		goto bad;
140 	}
141 
142 	/*
143 	 * Handle "..": two special cases.
144 	 * 1. If we're at the root directory (e.g. after chroot or
145 	 *    zone_enter) then change ".." to "." so we can't get
146 	 *    out of this subtree.
147 	 * 2. If this vnode is the root of a mounted file system,
148 	 *    then replace it with the vnode that was mounted on
149 	 *    so that we take the ".." in the other file system.
150 	 */
151 	if (component[0] == '.' && component[1] == '.' && component[2] == 0) {
152 checkforroot:
153 		if (VN_CMP(vp, rootvp)) {
154 			component[1] = '\0';
155 		} else if (vp->v_flag & VROOT) {
156 			vfs_t *vfsp;
157 			cvp = vp;
158 
159 			/*
160 			 * While we deal with the vfs pointer from the vnode
161 			 * the filesystem could have been forcefully unmounted
162 			 * and the vnode's v_vfsp could have been invalidated
163 			 * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it
164 			 * with vfs_rlock_wait/vfs_unlock.
165 			 * It is safe to use the v_vfsp even it is freed by
166 			 * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock
167 			 * do not dereference v_vfsp. It is just used as a
168 			 * magic cookie.
169 			 * One more corner case here is the memory getting
170 			 * reused for another vfs structure. In this case
171 			 * lookuppnvp's vfs_rlock_wait will succeed, domount's
172 			 * vfs_lock will fail and domount will bail out with an
173 			 * error (EBUSY).
174 			 */
175 			vfsp = cvp->v_vfsp;
176 
177 			/*
178 			 * This lock is used to synchronize
179 			 * mounts/unmounts and lookups.
180 			 * Threads doing mounts/unmounts hold the
181 			 * writers version vfs_lock_wait().
182 			 */
183 
184 			vfs_rlock_wait(vfsp);
185 
186 			/*
187 			 * If this vnode is on a file system that
188 			 * has been forcibly unmounted,
189 			 * we can't proceed. Cancel this operation
190 			 * and return EIO.
191 			 *
192 			 * vfs_vnodecovered is NULL if unmounted.
193 			 * Currently, nfs uses VFS_UNMOUNTED to
194 			 * check if it's a forced-umount. Keep the
195 			 * same checking here as well even though it
196 			 * may not be needed.
197 			 */
198 			if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) ||
199 			    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
200 				vfs_unlock(vfsp);
201 				VN_RELE(cvp);
202 				if (pp)
203 					pn_free(pp);
204 				return (EIO);
205 			}
206 			VN_HOLD(vp);
207 			vfs_unlock(vfsp);
208 			VN_RELE(cvp);
209 			cvp = NULL;
210 			/*
211 			 * Crossing mount points. For eg: We are doing
212 			 * a lookup of ".." for file systems root vnode
213 			 * mounted here, and VOP_LOOKUP() (with covered vnode)
214 			 * will be on underlying file systems mount point
215 			 * vnode. Set retry_with_kcred flag as we might end
216 			 * up doing VOP_LOOKUP() with kcred if required.
217 			 */
218 			retry_with_kcred = B_TRUE;
219 			goto checkforroot;
220 		}
221 	}
222 
223 	/*
224 	 * Perform a lookup in the current directory.
225 	 */
226 	error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags,
227 	    rootvp, cr, NULL, NULL, pp);
228 
229 	/*
230 	 * Retry with kcred - If crossing mount points & error is EACCES.
231 	 *
232 	 * If we are crossing mount points here and doing ".." lookup,
233 	 * VOP_LOOKUP() might fail if the underlying file systems
234 	 * mount point has no execute permission. In cases like these,
235 	 * we retry VOP_LOOKUP() by giving as much privilage as possible
236 	 * by passing kcred credentials.
237 	 *
238 	 * In case of hierarchical file systems, passing kcred still may
239 	 * or may not work.
240 	 * For eg: UFS FS --> Mount NFS FS --> Again mount UFS on some
241 	 *			directory inside NFS FS.
242 	 */
243 	if ((error == EACCES) && retry_with_kcred)
244 		error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags,
245 		    rootvp, zone_kcred(), NULL, NULL, pp);
246 
247 	cvp = tvp;
248 	if (error) {
249 		cvp = NULL;
250 		/*
251 		 * On error, return hard error if
252 		 * (a) we're not at the end of the pathname yet, or
253 		 * (b) the caller didn't want the parent directory, or
254 		 * (c) we failed for some reason other than a missing entry.
255 		 */
256 		if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT)
257 			goto bad;
258 
259 		pn_setlast(pnp);
260 		/*
261 		 * We inform the caller that the desired entry must be
262 		 * a directory by adding a '/' to the component name.
263 		 */
264 		if (must_be_directory && (error = pn_addslash(pnp)) != 0)
265 			goto bad;
266 		*dirvpp = vp;
267 		if (compvpp != NULL)
268 			*compvpp = NULL;
269 		if (rootvp != rootdir)
270 			VN_RELE(rootvp);
271 		if (pp)
272 			pn_free(pp);
273 		return (0);
274 	}
275 
276 	/*
277 	 * Traverse mount points.
278 	 */
279 	if (vn_mountedvfs(cvp) != NULL) {
280 		tvp = cvp;
281 		if ((error = traverse(&tvp)) != 0) {
282 			/*
283 			 * It is required to assign cvp here, because
284 			 * traverse() will return a held vnode which
285 			 * may different than the vnode that was passed
286 			 * in (even in the error case).  If traverse()
287 			 * changes the vnode it releases the original,
288 			 * and holds the new one.
289 			 */
290 			cvp = tvp;
291 			goto bad;
292 		}
293 		cvp = tvp;
294 	}
295 
296 	/*
297 	 * If we hit a symbolic link and there is more path to be
298 	 * translated or this operation does not wish to apply
299 	 * to a link, then place the contents of the link at the
300 	 * front of the remaining pathname.
301 	 */
302 	if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) {
303 		struct pathname linkpath;
304 
305 		if (++nlink > MAXSYMLINKS) {
306 			error = ELOOP;
307 			goto bad;
308 		}
309 		pn_alloc(&linkpath);
310 		if ((error = pn_getsymlink(cvp, &linkpath, cr)) != 0) {
311 			pn_free(&linkpath);
312 			goto bad;
313 		}
314 
315 		if (pn_pathleft(&linkpath) == 0)
316 			(void) pn_set(&linkpath, ".");
317 		error = pn_insert(pnp, &linkpath, strlen(component));
318 		pn_free(&linkpath);
319 		if (error)
320 			goto bad;
321 		VN_RELE(cvp);
322 		cvp = NULL;
323 		if (pnp->pn_pathlen == 0) {
324 			error = ENOENT;
325 			goto bad;
326 		}
327 		if (pnp->pn_path[0] == '/') {
328 			do {
329 				pnp->pn_path++;
330 				pnp->pn_pathlen--;
331 			} while (pnp->pn_path[0] == '/');
332 			VN_RELE(vp);
333 			vp = rootvp;
334 			VN_HOLD(vp);
335 		}
336 		if (pn_fixslash(pnp)) {
337 			flags |= FOLLOW;
338 			must_be_directory = 1;
339 		}
340 		goto next;
341 	}
342 
343 	/*
344 	 * If rpnp is non-NULL, remember the resolved path name therein.
345 	 * Do not include "." components.  Collapse occurrences of
346 	 * "previous/..", so long as "previous" is not itself "..".
347 	 * Exhausting rpnp results in error ENAMETOOLONG.
348 	 */
349 	if (rpnp && strcmp(component, ".") != 0) {
350 		size_t len;
351 
352 		if (strcmp(component, "..") == 0 &&
353 		    rpnp->pn_pathlen != 0 &&
354 		    !((rpnp->pn_pathlen > 2 &&
355 		    strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) ||
356 		    (rpnp->pn_pathlen == 2 &&
357 		    strncmp(rpnp->pn_path, "..", 2) == 0))) {
358 			while (rpnp->pn_pathlen &&
359 			    rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
360 				rpnp->pn_pathlen--;
361 			if (rpnp->pn_pathlen > 1)
362 				rpnp->pn_pathlen--;
363 			rpnp->pn_path[rpnp->pn_pathlen] = '\0';
364 		} else {
365 			if (rpnp->pn_pathlen != 0 &&
366 			    rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
367 				rpnp->pn_path[rpnp->pn_pathlen++] = '/';
368 			if (flags & FIGNORECASE) {
369 				/*
370 				 * Return the case-preserved name
371 				 * within the resolved path.
372 				 */
373 				error = copystr(pp->pn_buf,
374 				    rpnp->pn_path + rpnp->pn_pathlen,
375 				    rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
376 			} else {
377 				error = copystr(component,
378 				    rpnp->pn_path + rpnp->pn_pathlen,
379 				    rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
380 			}
381 			if (error)	/* copystr() returns ENAMETOOLONG */
382 				goto bad;
383 			rpnp->pn_pathlen += (len - 1);
384 			ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen);
385 		}
386 	}
387 
388 	/*
389 	 * If no more components, return last directory (if wanted) and
390 	 * last component (if wanted).
391 	 */
392 	if (pn_pathleft(pnp) == 0) {
393 		/*
394 		 * If there was a trailing slash in the pathname,
395 		 * make sure the last component is a directory.
396 		 */
397 		if (must_be_directory && cvp->v_type != VDIR) {
398 			error = ENOTDIR;
399 			goto bad;
400 		}
401 		if (dirvpp != NULL) {
402 			/*
403 			 * Check that we have the real parent and not
404 			 * an alias of the last component.
405 			 */
406 			if (vn_compare(vp, cvp)) {
407 				pn_setlast(pnp);
408 				VN_RELE(vp);
409 				VN_RELE(cvp);
410 				if (rootvp != rootdir)
411 					VN_RELE(rootvp);
412 				if (pp)
413 					pn_free(pp);
414 				return (EINVAL);
415 			}
416 			*dirvpp = vp;
417 		} else
418 			VN_RELE(vp);
419 		if (pnp->pn_path == pnp->pn_buf)
420 			(void) pn_set(pnp, ".");
421 		else
422 			pn_setlast(pnp);
423 		if (rpnp) {
424 			if (VN_CMP(cvp, rootvp))
425 				(void) pn_set(rpnp, "/");
426 			else if (rpnp->pn_pathlen == 0)
427 				(void) pn_set(rpnp, ".");
428 		}
429 
430 		if (compvpp != NULL)
431 			*compvpp = cvp;
432 		else
433 			VN_RELE(cvp);
434 		if (rootvp != rootdir)
435 			VN_RELE(rootvp);
436 		if (pp)
437 			pn_free(pp);
438 		return (0);
439 	}
440 
441 	/*
442 	 * Skip over slashes from end of last component.
443 	 */
444 	while (pnp->pn_path[0] == '/') {
445 		pnp->pn_path++;
446 		pnp->pn_pathlen--;
447 	}
448 
449 	/*
450 	 * Searched through another level of directory:
451 	 * release previous directory handle and save new (result
452 	 * of lookup) as current directory.
453 	 */
454 	VN_RELE(vp);
455 	vp = cvp;
456 	cvp = NULL;
457 	goto next;
458 
459 bad:
460 	/*
461 	 * Error.  Release vnodes and return.
462 	 */
463 	if (cvp)
464 		VN_RELE(cvp);
465 	/*
466 	 * If the error was ESTALE and the current directory to look in
467 	 * was the root for this lookup, the root for a mounted file
468 	 * system, or the starting directory for lookups, then
469 	 * return ENOENT instead of ESTALE.  In this case, no recovery
470 	 * is possible by the higher level.  If ESTALE was returned for
471 	 * some intermediate directory along the path, then recovery
472 	 * is potentially possible and retrying from the higher level
473 	 * will either correct the situation by purging stale cache
474 	 * entries or eventually get back to the point where no recovery
475 	 * is possible.
476 	 */
477 	if (error == ESTALE &&
478 	    (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp))
479 		error = ENOENT;
480 	VN_RELE(vp);
481 	if (rootvp != rootdir)
482 		VN_RELE(rootvp);
483 	if (pp)
484 		pn_free(pp);
485 	return (error);
486 }
487 
488 /*
489  * Traverse a mount point.  Routine accepts a vnode pointer as a reference
490  * parameter and performs the indirection, releasing the original vnode.
491  */
492 int
traverse(vnode_t ** cvpp)493 traverse(vnode_t **cvpp)
494 {
495 	int error = 0;
496 	vnode_t *cvp;
497 	vnode_t *tvp;
498 	vfs_t *vfsp;
499 
500 	cvp = *cvpp;
501 
502 	/*
503 	 * If this vnode is mounted on, then we transparently indirect
504 	 * to the vnode which is the root of the mounted file system.
505 	 * Before we do this we must check that an unmount is not in
506 	 * progress on this vnode.
507 	 */
508 
509 	for (;;) {
510 		/*
511 		 * Used to try to read lock the vnode here.
512 		 */
513 
514 		/*
515 		 * Reached the end of the mount chain?
516 		 */
517 		vfsp = vn_mountedvfs(cvp);
518 		if (vfsp == NULL) {
519 			break;
520 		}
521 
522 		/*
523 		 * The read lock must be held across the call to VFS_ROOT() to
524 		 * prevent a concurrent unmount from destroying the vfs.
525 		 */
526 		error = VFS_ROOT(vfsp, &tvp);
527 		if (error)
528 			break;
529 
530 		VN_RELE(cvp);
531 
532 		cvp = tvp;
533 	}
534 
535 	*cvpp = cvp;
536 	return (error);
537 }
538 
539 /*
540  * Get the vnode path, relative to the passed rootvp.
541  * Our vncache always fills in v_path, so this is easy.
542  */
543 /* ARGSUSED */
544 int
vnodetopath(vnode_t * vrootp,vnode_t * vp,char * buf,size_t buflen,cred_t * cr)545 vnodetopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr)
546 {
547 	int len, rvp_len = 0;
548 	const char *p = vp->v_path;
549 
550 	if (vrootp)
551 		rvp_len = strlen(vrootp->v_path);
552 	len = strlen(p);
553 	if (rvp_len < len)
554 		p += rvp_len;
555 	else
556 		p = "/";
557 
558 	(void) strlcpy(buf, p, buflen);
559 	return (0);
560 }
561