1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
25  * Copyright 2022 RackTop Systems, Inc.
26  */
27 
28 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved	*/
30 
31 /*
32  * University Copyright- Copyright (c) 1982, 1986, 1988
33  * The Regents of the University of California
34  * All Rights Reserved
35  *
36  * University Acknowledgment- Portions of this document are derived from
37  * software developed by the University of California, Berkeley, and its
38  * contributors.
39  */
40 
41 #include <sys/types.h>
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/file.h>
45 #include <sys/errno.h>
46 #include <sys/cred.h>
47 #include <sys/user.h>
48 #include <sys/uio.h>
49 #include <sys/vfs.h>
50 #include <sys/vnode.h>
51 #include <sys/pathname.h>
52 #include <sys/proc.h>
53 #include <sys/vtrace.h>
54 #include <sys/sysmacros.h>
55 #include <sys/debug.h>
56 #include <sys/dirent.h>
57 #include <sys/zone.h>
58 #include <sys/dnlc.h>
59 #include <sys/fs/snode.h>
60 
61 int
lookupname(const char * fnamep,enum uio_seg seg,int followlink,vnode_t ** dirvpp,vnode_t ** compvpp)62 lookupname(
63 	const char *fnamep,
64 	enum uio_seg seg,
65 	int followlink,
66 	vnode_t **dirvpp,
67 	vnode_t **compvpp)
68 {
69 	return (lookupnameatcred(fnamep, seg, followlink, dirvpp, compvpp, NULL,
70 	    CRED()));
71 }
72 
73 /*
74  * Lookup the user file name,
75  * Handle allocation and freeing of pathname buffer, return error.
76  */
77 int
lookupnameatcred(const char * fnamep,enum uio_seg seg,int followlink,vnode_t ** dirvpp,vnode_t ** compvpp,vnode_t * startvp,cred_t * cr)78 lookupnameatcred(
79 	const char *fnamep,		/* user pathname */
80 	enum uio_seg seg,		/* addr space that name is in */
81 	int followlink,			/* follow sym links */
82 	vnode_t **dirvpp,		/* ret for ptr to parent dir vnode */
83 	vnode_t **compvpp,		/* ret for ptr to component vnode */
84 	vnode_t *startvp,		/* start path search from vp */
85 	cred_t *cr)			/* credential */
86 {
87 	char namebuf[TYPICALMAXPATHLEN];
88 	struct pathname lookpn;
89 	int error;
90 
91 	error = pn_get_buf(fnamep, seg, &lookpn, namebuf, sizeof (namebuf));
92 	if (error == 0) {
93 		error = lookuppnatcred(&lookpn, NULL, followlink,
94 		    dirvpp, compvpp, startvp, cr);
95 	}
96 	if (error == ENAMETOOLONG) {
97 		/*
98 		 * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
99 		 */
100 		if ((error = pn_get(fnamep, seg, &lookpn)) != 0)
101 			return (error);
102 		error = lookuppnatcred(&lookpn, NULL, followlink,
103 		    dirvpp, compvpp, startvp, cr);
104 		pn_free(&lookpn);
105 	}
106 
107 	return (error);
108 }
109 
110 /*
111  * Lookup the user file name from a given vp, using a specific credential.
112  */
113 int
lookuppnatcred(struct pathname * pnp,struct pathname * rpnp,int followlink,vnode_t ** dirvpp,vnode_t ** compvpp,vnode_t * startvp,cred_t * cr)114 lookuppnatcred(
115 	struct pathname *pnp,		/* pathname to lookup */
116 	struct pathname *rpnp,		/* if non-NULL, return resolved path */
117 	int followlink,			/* (don't) follow sym links */
118 	vnode_t **dirvpp,		/* ptr for parent vnode */
119 	vnode_t **compvpp,		/* ptr for entry vnode */
120 	vnode_t *startvp,		/* start search from this vp */
121 	cred_t *cr)			/* user credential */
122 {
123 	vnode_t *vp;	/* current directory vp */
124 	vnode_t *rootvp;
125 
126 	if (pnp->pn_pathlen == 0)
127 		return (ENOENT);
128 
129 	/* Simplified for fake_... */
130 	vp = rootvp = rootdir;
131 
132 	/*
133 	 * Skip over leading slashes
134 	 */
135 	if (pnp->pn_path[0] == '/') {
136 		do {
137 			pnp->pn_path++;
138 			pnp->pn_pathlen--;
139 		} while (pnp->pn_path[0] == '/');
140 	}
141 
142 	return (lookuppnvp(pnp, rpnp, followlink, dirvpp,
143 	    compvpp, rootvp, vp, cr));
144 }
145 
146 /*
147  * Starting at current directory, translate pathname pnp to end.
148  * Leave pathname of final component in pnp, return the vnode
149  * for the final component in *compvpp, and return the vnode
150  * for the parent of the final component in dirvpp.
151  *
152  * This is the central routine in pathname translation and handles
153  * multiple components in pathnames, separating them at /'s.  It also
154  * implements mounted file systems and processes symbolic links.
155  *
156  * vp is the vnode where the directory search should start.
157  *
158  * Reference counts: vp must be held prior to calling this function.  rootvp
159  * should only be held if rootvp != rootdir.
160  */
161 int
lookuppnvp(struct pathname * pnp,struct pathname * rpnp,int flags,vnode_t ** dirvpp,vnode_t ** compvpp,vnode_t * rootvp,vnode_t * vp,cred_t * cr)162 lookuppnvp(
163 	struct pathname *pnp,		/* pathname to lookup */
164 	struct pathname *rpnp,		/* if non-NULL, return resolved path */
165 	int flags,			/* follow symlinks */
166 	vnode_t **dirvpp,		/* ptr for parent vnode */
167 	vnode_t **compvpp,		/* ptr for entry vnode */
168 	vnode_t *rootvp,		/* rootvp */
169 	vnode_t *vp,			/* directory to start search at */
170 	cred_t *cr)			/* user's credential */
171 {
172 	vnode_t *cvp;	/* current component vp */
173 	vnode_t *tvp;	/* addressable temp ptr */
174 	char component[MAXNAMELEN];	/* buffer for component (incl null) */
175 	int error;
176 	int nlink;
177 	int lookup_flags;
178 	struct pathname presrvd; /* case preserved name */
179 	struct pathname *pp = NULL;
180 	vnode_t *startvp;
181 	int must_be_directory = 0;
182 	boolean_t retry_with_kcred;
183 
184 	nlink = 0;
185 	cvp = NULL;
186 	if (rpnp)
187 		rpnp->pn_pathlen = 0;
188 
189 	lookup_flags = dirvpp ? LOOKUP_DIR : 0;
190 	if (flags & FIGNORECASE) {
191 		lookup_flags |= FIGNORECASE;
192 		pn_alloc(&presrvd);
193 		pp = &presrvd;
194 	}
195 
196 	/*
197 	 * Eliminate any trailing slashes in the pathname.
198 	 * If there are any, we must follow all symlinks.
199 	 * Also, we must guarantee that the last component is a directory.
200 	 */
201 	if (pn_fixslash(pnp)) {
202 		flags |= FOLLOW;
203 		must_be_directory = 1;
204 	}
205 
206 	startvp = vp;
207 next:
208 	retry_with_kcred = B_FALSE;
209 
210 	/*
211 	 * Make sure we have a directory.
212 	 */
213 	if (vp->v_type != VDIR) {
214 		error = ENOTDIR;
215 		goto bad;
216 	}
217 
218 	if (rpnp && VN_CMP(vp, rootvp))
219 		(void) pn_set(rpnp, "/");
220 
221 	/*
222 	 * Process the next component of the pathname.
223 	 */
224 	if ((error = pn_getcomponent(pnp, component)) != 0) {
225 		goto bad;
226 	}
227 
228 	/*
229 	 * Handle "..": two special cases.
230 	 * 1. If we're at the root directory (e.g. after chroot or
231 	 *    zone_enter) then change ".." to "." so we can't get
232 	 *    out of this subtree.
233 	 * 2. If this vnode is the root of a mounted file system,
234 	 *    then replace it with the vnode that was mounted on
235 	 *    so that we take the ".." in the other file system.
236 	 */
237 	if (component[0] == '.' && component[1] == '.' && component[2] == 0) {
238 checkforroot:
239 		if (VN_CMP(vp, rootvp)) {
240 			component[1] = '\0';
241 		} else if (vp->v_flag & VROOT) {
242 			vfs_t *vfsp;
243 			cvp = vp;
244 
245 			/*
246 			 * While we deal with the vfs pointer from the vnode
247 			 * the filesystem could have been forcefully unmounted
248 			 * and the vnode's v_vfsp could have been invalidated
249 			 * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it
250 			 * with vfs_rlock_wait/vfs_unlock.
251 			 * It is safe to use the v_vfsp even it is freed by
252 			 * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock
253 			 * do not dereference v_vfsp. It is just used as a
254 			 * magic cookie.
255 			 * One more corner case here is the memory getting
256 			 * reused for another vfs structure. In this case
257 			 * lookuppnvp's vfs_rlock_wait will succeed, domount's
258 			 * vfs_lock will fail and domount will bail out with an
259 			 * error (EBUSY).
260 			 */
261 			vfsp = cvp->v_vfsp;
262 
263 			/*
264 			 * This lock is used to synchronize
265 			 * mounts/unmounts and lookups.
266 			 * Threads doing mounts/unmounts hold the
267 			 * writers version vfs_lock_wait().
268 			 */
269 
270 			vfs_rlock_wait(vfsp);
271 
272 			/*
273 			 * If this vnode is on a file system that
274 			 * has been forcibly unmounted,
275 			 * we can't proceed. Cancel this operation
276 			 * and return EIO.
277 			 *
278 			 * vfs_vnodecovered is NULL if unmounted.
279 			 * Currently, nfs uses VFS_UNMOUNTED to
280 			 * check if it's a forced-umount. Keep the
281 			 * same checking here as well even though it
282 			 * may not be needed.
283 			 */
284 			if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) ||
285 			    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
286 				vfs_unlock(vfsp);
287 				VN_RELE(cvp);
288 				if (pp)
289 					pn_free(pp);
290 				return (EIO);
291 			}
292 			VN_HOLD(vp);
293 			vfs_unlock(vfsp);
294 			VN_RELE(cvp);
295 			cvp = NULL;
296 			/*
297 			 * Crossing mount points. For eg: We are doing
298 			 * a lookup of ".." for file systems root vnode
299 			 * mounted here, and VOP_LOOKUP() (with covered vnode)
300 			 * will be on underlying file systems mount point
301 			 * vnode. Set retry_with_kcred flag as we might end
302 			 * up doing VOP_LOOKUP() with kcred if required.
303 			 */
304 			retry_with_kcred = B_TRUE;
305 			goto checkforroot;
306 		}
307 	}
308 
309 	/*
310 	 * Perform a lookup in the current directory.
311 	 */
312 	error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags,
313 	    rootvp, cr, NULL, NULL, pp);
314 
315 	/*
316 	 * Retry with kcred - If crossing mount points & error is EACCES.
317 	 *
318 	 * If we are crossing mount points here and doing ".." lookup,
319 	 * VOP_LOOKUP() might fail if the underlying file systems
320 	 * mount point has no execute permission. In cases like these,
321 	 * we retry VOP_LOOKUP() by giving as much privilage as possible
322 	 * by passing kcred credentials.
323 	 *
324 	 * In case of hierarchical file systems, passing kcred still may
325 	 * or may not work.
326 	 * For eg: UFS FS --> Mount NFS FS --> Again mount UFS on some
327 	 *			directory inside NFS FS.
328 	 */
329 	if ((error == EACCES) && retry_with_kcred)
330 		error = VOP_LOOKUP(vp, component, &tvp, pnp, lookup_flags,
331 		    rootvp, zone_kcred(), NULL, NULL, pp);
332 
333 	cvp = tvp;
334 	if (error) {
335 		cvp = NULL;
336 		/*
337 		 * On error, return hard error if
338 		 * (a) we're not at the end of the pathname yet, or
339 		 * (b) the caller didn't want the parent directory, or
340 		 * (c) we failed for some reason other than a missing entry.
341 		 */
342 		if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT)
343 			goto bad;
344 
345 		pn_setlast(pnp);
346 		/*
347 		 * We inform the caller that the desired entry must be
348 		 * a directory by adding a '/' to the component name.
349 		 */
350 		if (must_be_directory && (error = pn_addslash(pnp)) != 0)
351 			goto bad;
352 		*dirvpp = vp;
353 		if (compvpp != NULL)
354 			*compvpp = NULL;
355 		if (rootvp != rootdir)
356 			VN_RELE(rootvp);
357 		if (pp)
358 			pn_free(pp);
359 		return (0);
360 	}
361 
362 	/*
363 	 * Traverse mount points.
364 	 */
365 	if (vn_mountedvfs(cvp) != NULL) {
366 		tvp = cvp;
367 		if ((error = traverse(&tvp)) != 0) {
368 			/*
369 			 * It is required to assign cvp here, because
370 			 * traverse() will return a held vnode which
371 			 * may different than the vnode that was passed
372 			 * in (even in the error case).  If traverse()
373 			 * changes the vnode it releases the original,
374 			 * and holds the new one.
375 			 */
376 			cvp = tvp;
377 			goto bad;
378 		}
379 		cvp = tvp;
380 	}
381 
382 	/*
383 	 * If we hit a symbolic link and there is more path to be
384 	 * translated or this operation does not wish to apply
385 	 * to a link, then place the contents of the link at the
386 	 * front of the remaining pathname.
387 	 */
388 	if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) {
389 		struct pathname linkpath;
390 
391 		if (++nlink > MAXSYMLINKS) {
392 			error = ELOOP;
393 			goto bad;
394 		}
395 		pn_alloc(&linkpath);
396 		if ((error = pn_getsymlink(cvp, &linkpath, cr)) != 0) {
397 			pn_free(&linkpath);
398 			goto bad;
399 		}
400 
401 		if (pn_pathleft(&linkpath) == 0)
402 			(void) pn_set(&linkpath, ".");
403 		error = pn_insert(pnp, &linkpath, strlen(component));
404 		pn_free(&linkpath);
405 		if (error)
406 			goto bad;
407 		VN_RELE(cvp);
408 		cvp = NULL;
409 		if (pnp->pn_pathlen == 0) {
410 			error = ENOENT;
411 			goto bad;
412 		}
413 		if (pnp->pn_path[0] == '/') {
414 			do {
415 				pnp->pn_path++;
416 				pnp->pn_pathlen--;
417 			} while (pnp->pn_path[0] == '/');
418 			VN_RELE(vp);
419 			vp = rootvp;
420 			VN_HOLD(vp);
421 		}
422 		if (pn_fixslash(pnp)) {
423 			flags |= FOLLOW;
424 			must_be_directory = 1;
425 		}
426 		goto next;
427 	}
428 
429 	/*
430 	 * If rpnp is non-NULL, remember the resolved path name therein.
431 	 * Do not include "." components.  Collapse occurrences of
432 	 * "previous/..", so long as "previous" is not itself "..".
433 	 * Exhausting rpnp results in error ENAMETOOLONG.
434 	 */
435 	if (rpnp && strcmp(component, ".") != 0) {
436 		size_t len;
437 
438 		if (strcmp(component, "..") == 0 &&
439 		    rpnp->pn_pathlen != 0 &&
440 		    !((rpnp->pn_pathlen > 2 &&
441 		    strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) ||
442 		    (rpnp->pn_pathlen == 2 &&
443 		    strncmp(rpnp->pn_path, "..", 2) == 0))) {
444 			while (rpnp->pn_pathlen &&
445 			    rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
446 				rpnp->pn_pathlen--;
447 			if (rpnp->pn_pathlen > 1)
448 				rpnp->pn_pathlen--;
449 			rpnp->pn_path[rpnp->pn_pathlen] = '\0';
450 		} else {
451 			if (rpnp->pn_pathlen != 0 &&
452 			    rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
453 				rpnp->pn_path[rpnp->pn_pathlen++] = '/';
454 			if (flags & FIGNORECASE) {
455 				/*
456 				 * Return the case-preserved name
457 				 * within the resolved path.
458 				 */
459 				error = copystr(pp->pn_buf,
460 				    rpnp->pn_path + rpnp->pn_pathlen,
461 				    rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
462 			} else {
463 				error = copystr(component,
464 				    rpnp->pn_path + rpnp->pn_pathlen,
465 				    rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
466 			}
467 			if (error)	/* copystr() returns ENAMETOOLONG */
468 				goto bad;
469 			rpnp->pn_pathlen += (len - 1);
470 			ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen);
471 		}
472 	}
473 
474 	/*
475 	 * If no more components, return last directory (if wanted) and
476 	 * last component (if wanted).
477 	 */
478 	if (pn_pathleft(pnp) == 0) {
479 		/*
480 		 * If there was a trailing slash in the pathname,
481 		 * make sure the last component is a directory.
482 		 */
483 		if (must_be_directory && cvp->v_type != VDIR) {
484 			error = ENOTDIR;
485 			goto bad;
486 		}
487 		if (dirvpp != NULL) {
488 			/*
489 			 * Check that we have the real parent and not
490 			 * an alias of the last component.
491 			 */
492 			if (vn_compare(vp, cvp)) {
493 				pn_setlast(pnp);
494 				VN_RELE(vp);
495 				VN_RELE(cvp);
496 				if (rootvp != rootdir)
497 					VN_RELE(rootvp);
498 				if (pp)
499 					pn_free(pp);
500 				return (EINVAL);
501 			}
502 			*dirvpp = vp;
503 		} else
504 			VN_RELE(vp);
505 		if (pnp->pn_path == pnp->pn_buf)
506 			(void) pn_set(pnp, ".");
507 		else
508 			pn_setlast(pnp);
509 		if (rpnp) {
510 			if (VN_CMP(cvp, rootvp))
511 				(void) pn_set(rpnp, "/");
512 			else if (rpnp->pn_pathlen == 0)
513 				(void) pn_set(rpnp, ".");
514 		}
515 
516 		if (compvpp != NULL)
517 			*compvpp = cvp;
518 		else
519 			VN_RELE(cvp);
520 		if (rootvp != rootdir)
521 			VN_RELE(rootvp);
522 		if (pp)
523 			pn_free(pp);
524 		return (0);
525 	}
526 
527 	/*
528 	 * Skip over slashes from end of last component.
529 	 */
530 	while (pnp->pn_path[0] == '/') {
531 		pnp->pn_path++;
532 		pnp->pn_pathlen--;
533 	}
534 
535 	/*
536 	 * Searched through another level of directory:
537 	 * release previous directory handle and save new (result
538 	 * of lookup) as current directory.
539 	 */
540 	VN_RELE(vp);
541 	vp = cvp;
542 	cvp = NULL;
543 	goto next;
544 
545 bad:
546 	/*
547 	 * Error.  Release vnodes and return.
548 	 */
549 	if (cvp)
550 		VN_RELE(cvp);
551 	/*
552 	 * If the error was ESTALE and the current directory to look in
553 	 * was the root for this lookup, the root for a mounted file
554 	 * system, or the starting directory for lookups, then
555 	 * return ENOENT instead of ESTALE.  In this case, no recovery
556 	 * is possible by the higher level.  If ESTALE was returned for
557 	 * some intermediate directory along the path, then recovery
558 	 * is potentially possible and retrying from the higher level
559 	 * will either correct the situation by purging stale cache
560 	 * entries or eventually get back to the point where no recovery
561 	 * is possible.
562 	 */
563 	if (error == ESTALE &&
564 	    (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp))
565 		error = ENOENT;
566 	VN_RELE(vp);
567 	if (rootvp != rootdir)
568 		VN_RELE(rootvp);
569 	if (pp)
570 		pn_free(pp);
571 	return (error);
572 }
573 
574 /*
575  * Traverse a mount point.  Routine accepts a vnode pointer as a reference
576  * parameter and performs the indirection, releasing the original vnode.
577  */
578 int
traverse(vnode_t ** cvpp)579 traverse(vnode_t **cvpp)
580 {
581 	int error = 0;
582 	vnode_t *cvp;
583 	vnode_t *tvp;
584 	vfs_t *vfsp;
585 
586 	cvp = *cvpp;
587 
588 	/*
589 	 * If this vnode is mounted on, then we transparently indirect
590 	 * to the vnode which is the root of the mounted file system.
591 	 * Before we do this we must check that an unmount is not in
592 	 * progress on this vnode.
593 	 */
594 
595 	for (;;) {
596 		/*
597 		 * Used to try to read lock the vnode here.
598 		 */
599 
600 		/*
601 		 * Reached the end of the mount chain?
602 		 */
603 		vfsp = vn_mountedvfs(cvp);
604 		if (vfsp == NULL) {
605 			break;
606 		}
607 
608 		/*
609 		 * The read lock must be held across the call to VFS_ROOT() to
610 		 * prevent a concurrent unmount from destroying the vfs.
611 		 */
612 		error = VFS_ROOT(vfsp, &tvp);
613 		if (error)
614 			break;
615 
616 		VN_RELE(cvp);
617 
618 		cvp = tvp;
619 	}
620 
621 	*cvpp = cvp;
622 	return (error);
623 }
624 
625 /*
626  * Get the vnode path, relative to the passed rootvp.
627  * Our vncache always fills in v_path, so this is easy.
628  */
629 /* ARGSUSED */
630 int
vnodetopath(vnode_t * vrootp,vnode_t * vp,char * buf,size_t buflen,cred_t * cr)631 vnodetopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr)
632 {
633 	int len, rvp_len = 0;
634 	const char *p = vp->v_path;
635 
636 	if (vrootp)
637 		rvp_len = strlen(vrootp->v_path);
638 	len = strlen(p);
639 	if (rvp_len < len)
640 		p += rvp_len;
641 	else
642 		p = "/";
643 
644 	(void) strlcpy(buf, p, buflen);
645 	return (0);
646 }
647