xref: /illumos-gate/usr/src/uts/common/fs/fs_subr.c (revision 87bfe94c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 
25 /*
26  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
27  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
28  * Copyright 2017 Joyent, Inc.
29  */
30 
31 /*
32  * Generic vnode operations.
33  */
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/errno.h>
38 #include <sys/fcntl.h>
39 #include <sys/flock.h>
40 #include <sys/statvfs.h>
41 #include <sys/vfs.h>
42 #include <sys/vnode.h>
43 #include <sys/proc.h>
44 #include <sys/user.h>
45 #include <sys/unistd.h>
46 #include <sys/cred.h>
47 #include <sys/poll.h>
48 #include <sys/debug.h>
49 #include <sys/cmn_err.h>
50 #include <sys/stream.h>
51 #include <fs/fs_subr.h>
52 #include <fs/fs_reparse.h>
53 #include <sys/door.h>
54 #include <sys/acl.h>
55 #include <sys/share.h>
56 #include <sys/file.h>
57 #include <sys/kmem.h>
58 #include <sys/file.h>
59 #include <sys/nbmlock.h>
60 #include <acl/acl_common.h>
61 #include <sys/pathname.h>
62 
63 /* required for fs_reject_epoll */
64 #include <sys/poll_impl.h>
65 
66 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
67 
68 /*
69  * Tunable to limit the number of retry to recover from STALE error.
70  */
71 int fs_estale_retry = 5;
72 
73 /*
74  * supports for reparse point door upcall
75  */
76 static door_handle_t reparsed_door;
77 static kmutex_t reparsed_door_lock;
78 
79 /*
80  * The associated operation is not supported by the file system.
81  */
82 int
fs_nosys()83 fs_nosys()
84 {
85 	return (ENOSYS);
86 }
87 
88 /*
89  * The associated operation is invalid (on this vnode).
90  */
91 int
fs_inval()92 fs_inval()
93 {
94 	return (EINVAL);
95 }
96 
97 /*
98  * The associated operation is valid only for directories.
99  */
100 int
fs_notdir()101 fs_notdir()
102 {
103 	return (ENOTDIR);
104 }
105 
106 /*
107  * Free the file system specific resources. For the file systems that
108  * do not support the forced unmount, it will be a nop function.
109  */
110 
111 /*ARGSUSED*/
112 void
fs_freevfs(vfs_t * vfsp)113 fs_freevfs(vfs_t *vfsp)
114 {
115 }
116 
117 /* ARGSUSED */
118 int
fs_nosys_map(struct vnode * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cr,caller_context_t * ct)119 fs_nosys_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
120     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
121     caller_context_t *ct)
122 {
123 	return (ENOSYS);
124 }
125 
126 /* ARGSUSED */
127 int
fs_nosys_addmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cr,caller_context_t * ct)128 fs_nosys_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
129     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
130     caller_context_t *ct)
131 {
132 	return (ENOSYS);
133 }
134 
135 /* ARGSUSED */
136 int
fs_nosys_poll(vnode_t * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)137 fs_nosys_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
138     struct pollhead **phpp, caller_context_t *ct)
139 {
140 	return (ENOSYS);
141 }
142 
143 
144 /*
145  * The file system has nothing to sync to disk.  However, the
146  * VFS_SYNC operation must not fail.
147  */
148 /* ARGSUSED */
149 int
fs_sync(struct vfs * vfspp,short flag,cred_t * cr)150 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
151 {
152 	return (0);
153 }
154 
155 /*
156  * Does nothing but VOP_FSYNC must not fail.
157  */
158 /* ARGSUSED */
159 int
fs_fsync(vnode_t * vp,int syncflag,cred_t * cr,caller_context_t * ct)160 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
161 {
162 	return (0);
163 }
164 
165 /*
166  * Does nothing but VOP_PUTPAGE must not fail.
167  */
168 /* ARGSUSED */
169 int
fs_putpage(vnode_t * vp,offset_t off,size_t len,int flags,cred_t * cr,caller_context_t * ctp)170 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
171     caller_context_t *ctp)
172 {
173 	return (0);
174 }
175 
176 /*
177  * Does nothing but VOP_IOCTL must not fail.
178  */
179 /* ARGSUSED */
180 int
fs_ioctl(vnode_t * vp,int com,intptr_t data,int flag,cred_t * cred,int * rvalp)181 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
182     int *rvalp)
183 {
184 	return (0);
185 }
186 
187 /*
188  * Read/write lock/unlock.  Does nothing.
189  */
190 /* ARGSUSED */
191 int
fs_rwlock(vnode_t * vp,int write_lock,caller_context_t * ctp)192 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
193 {
194 	return (-1);
195 }
196 
197 /* ARGSUSED */
198 void
fs_rwunlock(vnode_t * vp,int write_lock,caller_context_t * ctp)199 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
200 {
201 }
202 
203 /*
204  * Compare two vnodes.
205  */
206 /*ARGSUSED2*/
207 int
fs_cmp(vnode_t * vp1,vnode_t * vp2,caller_context_t * ct)208 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
209 {
210 	return (vp1 == vp2);
211 }
212 
213 /*
214  * No-op seek operation.
215  */
216 /* ARGSUSED */
217 int
fs_seek(vnode_t * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)218 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
219 {
220 	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
221 }
222 
223 /*
224  * File and record locking.
225  */
226 /* ARGSUSED */
227 int
fs_frlock(vnode_t * vp,int cmd,struct flock64 * bfp,int flag,offset_t offset,flk_callback_t * flk_cbp,cred_t * cr,caller_context_t * ct)228 fs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
229     flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct)
230 {
231 	int frcmd;
232 	int nlmid;
233 	int error = 0;
234 	boolean_t skip_lock = B_FALSE;
235 	flk_callback_t serialize_callback;
236 	int serialize = 0;
237 	v_mode_t mode;
238 
239 	switch (cmd) {
240 
241 	case F_GETLK:
242 	case F_O_GETLK:
243 		if (flag & F_REMOTELOCK) {
244 			frcmd = RCMDLCK;
245 		} else if (flag & F_PXFSLOCK) {
246 			frcmd = PCMDLCK;
247 		} else {
248 			frcmd = 0;
249 			bfp->l_pid = ttoproc(curthread)->p_pid;
250 			bfp->l_sysid = 0;
251 		}
252 		break;
253 
254 	case F_OFD_GETLK:
255 		/*
256 		 * TBD we do not support remote OFD locks at this time.
257 		 */
258 		if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
259 			error = EINVAL;
260 			goto done;
261 		}
262 		skip_lock = B_TRUE;
263 		break;
264 
265 	case F_SETLK_NBMAND:
266 		/*
267 		 * Are NBMAND locks allowed on this file?
268 		 */
269 		if (!vp->v_vfsp ||
270 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
271 			error = EINVAL;
272 			goto done;
273 		}
274 		if (vp->v_type != VREG) {
275 			error = EINVAL;
276 			goto done;
277 		}
278 		/*FALLTHROUGH*/
279 
280 	case F_SETLK:
281 		if (flag & F_REMOTELOCK) {
282 			frcmd = SETFLCK|RCMDLCK;
283 		} else if (flag & F_PXFSLOCK) {
284 			frcmd = SETFLCK|PCMDLCK;
285 		} else {
286 			frcmd = SETFLCK;
287 			bfp->l_pid = ttoproc(curthread)->p_pid;
288 			bfp->l_sysid = 0;
289 		}
290 		if (cmd == F_SETLK_NBMAND &&
291 		    (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
292 			frcmd |= NBMLCK;
293 		}
294 
295 		if (nbl_need_check(vp)) {
296 			nbl_start_crit(vp, RW_WRITER);
297 			serialize = 1;
298 			if (frcmd & NBMLCK) {
299 				mode = (bfp->l_type == F_RDLCK) ?
300 				    V_READ : V_RDANDWR;
301 				if (vn_is_mapped(vp, mode)) {
302 					error = EAGAIN;
303 					goto done;
304 				}
305 			}
306 		}
307 		break;
308 
309 	case F_SETLKW:
310 		if (flag & F_REMOTELOCK) {
311 			frcmd = SETFLCK|SLPFLCK|RCMDLCK;
312 		} else if (flag & F_PXFSLOCK) {
313 			frcmd = SETFLCK|SLPFLCK|PCMDLCK;
314 		} else {
315 			frcmd = SETFLCK|SLPFLCK;
316 			bfp->l_pid = ttoproc(curthread)->p_pid;
317 			bfp->l_sysid = 0;
318 		}
319 
320 		if (nbl_need_check(vp)) {
321 			nbl_start_crit(vp, RW_WRITER);
322 			serialize = 1;
323 		}
324 		break;
325 
326 	case F_OFD_SETLK:
327 	case F_OFD_SETLKW:
328 	case F_FLOCK:
329 	case F_FLOCKW:
330 		/*
331 		 * TBD we do not support remote OFD locks at this time.
332 		 */
333 		if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
334 			error = EINVAL;
335 			goto done;
336 		}
337 		skip_lock = B_TRUE;
338 		break;
339 
340 	case F_HASREMOTELOCKS:
341 		nlmid = GETNLMID(bfp->l_sysid);
342 		if (nlmid != 0) {	/* booted as a cluster */
343 			l_has_rmt(bfp) =
344 			    cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
345 		} else {		/* not booted as a cluster */
346 			l_has_rmt(bfp) = flk_has_remote_locks(vp);
347 		}
348 
349 		goto done;
350 
351 	default:
352 		error = EINVAL;
353 		goto done;
354 	}
355 
356 	/*
357 	 * If this is a blocking lock request and we're serializing lock
358 	 * requests, modify the callback list to leave the critical region
359 	 * while we're waiting for the lock.
360 	 */
361 
362 	if (serialize && (frcmd & SLPFLCK) != 0) {
363 		flk_add_callback(&serialize_callback,
364 		    frlock_serialize_blocked, vp, flk_cbp);
365 		flk_cbp = &serialize_callback;
366 	}
367 
368 	if (!skip_lock)
369 		error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
370 
371 	if (serialize && (frcmd & SLPFLCK) != 0)
372 		flk_del_callback(&serialize_callback);
373 
374 done:
375 	if (serialize)
376 		nbl_end_crit(vp);
377 
378 	return (error);
379 }
380 
381 /*
382  * Callback when a lock request blocks and we are serializing requests.  If
383  * before sleeping, leave the critical region.  If after wakeup, reenter
384  * the critical region.
385  */
386 
387 static callb_cpr_t *
frlock_serialize_blocked(flk_cb_when_t when,void * infop)388 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
389 {
390 	vnode_t *vp = (vnode_t *)infop;
391 
392 	if (when == FLK_BEFORE_SLEEP)
393 		nbl_end_crit(vp);
394 	else {
395 		nbl_start_crit(vp, RW_WRITER);
396 	}
397 
398 	return (NULL);
399 }
400 
401 /*
402  * Allow any flags.
403  */
404 /* ARGSUSED */
405 int
fs_setfl(vnode_t * vp,int oflags,int nflags,cred_t * cr,caller_context_t * ct)406 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
407 {
408 	return (0);
409 }
410 
411 /*
412  * Unlike poll(2), epoll should reject attempts to add normal files or
413  * directories to a given handle.  Most non-pseudo filesystems rely on
414  * fs_poll() as their implementation of polling behavior.  Exceptions to that
415  * rule (ufs) can use fs_reject_epoll(), so they don't require access to the
416  * inner details of poll.  Potential race conditions related to the poll module
417  * being loaded are avoided by implementing the check here in genunix.
418  */
419 boolean_t
fs_reject_epoll()420 fs_reject_epoll()
421 {
422 	/* Check if the currently-active pollcache is epoll-enabled. */
423 	return (curthread->t_pollcache != NULL &&
424 	    (curthread->t_pollcache->pc_flag & PC_EPOLL) != 0);
425 }
426 
427 /* ARGSUSED */
428 int
fs_poll(vnode_t * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)429 fs_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
430     struct pollhead **phpp, caller_context_t *ct)
431 {
432 	/*
433 	 * Regular filesystems should reject epollers.  On the off chance that
434 	 * a non-epoll consumer expresses the desire for edge-triggered
435 	 * polling, we reject them too.  Yes, the expected error for this
436 	 * really is EPERM.
437 	 */
438 	if (fs_reject_epoll() || (events & POLLET) != 0) {
439 		return (EPERM);
440 	}
441 
442 	*reventsp = 0;
443 	if (events & POLLIN)
444 		*reventsp |= POLLIN;
445 	if (events & POLLRDNORM)
446 		*reventsp |= POLLRDNORM;
447 	if (events & POLLRDBAND)
448 		*reventsp |= POLLRDBAND;
449 	if (events & POLLOUT)
450 		*reventsp |= POLLOUT;
451 	if (events & POLLWRBAND)
452 		*reventsp |= POLLWRBAND;
453 
454 	return (0);
455 }
456 
457 /*
458  * POSIX pathconf() support.
459  */
460 /* ARGSUSED */
461 int
fs_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)462 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
463     caller_context_t *ct)
464 {
465 	ulong_t val;
466 	int error = 0;
467 	struct statvfs64 vfsbuf;
468 
469 	switch (cmd) {
470 
471 	case _PC_LINK_MAX:
472 		val = MAXLINK;
473 		break;
474 
475 	case _PC_MAX_CANON:
476 		val = MAX_CANON;
477 		break;
478 
479 	case _PC_MAX_INPUT:
480 		val = MAX_INPUT;
481 		break;
482 
483 	case _PC_NAME_MAX:
484 		bzero(&vfsbuf, sizeof (vfsbuf));
485 		if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
486 			break;
487 		val = vfsbuf.f_namemax;
488 		break;
489 
490 	case _PC_PATH_MAX:
491 	case _PC_SYMLINK_MAX:
492 		val = MAXPATHLEN;
493 		break;
494 
495 	case _PC_PIPE_BUF:
496 		val = PIPE_BUF;
497 		break;
498 
499 	case _PC_NO_TRUNC:
500 		if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
501 			val = 1;	/* NOTRUNC is enabled for vp */
502 		else
503 			val = (ulong_t)-1;
504 		break;
505 
506 	case _PC_VDISABLE:
507 		val = _POSIX_VDISABLE;
508 		break;
509 
510 	case _PC_CHOWN_RESTRICTED:
511 		if (rstchown)
512 			val = rstchown; /* chown restricted enabled */
513 		else
514 			val = (ulong_t)-1;
515 		break;
516 
517 	case _PC_FILESIZEBITS:
518 
519 		/*
520 		 * If ever we come here it means that underlying file system
521 		 * does not recognise the command and therefore this
522 		 * configurable limit cannot be determined. We return -1
523 		 * and don't change errno.
524 		 */
525 
526 		val = (ulong_t)-1;    /* large file support */
527 		break;
528 
529 	case _PC_ACL_ENABLED:
530 		val = 0;
531 		break;
532 
533 	case _PC_CASE_BEHAVIOR:
534 		val = _CASE_SENSITIVE;
535 		if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1)
536 			val |= _CASE_INSENSITIVE;
537 		if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1)
538 			val &= ~_CASE_SENSITIVE;
539 		break;
540 
541 	case _PC_SATTR_ENABLED:
542 	case _PC_SATTR_EXISTS:
543 		val = 0;
544 		break;
545 
546 	case _PC_ACCESS_FILTERING:
547 		val = 0;
548 		break;
549 
550 	default:
551 		error = EINVAL;
552 		break;
553 	}
554 
555 	if (error == 0)
556 		*valp = val;
557 	return (error);
558 }
559 
560 /*
561  * Dispose of a page.
562  */
563 /* ARGSUSED */
564 void
fs_dispose(struct vnode * vp,page_t * pp,int fl,int dn,struct cred * cr,caller_context_t * ct)565 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
566     caller_context_t *ct)
567 {
568 
569 	ASSERT(fl == B_FREE || fl == B_INVAL);
570 
571 	if (fl == B_FREE)
572 		page_free(pp, dn);
573 	else
574 		page_destroy(pp, dn);
575 }
576 
577 /* ARGSUSED */
578 void
fs_nodispose(struct vnode * vp,page_t * pp,int fl,int dn,struct cred * cr,caller_context_t * ct)579 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
580     caller_context_t *ct)
581 {
582 	cmn_err(CE_PANIC, "fs_nodispose invoked");
583 }
584 
585 /*
586  * fabricate acls for file systems that do not support acls.
587  */
588 /* ARGSUSED */
589 int
fs_fab_acl(vnode_t * vp,vsecattr_t * vsecattr,int flag,cred_t * cr,caller_context_t * ct)590 fs_fab_acl(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr,
591     caller_context_t *ct)
592 {
593 	aclent_t	*aclentp;
594 	struct vattr	vattr;
595 	int		error;
596 	size_t		aclsize;
597 
598 	vsecattr->vsa_aclcnt	= 0;
599 	vsecattr->vsa_aclentsz	= 0;
600 	vsecattr->vsa_aclentp	= NULL;
601 	vsecattr->vsa_dfaclcnt	= 0;	/* Default ACLs are not fabricated */
602 	vsecattr->vsa_dfaclentp	= NULL;
603 
604 	vattr.va_mask = AT_MODE | AT_UID | AT_GID;
605 	if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct))
606 		return (error);
607 
608 	if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
609 		aclsize = 4 * sizeof (aclent_t);
610 		vsecattr->vsa_aclcnt	= 4; /* USER, GROUP, OTHER, and CLASS */
611 		vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
612 		aclentp = vsecattr->vsa_aclentp;
613 
614 		aclentp->a_type = USER_OBJ;	/* Owner */
615 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
616 		aclentp->a_id = vattr.va_uid;   /* Really undefined */
617 		aclentp++;
618 
619 		aclentp->a_type = GROUP_OBJ;    /* Group */
620 		aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
621 		aclentp->a_id = vattr.va_gid;   /* Really undefined */
622 		aclentp++;
623 
624 		aclentp->a_type = OTHER_OBJ;    /* Other */
625 		aclentp->a_perm = vattr.va_mode & 0007;
626 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
627 		aclentp++;
628 
629 		aclentp->a_type = CLASS_OBJ;    /* Class */
630 		aclentp->a_perm = (ushort_t)(0007);
631 		aclentp->a_id = (gid_t)-1;	/* Really undefined */
632 	} else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
633 		VERIFY(0 == acl_trivial_create(vattr.va_mode,
634 		    (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp,
635 		    &vsecattr->vsa_aclcnt));
636 		vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t);
637 	}
638 
639 	return (error);
640 }
641 
642 /*
643  * Common code for implementing DOS share reservations
644  */
645 /* ARGSUSED4 */
646 int
fs_shrlock(struct vnode * vp,int cmd,struct shrlock * shr,int flag,cred_t * cr,caller_context_t * ct)647 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
648     caller_context_t *ct)
649 {
650 	int error;
651 
652 	/*
653 	 * Make sure that the file was opened with permissions appropriate
654 	 * for the request, and make sure the caller isn't trying to sneak
655 	 * in an NBMAND request.
656 	 */
657 	if (cmd == F_SHARE) {
658 		if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
659 		    ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
660 			return (EBADF);
661 		if (shr->s_access & (F_RMACC | F_MDACC))
662 			return (EINVAL);
663 		if (shr->s_deny & (F_MANDDNY | F_RMDNY))
664 			return (EINVAL);
665 	}
666 	if (cmd == F_SHARE_NBMAND) {
667 		/* make sure nbmand is allowed on the file */
668 		if (!vp->v_vfsp ||
669 		    !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
670 			return (EINVAL);
671 		}
672 		if (vp->v_type != VREG) {
673 			return (EINVAL);
674 		}
675 	}
676 
677 	nbl_start_crit(vp, RW_WRITER);
678 
679 	switch (cmd) {
680 
681 	case F_SHARE_NBMAND:
682 		shr->s_deny |= F_MANDDNY;
683 		/*FALLTHROUGH*/
684 	case F_SHARE:
685 		error = add_share(vp, shr);
686 		break;
687 
688 	case F_UNSHARE:
689 		error = del_share(vp, shr);
690 		break;
691 
692 	case F_HASREMOTELOCKS:
693 		/*
694 		 * We are overloading this command to refer to remote
695 		 * shares as well as remote locks, despite its name.
696 		 */
697 		shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
698 		error = 0;
699 		break;
700 
701 	default:
702 		error = EINVAL;
703 		break;
704 	}
705 
706 	nbl_end_crit(vp);
707 	return (error);
708 }
709 
710 /*ARGSUSED1*/
711 int
fs_vnevent_nosupport(vnode_t * vp,vnevent_t e,vnode_t * dvp,char * fnm,caller_context_t * ct)712 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
713     caller_context_t *ct)
714 {
715 	ASSERT(vp != NULL);
716 	return (ENOTSUP);
717 }
718 
719 /*ARGSUSED1*/
720 int
fs_vnevent_support(vnode_t * vp,vnevent_t e,vnode_t * dvp,char * fnm,caller_context_t * ct)721 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
722     caller_context_t *ct)
723 {
724 	ASSERT(vp != NULL);
725 	return (0);
726 }
727 
728 /*
729  * return 1 for non-trivial ACL.
730  *
731  * NB: It is not necessary for the caller to VOP_RWLOCK since
732  *	we only issue VOP_GETSECATTR.
733  *
734  * Returns 0 == trivial
735  *         1 == NOT Trivial
736  *	   <0 could not determine.
737  */
738 int
fs_acl_nontrivial(vnode_t * vp,cred_t * cr)739 fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
740 {
741 	ulong_t		acl_styles;
742 	ulong_t		acl_flavor;
743 	vsecattr_t 	vsecattr;
744 	int 		error;
745 	int		isnontrivial;
746 
747 	/* determine the forms of ACLs maintained */
748 	error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL);
749 
750 	/* clear bits we don't understand and establish default acl_style */
751 	acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
752 	if (error || (acl_styles == 0))
753 		acl_styles = _ACL_ACLENT_ENABLED;
754 
755 	vsecattr.vsa_aclentp = NULL;
756 	vsecattr.vsa_dfaclentp = NULL;
757 	vsecattr.vsa_aclcnt = 0;
758 	vsecattr.vsa_dfaclcnt = 0;
759 
760 	while (acl_styles) {
761 		/* select one of the styles as current flavor */
762 		acl_flavor = 0;
763 		if (acl_styles & _ACL_ACLENT_ENABLED) {
764 			acl_flavor = _ACL_ACLENT_ENABLED;
765 			vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
766 		} else if (acl_styles & _ACL_ACE_ENABLED) {
767 			acl_flavor = _ACL_ACE_ENABLED;
768 			vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
769 		}
770 
771 		ASSERT(vsecattr.vsa_mask && acl_flavor);
772 		error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL);
773 		if (error == 0)
774 			break;
775 
776 		/* that flavor failed */
777 		acl_styles &= ~acl_flavor;
778 	}
779 
780 	/* if all styles fail then assume trivial */
781 	if (acl_styles == 0)
782 		return (0);
783 
784 	/* process the flavor that worked */
785 	isnontrivial = 0;
786 	if (acl_flavor & _ACL_ACLENT_ENABLED) {
787 		if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
788 			isnontrivial = 1;
789 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
790 			kmem_free(vsecattr.vsa_aclentp,
791 			    vsecattr.vsa_aclcnt * sizeof (aclent_t));
792 		if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
793 			kmem_free(vsecattr.vsa_dfaclentp,
794 			    vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
795 	}
796 	if (acl_flavor & _ACL_ACE_ENABLED) {
797 		isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
798 		    vsecattr.vsa_aclcnt);
799 
800 		if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
801 			kmem_free(vsecattr.vsa_aclentp,
802 			    vsecattr.vsa_aclcnt * sizeof (ace_t));
803 		/* ACE has no vsecattr.vsa_dfaclcnt */
804 	}
805 	return (isnontrivial);
806 }
807 
808 /*
809  * Check whether we need a retry to recover from STALE error.
810  */
811 int
fs_need_estale_retry(int retry_count)812 fs_need_estale_retry(int retry_count)
813 {
814 	if (retry_count < fs_estale_retry)
815 		return (1);
816 	else
817 		return (0);
818 }
819 
820 
821 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL;
822 
823 /*
824  * Routine for anti-virus scanner to call to register its scanning routine.
825  */
826 void
fs_vscan_register(int (* av_scan)(vnode_t *,cred_t *,int))827 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int))
828 {
829 	fs_av_scan = av_scan;
830 }
831 
832 /*
833  * Routine for file systems to call to initiate anti-virus scanning.
834  * Scanning will only be done on REGular files (currently).
835  */
836 int
fs_vscan(vnode_t * vp,cred_t * cr,int async)837 fs_vscan(vnode_t *vp, cred_t *cr, int async)
838 {
839 	int ret = 0;
840 
841 	if (fs_av_scan && vp->v_type == VREG)
842 		ret = (*fs_av_scan)(vp, cr, async);
843 
844 	return (ret);
845 }
846 
847 /*
848  * support functions for reparse point
849  */
850 /*
851  * reparse_vnode_parse
852  *
853  * Read the symlink data of a reparse point specified by the vnode
854  * and return the reparse data as name-value pair in the nvlist.
855  */
856 int
reparse_vnode_parse(vnode_t * vp,nvlist_t * nvl)857 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl)
858 {
859 	int err;
860 	char *lkdata;
861 	struct uio uio;
862 	struct iovec iov;
863 
864 	if (vp == NULL || nvl == NULL)
865 		return (EINVAL);
866 
867 	lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP);
868 
869 	/*
870 	 * Set up io vector to read sym link data
871 	 */
872 	iov.iov_base = lkdata;
873 	iov.iov_len = MAXREPARSELEN;
874 	uio.uio_iov = &iov;
875 	uio.uio_iovcnt = 1;
876 	uio.uio_segflg = UIO_SYSSPACE;
877 	uio.uio_extflg = UIO_COPY_CACHED;
878 	uio.uio_loffset = (offset_t)0;
879 	uio.uio_resid = MAXREPARSELEN;
880 
881 	if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) {
882 		*(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0';
883 		err = reparse_parse(lkdata, nvl);
884 	}
885 	kmem_free(lkdata, MAXREPARSELEN);	/* done with lkdata */
886 
887 	return (err);
888 }
889 
890 void
reparse_point_init()891 reparse_point_init()
892 {
893 	mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL);
894 }
895 
896 static door_handle_t
reparse_door_get_handle()897 reparse_door_get_handle()
898 {
899 	door_handle_t dh;
900 
901 	mutex_enter(&reparsed_door_lock);
902 	if ((dh = reparsed_door) == NULL) {
903 		if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) {
904 			reparsed_door = NULL;
905 			dh = NULL;
906 		} else
907 			dh = reparsed_door;
908 	}
909 	mutex_exit(&reparsed_door_lock);
910 	return (dh);
911 }
912 
913 static void
reparse_door_reset_handle()914 reparse_door_reset_handle()
915 {
916 	mutex_enter(&reparsed_door_lock);
917 	reparsed_door = NULL;
918 	mutex_exit(&reparsed_door_lock);
919 }
920 
921 /*
922  * reparse_kderef
923  *
924  * Accepts the service-specific item from the reparse point and returns
925  * the service-specific data requested.  The caller specifies the size of
926  * the buffer provided via *bufsz; the routine will fail with EOVERFLOW
927  * if the results will not fit in the buffer, in which case, *bufsz will
928  * contain the number of bytes needed to hold the results.
929  *
930  * if ok return 0 and update *bufsize with length of actual result
931  * else return error code.
932  */
933 int
reparse_kderef(const char * svc_type,const char * svc_data,char * buf,size_t * bufsize)934 reparse_kderef(const char *svc_type, const char *svc_data, char *buf,
935     size_t *bufsize)
936 {
937 	int err, retries, need_free, retried_doorhd;
938 	size_t dlen, res_len;
939 	char *darg;
940 	door_arg_t door_args;
941 	reparsed_door_res_t *resp;
942 	door_handle_t rp_door;
943 
944 	if (svc_type == NULL || svc_data == NULL || buf == NULL ||
945 	    bufsize == NULL)
946 		return (EINVAL);
947 
948 	/* get reparsed's door handle */
949 	if ((rp_door = reparse_door_get_handle()) == NULL)
950 		return (EBADF);
951 
952 	/* setup buffer for door_call args and results */
953 	dlen = strlen(svc_type) + strlen(svc_data) + 2;
954 	if (*bufsize < dlen) {
955 		darg = kmem_alloc(dlen, KM_SLEEP);
956 		need_free = 1;
957 	} else {
958 		darg = buf;	/* use same buffer for door's args & results */
959 		need_free = 0;
960 	}
961 
962 	/* build argument string of door call */
963 	(void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data);
964 
965 	/* setup args for door call */
966 	door_args.data_ptr = darg;
967 	door_args.data_size = dlen;
968 	door_args.desc_ptr = NULL;
969 	door_args.desc_num = 0;
970 	door_args.rbuf = buf;
971 	door_args.rsize = *bufsize;
972 
973 	/* do the door_call */
974 	retried_doorhd = 0;
975 	retries = 0;
976 	door_ki_hold(rp_door);
977 	while ((err = door_ki_upcall_limited(rp_door, &door_args,
978 	    NULL, SIZE_MAX, 0)) != 0) {
979 		if (err == EAGAIN || err == EINTR) {
980 			if (++retries < REPARSED_DOORCALL_MAX_RETRY) {
981 				delay(SEC_TO_TICK(1));
982 				continue;
983 			}
984 		} else if (err == EBADF) {
985 			/* door server goes away... */
986 			reparse_door_reset_handle();
987 
988 			if (retried_doorhd == 0) {
989 				door_ki_rele(rp_door);
990 				retried_doorhd++;
991 				rp_door = reparse_door_get_handle();
992 				if (rp_door != NULL) {
993 					door_ki_hold(rp_door);
994 					continue;
995 				}
996 			}
997 		}
998 		break;
999 	}
1000 
1001 	if (rp_door)
1002 		door_ki_rele(rp_door);
1003 
1004 	if (need_free)
1005 		kmem_free(darg, dlen);		/* done with args buffer */
1006 
1007 	if (err != 0)
1008 		return (err);
1009 
1010 	resp = (reparsed_door_res_t *)door_args.rbuf;
1011 	if ((err = resp->res_status) == 0) {
1012 		/*
1013 		 * have to save the length of the results before the
1014 		 * bcopy below since it's can be an overlap copy that
1015 		 * overwrites the reparsed_door_res_t structure at
1016 		 * the beginning of the buffer.
1017 		 */
1018 		res_len = (size_t)resp->res_len;
1019 
1020 		/* deref call is ok */
1021 		if (res_len > *bufsize)
1022 			err = EOVERFLOW;
1023 		else
1024 			bcopy(resp->res_data, buf, res_len);
1025 		*bufsize = res_len;
1026 	}
1027 	if (door_args.rbuf != buf)
1028 		kmem_free(door_args.rbuf, door_args.rsize);
1029 
1030 	return (err);
1031 }
1032