1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
22 /* All Rights Reserved */
23
24
25 /*
26 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
27 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
28 * Copyright 2017 Joyent, Inc.
29 * Copyright 2022 Oxide Computer Company
30 */
31
32 /*
33 * Generic vnode operations.
34 */
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/errno.h>
39 #include <sys/fcntl.h>
40 #include <sys/flock.h>
41 #include <sys/statvfs.h>
42 #include <sys/vfs.h>
43 #include <sys/vnode.h>
44 #include <sys/proc.h>
45 #include <sys/user.h>
46 #include <sys/unistd.h>
47 #include <sys/cred.h>
48 #include <sys/poll.h>
49 #include <sys/debug.h>
50 #include <sys/cmn_err.h>
51 #include <sys/stream.h>
52 #include <fs/fs_subr.h>
53 #include <fs/fs_reparse.h>
54 #include <sys/door.h>
55 #include <sys/acl.h>
56 #include <sys/share.h>
57 #include <sys/file.h>
58 #include <sys/kmem.h>
59 #include <sys/file.h>
60 #include <sys/nbmlock.h>
61 #include <acl/acl_common.h>
62 #include <sys/pathname.h>
63
64 /* required for fs_reject_epoll */
65 #include <sys/poll_impl.h>
66
67 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
68
69 /*
70 * Tunable to limit the number of retry to recover from STALE error.
71 */
72 int fs_estale_retry = 5;
73
74 /*
75 * supports for reparse point door upcall
76 */
77 static door_handle_t reparsed_door;
78 static kmutex_t reparsed_door_lock;
79
80 /*
81 * The associated operation is not supported by the file system.
82 */
83 int
fs_nosys()84 fs_nosys()
85 {
86 return (ENOSYS);
87 }
88
89 /*
90 * The associated operation is invalid (on this vnode).
91 */
92 int
fs_inval()93 fs_inval()
94 {
95 return (EINVAL);
96 }
97
98 /*
99 * The associated operation is valid only for directories.
100 */
101 int
fs_notdir()102 fs_notdir()
103 {
104 return (ENOTDIR);
105 }
106
107 /*
108 * Free the file system specific resources. For the file systems that
109 * do not support the forced unmount, it will be a nop function.
110 */
111
112 /*ARGSUSED*/
113 void
fs_freevfs(vfs_t * vfsp)114 fs_freevfs(vfs_t *vfsp)
115 {
116 }
117
118 /* ARGSUSED */
119 int
fs_nosys_map(struct vnode * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cr,caller_context_t * ct)120 fs_nosys_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
121 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
122 caller_context_t *ct)
123 {
124 return (ENOSYS);
125 }
126
127 /* ARGSUSED */
128 int
fs_nosys_addmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cr,caller_context_t * ct)129 fs_nosys_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
130 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
131 caller_context_t *ct)
132 {
133 return (ENOSYS);
134 }
135
136 /* ARGSUSED */
137 int
fs_nosys_poll(vnode_t * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)138 fs_nosys_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
139 struct pollhead **phpp, caller_context_t *ct)
140 {
141 return (ENOSYS);
142 }
143
144
145 /*
146 * The file system has nothing to sync to disk. However, the
147 * VFS_SYNC operation must not fail.
148 */
149 /* ARGSUSED */
150 int
fs_sync(struct vfs * vfspp,short flag,cred_t * cr)151 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
152 {
153 return (0);
154 }
155
156 /*
157 * Does nothing but VOP_FSYNC must not fail.
158 */
159 /* ARGSUSED */
160 int
fs_fsync(vnode_t * vp,int syncflag,cred_t * cr,caller_context_t * ct)161 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
162 {
163 return (0);
164 }
165
166 /*
167 * Does nothing but VOP_PUTPAGE must not fail.
168 */
169 /* ARGSUSED */
170 int
fs_putpage(vnode_t * vp,offset_t off,size_t len,int flags,cred_t * cr,caller_context_t * ctp)171 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
172 caller_context_t *ctp)
173 {
174 return (0);
175 }
176
177 /*
178 * Does nothing but VOP_IOCTL must not fail.
179 */
180 /* ARGSUSED */
181 int
fs_ioctl(vnode_t * vp,int com,intptr_t data,int flag,cred_t * cred,int * rvalp)182 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
183 int *rvalp)
184 {
185 return (0);
186 }
187
188 /*
189 * Read/write lock/unlock. Does nothing.
190 */
191 /* ARGSUSED */
192 int
fs_rwlock(vnode_t * vp,int write_lock,caller_context_t * ctp)193 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
194 {
195 return (-1);
196 }
197
198 /* ARGSUSED */
199 void
fs_rwunlock(vnode_t * vp,int write_lock,caller_context_t * ctp)200 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
201 {
202 }
203
204 /*
205 * Compare two vnodes.
206 */
207 /*ARGSUSED2*/
208 int
fs_cmp(vnode_t * vp1,vnode_t * vp2,caller_context_t * ct)209 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
210 {
211 return (vp1 == vp2);
212 }
213
214 /*
215 * No-op seek operation.
216 */
217 /* ARGSUSED */
218 int
fs_seek(vnode_t * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)219 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
220 {
221 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
222 }
223
224 /*
225 * File and record locking.
226 */
227 /* ARGSUSED */
228 int
fs_frlock(vnode_t * vp,int cmd,struct flock64 * bfp,int flag,offset_t offset,flk_callback_t * flk_cbp,cred_t * cr,caller_context_t * ct)229 fs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
230 flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct)
231 {
232 int frcmd;
233 int nlmid;
234 int error = 0;
235 boolean_t skip_lock = B_FALSE;
236 flk_callback_t serialize_callback;
237 int serialize = 0;
238 v_mode_t mode;
239
240 switch (cmd) {
241
242 case F_GETLK:
243 case F_O_GETLK:
244 if (flag & F_REMOTELOCK) {
245 frcmd = RCMDLCK;
246 } else if (flag & F_PXFSLOCK) {
247 frcmd = PCMDLCK;
248 } else {
249 frcmd = 0;
250 bfp->l_pid = ttoproc(curthread)->p_pid;
251 bfp->l_sysid = 0;
252 }
253 break;
254
255 case F_OFD_GETLK:
256 /*
257 * TBD we do not support remote OFD locks at this time.
258 */
259 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
260 error = EOPNOTSUPP;
261 goto done;
262 }
263 skip_lock = B_TRUE;
264 break;
265
266 case F_SETLK_NBMAND:
267 /*
268 * Are NBMAND locks allowed on this file?
269 */
270 if (!vp->v_vfsp ||
271 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
272 error = EINVAL;
273 goto done;
274 }
275 if (vp->v_type != VREG) {
276 error = EINVAL;
277 goto done;
278 }
279 /*FALLTHROUGH*/
280
281 case F_SETLK:
282 if (flag & F_REMOTELOCK) {
283 frcmd = SETFLCK|RCMDLCK;
284 } else if (flag & F_PXFSLOCK) {
285 frcmd = SETFLCK|PCMDLCK;
286 } else {
287 frcmd = SETFLCK;
288 bfp->l_pid = ttoproc(curthread)->p_pid;
289 bfp->l_sysid = 0;
290 }
291 if (cmd == F_SETLK_NBMAND &&
292 (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
293 frcmd |= NBMLCK;
294 }
295
296 if (nbl_need_check(vp)) {
297 nbl_start_crit(vp, RW_WRITER);
298 serialize = 1;
299 if (frcmd & NBMLCK) {
300 mode = (bfp->l_type == F_RDLCK) ?
301 V_READ : V_RDANDWR;
302 if (vn_is_mapped(vp, mode)) {
303 error = EAGAIN;
304 goto done;
305 }
306 }
307 }
308 break;
309
310 case F_SETLKW:
311 if (flag & F_REMOTELOCK) {
312 frcmd = SETFLCK|SLPFLCK|RCMDLCK;
313 } else if (flag & F_PXFSLOCK) {
314 frcmd = SETFLCK|SLPFLCK|PCMDLCK;
315 } else {
316 frcmd = SETFLCK|SLPFLCK;
317 bfp->l_pid = ttoproc(curthread)->p_pid;
318 bfp->l_sysid = 0;
319 }
320
321 if (nbl_need_check(vp)) {
322 nbl_start_crit(vp, RW_WRITER);
323 serialize = 1;
324 }
325 break;
326
327 case F_OFD_SETLK:
328 case F_OFD_SETLKW:
329 case F_FLOCK:
330 case F_FLOCKW:
331 /*
332 * TBD we do not support remote OFD locks at this time.
333 */
334 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
335 error = EOPNOTSUPP;
336 goto done;
337 }
338 skip_lock = B_TRUE;
339 break;
340
341 case F_HASREMOTELOCKS:
342 nlmid = GETNLMID(bfp->l_sysid);
343 if (nlmid != 0) { /* booted as a cluster */
344 l_has_rmt(bfp) =
345 cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
346 } else { /* not booted as a cluster */
347 l_has_rmt(bfp) = flk_has_remote_locks(vp);
348 }
349
350 goto done;
351
352 default:
353 error = EINVAL;
354 goto done;
355 }
356
357 /*
358 * If this is a blocking lock request and we're serializing lock
359 * requests, modify the callback list to leave the critical region
360 * while we're waiting for the lock.
361 */
362
363 if (serialize && (frcmd & SLPFLCK) != 0) {
364 flk_add_callback(&serialize_callback,
365 frlock_serialize_blocked, vp, flk_cbp);
366 flk_cbp = &serialize_callback;
367 }
368
369 if (!skip_lock)
370 error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
371
372 if (serialize && (frcmd & SLPFLCK) != 0)
373 flk_del_callback(&serialize_callback);
374
375 done:
376 if (serialize)
377 nbl_end_crit(vp);
378
379 return (error);
380 }
381
382 /*
383 * Callback when a lock request blocks and we are serializing requests. If
384 * before sleeping, leave the critical region. If after wakeup, reenter
385 * the critical region.
386 */
387
388 static callb_cpr_t *
frlock_serialize_blocked(flk_cb_when_t when,void * infop)389 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
390 {
391 vnode_t *vp = (vnode_t *)infop;
392
393 if (when == FLK_BEFORE_SLEEP)
394 nbl_end_crit(vp);
395 else {
396 nbl_start_crit(vp, RW_WRITER);
397 }
398
399 return (NULL);
400 }
401
402 /*
403 * Allow any flags.
404 */
405 /* ARGSUSED */
406 int
fs_setfl(vnode_t * vp,int oflags,int nflags,cred_t * cr,caller_context_t * ct)407 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
408 {
409 return (0);
410 }
411
412 /*
413 * Unlike poll(2), epoll should reject attempts to add normal files or
414 * directories to a given handle. Most non-pseudo filesystems rely on
415 * fs_poll() as their implementation of polling behavior. Exceptions to that
416 * rule (ufs) can use fs_reject_epoll(), so they don't require access to the
417 * inner details of poll. Potential race conditions related to the poll module
418 * being loaded are avoided by implementing the check here in genunix.
419 */
420 boolean_t
fs_reject_epoll()421 fs_reject_epoll()
422 {
423 /* Check if the currently-active pollcache is epoll-enabled. */
424 return (curthread->t_pollcache != NULL &&
425 (curthread->t_pollcache->pc_flag & PC_EPOLL) != 0);
426 }
427
428 /* ARGSUSED */
429 int
fs_poll(vnode_t * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)430 fs_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
431 struct pollhead **phpp, caller_context_t *ct)
432 {
433 /*
434 * Regular filesystems should reject epollers. On the off chance that
435 * a non-epoll consumer expresses the desire for edge-triggered
436 * polling, we reject them too. Yes, the expected error for this
437 * really is EPERM.
438 */
439 if (fs_reject_epoll() || (events & POLLET) != 0) {
440 return (EPERM);
441 }
442
443 *reventsp = 0;
444 if (events & POLLIN)
445 *reventsp |= POLLIN;
446 if (events & POLLRDNORM)
447 *reventsp |= POLLRDNORM;
448 if (events & POLLRDBAND)
449 *reventsp |= POLLRDBAND;
450 if (events & POLLOUT)
451 *reventsp |= POLLOUT;
452 if (events & POLLWRBAND)
453 *reventsp |= POLLWRBAND;
454
455 return (0);
456 }
457
458 /*
459 * POSIX pathconf() support.
460 */
461 /* ARGSUSED */
462 int
fs_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)463 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
464 caller_context_t *ct)
465 {
466 ulong_t val;
467 int error = 0;
468 struct statvfs64 vfsbuf;
469
470 switch (cmd) {
471
472 case _PC_LINK_MAX:
473 val = MAXLINK;
474 break;
475
476 case _PC_MAX_CANON:
477 val = MAX_CANON;
478 break;
479
480 case _PC_MAX_INPUT:
481 val = MAX_INPUT;
482 break;
483
484 case _PC_NAME_MAX:
485 bzero(&vfsbuf, sizeof (vfsbuf));
486 if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
487 break;
488 val = vfsbuf.f_namemax;
489 break;
490
491 case _PC_PATH_MAX:
492 case _PC_SYMLINK_MAX:
493 val = MAXPATHLEN;
494 break;
495
496 case _PC_PIPE_BUF:
497 val = PIPE_BUF;
498 break;
499
500 case _PC_NO_TRUNC:
501 if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
502 val = 1; /* NOTRUNC is enabled for vp */
503 else
504 val = (ulong_t)-1;
505 break;
506
507 case _PC_VDISABLE:
508 val = _POSIX_VDISABLE;
509 break;
510
511 case _PC_CHOWN_RESTRICTED:
512 if (rstchown)
513 val = rstchown; /* chown restricted enabled */
514 else
515 val = (ulong_t)-1;
516 break;
517
518 case _PC_FILESIZEBITS:
519
520 /*
521 * If ever we come here it means that underlying file system
522 * does not recognise the command and therefore this
523 * configurable limit cannot be determined. We return -1
524 * and don't change errno.
525 */
526
527 val = (ulong_t)-1; /* large file support */
528 break;
529
530 case _PC_ACL_ENABLED:
531 val = 0;
532 break;
533
534 case _PC_CASE_BEHAVIOR:
535 val = _CASE_SENSITIVE;
536 if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1)
537 val |= _CASE_INSENSITIVE;
538 if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1)
539 val &= ~_CASE_SENSITIVE;
540 break;
541
542 case _PC_SATTR_ENABLED:
543 case _PC_SATTR_EXISTS:
544 val = 0;
545 break;
546
547 case _PC_ACCESS_FILTERING:
548 val = 0;
549 break;
550
551 default:
552 error = EINVAL;
553 break;
554 }
555
556 if (error == 0)
557 *valp = val;
558 return (error);
559 }
560
561 /*
562 * Dispose of a page.
563 */
564 /* ARGSUSED */
565 void
fs_dispose(struct vnode * vp,page_t * pp,int fl,int dn,struct cred * cr,caller_context_t * ct)566 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
567 caller_context_t *ct)
568 {
569
570 ASSERT(fl == B_FREE || fl == B_INVAL);
571
572 if (fl == B_FREE)
573 page_free(pp, dn);
574 else
575 page_destroy(pp, dn);
576 }
577
578 /* ARGSUSED */
579 void
fs_nodispose(struct vnode * vp,page_t * pp,int fl,int dn,struct cred * cr,caller_context_t * ct)580 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
581 caller_context_t *ct)
582 {
583 cmn_err(CE_PANIC, "fs_nodispose invoked");
584 }
585
586 /*
587 * fabricate acls for file systems that do not support acls.
588 */
589 /* ARGSUSED */
590 int
fs_fab_acl(vnode_t * vp,vsecattr_t * vsecattr,int flag,cred_t * cr,caller_context_t * ct)591 fs_fab_acl(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr,
592 caller_context_t *ct)
593 {
594 aclent_t *aclentp;
595 struct vattr vattr;
596 int error;
597 size_t aclsize;
598
599 vsecattr->vsa_aclcnt = 0;
600 vsecattr->vsa_aclentsz = 0;
601 vsecattr->vsa_aclentp = NULL;
602 vsecattr->vsa_dfaclcnt = 0; /* Default ACLs are not fabricated */
603 vsecattr->vsa_dfaclentp = NULL;
604
605 vattr.va_mask = AT_MODE | AT_UID | AT_GID;
606 if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct))
607 return (error);
608
609 if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
610 aclsize = 4 * sizeof (aclent_t);
611 vsecattr->vsa_aclcnt = 4; /* USER, GROUP, OTHER, and CLASS */
612 vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
613 aclentp = vsecattr->vsa_aclentp;
614
615 aclentp->a_type = USER_OBJ; /* Owner */
616 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
617 aclentp->a_id = vattr.va_uid; /* Really undefined */
618 aclentp++;
619
620 aclentp->a_type = GROUP_OBJ; /* Group */
621 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
622 aclentp->a_id = vattr.va_gid; /* Really undefined */
623 aclentp++;
624
625 aclentp->a_type = OTHER_OBJ; /* Other */
626 aclentp->a_perm = vattr.va_mode & 0007;
627 aclentp->a_id = (gid_t)-1; /* Really undefined */
628 aclentp++;
629
630 aclentp->a_type = CLASS_OBJ; /* Class */
631 aclentp->a_perm = (ushort_t)(0007);
632 aclentp->a_id = (gid_t)-1; /* Really undefined */
633 } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
634 VERIFY(0 == acl_trivial_create(vattr.va_mode,
635 (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp,
636 &vsecattr->vsa_aclcnt));
637 vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t);
638 }
639
640 return (error);
641 }
642
643 /*
644 * Common code for implementing DOS share reservations
645 */
646 /* ARGSUSED4 */
647 int
fs_shrlock(struct vnode * vp,int cmd,struct shrlock * shr,int flag,cred_t * cr,caller_context_t * ct)648 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
649 caller_context_t *ct)
650 {
651 int error;
652
653 /*
654 * Make sure that the file was opened with permissions appropriate
655 * for the request, and make sure the caller isn't trying to sneak
656 * in an NBMAND request.
657 */
658 if (cmd == F_SHARE) {
659 if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
660 ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
661 return (EBADF);
662 if (shr->s_access & (F_RMACC | F_MDACC))
663 return (EINVAL);
664 if (shr->s_deny & (F_MANDDNY | F_RMDNY))
665 return (EINVAL);
666 }
667 if (cmd == F_SHARE_NBMAND) {
668 /* make sure nbmand is allowed on the file */
669 if (!vp->v_vfsp ||
670 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
671 return (EINVAL);
672 }
673 if (vp->v_type != VREG) {
674 return (EINVAL);
675 }
676 }
677
678 nbl_start_crit(vp, RW_WRITER);
679
680 switch (cmd) {
681
682 case F_SHARE_NBMAND:
683 shr->s_deny |= F_MANDDNY;
684 /*FALLTHROUGH*/
685 case F_SHARE:
686 error = add_share(vp, shr);
687 break;
688
689 case F_UNSHARE:
690 error = del_share(vp, shr);
691 break;
692
693 case F_HASREMOTELOCKS:
694 /*
695 * We are overloading this command to refer to remote
696 * shares as well as remote locks, despite its name.
697 */
698 shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
699 error = 0;
700 break;
701
702 default:
703 error = EINVAL;
704 break;
705 }
706
707 nbl_end_crit(vp);
708 return (error);
709 }
710
711 /*ARGSUSED1*/
712 int
fs_vnevent_nosupport(vnode_t * vp,vnevent_t e,vnode_t * dvp,char * fnm,caller_context_t * ct)713 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
714 caller_context_t *ct)
715 {
716 ASSERT(vp != NULL);
717 return (ENOTSUP);
718 }
719
720 /*ARGSUSED1*/
721 int
fs_vnevent_support(vnode_t * vp,vnevent_t e,vnode_t * dvp,char * fnm,caller_context_t * ct)722 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
723 caller_context_t *ct)
724 {
725 ASSERT(vp != NULL);
726 return (0);
727 }
728
729 /*
730 * return 1 for non-trivial ACL.
731 *
732 * NB: It is not necessary for the caller to VOP_RWLOCK since
733 * we only issue VOP_GETSECATTR.
734 *
735 * Returns 0 == trivial
736 * 1 == NOT Trivial
737 * <0 could not determine.
738 */
739 int
fs_acl_nontrivial(vnode_t * vp,cred_t * cr)740 fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
741 {
742 ulong_t acl_styles;
743 ulong_t acl_flavor;
744 vsecattr_t vsecattr;
745 int error;
746 int isnontrivial;
747
748 /* determine the forms of ACLs maintained */
749 error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL);
750
751 /* clear bits we don't understand and establish default acl_style */
752 acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
753 if (error || (acl_styles == 0))
754 acl_styles = _ACL_ACLENT_ENABLED;
755
756 vsecattr.vsa_aclentp = NULL;
757 vsecattr.vsa_dfaclentp = NULL;
758 vsecattr.vsa_aclcnt = 0;
759 vsecattr.vsa_dfaclcnt = 0;
760
761 while (acl_styles) {
762 /* select one of the styles as current flavor */
763 acl_flavor = 0;
764 if (acl_styles & _ACL_ACLENT_ENABLED) {
765 acl_flavor = _ACL_ACLENT_ENABLED;
766 vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
767 } else if (acl_styles & _ACL_ACE_ENABLED) {
768 acl_flavor = _ACL_ACE_ENABLED;
769 vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
770 }
771
772 ASSERT(vsecattr.vsa_mask && acl_flavor);
773 error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL);
774 if (error == 0)
775 break;
776
777 /* that flavor failed */
778 acl_styles &= ~acl_flavor;
779 }
780
781 /* if all styles fail then assume trivial */
782 if (acl_styles == 0)
783 return (0);
784
785 /* process the flavor that worked */
786 isnontrivial = 0;
787 if (acl_flavor & _ACL_ACLENT_ENABLED) {
788 if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
789 isnontrivial = 1;
790 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
791 kmem_free(vsecattr.vsa_aclentp,
792 vsecattr.vsa_aclcnt * sizeof (aclent_t));
793 if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
794 kmem_free(vsecattr.vsa_dfaclentp,
795 vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
796 }
797 if (acl_flavor & _ACL_ACE_ENABLED) {
798 isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
799 vsecattr.vsa_aclcnt);
800
801 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
802 kmem_free(vsecattr.vsa_aclentp,
803 vsecattr.vsa_aclcnt * sizeof (ace_t));
804 /* ACE has no vsecattr.vsa_dfaclcnt */
805 }
806 return (isnontrivial);
807 }
808
809 /*
810 * Check whether we need a retry to recover from STALE error.
811 */
812 int
fs_need_estale_retry(int retry_count)813 fs_need_estale_retry(int retry_count)
814 {
815 if (retry_count < fs_estale_retry)
816 return (1);
817 else
818 return (0);
819 }
820
821
822 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL;
823
824 /*
825 * Routine for anti-virus scanner to call to register its scanning routine.
826 */
827 void
fs_vscan_register(int (* av_scan)(vnode_t *,cred_t *,int))828 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int))
829 {
830 fs_av_scan = av_scan;
831 }
832
833 /*
834 * Routine for file systems to call to initiate anti-virus scanning.
835 * Scanning will only be done on REGular files (currently).
836 */
837 int
fs_vscan(vnode_t * vp,cred_t * cr,int async)838 fs_vscan(vnode_t *vp, cred_t *cr, int async)
839 {
840 int ret = 0;
841
842 if (fs_av_scan && vp->v_type == VREG)
843 ret = (*fs_av_scan)(vp, cr, async);
844
845 return (ret);
846 }
847
848 /*
849 * support functions for reparse point
850 */
851 /*
852 * reparse_vnode_parse
853 *
854 * Read the symlink data of a reparse point specified by the vnode
855 * and return the reparse data as name-value pair in the nvlist.
856 */
857 int
reparse_vnode_parse(vnode_t * vp,nvlist_t * nvl)858 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl)
859 {
860 int err;
861 char *lkdata;
862 struct uio uio;
863 struct iovec iov;
864
865 if (vp == NULL || nvl == NULL)
866 return (EINVAL);
867
868 lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP);
869
870 /*
871 * Set up io vector to read sym link data
872 */
873 iov.iov_base = lkdata;
874 iov.iov_len = MAXREPARSELEN;
875 uio.uio_iov = &iov;
876 uio.uio_iovcnt = 1;
877 uio.uio_segflg = UIO_SYSSPACE;
878 uio.uio_extflg = UIO_COPY_CACHED;
879 uio.uio_loffset = (offset_t)0;
880 uio.uio_resid = MAXREPARSELEN;
881
882 if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) {
883 *(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0';
884 err = reparse_parse(lkdata, nvl);
885 }
886 kmem_free(lkdata, MAXREPARSELEN); /* done with lkdata */
887
888 return (err);
889 }
890
891 void
reparse_point_init()892 reparse_point_init()
893 {
894 mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL);
895 }
896
897 static door_handle_t
reparse_door_get_handle()898 reparse_door_get_handle()
899 {
900 door_handle_t dh;
901
902 mutex_enter(&reparsed_door_lock);
903 if ((dh = reparsed_door) == NULL) {
904 if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) {
905 reparsed_door = NULL;
906 dh = NULL;
907 } else
908 dh = reparsed_door;
909 }
910 mutex_exit(&reparsed_door_lock);
911 return (dh);
912 }
913
914 static void
reparse_door_reset_handle()915 reparse_door_reset_handle()
916 {
917 mutex_enter(&reparsed_door_lock);
918 reparsed_door = NULL;
919 mutex_exit(&reparsed_door_lock);
920 }
921
922 /*
923 * reparse_kderef
924 *
925 * Accepts the service-specific item from the reparse point and returns
926 * the service-specific data requested. The caller specifies the size of
927 * the buffer provided via *bufsz; the routine will fail with EOVERFLOW
928 * if the results will not fit in the buffer, in which case, *bufsz will
929 * contain the number of bytes needed to hold the results.
930 *
931 * if ok return 0 and update *bufsize with length of actual result
932 * else return error code.
933 */
934 int
reparse_kderef(const char * svc_type,const char * svc_data,char * buf,size_t * bufsize)935 reparse_kderef(const char *svc_type, const char *svc_data, char *buf,
936 size_t *bufsize)
937 {
938 int err, retries, need_free, retried_doorhd;
939 size_t dlen, res_len;
940 char *darg;
941 door_arg_t door_args;
942 reparsed_door_res_t *resp;
943 door_handle_t rp_door;
944
945 if (svc_type == NULL || svc_data == NULL || buf == NULL ||
946 bufsize == NULL)
947 return (EINVAL);
948
949 /* get reparsed's door handle */
950 if ((rp_door = reparse_door_get_handle()) == NULL)
951 return (EBADF);
952
953 /* setup buffer for door_call args and results */
954 dlen = strlen(svc_type) + strlen(svc_data) + 2;
955 if (*bufsize < dlen) {
956 darg = kmem_alloc(dlen, KM_SLEEP);
957 need_free = 1;
958 } else {
959 darg = buf; /* use same buffer for door's args & results */
960 need_free = 0;
961 }
962
963 /* build argument string of door call */
964 (void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data);
965
966 /* setup args for door call */
967 door_args.data_ptr = darg;
968 door_args.data_size = dlen;
969 door_args.desc_ptr = NULL;
970 door_args.desc_num = 0;
971 door_args.rbuf = buf;
972 door_args.rsize = *bufsize;
973
974 /* do the door_call */
975 retried_doorhd = 0;
976 retries = 0;
977 door_ki_hold(rp_door);
978 while ((err = door_ki_upcall_limited(rp_door, &door_args,
979 NULL, SIZE_MAX, 0)) != 0) {
980 if (err == EAGAIN || err == EINTR) {
981 if (++retries < REPARSED_DOORCALL_MAX_RETRY) {
982 delay(SEC_TO_TICK(1));
983 continue;
984 }
985 } else if (err == EBADF) {
986 /* door server goes away... */
987 reparse_door_reset_handle();
988
989 if (retried_doorhd == 0) {
990 door_ki_rele(rp_door);
991 retried_doorhd++;
992 rp_door = reparse_door_get_handle();
993 if (rp_door != NULL) {
994 door_ki_hold(rp_door);
995 continue;
996 }
997 }
998 }
999 break;
1000 }
1001
1002 if (rp_door)
1003 door_ki_rele(rp_door);
1004
1005 if (need_free)
1006 kmem_free(darg, dlen); /* done with args buffer */
1007
1008 if (err != 0)
1009 return (err);
1010
1011 resp = (reparsed_door_res_t *)door_args.rbuf;
1012 if ((err = resp->res_status) == 0) {
1013 /*
1014 * have to save the length of the results before the
1015 * bcopy below since it's can be an overlap copy that
1016 * overwrites the reparsed_door_res_t structure at
1017 * the beginning of the buffer.
1018 */
1019 res_len = (size_t)resp->res_len;
1020
1021 /* deref call is ok */
1022 if (res_len > *bufsize)
1023 err = EOVERFLOW;
1024 else
1025 bcopy(resp->res_data, buf, res_len);
1026 *bufsize = res_len;
1027 }
1028 if (door_args.rbuf != buf)
1029 kmem_free(door_args.rbuf, door_args.rsize);
1030
1031 return (err);
1032 }
1033