1/*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source.  A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12/*
13 * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
14 */
15
16#include <sys/types.h>
17#include <sys/param.h>
18#include <sys/systm.h>
19#include <sys/t_lock.h>
20#include <sys/errno.h>
21#include <sys/cred.h>
22#include <sys/user.h>
23#include <sys/uio.h>
24#include <sys/file.h>
25#include <sys/pathname.h>
26#include <sys/vfs.h>
27#include <sys/vnode.h>
28#include <sys/stat.h>
29#include <sys/mode.h>
30#include <sys/kmem.h>
31#include <sys/cmn_err.h>
32#include <sys/debug.h>
33#include <sys/atomic.h>
34#include <sys/acl.h>
35#include <sys/filio.h>
36#include <sys/flock.h>
37#include <sys/nbmlock.h>
38#include <sys/fcntl.h>
39#include <sys/poll.h>
40#include <sys/time.h>
41
42#include <errno.h>
43#include <fcntl.h>
44#include <unistd.h>
45
46#include "vncache.h"
47
48#define	O_RWMASK	(O_WRONLY | O_RDWR) /* == 3 */
49
50int fop_shrlock_enable = 0;
51
52int stat_to_vattr(const struct stat *, vattr_t *);
53int fop__getxvattr(vnode_t *, xvattr_t *);
54int fop__setxvattr(vnode_t *, xvattr_t *);
55
56static void fake_inactive_xattrdir(vnode_t *);
57
58/* ARGSUSED */
59int
60fop_open(
61	vnode_t **vpp,
62	int mode,
63	cred_t *cr,
64	caller_context_t *ct)
65{
66
67	if ((*vpp)->v_type == VREG) {
68		if (mode & FREAD)
69			atomic_add_32(&((*vpp)->v_rdcnt), 1);
70		if (mode & FWRITE)
71			atomic_add_32(&((*vpp)->v_wrcnt), 1);
72	}
73
74	/* call to ->vop_open was here */
75
76	return (0);
77}
78
79/* ARGSUSED */
80int
81fop_close(
82	vnode_t *vp,
83	int flag,
84	int count,
85	offset_t offset,
86	cred_t *cr,
87	caller_context_t *ct)
88{
89
90	/* call to ->vop_close was here */
91
92	/*
93	 * Check passed in count to handle possible dups. Vnode counts are only
94	 * kept on regular files
95	 */
96	if ((vp->v_type == VREG) && (count == 1))  {
97		if (flag & FREAD) {
98			ASSERT(vp->v_rdcnt > 0);
99			atomic_add_32(&(vp->v_rdcnt), -1);
100		}
101		if (flag & FWRITE) {
102			ASSERT(vp->v_wrcnt > 0);
103			atomic_add_32(&(vp->v_wrcnt), -1);
104		}
105	}
106	return (0);
107}
108
109/* ARGSUSED */
110int
111fop_read(
112	vnode_t *vp,
113	uio_t *uio,
114	int ioflag,
115	cred_t *cr,
116	caller_context_t *ct)
117{
118	struct stat st;
119	struct iovec *iov;
120	ssize_t resid;
121	size_t cnt;
122	int n;
123
124	/*
125	 * If that caller asks for read beyond end of file,
126	 * that causes the pread call to block.  (Ugh!)
127	 * Get the file size and return what we can.
128	 */
129	(void) fstat(vp->v_fd, &st);
130	resid = uio->uio_resid;
131	if ((uio->uio_loffset + resid) > st.st_size)
132		resid = st.st_size - uio->uio_loffset;
133
134	while (resid > 0) {
135
136		ASSERT(uio->uio_iovcnt > 0);
137		iov = uio->uio_iov;
138
139		if (iov->iov_len == 0) {
140			uio->uio_iov++;
141			uio->uio_iovcnt--;
142			continue;
143		}
144		cnt = iov->iov_len;
145		if (cnt > resid)
146			cnt = resid;
147
148		n = pread(vp->v_fd, iov->iov_base, cnt, uio->uio_loffset);
149		if (n < 0)
150			return (errno);
151
152		iov->iov_base += n;
153		iov->iov_len -= n;
154
155		uio->uio_resid -= n;
156		uio->uio_loffset += n;
157
158		resid -= n;
159	}
160
161	return (0);
162}
163
164/* ARGSUSED */
165int
166fop_write(
167	vnode_t *vp,
168	uio_t *uio,
169	int ioflag,
170	cred_t *cr,
171	caller_context_t *ct)
172{
173	struct iovec *iov;
174	size_t cnt;
175	int n;
176
177	while (uio->uio_resid > 0) {
178
179		ASSERT(uio->uio_iovcnt > 0);
180		iov = uio->uio_iov;
181
182		if (iov->iov_len == 0) {
183			uio->uio_iov++;
184			uio->uio_iovcnt--;
185			continue;
186		}
187		cnt = iov->iov_len;
188		if (cnt > uio->uio_resid)
189			cnt = uio->uio_resid;
190
191		n = pwrite(vp->v_fd, iov->iov_base, iov->iov_len,
192		    uio->uio_loffset);
193		if (n < 0)
194			return (errno);
195
196		iov->iov_base += n;
197		iov->iov_len -= n;
198
199		uio->uio_resid -= n;
200		uio->uio_loffset += n;
201	}
202
203	if (ioflag == FSYNC) {
204		(void) fsync(vp->v_fd);
205	}
206
207	return (0);
208}
209
210/* ARGSUSED */
211int
212fop_ioctl(
213	vnode_t *vp,
214	int cmd,
215	intptr_t arg,
216	int flag,
217	cred_t *cr,
218	int *rvalp,
219	caller_context_t *ct)
220{
221	off64_t off;
222	int rv, whence;
223
224	switch (cmd) {
225	case _FIO_SEEK_DATA:
226	case _FIO_SEEK_HOLE:
227		whence = (cmd == _FIO_SEEK_DATA) ? SEEK_DATA : SEEK_HOLE;
228		bcopy((void *)arg, &off, sizeof (off));
229		off = lseek(vp->v_fd, off, whence);
230		if (off == (off64_t)-1) {
231			rv = errno;
232		} else {
233			bcopy(&off, (void *)arg, sizeof (off));
234			rv = 0;
235		}
236		break;
237
238	default:
239		rv = ENOTTY;
240		break;
241	}
242
243	return (rv);
244}
245
246/* ARGSUSED */
247int
248fop_setfl(
249	vnode_t *vp,
250	int oflags,
251	int nflags,
252	cred_t *cr,
253	caller_context_t *ct)
254{
255	/* allow any flags? See fs_setfl */
256	return (0);
257}
258
259/* ARGSUSED */
260int
261fop_getattr(
262	vnode_t *vp,
263	vattr_t *vap,
264	int flags,
265	cred_t *cr,
266	caller_context_t *ct)
267{
268	int error;
269	struct stat st;
270
271	if (fstat(vp->v_fd, &st) == -1)
272		return (errno);
273	error = stat_to_vattr(&st, vap);
274
275	if (vap->va_mask & AT_XVATTR)
276		(void) fop__getxvattr(vp, (xvattr_t *)vap);
277
278	return (error);
279}
280
281/* ARGSUSED */
282int
283fop_setattr(
284	vnode_t *vp,
285	vattr_t *vap,
286	int flags,
287	cred_t *cr,
288	caller_context_t *ct)
289{
290	timespec_t times[2];
291	int err;
292
293	if (vap->va_mask & AT_SIZE) {
294		if (ftruncate(vp->v_fd, vap->va_size) == -1) {
295			err = errno;
296			if (err == EBADF)
297				err = EACCES;
298			return (err);
299		}
300	}
301
302	/* AT_MODE or anything else? */
303
304	if (vap->va_mask & AT_XVATTR)
305		(void) fop__setxvattr(vp, (xvattr_t *)vap);
306
307	if (vap->va_mask & (AT_ATIME | AT_MTIME)) {
308		if (vap->va_mask & AT_ATIME) {
309			times[0] = vap->va_atime;
310		} else {
311			times[0].tv_sec = 0;
312			times[0].tv_nsec = UTIME_OMIT;
313		}
314		if (vap->va_mask & AT_MTIME) {
315			times[1] = vap->va_mtime;
316		} else {
317			times[1].tv_sec = 0;
318			times[1].tv_nsec = UTIME_OMIT;
319		}
320
321		(void) futimens(vp->v_fd, times);
322	}
323
324	return (0);
325}
326
327/* ARGSUSED */
328int
329fop_access(
330	vnode_t *vp,
331	int mode,
332	int flags,
333	cred_t *cr,
334	caller_context_t *ct)
335{
336	return (0);
337}
338
339/*
340 * Conceptually like xattr_dir_lookup()
341 */
342static int
343fake_lookup_xattrdir(
344	vnode_t *dvp,
345	vnode_t **vpp)
346{
347	int len, fd;
348	int omode = O_RDWR | O_NOFOLLOW;
349	vnode_t *vp;
350
351	*vpp = NULL;
352
353	if (dvp->v_type != VDIR && dvp->v_type != VREG)
354		return (EINVAL);
355
356	/*
357	 * If we're already in sysattr space, don't allow creation
358	 * of another level of sysattrs.
359	 */
360	if (dvp->v_flag & V_SYSATTR)
361		return (EINVAL);
362
363	mutex_enter(&dvp->v_lock);
364	if (dvp->v_xattrdir != NULL) {
365		*vpp = dvp->v_xattrdir;
366		VN_HOLD(*vpp);
367		mutex_exit(&dvp->v_lock);
368		return (0);
369	}
370	mutex_exit(&dvp->v_lock);
371
372	omode = O_RDONLY|O_XATTR;
373	fd = openat(dvp->v_fd, ".", omode);
374	if (fd < 0)
375		return (errno);
376
377	vp = vn_alloc(KM_SLEEP);
378	vp->v_fd = fd;
379	vp->v_flag = V_XATTRDIR|V_SYSATTR;
380	vp->v_type = VDIR;
381	vp->v_vfsp = dvp->v_vfsp;
382
383	/* Set v_path to parent path + "/@" (like NFS) */
384	len = strlen(dvp->v_path) + 3;
385	vp->v_path = kmem_alloc(len, KM_SLEEP);
386	(void) snprintf(vp->v_path, len, "%s/@", dvp->v_path);
387
388	/*
389	 * Keep a pointer to the parent and a hold on it.
390	 * Both are cleaned up in fake_inactive_xattrdir
391	 */
392	vp->v_data = dvp;
393	vn_hold(dvp);
394
395	mutex_enter(&dvp->v_lock);
396	if (dvp->v_xattrdir == NULL) {
397		*vpp = dvp->v_xattrdir = vp;
398		mutex_exit(&dvp->v_lock);
399	} else {
400		*vpp = dvp->v_xattrdir;
401		mutex_exit(&dvp->v_lock);
402		fake_inactive_xattrdir(vp);
403	}
404
405	return (0);
406}
407
408/* ARGSUSED */
409int
410fop_lookup(
411	vnode_t *dvp,
412	char *name,
413	vnode_t **vpp,
414	pathname_t *pnp,
415	int flags,
416	vnode_t *rdir,
417	cred_t *cr,
418	caller_context_t *ct,
419	int *deflags,		/* Returned per-dirent flags */
420	pathname_t *ppnp)	/* Returned case-preserved name in directory */
421{
422	int fd;
423	int omode = O_RDWR | O_NOFOLLOW;
424	vnode_t *vp;
425	struct stat st;
426
427	if (flags & LOOKUP_XATTR)
428		return (fake_lookup_xattrdir(dvp, vpp));
429
430	/*
431	 * If lookup is for "", just return dvp.
432	 */
433	if (name[0] == '\0') {
434		vn_hold(dvp);
435		*vpp = dvp;
436		return (0);
437	}
438
439	if (fstatat(dvp->v_fd, name, &st, AT_SYMLINK_NOFOLLOW) == -1)
440		return (errno);
441
442	vp = vncache_lookup(&st);
443	if (vp != NULL) {
444		/* lookup gave us a hold */
445		*vpp = vp;
446		return (0);
447	}
448
449	if (S_ISDIR(st.st_mode))
450		omode = O_RDONLY | O_NOFOLLOW;
451
452again:
453	fd = openat(dvp->v_fd, name, omode, 0);
454	if (fd < 0) {
455		if ((omode & O_RWMASK) == O_RDWR) {
456			omode &= ~O_RWMASK;
457			omode |= O_RDONLY;
458			goto again;
459		}
460		return (errno);
461	}
462
463	if (fstat(fd, &st) == -1) {
464		(void) close(fd);
465		return (errno);
466	}
467
468	vp = vncache_enter(&st, dvp, name, fd);
469
470	*vpp = vp;
471	return (0);
472}
473
474/* ARGSUSED */
475int
476fop_create(
477	vnode_t *dvp,
478	char *name,
479	vattr_t *vap,
480	vcexcl_t excl,
481	int mode,
482	vnode_t **vpp,
483	cred_t *cr,
484	int flags,
485	caller_context_t *ct,
486	vsecattr_t *vsecp)	/* ACL to set during create */
487{
488	struct stat st;
489	vnode_t *vp;
490	int err, fd, omode;
491
492	/*
493	 * If creating "", just return dvp.
494	 */
495	if (name[0] == '\0') {
496		vn_hold(dvp);
497		*vpp = dvp;
498		return (0);
499	}
500
501	err = fstatat(dvp->v_fd, name, &st, AT_SYMLINK_NOFOLLOW);
502	if (err != 0)
503		err = errno;
504
505	vp = NULL;
506	if (err == 0) {
507		/* The file already exists. */
508		if (excl == EXCL)
509			return (EEXIST);
510
511		vp = vncache_lookup(&st);
512		/* vp gained a hold */
513	}
514
515	if (vp == NULL) {
516		/*
517		 * Open it. (may or may not exist)
518		 */
519		omode = O_RDWR | O_CREAT | O_NOFOLLOW;
520		if (excl == EXCL)
521			omode |= O_EXCL;
522	open_again:
523		fd = openat(dvp->v_fd, name, omode, mode);
524		if (fd < 0) {
525			if ((omode & O_RWMASK) == O_RDWR) {
526				omode &= ~O_RWMASK;
527				omode |= O_RDONLY;
528				goto open_again;
529			}
530			return (errno);
531		}
532		(void) fstat(fd, &st);
533
534		vp = vncache_enter(&st, dvp, name, fd);
535		/* vp has its initial hold */
536	}
537
538	/* Should have the vp now. */
539	if (vp == NULL)
540		return (EFAULT);
541
542	if (vp->v_type == VDIR && vap->va_type != VDIR) {
543		vn_rele(vp);
544		return (EISDIR);
545	}
546	if (vp->v_type != VDIR && vap->va_type == VDIR) {
547		vn_rele(vp);
548		return (ENOTDIR);
549	}
550
551	/*
552	 * Might need to set attributes.
553	 */
554	(void) fop_setattr(vp, vap, 0, cr, ct);
555
556	*vpp = vp;
557	return (0);
558}
559
560/* ARGSUSED */
561int
562fop_remove(
563	vnode_t *dvp,
564	char *name,
565	cred_t *cr,
566	caller_context_t *ct,
567	int flags)
568{
569
570	if (unlinkat(dvp->v_fd, name, 0))
571		return (errno);
572
573	return (0);
574}
575
576/* ARGSUSED */
577int
578fop_link(
579	vnode_t *to_dvp,
580	vnode_t *fr_vp,
581	char *to_name,
582	cred_t *cr,
583	caller_context_t *ct,
584	int flags)
585{
586	int err;
587
588	/*
589	 * Would prefer to specify "from" as the combination:
590	 * (fr_vp->v_fd, NULL) but linkat does not permit it.
591	 */
592	err = linkat(AT_FDCWD, fr_vp->v_path, to_dvp->v_fd, to_name,
593	    AT_SYMLINK_FOLLOW);
594	if (err == -1)
595		err = errno;
596
597	return (err);
598}
599
600/* ARGSUSED */
601int
602fop_rename(
603	vnode_t *from_dvp,
604	char *from_name,
605	vnode_t *to_dvp,
606	char *to_name,
607	cred_t *cr,
608	caller_context_t *ct,
609	int flags)
610{
611	struct stat st;
612	vnode_t *vp;
613	int err;
614
615	if (fstatat(from_dvp->v_fd, from_name, &st,
616	    AT_SYMLINK_NOFOLLOW) == -1)
617		return (errno);
618
619	vp = vncache_lookup(&st);
620	if (vp == NULL)
621		return (ENOENT);
622
623	err = renameat(from_dvp->v_fd, from_name, to_dvp->v_fd, to_name);
624	if (err == -1)
625		err = errno;
626	else
627		vncache_renamed(vp, to_dvp, to_name);
628
629	vn_rele(vp);
630
631	return (err);
632}
633
634/* ARGSUSED */
635int
636fop_mkdir(
637	vnode_t *dvp,
638	char *name,
639	vattr_t *vap,
640	vnode_t **vpp,
641	cred_t *cr,
642	caller_context_t *ct,
643	int flags,
644	vsecattr_t *vsecp)	/* ACL to set during create */
645{
646	struct stat st;
647	int err, fd;
648
649	mode_t mode = vap->va_mode & 0777;
650
651	if (mkdirat(dvp->v_fd, name, mode) == -1)
652		return (errno);
653
654	if ((fd = openat(dvp->v_fd, name, O_RDONLY)) == -1)
655		return (errno);
656	if (fstat(fd, &st) == -1) {
657		err = errno;
658		(void) close(fd);
659		return (err);
660	}
661
662	*vpp = vncache_enter(&st, dvp, name, fd);
663
664	/*
665	 * Might need to set attributes.
666	 */
667	(void) fop_setattr(*vpp, vap, 0, cr, ct);
668
669	return (0);
670}
671
672/* ARGSUSED */
673int
674fop_rmdir(
675	vnode_t *dvp,
676	char *name,
677	vnode_t *cdir,
678	cred_t *cr,
679	caller_context_t *ct,
680	int flags)
681{
682
683	if (unlinkat(dvp->v_fd, name, AT_REMOVEDIR) == -1)
684		return (errno);
685
686	return (0);
687}
688
689/* ARGSUSED */
690int
691fop_readdir(
692	vnode_t *vp,
693	uio_t *uiop,
694	cred_t *cr,
695	int *eofp,
696	caller_context_t *ct,
697	int flags)
698{
699	struct iovec *iov;
700	int cnt;
701	int error = 0;
702	int fd = vp->v_fd;
703
704	if (eofp) {
705		*eofp = 0;
706	}
707
708	error = lseek(fd, uiop->uio_loffset, SEEK_SET);
709	if (error == -1)
710		return (errno);
711
712	ASSERT(uiop->uio_iovcnt > 0);
713	iov = uiop->uio_iov;
714	if (iov->iov_len < sizeof (struct dirent))
715		return (EINVAL);
716
717	/* LINTED E_BAD_PTR_CAST_ALIGN */
718	cnt = getdents(fd, (struct dirent *)(uiop->uio_iov->iov_base),
719	    uiop->uio_resid);
720	if (cnt == -1)
721		return (errno);
722	if (cnt == 0) {
723		if (eofp) {
724			*eofp = 1;
725		}
726		return (ENOENT);
727	}
728
729	iov->iov_base += cnt;
730	iov->iov_len  -= cnt;
731	uiop->uio_resid -= cnt;
732	uiop->uio_loffset = lseek(fd, 0LL, SEEK_CUR);
733
734	return (0);
735}
736
737/* ARGSUSED */
738int
739fop_symlink(
740	vnode_t *dvp,
741	char *linkname,
742	vattr_t *vap,
743	char *target,
744	cred_t *cr,
745	caller_context_t *ct,
746	int flags)
747{
748	return (ENOSYS);
749}
750
751/* ARGSUSED */
752int
753fop_readlink(
754	vnode_t *vp,
755	uio_t *uiop,
756	cred_t *cr,
757	caller_context_t *ct)
758{
759	return (ENOSYS);
760}
761
762/* ARGSUSED */
763int
764fop_fsync(
765	vnode_t *vp,
766	int syncflag,
767	cred_t *cr,
768	caller_context_t *ct)
769{
770
771	if (fsync(vp->v_fd) == -1)
772		return (errno);
773
774	return (0);
775}
776
777/* ARGSUSED */
778void
779fop_inactive(
780	vnode_t *vp,
781	cred_t *cr,
782	caller_context_t *ct)
783{
784	if (vp->v_flag & V_XATTRDIR) {
785		fake_inactive_xattrdir(vp);
786	} else {
787		vncache_inactive(vp);
788	}
789}
790
791/*
792 * The special xattr directories are not in the vncache AVL, but
793 * hang off the parent's v_xattrdir field.  When vn_rele finds
794 * an xattr dir at v_count == 1 it calls here, but until we
795 * take locks on both the parent and the xattrdir, we don't
796 * know if we're really at the last reference.  So in here we
797 * take both locks, re-check the count, and either bail out
798 * or proceed with "inactive" vnode cleanup.  Part of that
799 * cleanup includes releasing the hold on the parent and
800 * clearing the parent's v_xattrdir field, which were
801 * setup in fake_lookup_xattrdir()
802 */
803static void
804fake_inactive_xattrdir(vnode_t *vp)
805{
806	vnode_t *dvp = vp->v_data; /* parent */
807	mutex_enter(&dvp->v_lock);
808	mutex_enter(&vp->v_lock);
809	if (vp->v_count > 1) {
810		/* new ref. via v_xattrdir */
811		mutex_exit(&vp->v_lock);
812		mutex_exit(&dvp->v_lock);
813		return;
814	}
815	ASSERT(dvp->v_xattrdir == vp);
816	dvp->v_xattrdir = NULL;
817	mutex_exit(&vp->v_lock);
818	mutex_exit(&dvp->v_lock);
819	vn_rele(dvp);
820	vn_free(vp);
821}
822
823/* ARGSUSED */
824int
825fop_fid(
826	vnode_t *vp,
827	fid_t *fidp,
828	caller_context_t *ct)
829{
830	return (ENOSYS);
831}
832
833/* ARGSUSED */
834int
835fop_rwlock(
836	vnode_t *vp,
837	int write_lock,
838	caller_context_t *ct)
839{
840	/* See: fs_rwlock */
841	return (-1);
842}
843
844/* ARGSUSED */
845void
846fop_rwunlock(
847	vnode_t *vp,
848	int write_lock,
849	caller_context_t *ct)
850{
851	/* See: fs_rwunlock */
852}
853
854/* ARGSUSED */
855int
856fop_seek(
857	vnode_t *vp,
858	offset_t ooff,
859	offset_t *noffp,
860	caller_context_t *ct)
861{
862	return (ENOSYS);
863}
864
865/* ARGSUSED */
866int
867fop_cmp(
868	vnode_t *vp1,
869	vnode_t *vp2,
870	caller_context_t *ct)
871{
872	/* See fs_cmp */
873	return (vncache_cmp(vp1, vp2));
874}
875
876/* ARGSUSED */
877int
878fop_frlock(
879	vnode_t *vp,
880	int cmd,
881	flock64_t *bfp,
882	int flag,
883	offset_t offset,
884	struct flk_callback *flk_cbp,
885	cred_t *cr,
886	caller_context_t *ct)
887{
888#if defined(_LP64)
889	offset_t maxoffset = INT64_MAX;
890#elif defined(_ILP32)
891	/*
892	 * Sadly, the fcntl API enforces 32-bit offsets,
893	 * even though we have _FILE_OFFSET_BITS=64
894	 */
895	offset_t maxoffset = INT32_MAX;
896#else
897#error "unsupported env."
898#endif
899
900	/* See fs_frlock */
901
902	switch (cmd) {
903	case F_GETLK:
904	case F_SETLK_NBMAND:
905	case F_SETLK:
906	case F_SETLKW:
907		break;
908	default:
909		return (EINVAL);
910	}
911
912	/* We only get SEEK_SET ranges here. */
913	if (bfp->l_whence != 0)
914		return (EINVAL);
915
916	/*
917	 * One limitation of using fcntl(2) F_SETLK etc is that
918	 * the real kernel limits the offsets we can use.
919	 * (Maybe the fcntl API should loosen that up?)
920	 * See syscall/fcntl.c:flock_check()
921	 *
922	 * Here in libfksmbsrv we can just ignore such locks,
923	 * or ignore the part that extends beyond maxoffset.
924	 * The SMB layer still keeps track of such locks for
925	 * conflict detection, so not reflecting such locks
926	 * into the real FS layer is OK.  Note: this may
927	 * modify the pased bfp->l_len.
928	 */
929	if (bfp->l_start < 0 || bfp->l_start > maxoffset)
930		return (0);
931	if (bfp->l_len < 0 || bfp->l_len > maxoffset)
932		return (0);
933	if (bfp->l_len > (maxoffset - bfp->l_start + 1))
934		bfp->l_len = (maxoffset - bfp->l_start + 1);
935
936	if (fcntl(vp->v_fd, cmd, bfp) == -1)
937		return (errno);
938
939	return (0);
940}
941
942/* ARGSUSED */
943int
944fop_space(
945	vnode_t *vp,
946	int cmd,
947	flock64_t *bfp,
948	int flag,
949	offset_t offset,
950	cred_t *cr,
951	caller_context_t *ct)
952{
953	/* See fs_frlock */
954
955	switch (cmd) {
956	case F_ALLOCSP:
957	case F_FREESP:
958		break;
959	default:
960		return (EINVAL);
961	}
962
963	if (fcntl(vp->v_fd, cmd, bfp) == -1)
964		return (errno);
965
966	return (0);
967}
968
969/* ARGSUSED */
970int
971fop_realvp(
972	vnode_t *vp,
973	vnode_t **vpp,
974	caller_context_t *ct)
975{
976	return (ENOSYS);
977}
978
979/* ARGSUSED */
980int
981fop_getpage(
982	vnode_t *vp,
983	offset_t off,
984	size_t len,
985	uint_t *protp,
986	struct page **plarr,
987	size_t plsz,
988	struct seg *seg,
989	caddr_t addr,
990	enum seg_rw rw,
991	cred_t *cr,
992	caller_context_t *ct)
993{
994	return (ENOSYS);
995}
996
997/* ARGSUSED */
998int
999fop_putpage(
1000	vnode_t *vp,
1001	offset_t off,
1002	size_t len,
1003	int flags,
1004	cred_t *cr,
1005	caller_context_t *ct)
1006{
1007	return (ENOSYS);
1008}
1009
1010/* ARGSUSED */
1011int
1012fop_map(
1013	vnode_t *vp,
1014	offset_t off,
1015	struct as *as,
1016	caddr_t *addrp,
1017	size_t len,
1018	uchar_t prot,
1019	uchar_t maxprot,
1020	uint_t flags,
1021	cred_t *cr,
1022	caller_context_t *ct)
1023{
1024	return (ENOSYS);
1025}
1026
1027/* ARGSUSED */
1028int
1029fop_addmap(
1030	vnode_t *vp,
1031	offset_t off,
1032	struct as *as,
1033	caddr_t addr,
1034	size_t len,
1035	uchar_t prot,
1036	uchar_t maxprot,
1037	uint_t flags,
1038	cred_t *cr,
1039	caller_context_t *ct)
1040{
1041	return (ENOSYS);
1042}
1043
1044/* ARGSUSED */
1045int
1046fop_delmap(
1047	vnode_t *vp,
1048	offset_t off,
1049	struct as *as,
1050	caddr_t addr,
1051	size_t len,
1052	uint_t prot,
1053	uint_t maxprot,
1054	uint_t flags,
1055	cred_t *cr,
1056	caller_context_t *ct)
1057{
1058	return (ENOSYS);
1059}
1060
1061/* ARGSUSED */
1062int
1063fop_poll(
1064	vnode_t *vp,
1065	short events,
1066	int anyyet,
1067	short *reventsp,
1068	struct pollhead **phpp,
1069	caller_context_t *ct)
1070{
1071	*reventsp = 0;
1072	if (events & POLLIN)
1073		*reventsp |= POLLIN;
1074	if (events & POLLRDNORM)
1075		*reventsp |= POLLRDNORM;
1076	if (events & POLLRDBAND)
1077		*reventsp |= POLLRDBAND;
1078	if (events & POLLOUT)
1079		*reventsp |= POLLOUT;
1080	if (events & POLLWRBAND)
1081		*reventsp |= POLLWRBAND;
1082	*phpp = NULL; /* or fake_pollhead? */
1083
1084	return (0);
1085}
1086
1087/* ARGSUSED */
1088int
1089fop_dump(
1090	vnode_t *vp,
1091	caddr_t addr,
1092	offset_t lbdn,
1093	offset_t dblks,
1094	caller_context_t *ct)
1095{
1096	return (ENOSYS);
1097}
1098
1099/*
1100 * See fs_pathconf
1101 */
1102/* ARGSUSED */
1103int
1104fop_pathconf(
1105	vnode_t *vp,
1106	int cmd,
1107	ulong_t *valp,
1108	cred_t *cr,
1109	caller_context_t *ct)
1110{
1111	register ulong_t val;
1112	register int error = 0;
1113
1114	switch (cmd) {
1115
1116	case _PC_LINK_MAX:
1117		val = MAXLINK;
1118		break;
1119
1120	case _PC_MAX_CANON:
1121		val = MAX_CANON;
1122		break;
1123
1124	case _PC_MAX_INPUT:
1125		val = MAX_INPUT;
1126		break;
1127
1128	case _PC_NAME_MAX:
1129		val = MAXNAMELEN;
1130		break;
1131
1132	case _PC_PATH_MAX:
1133	case _PC_SYMLINK_MAX:
1134		val = MAXPATHLEN;
1135		break;
1136
1137	case _PC_PIPE_BUF:
1138		val = PIPE_BUF;
1139		break;
1140
1141	case _PC_NO_TRUNC:
1142		val = (ulong_t)-1;
1143		break;
1144
1145	case _PC_VDISABLE:
1146		val = _POSIX_VDISABLE;
1147		break;
1148
1149	case _PC_CHOWN_RESTRICTED:
1150		val = 1; /* chown restricted enabled */
1151		break;
1152
1153	case _PC_FILESIZEBITS:
1154		val = (ulong_t)-1;    /* large file support */
1155		break;
1156
1157	case _PC_ACL_ENABLED:
1158		val = _ACL_ACE_ENABLED;
1159		break;
1160
1161	case _PC_CASE_BEHAVIOR:
1162		val = _CASE_SENSITIVE;
1163		break;
1164
1165	case _PC_SATTR_ENABLED:
1166	case _PC_SATTR_EXISTS:
1167		val = 0;
1168		break;
1169
1170	case _PC_ACCESS_FILTERING:
1171		val = 0;
1172		break;
1173
1174	default:
1175		error = EINVAL;
1176		break;
1177	}
1178
1179	if (error == 0)
1180		*valp = val;
1181	return (error);
1182}
1183
1184/* ARGSUSED */
1185int
1186fop_pageio(
1187	vnode_t *vp,
1188	struct page *pp,
1189	u_offset_t io_off,
1190	size_t io_len,
1191	int flags,
1192	cred_t *cr,
1193	caller_context_t *ct)
1194{
1195	return (ENOSYS);
1196}
1197
1198/* ARGSUSED */
1199int
1200fop_dumpctl(
1201	vnode_t *vp,
1202	int action,
1203	offset_t *blkp,
1204	caller_context_t *ct)
1205{
1206	return (ENOSYS);
1207}
1208
1209/* ARGSUSED */
1210void
1211fop_dispose(
1212	vnode_t *vp,
1213	struct page *pp,
1214	int flag,
1215	int dn,
1216	cred_t *cr,
1217	caller_context_t *ct)
1218{
1219}
1220
1221/* ARGSUSED */
1222int
1223fop_setsecattr(
1224	vnode_t *vp,
1225	vsecattr_t *vsap,
1226	int flag,
1227	cred_t *cr,
1228	caller_context_t *ct)
1229{
1230	return (0);
1231}
1232
1233/*
1234 * Fake up just enough of this so we can test get/set SDs.
1235 */
1236/* ARGSUSED */
1237int
1238fop_getsecattr(
1239	vnode_t *vp,
1240	vsecattr_t *vsecattr,
1241	int flag,
1242	cred_t *cr,
1243	caller_context_t *ct)
1244{
1245
1246	vsecattr->vsa_aclcnt	= 0;
1247	vsecattr->vsa_aclentsz	= 0;
1248	vsecattr->vsa_aclentp	= NULL;
1249	vsecattr->vsa_dfaclcnt	= 0;	/* Default ACLs are not fabricated */
1250	vsecattr->vsa_dfaclentp	= NULL;
1251
1252	if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
1253		aclent_t *aclentp;
1254		size_t aclsize;
1255
1256		aclsize = sizeof (aclent_t);
1257		vsecattr->vsa_aclcnt = 1;
1258		vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
1259		aclentp = vsecattr->vsa_aclentp;
1260
1261		aclentp->a_type = OTHER_OBJ;
1262		aclentp->a_perm = 0777;
1263		aclentp->a_id = (gid_t)-1;
1264		aclentp++;
1265	} else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
1266		ace_t *acl;
1267
1268		acl = kmem_alloc(sizeof (ace_t), KM_SLEEP);
1269		acl->a_who = (uint32_t)-1;
1270		acl->a_type = ACE_ACCESS_ALLOWED_ACE_TYPE;
1271		acl->a_flags = ACE_EVERYONE;
1272		acl->a_access_mask  = ACE_MODIFY_PERMS;
1273
1274		vsecattr->vsa_aclentp = (void *)acl;
1275		vsecattr->vsa_aclcnt = 1;
1276		vsecattr->vsa_aclentsz = sizeof (ace_t);
1277	}
1278
1279	return (0);
1280}
1281
1282/* ARGSUSED */
1283int
1284fop_shrlock(
1285	vnode_t *vp,
1286	int cmd,
1287	struct shrlock *shr,
1288	int flag,
1289	cred_t *cr,
1290	caller_context_t *ct)
1291{
1292
1293	switch (cmd) {
1294	case F_SHARE:
1295	case F_SHARE_NBMAND:
1296	case F_UNSHARE:
1297		break;
1298	default:
1299		return (EINVAL);
1300	}
1301
1302	if (!fop_shrlock_enable)
1303		return (0);
1304
1305	if (fcntl(vp->v_fd, cmd, shr) == -1)
1306		return (errno);
1307
1308	return (0);
1309}
1310
1311/* ARGSUSED */
1312int
1313fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm,
1314    caller_context_t *ct)
1315{
1316	return (ENOSYS);
1317}
1318
1319/* ARGSUSED */
1320int
1321fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr,
1322    caller_context_t *ct)
1323{
1324	return (ENOSYS);
1325}
1326
1327/* ARGSUSED */
1328int
1329fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct)
1330{
1331	return (ENOSYS);
1332}
1333
1334
1335/*
1336 * ***************************************************************
1337 * other VOP support
1338 */
1339
1340/*
1341 * Convert stat(2) formats to vnode types and vice versa.  (Knows about
1342 * numerical order of S_IFMT and vnode types.)
1343 */
1344enum vtype iftovt_tab[] = {
1345	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
1346	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
1347};
1348
1349ushort_t vttoif_tab[] = {
1350	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
1351	S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
1352};
1353
1354/*
1355 * stat_to_vattr()
1356 *
1357 * Convert from a stat structure to an vattr structure
1358 * Note: only set fields according to va_mask
1359 */
1360
1361int
1362stat_to_vattr(const struct stat *st, vattr_t *vap)
1363{
1364
1365	if (vap->va_mask & AT_TYPE)
1366		vap->va_type = IFTOVT(st->st_mode);
1367
1368	if (vap->va_mask & AT_MODE)
1369		vap->va_mode = st->st_mode;
1370
1371	if (vap->va_mask & AT_UID)
1372		vap->va_uid = st->st_uid;
1373
1374	if (vap->va_mask & AT_GID)
1375		vap->va_gid = st->st_gid;
1376
1377	if (vap->va_mask & AT_FSID)
1378		vap->va_fsid = st->st_dev;
1379
1380	if (vap->va_mask & AT_NODEID)
1381		vap->va_nodeid = st->st_ino;
1382
1383	if (vap->va_mask & AT_NLINK)
1384		vap->va_nlink = st->st_nlink;
1385
1386	if (vap->va_mask & AT_SIZE)
1387		vap->va_size = (u_offset_t)st->st_size;
1388
1389	if (vap->va_mask & AT_ATIME) {
1390		vap->va_atime.tv_sec  = st->st_atim.tv_sec;
1391		vap->va_atime.tv_nsec = st->st_atim.tv_nsec;
1392	}
1393
1394	if (vap->va_mask & AT_MTIME) {
1395		vap->va_mtime.tv_sec  = st->st_mtim.tv_sec;
1396		vap->va_mtime.tv_nsec = st->st_mtim.tv_nsec;
1397	}
1398
1399	if (vap->va_mask & AT_CTIME) {
1400		vap->va_ctime.tv_sec  = st->st_ctim.tv_sec;
1401		vap->va_ctime.tv_nsec = st->st_ctim.tv_nsec;
1402	}
1403
1404	if (vap->va_mask & AT_RDEV)
1405		vap->va_rdev = st->st_rdev;
1406
1407	if (vap->va_mask & AT_BLKSIZE)
1408		vap->va_blksize = (uint_t)st->st_blksize;
1409
1410
1411	if (vap->va_mask & AT_NBLOCKS)
1412		vap->va_nblocks = (u_longlong_t)st->st_blocks;
1413
1414	if (vap->va_mask & AT_SEQ)
1415		vap->va_seq = 0;
1416
1417	return (0);
1418}
1419
1420/* ARGSUSED */
1421void
1422flk_init_callback(flk_callback_t *flk_cb,
1423	callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
1424{
1425}
1426
1427void
1428vn_hold(vnode_t *vp)
1429{
1430	mutex_enter(&vp->v_lock);
1431	vp->v_count++;
1432	mutex_exit(&vp->v_lock);
1433}
1434
1435void
1436vn_rele(vnode_t *vp)
1437{
1438	VERIFY3U(vp->v_count, !=, 0);
1439	mutex_enter(&vp->v_lock);
1440	if (vp->v_count == 1) {
1441		mutex_exit(&vp->v_lock);
1442		fop_inactive(vp, NULL, NULL);
1443	} else {
1444		vp->v_count--;
1445		mutex_exit(&vp->v_lock);
1446	}
1447}
1448
1449int
1450vn_has_other_opens(
1451	vnode_t *vp,
1452	v_mode_t mode)
1453{
1454
1455	switch (mode) {
1456	case V_WRITE:
1457		if (vp->v_wrcnt > 1)
1458			return (V_TRUE);
1459		break;
1460	case V_RDORWR:
1461		if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1))
1462			return (V_TRUE);
1463		break;
1464	case V_RDANDWR:
1465		if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1))
1466			return (V_TRUE);
1467		break;
1468	case V_READ:
1469		if (vp->v_rdcnt > 1)
1470			return (V_TRUE);
1471		break;
1472	}
1473
1474	return (V_FALSE);
1475}
1476
1477/*
1478 * vn_is_opened() checks whether a particular file is opened and
1479 * whether the open is for read and/or write.
1480 *
1481 * Vnode counts are only kept on regular files (v_type=VREG).
1482 */
1483int
1484vn_is_opened(
1485	vnode_t *vp,
1486	v_mode_t mode)
1487{
1488
1489	ASSERT(vp != NULL);
1490
1491	switch (mode) {
1492	case V_WRITE:
1493		if (vp->v_wrcnt)
1494			return (V_TRUE);
1495		break;
1496	case V_RDANDWR:
1497		if (vp->v_rdcnt && vp->v_wrcnt)
1498			return (V_TRUE);
1499		break;
1500	case V_RDORWR:
1501		if (vp->v_rdcnt || vp->v_wrcnt)
1502			return (V_TRUE);
1503		break;
1504	case V_READ:
1505		if (vp->v_rdcnt)
1506			return (V_TRUE);
1507		break;
1508	}
1509
1510	return (V_FALSE);
1511}
1512
1513/*
1514 * vn_is_mapped() checks whether a particular file is mapped and whether
1515 * the file is mapped read and/or write.
1516 */
1517/* ARGSUSED */
1518int
1519vn_is_mapped(
1520	vnode_t *vp,
1521	v_mode_t mode)
1522{
1523	return (V_FALSE);
1524}
1525