17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5a5652762Spraks  * Common Development and Distribution License (the "License").
6a5652762Spraks  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22349dcea3SGarrett D'Amore  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
2406e6833aSJosef 'Jeff' Sipek  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
25ade42b55SSebastien Roy  * Copyright (c) 2017 by Delphix. All rights reserved.
26*78a2e113SAndy Fiddaman  * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
277c478bd9Sstevel@tonic-gate  */
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
30*78a2e113SAndy Fiddaman /*	  All Rights Reserved	*/
317c478bd9Sstevel@tonic-gate 
327c478bd9Sstevel@tonic-gate /*
337c478bd9Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
347c478bd9Sstevel@tonic-gate  * The Regents of the University of California
357c478bd9Sstevel@tonic-gate  * All Rights Reserved
367c478bd9Sstevel@tonic-gate  *
377c478bd9Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
387c478bd9Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
397c478bd9Sstevel@tonic-gate  * contributors.
407c478bd9Sstevel@tonic-gate  */
417c478bd9Sstevel@tonic-gate 
427c478bd9Sstevel@tonic-gate #include <sys/types.h>
437c478bd9Sstevel@tonic-gate #include <sys/thread.h>
447c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
457c478bd9Sstevel@tonic-gate #include <sys/param.h>
467c478bd9Sstevel@tonic-gate #include <sys/systm.h>
477c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
487c478bd9Sstevel@tonic-gate #include <sys/buf.h>
497c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
507c478bd9Sstevel@tonic-gate #include <sys/conf.h>
517c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
527c478bd9Sstevel@tonic-gate #include <sys/debug.h>
53feb08c6bSbillm #include <sys/dkio.h>
547c478bd9Sstevel@tonic-gate #include <sys/errno.h>
557c478bd9Sstevel@tonic-gate #include <sys/time.h>
567c478bd9Sstevel@tonic-gate #include <sys/fcntl.h>
577c478bd9Sstevel@tonic-gate #include <sys/flock.h>
587c478bd9Sstevel@tonic-gate #include <sys/file.h>
597c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
607c478bd9Sstevel@tonic-gate #include <sys/mman.h>
617c478bd9Sstevel@tonic-gate #include <sys/open.h>
627c478bd9Sstevel@tonic-gate #include <sys/swap.h>
637c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
647c478bd9Sstevel@tonic-gate #include <sys/uio.h>
657c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
66aa59c4cbSrsb #include <sys/vfs_opreg.h>
677c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
687c478bd9Sstevel@tonic-gate #include <sys/stat.h>
697c478bd9Sstevel@tonic-gate #include <sys/poll.h>
707c478bd9Sstevel@tonic-gate #include <sys/stream.h>
717c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
727c478bd9Sstevel@tonic-gate #include <sys/policy.h>
737c478bd9Sstevel@tonic-gate #include <sys/devpolicy.h>
747c478bd9Sstevel@tonic-gate 
757c478bd9Sstevel@tonic-gate #include <sys/proc.h>
767c478bd9Sstevel@tonic-gate #include <sys/user.h>
777c478bd9Sstevel@tonic-gate #include <sys/session.h>
787c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
797c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
807c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
817c478bd9Sstevel@tonic-gate 
827c478bd9Sstevel@tonic-gate #include <sys/fs/snode.h>
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate #include <vm/seg.h>
857c478bd9Sstevel@tonic-gate #include <vm/seg_map.h>
867c478bd9Sstevel@tonic-gate #include <vm/page.h>
877c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
887c478bd9Sstevel@tonic-gate #include <vm/seg_dev.h>
897c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h>
907c478bd9Sstevel@tonic-gate 
917c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h>
927c478bd9Sstevel@tonic-gate 
937c478bd9Sstevel@tonic-gate #include <sys/esunddi.h>
947c478bd9Sstevel@tonic-gate #include <sys/autoconf.h>
957c478bd9Sstevel@tonic-gate #include <sys/sunndi.h>
9625e8c5aaSvikram #include <sys/contract/device_impl.h>
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate 
99da6c28aaSamw static int spec_open(struct vnode **, int, struct cred *, caller_context_t *);
100da6c28aaSamw static int spec_close(struct vnode *, int, int, offset_t, struct cred *,
101da6c28aaSamw 	caller_context_t *);
1027c478bd9Sstevel@tonic-gate static int spec_read(struct vnode *, struct uio *, int, struct cred *,
103da6c28aaSamw 	caller_context_t *);
1047c478bd9Sstevel@tonic-gate static int spec_write(struct vnode *, struct uio *, int, struct cred *,
105da6c28aaSamw 	caller_context_t *);
106da6c28aaSamw static int spec_ioctl(struct vnode *, int, intptr_t, int, struct cred *, int *,
107da6c28aaSamw 	caller_context_t *);
108da6c28aaSamw static int spec_getattr(struct vnode *, struct vattr *, int, struct cred *,
109da6c28aaSamw 	caller_context_t *);
1107c478bd9Sstevel@tonic-gate static int spec_setattr(struct vnode *, struct vattr *, int, struct cred *,
1117c478bd9Sstevel@tonic-gate 	caller_context_t *);
112da6c28aaSamw static int spec_access(struct vnode *, int, int, struct cred *,
113da6c28aaSamw 	caller_context_t *);
114da6c28aaSamw static int spec_create(struct vnode *, char *, vattr_t *, enum vcexcl, int,
115da6c28aaSamw 	struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *);
116da6c28aaSamw static int spec_fsync(struct vnode *, int, struct cred *, caller_context_t *);
117da6c28aaSamw static void spec_inactive(struct vnode *, struct cred *, caller_context_t *);
118da6c28aaSamw static int spec_fid(struct vnode *, struct fid *, caller_context_t *);
119da6c28aaSamw static int spec_seek(struct vnode *, offset_t, offset_t *, caller_context_t *);
1207c478bd9Sstevel@tonic-gate static int spec_frlock(struct vnode *, int, struct flock64 *, int, offset_t,
121da6c28aaSamw 	struct flk_callback *, struct cred *, caller_context_t *);
122da6c28aaSamw static int spec_realvp(struct vnode *, struct vnode **, caller_context_t *);
1237c478bd9Sstevel@tonic-gate 
1247c478bd9Sstevel@tonic-gate static int spec_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t **,
125da6c28aaSamw 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
126da6c28aaSamw 	caller_context_t *);
1277c478bd9Sstevel@tonic-gate static int spec_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
1287c478bd9Sstevel@tonic-gate 	struct cred *);
1297c478bd9Sstevel@tonic-gate static struct buf *spec_startio(struct vnode *, page_t *, u_offset_t, size_t,
1307c478bd9Sstevel@tonic-gate 	int);
1317c478bd9Sstevel@tonic-gate static int spec_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
132da6c28aaSamw 	page_t **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
1337c478bd9Sstevel@tonic-gate static int spec_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
134da6c28aaSamw 	uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
1357c478bd9Sstevel@tonic-gate static int spec_addmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
136da6c28aaSamw 	uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
1377c478bd9Sstevel@tonic-gate static int spec_delmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
138da6c28aaSamw 	uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
1397c478bd9Sstevel@tonic-gate 
140da6c28aaSamw static int spec_poll(struct vnode *, short, int, short *, struct pollhead **,
141da6c28aaSamw 	caller_context_t *);
142d7334e51Srm static int spec_dump(struct vnode *, caddr_t, offset_t, offset_t,
143d7334e51Srm     caller_context_t *);
1447c478bd9Sstevel@tonic-gate static int spec_pageio(struct vnode *, page_t *, u_offset_t, size_t, int,
145da6c28aaSamw     cred_t *, caller_context_t *);
1467c478bd9Sstevel@tonic-gate 
147da6c28aaSamw static int spec_getsecattr(struct vnode *, vsecattr_t *, int, struct cred *,
148da6c28aaSamw 	caller_context_t *);
149da6c28aaSamw static int spec_setsecattr(struct vnode *, vsecattr_t *, int, struct cred *,
150da6c28aaSamw 	caller_context_t *);
151da6c28aaSamw static int spec_pathconf(struct	vnode *, int, ulong_t *, struct cred *,
152da6c28aaSamw 	caller_context_t *);
1537c478bd9Sstevel@tonic-gate 
1547c478bd9Sstevel@tonic-gate #define	SN_HOLD(csp)	{ \
1557c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock); \
1567c478bd9Sstevel@tonic-gate 	csp->s_count++; \
1577c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock); \
1587c478bd9Sstevel@tonic-gate }
1597c478bd9Sstevel@tonic-gate 
1607c478bd9Sstevel@tonic-gate #define	SN_RELE(csp)	{ \
1617c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock); \
1627c478bd9Sstevel@tonic-gate 	csp->s_count--; \
163fbe27353Sedp 	ASSERT((csp->s_count > 0) || (csp->s_vnode->v_stream == NULL)); \
1647c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock); \
1657c478bd9Sstevel@tonic-gate }
1667c478bd9Sstevel@tonic-gate 
16725e8c5aaSvikram #define	S_ISFENCED(sp)	((VTOS((sp)->s_commonvp))->s_flag & SFENCED)
16825e8c5aaSvikram 
1697c478bd9Sstevel@tonic-gate struct vnodeops *spec_vnodeops;
1707c478bd9Sstevel@tonic-gate 
17125e8c5aaSvikram /*
17225e8c5aaSvikram  * *PLEASE NOTE*: If you add new entry points to specfs, do
17325e8c5aaSvikram  * not forget to add support for fencing. A fenced snode
17425e8c5aaSvikram  * is indicated by the SFENCED flag in the common snode.
17525e8c5aaSvikram  * If a snode is fenced, determine if your entry point is
17625e8c5aaSvikram  * a configuration operation (Example: open), a detection
17725e8c5aaSvikram  * operation (Example: gettattr), an I/O operation (Example: ioctl())
17825e8c5aaSvikram  * or an unconfiguration operation (Example: close). If it is
17925e8c5aaSvikram  * a configuration or detection operation, fail the operation
18025e8c5aaSvikram  * for a fenced snode with an ENXIO or EIO as appropriate. If
18125e8c5aaSvikram  * it is any other operation, let it through.
18225e8c5aaSvikram  */
18325e8c5aaSvikram 
1847c478bd9Sstevel@tonic-gate const fs_operation_def_t spec_vnodeops_template[] = {
185aa59c4cbSrsb 	VOPNAME_OPEN,		{ .vop_open = spec_open },
186aa59c4cbSrsb 	VOPNAME_CLOSE,		{ .vop_close = spec_close },
187aa59c4cbSrsb 	VOPNAME_READ,		{ .vop_read = spec_read },
188aa59c4cbSrsb 	VOPNAME_WRITE,		{ .vop_write = spec_write },
189aa59c4cbSrsb 	VOPNAME_IOCTL,		{ .vop_ioctl = spec_ioctl },
190aa59c4cbSrsb 	VOPNAME_GETATTR,	{ .vop_getattr = spec_getattr },
191aa59c4cbSrsb 	VOPNAME_SETATTR,	{ .vop_setattr = spec_setattr },
192aa59c4cbSrsb 	VOPNAME_ACCESS,		{ .vop_access = spec_access },
193aa59c4cbSrsb 	VOPNAME_CREATE,		{ .vop_create = spec_create },
194aa59c4cbSrsb 	VOPNAME_FSYNC,		{ .vop_fsync = spec_fsync },
195aa59c4cbSrsb 	VOPNAME_INACTIVE,	{ .vop_inactive = spec_inactive },
196aa59c4cbSrsb 	VOPNAME_FID,		{ .vop_fid = spec_fid },
197aa59c4cbSrsb 	VOPNAME_SEEK,		{ .vop_seek = spec_seek },
198aa59c4cbSrsb 	VOPNAME_PATHCONF,	{ .vop_pathconf = spec_pathconf },
199aa59c4cbSrsb 	VOPNAME_FRLOCK,		{ .vop_frlock = spec_frlock },
200aa59c4cbSrsb 	VOPNAME_REALVP,		{ .vop_realvp = spec_realvp },
201aa59c4cbSrsb 	VOPNAME_GETPAGE,	{ .vop_getpage = spec_getpage },
202aa59c4cbSrsb 	VOPNAME_PUTPAGE,	{ .vop_putpage = spec_putpage },
203aa59c4cbSrsb 	VOPNAME_MAP,		{ .vop_map = spec_map },
204aa59c4cbSrsb 	VOPNAME_ADDMAP,		{ .vop_addmap = spec_addmap },
205aa59c4cbSrsb 	VOPNAME_DELMAP,		{ .vop_delmap = spec_delmap },
206aa59c4cbSrsb 	VOPNAME_POLL,		{ .vop_poll = spec_poll },
207aa59c4cbSrsb 	VOPNAME_DUMP,		{ .vop_dump = spec_dump },
208aa59c4cbSrsb 	VOPNAME_PAGEIO,		{ .vop_pageio = spec_pageio },
209aa59c4cbSrsb 	VOPNAME_SETSECATTR,	{ .vop_setsecattr = spec_setsecattr },
210aa59c4cbSrsb 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = spec_getsecattr },
211aa59c4cbSrsb 	NULL,			NULL
2127c478bd9Sstevel@tonic-gate };
2137c478bd9Sstevel@tonic-gate 
2147c478bd9Sstevel@tonic-gate /*
2157c478bd9Sstevel@tonic-gate  * Return address of spec_vnodeops
2167c478bd9Sstevel@tonic-gate  */
2177c478bd9Sstevel@tonic-gate struct vnodeops *
spec_getvnodeops(void)2187c478bd9Sstevel@tonic-gate spec_getvnodeops(void)
2197c478bd9Sstevel@tonic-gate {
2207c478bd9Sstevel@tonic-gate 	return (spec_vnodeops);
2217c478bd9Sstevel@tonic-gate }
2227c478bd9Sstevel@tonic-gate 
2237c478bd9Sstevel@tonic-gate extern vnode_t *rconsvp;
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate /*
2267c478bd9Sstevel@tonic-gate  * Acquire the serial lock on the common snode.
2277c478bd9Sstevel@tonic-gate  */
228e099bf07Scth #define	LOCK_CSP(csp)			(void) spec_lockcsp(csp, 0, 1, 0)
229e099bf07Scth #define	LOCKHOLD_CSP_SIG(csp)		spec_lockcsp(csp, 1, 1, 1)
230e099bf07Scth #define	SYNCHOLD_CSP_SIG(csp, intr)	spec_lockcsp(csp, intr, 0, 1)
2317c478bd9Sstevel@tonic-gate 
2327f9b0c87Scg typedef enum {
2337f9b0c87Scg 	LOOP,
2347f9b0c87Scg 	INTR,
2357f9b0c87Scg 	SUCCESS
2367f9b0c87Scg } slock_ret_t;
2377f9b0c87Scg 
2387c478bd9Sstevel@tonic-gate /*
2397f9b0c87Scg  * Synchronize with active SLOCKED snode, optionally checking for a signal and
240e099bf07Scth  * optionally returning with SLOCKED set and SN_HOLD done.  The 'intr'
241e099bf07Scth  * argument determines if the thread is interruptible by a signal while
2427f9b0c87Scg  * waiting, the function returns INTR if interrupted while there is another
2437f9b0c87Scg  * thread closing this snonde and LOOP if interrupted otherwise.
2447f9b0c87Scg  * When SUCCESS is returned the 'hold' argument determines if the open
2457f9b0c87Scg  * count (SN_HOLD) has been incremented and the 'setlock' argument
2467f9b0c87Scg  * determines if the function returns with SLOCKED set.
2477c478bd9Sstevel@tonic-gate  */
2487f9b0c87Scg static slock_ret_t
spec_lockcsp(struct snode * csp,int intr,int setlock,int hold)249e099bf07Scth spec_lockcsp(struct snode *csp, int intr, int setlock, int hold)
2507c478bd9Sstevel@tonic-gate {
2517f9b0c87Scg 	slock_ret_t ret = SUCCESS;
2527c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);
2537c478bd9Sstevel@tonic-gate 	while (csp->s_flag & SLOCKED) {
2547c478bd9Sstevel@tonic-gate 		csp->s_flag |= SWANT;
255e099bf07Scth 		if (intr) {
256e099bf07Scth 			if (!cv_wait_sig(&csp->s_cv, &csp->s_lock)) {
2577f9b0c87Scg 				if (csp->s_flag & SCLOSING)
2587f9b0c87Scg 					ret = INTR;
2597f9b0c87Scg 				else
2607f9b0c87Scg 					ret = LOOP;
261e099bf07Scth 				mutex_exit(&csp->s_lock);
2627f9b0c87Scg 				return (ret);		/* interrupted */
263e099bf07Scth 			}
264e099bf07Scth 		} else {
265e099bf07Scth 			cv_wait(&csp->s_cv, &csp->s_lock);
2667c478bd9Sstevel@tonic-gate 		}
2677c478bd9Sstevel@tonic-gate 	}
268e099bf07Scth 	if (setlock)
269e099bf07Scth 		csp->s_flag |= SLOCKED;
270e099bf07Scth 	if (hold)
271e099bf07Scth 		csp->s_count++;		/* one more open reference : SN_HOLD */
2727c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock);
2737f9b0c87Scg 	return (ret);			/* serialized/locked */
2747c478bd9Sstevel@tonic-gate }
2757c478bd9Sstevel@tonic-gate 
2767c478bd9Sstevel@tonic-gate /*
2777c478bd9Sstevel@tonic-gate  * Unlock the serial lock on the common snode
2787c478bd9Sstevel@tonic-gate  */
2797c478bd9Sstevel@tonic-gate #define	UNLOCK_CSP_LOCK_HELD(csp)			\
2807c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&csp->s_lock));		\
2817c478bd9Sstevel@tonic-gate 	if (csp->s_flag & SWANT)			\
2827c478bd9Sstevel@tonic-gate 		cv_broadcast(&csp->s_cv);		\
2837c478bd9Sstevel@tonic-gate 	csp->s_flag &= ~(SWANT|SLOCKED);
2847c478bd9Sstevel@tonic-gate 
2857c478bd9Sstevel@tonic-gate #define	UNLOCK_CSP(csp)					\
2867c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);			\
2877c478bd9Sstevel@tonic-gate 	UNLOCK_CSP_LOCK_HELD(csp);			\
2887c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock);
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate /*
2917c478bd9Sstevel@tonic-gate  * compute/return the size of the device
2927c478bd9Sstevel@tonic-gate  */
2937c478bd9Sstevel@tonic-gate #define	SPEC_SIZE(csp)	\
2947c478bd9Sstevel@tonic-gate 	(((csp)->s_flag & SSIZEVALID) ? (csp)->s_size : spec_size(csp))
2957c478bd9Sstevel@tonic-gate 
2967c478bd9Sstevel@tonic-gate /*
2977c478bd9Sstevel@tonic-gate  * Compute and return the size.  If the size in the common snode is valid then
2987c478bd9Sstevel@tonic-gate  * return it.  If not valid then get the size from the driver and set size in
2997c478bd9Sstevel@tonic-gate  * the common snode.  If the device has not been attached then we don't ask for
3007c478bd9Sstevel@tonic-gate  * an update from the driver- for non-streams SSIZEVALID stays unset until the
3017c478bd9Sstevel@tonic-gate  * device is attached. A stat of a mknod outside /devices (non-devfs) may
3027c478bd9Sstevel@tonic-gate  * report UNKNOWN_SIZE because the device may not be attached yet (SDIPSET not
3037c478bd9Sstevel@tonic-gate  * established in mknod until open time). An stat in /devices will report the
3047c478bd9Sstevel@tonic-gate  * size correctly.  Specfs should always call SPEC_SIZE instead of referring
3057c478bd9Sstevel@tonic-gate  * directly to s_size to initialize/retrieve the size of a device.
3067c478bd9Sstevel@tonic-gate  *
3077c478bd9Sstevel@tonic-gate  * XXX There is an inconsistency between block and raw - "unknown" is
3087c478bd9Sstevel@tonic-gate  * UNKNOWN_SIZE for VBLK and 0 for VCHR(raw).
3097c478bd9Sstevel@tonic-gate  */
3107c478bd9Sstevel@tonic-gate static u_offset_t
spec_size(struct snode * csp)3117c478bd9Sstevel@tonic-gate spec_size(struct snode *csp)
3127c478bd9Sstevel@tonic-gate {
3137c478bd9Sstevel@tonic-gate 	struct vnode	*cvp = STOV(csp);
3147c478bd9Sstevel@tonic-gate 	u_offset_t	size;
3157c478bd9Sstevel@tonic-gate 	int		plen;
3167c478bd9Sstevel@tonic-gate 	uint32_t	size32;
3177c478bd9Sstevel@tonic-gate 	dev_t		dev;
3187c478bd9Sstevel@tonic-gate 	dev_info_t	*devi;
3197c478bd9Sstevel@tonic-gate 	major_t		maj;
320184cd04cScth 	uint_t		blksize;
321184cd04cScth 	int		blkshift;
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate 	ASSERT((csp)->s_commonvp == cvp);	/* must be common node */
3247c478bd9Sstevel@tonic-gate 
3257c478bd9Sstevel@tonic-gate 	/* return cached value */
3267c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);
3277c478bd9Sstevel@tonic-gate 	if (csp->s_flag & SSIZEVALID) {
3287c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
3297c478bd9Sstevel@tonic-gate 		return (csp->s_size);
3307c478bd9Sstevel@tonic-gate 	}
3317c478bd9Sstevel@tonic-gate 
3327c478bd9Sstevel@tonic-gate 	/* VOP_GETATTR of mknod has not had devcnt restriction applied */
3337c478bd9Sstevel@tonic-gate 	dev = cvp->v_rdev;
3347c478bd9Sstevel@tonic-gate 	maj = getmajor(dev);
3357c478bd9Sstevel@tonic-gate 	if (maj >= devcnt) {
3367c478bd9Sstevel@tonic-gate 		/* return non-cached UNKNOWN_SIZE */
3377c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
3387c478bd9Sstevel@tonic-gate 		return ((cvp->v_type == VCHR) ? 0 : UNKNOWN_SIZE);
3397c478bd9Sstevel@tonic-gate 	}
3407c478bd9Sstevel@tonic-gate 
3417c478bd9Sstevel@tonic-gate 	/* establish cached zero size for streams */
3427c478bd9Sstevel@tonic-gate 	if (STREAMSTAB(maj)) {
3437c478bd9Sstevel@tonic-gate 		csp->s_size = 0;
3447c478bd9Sstevel@tonic-gate 		csp->s_flag |= SSIZEVALID;
3457c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
3467c478bd9Sstevel@tonic-gate 		return (0);
3477c478bd9Sstevel@tonic-gate 	}
3487c478bd9Sstevel@tonic-gate 
3497c478bd9Sstevel@tonic-gate 	/*
3507c478bd9Sstevel@tonic-gate 	 * Return non-cached UNKNOWN_SIZE if not open.
3517c478bd9Sstevel@tonic-gate 	 *
3527c478bd9Sstevel@tonic-gate 	 * NB: This check is bogus, calling prop_op(9E) should be gated by
3537c478bd9Sstevel@tonic-gate 	 * attach, not open. Not having this check however opens up a new
3547c478bd9Sstevel@tonic-gate 	 * context under which a driver's prop_op(9E) could be called. Calling
3557c478bd9Sstevel@tonic-gate 	 * prop_op(9E) in this new context has been shown to expose latent
3567c478bd9Sstevel@tonic-gate 	 * driver bugs (insufficient NULL pointer checks that lead to panic).
3577c478bd9Sstevel@tonic-gate 	 * We are keeping this open check for now to avoid these panics.
3587c478bd9Sstevel@tonic-gate 	 */
3597c478bd9Sstevel@tonic-gate 	if (csp->s_count == 0) {
3607c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
3617c478bd9Sstevel@tonic-gate 		return ((cvp->v_type == VCHR) ? 0 : UNKNOWN_SIZE);
3627c478bd9Sstevel@tonic-gate 	}
3637c478bd9Sstevel@tonic-gate 
3647c478bd9Sstevel@tonic-gate 	/* Return non-cached UNKNOWN_SIZE if not attached. */
3657c478bd9Sstevel@tonic-gate 	if (((csp->s_flag & SDIPSET) == 0) || (csp->s_dip == NULL) ||
366737d277aScth 	    !i_ddi_devi_attached(csp->s_dip)) {
3677c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
3687c478bd9Sstevel@tonic-gate 		return ((cvp->v_type == VCHR) ? 0 : UNKNOWN_SIZE);
3697c478bd9Sstevel@tonic-gate 	}
3707c478bd9Sstevel@tonic-gate 
3717c478bd9Sstevel@tonic-gate 	devi = csp->s_dip;
3727c478bd9Sstevel@tonic-gate 
3737c478bd9Sstevel@tonic-gate 	/*
3747c478bd9Sstevel@tonic-gate 	 * Established cached size obtained from the attached driver. Since we
3757c478bd9Sstevel@tonic-gate 	 * know the devinfo node, for efficiency we use cdev_prop_op directly
3767c478bd9Sstevel@tonic-gate 	 * instead of [cb]dev_[Ss]size.
3777c478bd9Sstevel@tonic-gate 	 */
3787c478bd9Sstevel@tonic-gate 	if (cvp->v_type == VCHR) {
3797c478bd9Sstevel@tonic-gate 		size = 0;
3807c478bd9Sstevel@tonic-gate 		plen = sizeof (size);
3817c478bd9Sstevel@tonic-gate 		if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
3827c478bd9Sstevel@tonic-gate 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS |
3837c478bd9Sstevel@tonic-gate 		    DDI_PROP_CONSUMER_TYPED, "Size", (caddr_t)&size,
3847c478bd9Sstevel@tonic-gate 		    &plen) != DDI_PROP_SUCCESS) {
3857c478bd9Sstevel@tonic-gate 			plen = sizeof (size32);
3867c478bd9Sstevel@tonic-gate 			if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
3877c478bd9Sstevel@tonic-gate 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
3887c478bd9Sstevel@tonic-gate 			    "size", (caddr_t)&size32, &plen) ==
3897c478bd9Sstevel@tonic-gate 			    DDI_PROP_SUCCESS)
3907c478bd9Sstevel@tonic-gate 				size = size32;
3917c478bd9Sstevel@tonic-gate 		}
3927c478bd9Sstevel@tonic-gate 	} else {
3937c478bd9Sstevel@tonic-gate 		size = UNKNOWN_SIZE;
3947c478bd9Sstevel@tonic-gate 		plen = sizeof (size);
3957c478bd9Sstevel@tonic-gate 		if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
3967c478bd9Sstevel@tonic-gate 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS |
3977c478bd9Sstevel@tonic-gate 		    DDI_PROP_CONSUMER_TYPED, "Nblocks", (caddr_t)&size,
3987c478bd9Sstevel@tonic-gate 		    &plen) != DDI_PROP_SUCCESS) {
3997c478bd9Sstevel@tonic-gate 			plen = sizeof (size32);
4007c478bd9Sstevel@tonic-gate 			if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
4017c478bd9Sstevel@tonic-gate 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
4027c478bd9Sstevel@tonic-gate 			    "nblocks", (caddr_t)&size32, &plen) ==
4037c478bd9Sstevel@tonic-gate 			    DDI_PROP_SUCCESS)
4047c478bd9Sstevel@tonic-gate 				size = size32;
4057c478bd9Sstevel@tonic-gate 		}
4067c478bd9Sstevel@tonic-gate 
4077c478bd9Sstevel@tonic-gate 		if (size != UNKNOWN_SIZE) {
408184cd04cScth 			blksize = DEV_BSIZE;		/* default */
409184cd04cScth 			plen = sizeof (blksize);
410184cd04cScth 
411184cd04cScth 			/* try to get dev_t specific "blksize" */
412184cd04cScth 			if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
413184cd04cScth 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
414184cd04cScth 			    "blksize", (caddr_t)&blksize, &plen) !=
415184cd04cScth 			    DDI_PROP_SUCCESS) {
416184cd04cScth 				/*
417184cd04cScth 				 * Try for dev_info node "device-blksize".
418184cd04cScth 				 * If this fails then blksize will still be
419184cd04cScth 				 * DEV_BSIZE default value.
420184cd04cScth 				 */
421184cd04cScth 				(void) cdev_prop_op(DDI_DEV_T_ANY, devi,
422184cd04cScth 				    PROP_LEN_AND_VAL_BUF,
423184cd04cScth 				    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
424184cd04cScth 				    "device-blksize", (caddr_t)&blksize, &plen);
425184cd04cScth 			}
426184cd04cScth 
427184cd04cScth 			/* blksize must be a power of two */
428184cd04cScth 			ASSERT(BIT_ONLYONESET(blksize));
429184cd04cScth 			blkshift = highbit(blksize) - 1;
430184cd04cScth 
4317c478bd9Sstevel@tonic-gate 			/* convert from block size to byte size */
432184cd04cScth 			if (size < (MAXOFFSET_T >> blkshift))
433184cd04cScth 				size = size << blkshift;
4347c478bd9Sstevel@tonic-gate 			else
4357c478bd9Sstevel@tonic-gate 				size = UNKNOWN_SIZE;
4367c478bd9Sstevel@tonic-gate 		}
4377c478bd9Sstevel@tonic-gate 	}
4387c478bd9Sstevel@tonic-gate 
4397c478bd9Sstevel@tonic-gate 	csp->s_size = size;
4407c478bd9Sstevel@tonic-gate 	csp->s_flag |= SSIZEVALID;
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock);
4437c478bd9Sstevel@tonic-gate 	return (size);
4447c478bd9Sstevel@tonic-gate }
4457c478bd9Sstevel@tonic-gate 
4467c478bd9Sstevel@tonic-gate /*
447*78a2e113SAndy Fiddaman  * This function deals with vnode substitution in the case of
4487c478bd9Sstevel@tonic-gate  * device cloning.
4497c478bd9Sstevel@tonic-gate  */
4507c478bd9Sstevel@tonic-gate static int
spec_clone(struct vnode ** vpp,dev_t newdev,int vtype,struct stdata * stp)4517c478bd9Sstevel@tonic-gate spec_clone(struct vnode **vpp, dev_t newdev, int vtype, struct stdata *stp)
4527c478bd9Sstevel@tonic-gate {
4537c478bd9Sstevel@tonic-gate 	dev_t		dev = (*vpp)->v_rdev;
4547c478bd9Sstevel@tonic-gate 	major_t		maj = getmajor(dev);
455*78a2e113SAndy Fiddaman 	major_t		newmaj = getmajor(newdev);
4567c478bd9Sstevel@tonic-gate 	int		sysclone = (maj == clone_major);
4577c478bd9Sstevel@tonic-gate 	int		qassociate_used = 0;
4587c478bd9Sstevel@tonic-gate 	struct snode	*oldsp, *oldcsp;
4597c478bd9Sstevel@tonic-gate 	struct snode	*newsp, *newcsp;
4607c478bd9Sstevel@tonic-gate 	struct vnode	*newvp, *newcvp;
4617c478bd9Sstevel@tonic-gate 	dev_info_t	*dip;
4627c478bd9Sstevel@tonic-gate 	queue_t		*dq;
4637c478bd9Sstevel@tonic-gate 
4647c478bd9Sstevel@tonic-gate 	ASSERT(dev != newdev);
4657c478bd9Sstevel@tonic-gate 
4667c478bd9Sstevel@tonic-gate 	/*
4677c478bd9Sstevel@tonic-gate 	 * Check for cloning across different drivers.
4687c478bd9Sstevel@tonic-gate 	 * We only support this under the system provided clone driver
4697c478bd9Sstevel@tonic-gate 	 */
4707c478bd9Sstevel@tonic-gate 	if ((maj != newmaj) && !sysclone) {
4717c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE,
4727c478bd9Sstevel@tonic-gate 		    "unsupported clone open maj = %u, newmaj = %u",
4737c478bd9Sstevel@tonic-gate 		    maj, newmaj);
4747c478bd9Sstevel@tonic-gate 		return (ENXIO);
4757c478bd9Sstevel@tonic-gate 	}
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate 	/* old */
4787c478bd9Sstevel@tonic-gate 	oldsp = VTOS(*vpp);
4797c478bd9Sstevel@tonic-gate 	oldcsp = VTOS(oldsp->s_commonvp);
4807c478bd9Sstevel@tonic-gate 
4817c478bd9Sstevel@tonic-gate 	/* new */
4827c478bd9Sstevel@tonic-gate 	newvp = makespecvp(newdev, vtype);
4837c478bd9Sstevel@tonic-gate 	ASSERT(newvp != NULL);
4847c478bd9Sstevel@tonic-gate 	newsp = VTOS(newvp);
4857c478bd9Sstevel@tonic-gate 	newcvp = newsp->s_commonvp;
4867c478bd9Sstevel@tonic-gate 	newcsp = VTOS(newcvp);
4877c478bd9Sstevel@tonic-gate 
4887c478bd9Sstevel@tonic-gate 	/*
4897c478bd9Sstevel@tonic-gate 	 * Clones inherit fsid, realvp, and dip.
4907c478bd9Sstevel@tonic-gate 	 * XXX realvp inherit is not occurring, does fstat of clone work?
4917c478bd9Sstevel@tonic-gate 	 */
4927c478bd9Sstevel@tonic-gate 	newsp->s_fsid = oldsp->s_fsid;
4937c478bd9Sstevel@tonic-gate 	if (sysclone) {
4947c478bd9Sstevel@tonic-gate 		newsp->s_flag |= SCLONE;
4957c478bd9Sstevel@tonic-gate 		dip = NULL;
4967c478bd9Sstevel@tonic-gate 	} else {
4977c478bd9Sstevel@tonic-gate 		newsp->s_flag |= SSELFCLONE;
4987c478bd9Sstevel@tonic-gate 		dip = oldcsp->s_dip;
4997c478bd9Sstevel@tonic-gate 	}
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 	/*
5027c478bd9Sstevel@tonic-gate 	 * If we cloned to an opened newdev that already has called
5037c478bd9Sstevel@tonic-gate 	 * spec_assoc_vp_with_devi (SDIPSET set) then the association is
5047c478bd9Sstevel@tonic-gate 	 * already established.
5057c478bd9Sstevel@tonic-gate 	 */
5067c478bd9Sstevel@tonic-gate 	if (!(newcsp->s_flag & SDIPSET)) {
5077c478bd9Sstevel@tonic-gate 		/*
5087c478bd9Sstevel@tonic-gate 		 * Establish s_dip association for newdev.
5097c478bd9Sstevel@tonic-gate 		 *
5107c478bd9Sstevel@tonic-gate 		 * If we trusted the getinfo(9E) DDI_INFO_DEVT2INSTANCE
5117c478bd9Sstevel@tonic-gate 		 * implementation of all cloning drivers  (SCLONE and SELFCLONE)
5127c478bd9Sstevel@tonic-gate 		 * we would always use e_ddi_hold_devi_by_dev().  We know that
5137c478bd9Sstevel@tonic-gate 		 * many drivers have had (still have?) problems with
5147c478bd9Sstevel@tonic-gate 		 * DDI_INFO_DEVT2INSTANCE, so we try to minimize reliance by
5157c478bd9Sstevel@tonic-gate 		 * detecting drivers that use QASSOCIATE (by looking down the
5167c478bd9Sstevel@tonic-gate 		 * stream) and setting their s_dip association to NULL.
5177c478bd9Sstevel@tonic-gate 		 */
5187c478bd9Sstevel@tonic-gate 		qassociate_used = 0;
5197c478bd9Sstevel@tonic-gate 		if (stp) {
5207c478bd9Sstevel@tonic-gate 			for (dq = stp->sd_wrq; dq; dq = dq->q_next) {
5217c478bd9Sstevel@tonic-gate 				if (_RD(dq)->q_flag & _QASSOCIATED) {
5227c478bd9Sstevel@tonic-gate 					qassociate_used = 1;
5237c478bd9Sstevel@tonic-gate 					dip = NULL;
5247c478bd9Sstevel@tonic-gate 					break;
5257c478bd9Sstevel@tonic-gate 				}
5267c478bd9Sstevel@tonic-gate 			}
5277c478bd9Sstevel@tonic-gate 		}
5287c478bd9Sstevel@tonic-gate 
5297c478bd9Sstevel@tonic-gate 		if (dip || qassociate_used) {
5307c478bd9Sstevel@tonic-gate 			spec_assoc_vp_with_devi(newvp, dip);
5317c478bd9Sstevel@tonic-gate 		} else {
5327c478bd9Sstevel@tonic-gate 			/* derive association from newdev */
5337c478bd9Sstevel@tonic-gate 			dip = e_ddi_hold_devi_by_dev(newdev, 0);
5347c478bd9Sstevel@tonic-gate 			spec_assoc_vp_with_devi(newvp, dip);
5357c478bd9Sstevel@tonic-gate 			if (dip)
5367c478bd9Sstevel@tonic-gate 				ddi_release_devi(dip);
5377c478bd9Sstevel@tonic-gate 		}
5387c478bd9Sstevel@tonic-gate 	}
5397c478bd9Sstevel@tonic-gate 
5407c478bd9Sstevel@tonic-gate 	SN_HOLD(newcsp);
5417c478bd9Sstevel@tonic-gate 
5427c478bd9Sstevel@tonic-gate 	/* deal with stream stuff */
5437c478bd9Sstevel@tonic-gate 	if (stp != NULL) {
5447c478bd9Sstevel@tonic-gate 		LOCK_CSP(newcsp);	/* synchronize stream open/close */
5457c478bd9Sstevel@tonic-gate 		mutex_enter(&newcsp->s_lock);
5467c478bd9Sstevel@tonic-gate 		newcvp->v_stream = newvp->v_stream = stp;
5477c478bd9Sstevel@tonic-gate 		stp->sd_vnode = newcvp;
548*78a2e113SAndy Fiddaman 		stp->sd_pvnode = newvp;
5497c478bd9Sstevel@tonic-gate 		stp->sd_strtab = STREAMSTAB(newmaj);
5507c478bd9Sstevel@tonic-gate 		mutex_exit(&newcsp->s_lock);
5517c478bd9Sstevel@tonic-gate 		UNLOCK_CSP(newcsp);
5527c478bd9Sstevel@tonic-gate 	}
5537c478bd9Sstevel@tonic-gate 
5547c478bd9Sstevel@tonic-gate 	/* substitute the vnode */
5557c478bd9Sstevel@tonic-gate 	SN_RELE(oldcsp);
5567c478bd9Sstevel@tonic-gate 	VN_RELE(*vpp);
5577c478bd9Sstevel@tonic-gate 	*vpp = newvp;
5587c478bd9Sstevel@tonic-gate 
5597c478bd9Sstevel@tonic-gate 	return (0);
5607c478bd9Sstevel@tonic-gate }
5617c478bd9Sstevel@tonic-gate 
5627c478bd9Sstevel@tonic-gate static int
spec_open(struct vnode ** vpp,int flag,struct cred * cr,caller_context_t * cc)563da6c28aaSamw spec_open(struct vnode **vpp, int flag, struct cred *cr, caller_context_t *cc)
5647c478bd9Sstevel@tonic-gate {
5657c478bd9Sstevel@tonic-gate 	major_t maj;
5667c478bd9Sstevel@tonic-gate 	dev_t dev, newdev;
5677c478bd9Sstevel@tonic-gate 	struct vnode *vp, *cvp;
5687c478bd9Sstevel@tonic-gate 	struct snode *sp, *csp;
5697c478bd9Sstevel@tonic-gate 	struct stdata *stp;
5707c478bd9Sstevel@tonic-gate 	dev_info_t *dip;
5717c478bd9Sstevel@tonic-gate 	int error, type;
57225e8c5aaSvikram 	contract_t *ct = NULL;
573e099bf07Scth 	int open_returns_eintr;
5747f9b0c87Scg 	slock_ret_t spec_locksp_ret;
5757f9b0c87Scg 
5767c478bd9Sstevel@tonic-gate 
5777c478bd9Sstevel@tonic-gate 	flag &= ~FCREAT;		/* paranoia */
5787c478bd9Sstevel@tonic-gate 
5797c478bd9Sstevel@tonic-gate 	vp = *vpp;
5807c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
5817c478bd9Sstevel@tonic-gate 	ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK));
5827c478bd9Sstevel@tonic-gate 	if ((vp->v_type != VCHR) && (vp->v_type != VBLK))
5837c478bd9Sstevel@tonic-gate 		return (ENXIO);
5847c478bd9Sstevel@tonic-gate 
5857c478bd9Sstevel@tonic-gate 	/*
5867c478bd9Sstevel@tonic-gate 	 * If the VFS_NODEVICES bit was set for the mount,
5877c478bd9Sstevel@tonic-gate 	 * do not allow opens of special devices.
5887c478bd9Sstevel@tonic-gate 	 */
5897c478bd9Sstevel@tonic-gate 	if (sp->s_realvp && (sp->s_realvp->v_vfsp->vfs_flag & VFS_NODEVICES))
5907c478bd9Sstevel@tonic-gate 		return (ENXIO);
5917c478bd9Sstevel@tonic-gate 
5927c478bd9Sstevel@tonic-gate 	newdev = dev = vp->v_rdev;
5937c478bd9Sstevel@tonic-gate 
5947c478bd9Sstevel@tonic-gate 	/*
5957c478bd9Sstevel@tonic-gate 	 * If we are opening a node that has not had spec_assoc_vp_with_devi
5967c478bd9Sstevel@tonic-gate 	 * called against it (mknod outside /devices or a non-dacf makespecvp
5977c478bd9Sstevel@tonic-gate 	 * node) then SDIPSET will not be set. In this case we call an
5987c478bd9Sstevel@tonic-gate 	 * interface which will reconstruct the path and lookup (drive attach)
5997c478bd9Sstevel@tonic-gate 	 * through devfs (e_ddi_hold_devi_by_dev -> e_ddi_hold_devi_by_path ->
6007c478bd9Sstevel@tonic-gate 	 * devfs_lookupname).  For support of broken drivers that don't call
6017c478bd9Sstevel@tonic-gate 	 * ddi_create_minor_node for all minor nodes in their instance space,
6027c478bd9Sstevel@tonic-gate 	 * we call interfaces that operates at the directory/devinfo
6037c478bd9Sstevel@tonic-gate 	 * (major/instance) level instead of to the leaf/minor node level.
6047c478bd9Sstevel@tonic-gate 	 * After finding and attaching the dip we associate it with the
6057c478bd9Sstevel@tonic-gate 	 * common specfs vnode (s_dip), which sets SDIPSET.  A DL_DETACH_REQ
6067c478bd9Sstevel@tonic-gate 	 * to style-2 stream driver may set s_dip to NULL with SDIPSET set.
6077c478bd9Sstevel@tonic-gate 	 *
6087c478bd9Sstevel@tonic-gate 	 * NOTE: Although e_ddi_hold_devi_by_dev takes a dev_t argument, its
6097c478bd9Sstevel@tonic-gate 	 * implementation operates at the major/instance level since it only
6107c478bd9Sstevel@tonic-gate 	 * need to return a dip.
6117c478bd9Sstevel@tonic-gate 	 */
6127c478bd9Sstevel@tonic-gate 	cvp = sp->s_commonvp;
6137c478bd9Sstevel@tonic-gate 	csp = VTOS(cvp);
6147c478bd9Sstevel@tonic-gate 	if (!(csp->s_flag & SDIPSET)) {
6157c478bd9Sstevel@tonic-gate 		/* try to attach, return error if we fail */
6167c478bd9Sstevel@tonic-gate 		if ((dip = e_ddi_hold_devi_by_dev(dev, 0)) == NULL)
6177c478bd9Sstevel@tonic-gate 			return (ENXIO);
6187c478bd9Sstevel@tonic-gate 
6197c478bd9Sstevel@tonic-gate 		/* associate dip with the common snode s_dip */
6207c478bd9Sstevel@tonic-gate 		spec_assoc_vp_with_devi(vp, dip);
6217c478bd9Sstevel@tonic-gate 		ddi_release_devi(dip);	/* from e_ddi_hold_devi_by_dev */
6227c478bd9Sstevel@tonic-gate 	}
6237c478bd9Sstevel@tonic-gate 
62425e8c5aaSvikram 	/* check if device fenced off */
62525e8c5aaSvikram 	if (S_ISFENCED(sp))
62625e8c5aaSvikram 		return (ENXIO);
62725e8c5aaSvikram 
6287c478bd9Sstevel@tonic-gate #ifdef  DEBUG
6297c478bd9Sstevel@tonic-gate 	/* verify attach/open exclusion guarantee */
6307c478bd9Sstevel@tonic-gate 	dip = csp->s_dip;
631737d277aScth 	ASSERT((dip == NULL) || i_ddi_devi_attached(dip));
6327c478bd9Sstevel@tonic-gate #endif  /* DEBUG */
6337c478bd9Sstevel@tonic-gate 
634853de45fSdh 	if ((error = secpolicy_spec_open(cr, vp, flag)) != 0)
6357c478bd9Sstevel@tonic-gate 		return (error);
6367c478bd9Sstevel@tonic-gate 
6374f7df455Scth 	/* Verify existance of open(9E) implementation. */
6387c478bd9Sstevel@tonic-gate 	maj = getmajor(dev);
6394f7df455Scth 	if ((maj >= devcnt) ||
6404f7df455Scth 	    (devopsp[maj]->devo_cb_ops == NULL) ||
6414f7df455Scth 	    (devopsp[maj]->devo_cb_ops->cb_open == NULL))
6424f7df455Scth 		return (ENXIO);
6434f7df455Scth 
644349dcea3SGarrett D'Amore 	/*
645349dcea3SGarrett D'Amore 	 * split STREAMS vs. non-STREAMS
646349dcea3SGarrett D'Amore 	 *
647349dcea3SGarrett D'Amore 	 * If the device is a dual-personality device, then we might want
648349dcea3SGarrett D'Amore 	 * to allow for a regular OTYP_BLK open.  If however it's strictly
649349dcea3SGarrett D'Amore 	 * a pure STREAMS device, the cb_open entry point will be
650349dcea3SGarrett D'Amore 	 * nodev() which returns ENXIO.  This does make this failure path
651349dcea3SGarrett D'Amore 	 * somewhat longer, but such attempts to use OTYP_BLK with STREAMS
652349dcea3SGarrett D'Amore 	 * devices should be exceedingly rare.  (Most of the time they will
653349dcea3SGarrett D'Amore 	 * be due to programmer error.)
654349dcea3SGarrett D'Amore 	 */
655349dcea3SGarrett D'Amore 	if ((vp->v_type == VCHR) && (STREAMSTAB(maj)))
6567c478bd9Sstevel@tonic-gate 		goto streams_open;
6577c478bd9Sstevel@tonic-gate 
658349dcea3SGarrett D'Amore not_streams:
659e099bf07Scth 	/*
660e099bf07Scth 	 * Wait for in progress last close to complete. This guarantees
661e099bf07Scth 	 * to the driver writer that we will never be in the drivers
662e099bf07Scth 	 * open and close on the same (dev_t, otype) at the same time.
663e099bf07Scth 	 * Open count already incremented (SN_HOLD) on non-zero return.
664e099bf07Scth 	 * The wait is interruptible by a signal if the driver sets the
665e099bf07Scth 	 * D_OPEN_RETURNS_EINTR cb_ops(9S) cb_flag or sets the
666e099bf07Scth 	 * ddi-open-returns-eintr(9P) property in its driver.conf.
667e099bf07Scth 	 */
668e099bf07Scth 	if ((devopsp[maj]->devo_cb_ops->cb_flag & D_OPEN_RETURNS_EINTR) ||
669e099bf07Scth 	    (devnamesp[maj].dn_flags & DN_OPEN_RETURNS_EINTR))
670e099bf07Scth 		open_returns_eintr = 1;
671e099bf07Scth 	else
672e099bf07Scth 		open_returns_eintr = 0;
6737f9b0c87Scg 	while ((spec_locksp_ret = SYNCHOLD_CSP_SIG(csp, open_returns_eintr)) !=
6747f9b0c87Scg 	    SUCCESS) {
6757f9b0c87Scg 		if (spec_locksp_ret == INTR)
676e099bf07Scth 			return (EINTR);
677e099bf07Scth 	}
678fbe27353Sedp 
6797c478bd9Sstevel@tonic-gate 	/* non streams open */
6807c478bd9Sstevel@tonic-gate 	type = (vp->v_type == VBLK ? OTYP_BLK : OTYP_CHR);
6817c478bd9Sstevel@tonic-gate 	error = dev_open(&newdev, flag, type, cr);
6827c478bd9Sstevel@tonic-gate 
6837c478bd9Sstevel@tonic-gate 	/* deal with clone case */
6847c478bd9Sstevel@tonic-gate 	if (error == 0 && dev != newdev) {
6857c478bd9Sstevel@tonic-gate 		error = spec_clone(vpp, newdev, vp->v_type, NULL);
6867c478bd9Sstevel@tonic-gate 		/*
6877c478bd9Sstevel@tonic-gate 		 * bail on clone failure, further processing
6887c478bd9Sstevel@tonic-gate 		 * results in undefined behaviors.
6897c478bd9Sstevel@tonic-gate 		 */
6907c478bd9Sstevel@tonic-gate 		if (error != 0)
6917c478bd9Sstevel@tonic-gate 			return (error);
6927c478bd9Sstevel@tonic-gate 		sp = VTOS(*vpp);
6937c478bd9Sstevel@tonic-gate 		csp = VTOS(sp->s_commonvp);
6947c478bd9Sstevel@tonic-gate 	}
6957c478bd9Sstevel@tonic-gate 
69625e8c5aaSvikram 	/*
69725e8c5aaSvikram 	 * create contracts only for userland opens
69825e8c5aaSvikram 	 * Successful open and cloning is done at this point.
69925e8c5aaSvikram 	 */
70025e8c5aaSvikram 	if (error == 0 && !(flag & FKLYR)) {
70125e8c5aaSvikram 		int spec_type;
70225e8c5aaSvikram 		spec_type = (STOV(csp)->v_type == VCHR) ? S_IFCHR : S_IFBLK;
70325e8c5aaSvikram 		if (contract_device_open(newdev, spec_type, NULL) != 0) {
70425e8c5aaSvikram 			error = EIO;
70525e8c5aaSvikram 		}
70625e8c5aaSvikram 	}
70725e8c5aaSvikram 
7087c478bd9Sstevel@tonic-gate 	if (error == 0) {
7097c478bd9Sstevel@tonic-gate 		sp->s_size = SPEC_SIZE(csp);
7107c478bd9Sstevel@tonic-gate 
7117c478bd9Sstevel@tonic-gate 		if ((csp->s_flag & SNEEDCLOSE) == 0) {
7127c478bd9Sstevel@tonic-gate 			int nmaj = getmajor(newdev);
7137c478bd9Sstevel@tonic-gate 			mutex_enter(&csp->s_lock);
7147c478bd9Sstevel@tonic-gate 			/* successful open needs a close later */
7157c478bd9Sstevel@tonic-gate 			csp->s_flag |= SNEEDCLOSE;
7167c478bd9Sstevel@tonic-gate 
7177c478bd9Sstevel@tonic-gate 			/*
7187c478bd9Sstevel@tonic-gate 			 * Invalidate possible cached "unknown" size
7197c478bd9Sstevel@tonic-gate 			 * established by a VOP_GETATTR while open was in
7207c478bd9Sstevel@tonic-gate 			 * progress, and the driver might fail prop_op(9E).
7217c478bd9Sstevel@tonic-gate 			 */
7227c478bd9Sstevel@tonic-gate 			if (((cvp->v_type == VCHR) && (csp->s_size == 0)) ||
7237c478bd9Sstevel@tonic-gate 			    ((cvp->v_type == VBLK) &&
7247c478bd9Sstevel@tonic-gate 			    (csp->s_size == UNKNOWN_SIZE)))
7257c478bd9Sstevel@tonic-gate 				csp->s_flag &= ~SSIZEVALID;
7267c478bd9Sstevel@tonic-gate 
7277c478bd9Sstevel@tonic-gate 			if (devopsp[nmaj]->devo_cb_ops->cb_flag & D_64BIT)
7287c478bd9Sstevel@tonic-gate 				csp->s_flag |= SLOFFSET;
7297c478bd9Sstevel@tonic-gate 			if (devopsp[nmaj]->devo_cb_ops->cb_flag & D_U64BIT)
7307c478bd9Sstevel@tonic-gate 				csp->s_flag |= SLOFFSET | SANYOFFSET;
7317c478bd9Sstevel@tonic-gate 			mutex_exit(&csp->s_lock);
7327c478bd9Sstevel@tonic-gate 		}
7337c478bd9Sstevel@tonic-gate 		return (0);
7347c478bd9Sstevel@tonic-gate 	}
7357c478bd9Sstevel@tonic-gate 
7367c478bd9Sstevel@tonic-gate 	/*
7377c478bd9Sstevel@tonic-gate 	 * Open failed. If we missed a close operation because
7387c478bd9Sstevel@tonic-gate 	 * we were trying to get the device open and it is the
7397c478bd9Sstevel@tonic-gate 	 * last in progress open that is failing then call close.
7407c478bd9Sstevel@tonic-gate 	 *
7417c478bd9Sstevel@tonic-gate 	 * NOTE: Only non-streams open has this race condition.
7427c478bd9Sstevel@tonic-gate 	 */
7437c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);
7447c478bd9Sstevel@tonic-gate 	csp->s_count--;			/* decrement open count : SN_RELE */
7457c478bd9Sstevel@tonic-gate 	if ((csp->s_count == 0) &&	/* no outstanding open */
7467c478bd9Sstevel@tonic-gate 	    (csp->s_mapcnt == 0) &&	/* no mapping */
7477c478bd9Sstevel@tonic-gate 	    (csp->s_flag & SNEEDCLOSE)) { /* need a close */
7487c478bd9Sstevel@tonic-gate 		csp->s_flag &= ~(SNEEDCLOSE | SSIZEVALID);
7497c478bd9Sstevel@tonic-gate 
7507c478bd9Sstevel@tonic-gate 		/* See comment in spec_close() */
7517c478bd9Sstevel@tonic-gate 		if (csp->s_flag & (SCLONE | SSELFCLONE))
7527c478bd9Sstevel@tonic-gate 			csp->s_flag &= ~SDIPSET;
7537c478bd9Sstevel@tonic-gate 
754e099bf07Scth 		csp->s_flag |= SCLOSING;
7557c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
756e099bf07Scth 
7577c478bd9Sstevel@tonic-gate 		ASSERT(*vpp != NULL);
7587c478bd9Sstevel@tonic-gate 		(void) device_close(*vpp, flag, cr);
759e099bf07Scth 
760e099bf07Scth 		mutex_enter(&csp->s_lock);
761e099bf07Scth 		csp->s_flag &= ~SCLOSING;
762e099bf07Scth 		mutex_exit(&csp->s_lock);
7637c478bd9Sstevel@tonic-gate 	} else {
7647c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
7657c478bd9Sstevel@tonic-gate 	}
7667c478bd9Sstevel@tonic-gate 	return (error);
7677c478bd9Sstevel@tonic-gate 
7687c478bd9Sstevel@tonic-gate streams_open:
7697c478bd9Sstevel@tonic-gate 	/*
770e099bf07Scth 	 * Lock common snode to prevent any new clone opens on this
771e099bf07Scth 	 * stream while one is in progress. This is necessary since
772e099bf07Scth 	 * the stream currently associated with the clone device will
773e099bf07Scth 	 * not be part of it after the clone open completes. Unfortunately
774e099bf07Scth 	 * we don't know in advance if this is a clone
775e099bf07Scth 	 * device so we have to lock all opens.
7767c478bd9Sstevel@tonic-gate 	 *
777e099bf07Scth 	 * If we fail, it's because of an interrupt - EINTR return is an
778e099bf07Scth 	 * expected aspect of opening a stream so we don't need to check
779e099bf07Scth 	 * D_OPEN_RETURNS_EINTR. Open count already incremented (SN_HOLD)
780e099bf07Scth 	 * on non-zero return.
7817c478bd9Sstevel@tonic-gate 	 */
7827f9b0c87Scg 	if (LOCKHOLD_CSP_SIG(csp) != SUCCESS)
7837c478bd9Sstevel@tonic-gate 		return (EINTR);
784fbe27353Sedp 
7857c478bd9Sstevel@tonic-gate 	error = stropen(cvp, &newdev, flag, cr);
7867c478bd9Sstevel@tonic-gate 	stp = cvp->v_stream;
7877c478bd9Sstevel@tonic-gate 
7887c478bd9Sstevel@tonic-gate 	/* deal with the clone case */
7897c478bd9Sstevel@tonic-gate 	if ((error == 0) && (dev != newdev)) {
7907c478bd9Sstevel@tonic-gate 		vp->v_stream = cvp->v_stream = NULL;
7917c478bd9Sstevel@tonic-gate 		UNLOCK_CSP(csp);
7927c478bd9Sstevel@tonic-gate 		error = spec_clone(vpp, newdev, vp->v_type, stp);
7937c478bd9Sstevel@tonic-gate 		/*
7947c478bd9Sstevel@tonic-gate 		 * bail on clone failure, further processing
7957c478bd9Sstevel@tonic-gate 		 * results in undefined behaviors.
7967c478bd9Sstevel@tonic-gate 		 */
7977c478bd9Sstevel@tonic-gate 		if (error != 0)
7987c478bd9Sstevel@tonic-gate 			return (error);
7997c478bd9Sstevel@tonic-gate 		sp = VTOS(*vpp);
8007c478bd9Sstevel@tonic-gate 		csp = VTOS(sp->s_commonvp);
8017c478bd9Sstevel@tonic-gate 	} else if (error == 0) {
8027c478bd9Sstevel@tonic-gate 		vp->v_stream = stp;
8037c478bd9Sstevel@tonic-gate 		UNLOCK_CSP(csp);
8047c478bd9Sstevel@tonic-gate 	}
8057c478bd9Sstevel@tonic-gate 
80625e8c5aaSvikram 	/*
80725e8c5aaSvikram 	 * create contracts only for userland opens
80825e8c5aaSvikram 	 * Successful open and cloning is done at this point.
80925e8c5aaSvikram 	 */
81025e8c5aaSvikram 	if (error == 0 && !(flag & FKLYR)) {
81125e8c5aaSvikram 		/* STREAM is of type S_IFCHR */
81225e8c5aaSvikram 		if (contract_device_open(newdev, S_IFCHR, &ct) != 0) {
81325e8c5aaSvikram 			UNLOCK_CSP(csp);
814da6c28aaSamw 			(void) spec_close(vp, flag, 1, 0, cr, cc);
81525e8c5aaSvikram 			return (EIO);
81625e8c5aaSvikram 		}
81725e8c5aaSvikram 	}
81825e8c5aaSvikram 
8197c478bd9Sstevel@tonic-gate 	if (error == 0) {
8207c478bd9Sstevel@tonic-gate 		/* STREAMS devices don't have a size */
8217c478bd9Sstevel@tonic-gate 		sp->s_size = csp->s_size = 0;
8227c478bd9Sstevel@tonic-gate 
8239acbbeafSnn 		if (!(stp->sd_flag & STRISTTY) || (flag & FNOCTTY))
8249acbbeafSnn 			return (0);
825ad1660d0Smeem 
8269acbbeafSnn 		/* try to allocate it as a controlling terminal */
8279acbbeafSnn 		if (strctty(stp) != EINTR)
8289acbbeafSnn 			return (0);
8299acbbeafSnn 
8309acbbeafSnn 		/* strctty() was interrupted by a signal */
83125e8c5aaSvikram 		if (ct) {
83225e8c5aaSvikram 			/* we only create contracts for userland opens */
83325e8c5aaSvikram 			ASSERT(ttoproc(curthread));
83425e8c5aaSvikram 			(void) contract_abandon(ct, ttoproc(curthread), 0);
83525e8c5aaSvikram 		}
836da6c28aaSamw 		(void) spec_close(vp, flag, 1, 0, cr, cc);
8379acbbeafSnn 		return (EINTR);
8387c478bd9Sstevel@tonic-gate 	}
8397c478bd9Sstevel@tonic-gate 
8407c478bd9Sstevel@tonic-gate 	/*
8417c478bd9Sstevel@tonic-gate 	 * Deal with stropen failure.
8427c478bd9Sstevel@tonic-gate 	 *
8437c478bd9Sstevel@tonic-gate 	 * sd_flag in the stream head cannot change since the
8447c478bd9Sstevel@tonic-gate 	 * common snode is locked before the call to stropen().
8457c478bd9Sstevel@tonic-gate 	 */
8467c478bd9Sstevel@tonic-gate 	if ((stp != NULL) && (stp->sd_flag & STREOPENFAIL)) {
8477c478bd9Sstevel@tonic-gate 		/*
8487c478bd9Sstevel@tonic-gate 		 * Open failed part way through.
8497c478bd9Sstevel@tonic-gate 		 */
8507c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
8517c478bd9Sstevel@tonic-gate 		stp->sd_flag &= ~STREOPENFAIL;
8527c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
8537c478bd9Sstevel@tonic-gate 
8547c478bd9Sstevel@tonic-gate 		UNLOCK_CSP(csp);
855da6c28aaSamw 		(void) spec_close(vp, flag, 1, 0, cr, cc);
8567c478bd9Sstevel@tonic-gate 	} else {
8577c478bd9Sstevel@tonic-gate 		UNLOCK_CSP(csp);
8587c478bd9Sstevel@tonic-gate 		SN_RELE(csp);
8597c478bd9Sstevel@tonic-gate 	}
8607c478bd9Sstevel@tonic-gate 
861349dcea3SGarrett D'Amore 	/*
862349dcea3SGarrett D'Amore 	 * Resolution for STREAMS vs. regular character device: If the
863349dcea3SGarrett D'Amore 	 * STREAMS open(9e) returns ENOSTR, then try an ordinary device
864349dcea3SGarrett D'Amore 	 * open instead.
865349dcea3SGarrett D'Amore 	 */
866349dcea3SGarrett D'Amore 	if (error == ENOSTR) {
867349dcea3SGarrett D'Amore 		goto not_streams;
868349dcea3SGarrett D'Amore 	}
8697c478bd9Sstevel@tonic-gate 	return (error);
8707c478bd9Sstevel@tonic-gate }
8717c478bd9Sstevel@tonic-gate 
8727c478bd9Sstevel@tonic-gate /*ARGSUSED2*/
8737c478bd9Sstevel@tonic-gate static int
spec_close(struct vnode * vp,int flag,int count,offset_t offset,struct cred * cr,caller_context_t * ct)8747c478bd9Sstevel@tonic-gate spec_close(
8757c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
8767c478bd9Sstevel@tonic-gate 	int		flag,
8777c478bd9Sstevel@tonic-gate 	int		count,
8787c478bd9Sstevel@tonic-gate 	offset_t	offset,
879da6c28aaSamw 	struct cred	*cr,
880da6c28aaSamw 	caller_context_t *ct)
8817c478bd9Sstevel@tonic-gate {
8827c478bd9Sstevel@tonic-gate 	struct vnode *cvp;
8837c478bd9Sstevel@tonic-gate 	struct snode *sp, *csp;
8847c478bd9Sstevel@tonic-gate 	enum vtype type;
8857c478bd9Sstevel@tonic-gate 	dev_t dev;
8867c478bd9Sstevel@tonic-gate 	int error = 0;
8877c478bd9Sstevel@tonic-gate 	int sysclone;
8887c478bd9Sstevel@tonic-gate 
8897c478bd9Sstevel@tonic-gate 	if (!(flag & FKLYR)) {
8907c478bd9Sstevel@tonic-gate 		/* this only applies to closes of devices from userland */
8917c478bd9Sstevel@tonic-gate 		cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
8927c478bd9Sstevel@tonic-gate 		cleanshares(vp, ttoproc(curthread)->p_pid);
8937c478bd9Sstevel@tonic-gate 		if (vp->v_stream)
8947c478bd9Sstevel@tonic-gate 			strclean(vp);
8957c478bd9Sstevel@tonic-gate 	}
8967c478bd9Sstevel@tonic-gate 	if (count > 1)
8977c478bd9Sstevel@tonic-gate 		return (0);
8987c478bd9Sstevel@tonic-gate 
89925e8c5aaSvikram 	/* we allow close to succeed even if device is fenced off */
9007c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
9017c478bd9Sstevel@tonic-gate 	cvp = sp->s_commonvp;
9027c478bd9Sstevel@tonic-gate 
9037c478bd9Sstevel@tonic-gate 	dev = sp->s_dev;
9047c478bd9Sstevel@tonic-gate 	type = vp->v_type;
9057c478bd9Sstevel@tonic-gate 
9067c478bd9Sstevel@tonic-gate 	ASSERT(type == VCHR || type == VBLK);
9077c478bd9Sstevel@tonic-gate 
9087c478bd9Sstevel@tonic-gate 	/*
9097c478bd9Sstevel@tonic-gate 	 * Prevent close/close and close/open races by serializing closes
9107c478bd9Sstevel@tonic-gate 	 * on this common snode. Clone opens are held up until after
9117c478bd9Sstevel@tonic-gate 	 * we have closed this device so the streams linkage is maintained
9127c478bd9Sstevel@tonic-gate 	 */
9137c478bd9Sstevel@tonic-gate 	csp = VTOS(cvp);
9147c478bd9Sstevel@tonic-gate 
9157c478bd9Sstevel@tonic-gate 	LOCK_CSP(csp);
9167c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);
9177c478bd9Sstevel@tonic-gate 
9187c478bd9Sstevel@tonic-gate 	csp->s_count--;			/* one fewer open reference : SN_RELE */
9197c478bd9Sstevel@tonic-gate 	sysclone = sp->s_flag & SCLONE;
9207c478bd9Sstevel@tonic-gate 
9217c478bd9Sstevel@tonic-gate 	/*
9227c478bd9Sstevel@tonic-gate 	 * Invalidate size on each close.
9237c478bd9Sstevel@tonic-gate 	 *
9247c478bd9Sstevel@tonic-gate 	 * XXX We do this on each close because we don't have interfaces that
9257c478bd9Sstevel@tonic-gate 	 * allow a driver to invalidate the size.  Since clearing this on each
9267c478bd9Sstevel@tonic-gate 	 * close this causes property overhead we skip /dev/null and
9277c478bd9Sstevel@tonic-gate 	 * /dev/zero to avoid degrading kenbus performance.
9287c478bd9Sstevel@tonic-gate 	 */
9297c478bd9Sstevel@tonic-gate 	if (getmajor(dev) != mm_major)
9307c478bd9Sstevel@tonic-gate 		csp->s_flag &= ~SSIZEVALID;
9317c478bd9Sstevel@tonic-gate 
9327c478bd9Sstevel@tonic-gate 	/*
9337c478bd9Sstevel@tonic-gate 	 * Only call the close routine when the last open reference through
9347c478bd9Sstevel@tonic-gate 	 * any [s, v]node goes away.  This can be checked by looking at
9357c478bd9Sstevel@tonic-gate 	 * s_count on the common vnode.
9367c478bd9Sstevel@tonic-gate 	 */
9377c478bd9Sstevel@tonic-gate 	if ((csp->s_count == 0) && (csp->s_mapcnt == 0)) {
9387c478bd9Sstevel@tonic-gate 		/* we don't need a close */
9397c478bd9Sstevel@tonic-gate 		csp->s_flag &= ~(SNEEDCLOSE | SSIZEVALID);
9407c478bd9Sstevel@tonic-gate 
9417c478bd9Sstevel@tonic-gate 		/*
9427c478bd9Sstevel@tonic-gate 		 * A cloning driver may open-clone to the same dev_t that we
9437c478bd9Sstevel@tonic-gate 		 * are closing before spec_inactive destroys the common snode.
9447c478bd9Sstevel@tonic-gate 		 * If this occurs the s_dip association needs to be reevaluated.
9457c478bd9Sstevel@tonic-gate 		 * We clear SDIPSET to force reevaluation in this case.  When
9467c478bd9Sstevel@tonic-gate 		 * reevaluation occurs (by spec_clone after open), if the
9477c478bd9Sstevel@tonic-gate 		 * devinfo association has changed then the old association
9487c478bd9Sstevel@tonic-gate 		 * will be released as the new association is established by
9497c478bd9Sstevel@tonic-gate 		 * spec_assoc_vp_with_devi().
9507c478bd9Sstevel@tonic-gate 		 */
9517c478bd9Sstevel@tonic-gate 		if (csp->s_flag & (SCLONE | SSELFCLONE))
9527c478bd9Sstevel@tonic-gate 			csp->s_flag &= ~SDIPSET;
9537c478bd9Sstevel@tonic-gate 
954e099bf07Scth 		csp->s_flag |= SCLOSING;
9557c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
9567c478bd9Sstevel@tonic-gate 		error = device_close(vp, flag, cr);
9577c478bd9Sstevel@tonic-gate 
9587c478bd9Sstevel@tonic-gate 		/*
9597c478bd9Sstevel@tonic-gate 		 * Decrement the devops held in clnopen()
9607c478bd9Sstevel@tonic-gate 		 */
9617c478bd9Sstevel@tonic-gate 		if (sysclone) {
9627c478bd9Sstevel@tonic-gate 			ddi_rele_driver(getmajor(dev));
9637c478bd9Sstevel@tonic-gate 		}
9647c478bd9Sstevel@tonic-gate 		mutex_enter(&csp->s_lock);
965e099bf07Scth 		csp->s_flag &= ~SCLOSING;
9667c478bd9Sstevel@tonic-gate 	}
9677c478bd9Sstevel@tonic-gate 
9687c478bd9Sstevel@tonic-gate 	UNLOCK_CSP_LOCK_HELD(csp);
9697c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock);
9707c478bd9Sstevel@tonic-gate 
9717c478bd9Sstevel@tonic-gate 	return (error);
9727c478bd9Sstevel@tonic-gate }
9737c478bd9Sstevel@tonic-gate 
9747c478bd9Sstevel@tonic-gate /*ARGSUSED2*/
9757c478bd9Sstevel@tonic-gate static int
spec_read(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,caller_context_t * ct)9767c478bd9Sstevel@tonic-gate spec_read(
9777c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
9787c478bd9Sstevel@tonic-gate 	struct uio	*uiop,
9797c478bd9Sstevel@tonic-gate 	int		ioflag,
9807c478bd9Sstevel@tonic-gate 	struct cred	*cr,
981da6c28aaSamw 	caller_context_t *ct)
9827c478bd9Sstevel@tonic-gate {
9837c478bd9Sstevel@tonic-gate 	int error;
9847c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
9857c478bd9Sstevel@tonic-gate 	dev_t dev = sp->s_dev;
9867c478bd9Sstevel@tonic-gate 	size_t n;
9877c478bd9Sstevel@tonic-gate 	ulong_t on;
9887c478bd9Sstevel@tonic-gate 	u_offset_t bdevsize;
9897c478bd9Sstevel@tonic-gate 	offset_t maxoff;
9907c478bd9Sstevel@tonic-gate 	offset_t off;
9917c478bd9Sstevel@tonic-gate 	struct vnode *blkvp;
9927c478bd9Sstevel@tonic-gate 
9937c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR || vp->v_type == VBLK);
9947c478bd9Sstevel@tonic-gate 
995349dcea3SGarrett D'Amore 	if (vp->v_stream) {
9967c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type == VCHR);
9977c478bd9Sstevel@tonic-gate 		smark(sp, SACC);
9987c478bd9Sstevel@tonic-gate 		return (strread(vp, uiop, cr));
9997c478bd9Sstevel@tonic-gate 	}
10007c478bd9Sstevel@tonic-gate 
10017c478bd9Sstevel@tonic-gate 	if (uiop->uio_resid == 0)
10027c478bd9Sstevel@tonic-gate 		return (0);
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate 	/*
10057c478bd9Sstevel@tonic-gate 	 * Plain old character devices that set D_U64BIT can have
10067c478bd9Sstevel@tonic-gate 	 * unrestricted offsets.
10077c478bd9Sstevel@tonic-gate 	 */
10087c478bd9Sstevel@tonic-gate 	maxoff = spec_maxoffset(vp);
10097c478bd9Sstevel@tonic-gate 	ASSERT(maxoff != -1 || vp->v_type == VCHR);
10107c478bd9Sstevel@tonic-gate 
10117c478bd9Sstevel@tonic-gate 	if (maxoff != -1 && (uiop->uio_loffset < 0 ||
10127c478bd9Sstevel@tonic-gate 	    uiop->uio_loffset + uiop->uio_resid > maxoff))
10137c478bd9Sstevel@tonic-gate 		return (EINVAL);
10147c478bd9Sstevel@tonic-gate 
10157c478bd9Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
10167c478bd9Sstevel@tonic-gate 		smark(sp, SACC);
1017349dcea3SGarrett D'Amore 		ASSERT(vp->v_stream == NULL);
10187c478bd9Sstevel@tonic-gate 		return (cdev_read(dev, uiop, cr));
10197c478bd9Sstevel@tonic-gate 	}
10207c478bd9Sstevel@tonic-gate 
10217c478bd9Sstevel@tonic-gate 	/*
10227c478bd9Sstevel@tonic-gate 	 * Block device.
10237c478bd9Sstevel@tonic-gate 	 */
10247c478bd9Sstevel@tonic-gate 	error = 0;
10257c478bd9Sstevel@tonic-gate 	blkvp = sp->s_commonvp;
10267c478bd9Sstevel@tonic-gate 	bdevsize = SPEC_SIZE(VTOS(blkvp));
10277c478bd9Sstevel@tonic-gate 
10287c478bd9Sstevel@tonic-gate 	do {
10297c478bd9Sstevel@tonic-gate 		caddr_t base;
10307c478bd9Sstevel@tonic-gate 		offset_t diff;
10317c478bd9Sstevel@tonic-gate 
10327c478bd9Sstevel@tonic-gate 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
10337c478bd9Sstevel@tonic-gate 		on = (size_t)(uiop->uio_loffset & MAXBOFFSET);
10347c478bd9Sstevel@tonic-gate 		n = (size_t)MIN(MAXBSIZE - on, uiop->uio_resid);
10357c478bd9Sstevel@tonic-gate 		diff = bdevsize - uiop->uio_loffset;
10367c478bd9Sstevel@tonic-gate 
10377c478bd9Sstevel@tonic-gate 		if (diff <= 0)
10387c478bd9Sstevel@tonic-gate 			break;
10397c478bd9Sstevel@tonic-gate 		if (diff < n)
10407c478bd9Sstevel@tonic-gate 			n = (size_t)diff;
10417c478bd9Sstevel@tonic-gate 
1042a5652762Spraks 		if (vpm_enable) {
1043a5652762Spraks 			error = vpm_data_copy(blkvp, (u_offset_t)(off + on),
1044e099bf07Scth 			    n, uiop, 1, NULL, 0, S_READ);
1045a5652762Spraks 		} else {
1046a5652762Spraks 			base = segmap_getmapflt(segkmap, blkvp,
1047e099bf07Scth 			    (u_offset_t)(off + on), n, 1, S_READ);
10487c478bd9Sstevel@tonic-gate 
1049a5652762Spraks 			error = uiomove(base + on, n, UIO_READ, uiop);
1050a5652762Spraks 		}
1051a5652762Spraks 		if (!error) {
10527c478bd9Sstevel@tonic-gate 			int flags = 0;
10537c478bd9Sstevel@tonic-gate 			/*
10547c478bd9Sstevel@tonic-gate 			 * If we read a whole block, we won't need this
10557c478bd9Sstevel@tonic-gate 			 * buffer again soon.
10567c478bd9Sstevel@tonic-gate 			 */
10577c478bd9Sstevel@tonic-gate 			if (n + on == MAXBSIZE)
10587c478bd9Sstevel@tonic-gate 				flags = SM_DONTNEED | SM_FREE;
1059a5652762Spraks 			if (vpm_enable) {
1060a5652762Spraks 				error = vpm_sync_pages(blkvp, off, n, flags);
1061a5652762Spraks 			} else {
1062a5652762Spraks 				error = segmap_release(segkmap, base, flags);
1063a5652762Spraks 			}
10647c478bd9Sstevel@tonic-gate 		} else {
1065a5652762Spraks 			if (vpm_enable) {
1066a5652762Spraks 				(void) vpm_sync_pages(blkvp, off, n, 0);
1067a5652762Spraks 			} else {
1068a5652762Spraks 				(void) segmap_release(segkmap, base, 0);
1069a5652762Spraks 			}
10707c478bd9Sstevel@tonic-gate 			if (bdevsize == UNKNOWN_SIZE) {
10717c478bd9Sstevel@tonic-gate 				error = 0;
10727c478bd9Sstevel@tonic-gate 				break;
10737c478bd9Sstevel@tonic-gate 			}
10747c478bd9Sstevel@tonic-gate 		}
10757c478bd9Sstevel@tonic-gate 	} while (error == 0 && uiop->uio_resid > 0 && n != 0);
10767c478bd9Sstevel@tonic-gate 
10777c478bd9Sstevel@tonic-gate 	return (error);
10787c478bd9Sstevel@tonic-gate }
10797c478bd9Sstevel@tonic-gate 
10807c478bd9Sstevel@tonic-gate /*ARGSUSED*/
10817c478bd9Sstevel@tonic-gate static int
spec_write(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,caller_context_t * ct)10827c478bd9Sstevel@tonic-gate spec_write(
10837c478bd9Sstevel@tonic-gate 	struct vnode *vp,
10847c478bd9Sstevel@tonic-gate 	struct uio *uiop,
10857c478bd9Sstevel@tonic-gate 	int ioflag,
10867c478bd9Sstevel@tonic-gate 	struct cred *cr,
1087da6c28aaSamw 	caller_context_t *ct)
10887c478bd9Sstevel@tonic-gate {
10897c478bd9Sstevel@tonic-gate 	int error;
10907c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
10917c478bd9Sstevel@tonic-gate 	dev_t dev = sp->s_dev;
10927c478bd9Sstevel@tonic-gate 	size_t n;
10937c478bd9Sstevel@tonic-gate 	ulong_t on;
10947c478bd9Sstevel@tonic-gate 	u_offset_t bdevsize;
10957c478bd9Sstevel@tonic-gate 	offset_t maxoff;
10967c478bd9Sstevel@tonic-gate 	offset_t off;
10977c478bd9Sstevel@tonic-gate 	struct vnode *blkvp;
10987c478bd9Sstevel@tonic-gate 
10997c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR || vp->v_type == VBLK);
11007c478bd9Sstevel@tonic-gate 
1101349dcea3SGarrett D'Amore 	if (vp->v_stream) {
11027c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type == VCHR);
11037c478bd9Sstevel@tonic-gate 		smark(sp, SUPD);
11047c478bd9Sstevel@tonic-gate 		return (strwrite(vp, uiop, cr));
11057c478bd9Sstevel@tonic-gate 	}
11067c478bd9Sstevel@tonic-gate 
11077c478bd9Sstevel@tonic-gate 	/*
11087c478bd9Sstevel@tonic-gate 	 * Plain old character devices that set D_U64BIT can have
11097c478bd9Sstevel@tonic-gate 	 * unrestricted offsets.
11107c478bd9Sstevel@tonic-gate 	 */
11117c478bd9Sstevel@tonic-gate 	maxoff = spec_maxoffset(vp);
11127c478bd9Sstevel@tonic-gate 	ASSERT(maxoff != -1 || vp->v_type == VCHR);
11137c478bd9Sstevel@tonic-gate 
11147c478bd9Sstevel@tonic-gate 	if (maxoff != -1 && (uiop->uio_loffset < 0 ||
11157c478bd9Sstevel@tonic-gate 	    uiop->uio_loffset + uiop->uio_resid > maxoff))
11167c478bd9Sstevel@tonic-gate 		return (EINVAL);
11177c478bd9Sstevel@tonic-gate 
11187c478bd9Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
11197c478bd9Sstevel@tonic-gate 		smark(sp, SUPD);
1120349dcea3SGarrett D'Amore 		ASSERT(vp->v_stream == NULL);
11217c478bd9Sstevel@tonic-gate 		return (cdev_write(dev, uiop, cr));
11227c478bd9Sstevel@tonic-gate 	}
11237c478bd9Sstevel@tonic-gate 
11247c478bd9Sstevel@tonic-gate 	if (uiop->uio_resid == 0)
11257c478bd9Sstevel@tonic-gate 		return (0);
11267c478bd9Sstevel@tonic-gate 
11277c478bd9Sstevel@tonic-gate 	error = 0;
11287c478bd9Sstevel@tonic-gate 	blkvp = sp->s_commonvp;
11297c478bd9Sstevel@tonic-gate 	bdevsize = SPEC_SIZE(VTOS(blkvp));
11307c478bd9Sstevel@tonic-gate 
11317c478bd9Sstevel@tonic-gate 	do {
11327c478bd9Sstevel@tonic-gate 		int pagecreate;
11337c478bd9Sstevel@tonic-gate 		int newpage;
11347c478bd9Sstevel@tonic-gate 		caddr_t base;
11357c478bd9Sstevel@tonic-gate 		offset_t diff;
11367c478bd9Sstevel@tonic-gate 
11377c478bd9Sstevel@tonic-gate 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
11387c478bd9Sstevel@tonic-gate 		on = (ulong_t)(uiop->uio_loffset & MAXBOFFSET);
11397c478bd9Sstevel@tonic-gate 		n = (size_t)MIN(MAXBSIZE - on, uiop->uio_resid);
11407c478bd9Sstevel@tonic-gate 		pagecreate = 0;
11417c478bd9Sstevel@tonic-gate 
11427c478bd9Sstevel@tonic-gate 		diff = bdevsize - uiop->uio_loffset;
11437c478bd9Sstevel@tonic-gate 		if (diff <= 0) {
11447c478bd9Sstevel@tonic-gate 			error = ENXIO;
11457c478bd9Sstevel@tonic-gate 			break;
11467c478bd9Sstevel@tonic-gate 		}
11477c478bd9Sstevel@tonic-gate 		if (diff < n)
11487c478bd9Sstevel@tonic-gate 			n = (size_t)diff;
11497c478bd9Sstevel@tonic-gate 
11507c478bd9Sstevel@tonic-gate 		/*
11517c478bd9Sstevel@tonic-gate 		 * Check to see if we can skip reading in the page
11527c478bd9Sstevel@tonic-gate 		 * and just allocate the memory.  We can do this
11537c478bd9Sstevel@tonic-gate 		 * if we are going to rewrite the entire mapping
11547c478bd9Sstevel@tonic-gate 		 * or if we are going to write to end of the device
11557c478bd9Sstevel@tonic-gate 		 * from the beginning of the mapping.
11567c478bd9Sstevel@tonic-gate 		 */
11577c478bd9Sstevel@tonic-gate 		if (n == MAXBSIZE || (on == 0 && (off + n) == bdevsize))
11587c478bd9Sstevel@tonic-gate 			pagecreate = 1;
11597c478bd9Sstevel@tonic-gate 
11607c478bd9Sstevel@tonic-gate 		newpage = 0;
11616f5f1c63SDonghai Qiao 
11626f5f1c63SDonghai Qiao 		/*
11636f5f1c63SDonghai Qiao 		 * Touch the page and fault it in if it is not in core
11646f5f1c63SDonghai Qiao 		 * before segmap_getmapflt or vpm_data_copy can lock it.
11656f5f1c63SDonghai Qiao 		 * This is to avoid the deadlock if the buffer is mapped
11666f5f1c63SDonghai Qiao 		 * to the same file through mmap which we want to write.
11676f5f1c63SDonghai Qiao 		 */
11686f5f1c63SDonghai Qiao 		uio_prefaultpages((long)n, uiop);
11696f5f1c63SDonghai Qiao 
1170a5652762Spraks 		if (vpm_enable) {
1171a5652762Spraks 			error = vpm_data_copy(blkvp, (u_offset_t)(off + on),
1172e099bf07Scth 			    n, uiop, !pagecreate, NULL, 0, S_WRITE);
1173a5652762Spraks 		} else {
1174a5652762Spraks 			base = segmap_getmapflt(segkmap, blkvp,
1175a5652762Spraks 			    (u_offset_t)(off + on), n, !pagecreate, S_WRITE);
11767c478bd9Sstevel@tonic-gate 
1177a5652762Spraks 			/*
1178a5652762Spraks 			 * segmap_pagecreate() returns 1 if it calls
1179a5652762Spraks 			 * page_create_va() to allocate any pages.
1180a5652762Spraks 			 */
11817c478bd9Sstevel@tonic-gate 
1182a5652762Spraks 			if (pagecreate)
1183a5652762Spraks 				newpage = segmap_pagecreate(segkmap, base + on,
1184e099bf07Scth 				    n, 0);
1185a5652762Spraks 
1186a5652762Spraks 			error = uiomove(base + on, n, UIO_WRITE, uiop);
1187a5652762Spraks 		}
11887c478bd9Sstevel@tonic-gate 
1189a5652762Spraks 		if (!vpm_enable && pagecreate &&
11907c478bd9Sstevel@tonic-gate 		    uiop->uio_loffset <
11917c478bd9Sstevel@tonic-gate 		    P2ROUNDUP_TYPED(off + on + n, PAGESIZE, offset_t)) {
11927c478bd9Sstevel@tonic-gate 			/*
11937c478bd9Sstevel@tonic-gate 			 * We created pages w/o initializing them completely,
11947c478bd9Sstevel@tonic-gate 			 * thus we need to zero the part that wasn't set up.
11957c478bd9Sstevel@tonic-gate 			 * This can happen if we write to the end of the device
11967c478bd9Sstevel@tonic-gate 			 * or if we had some sort of error during the uiomove.
11977c478bd9Sstevel@tonic-gate 			 */
11987c478bd9Sstevel@tonic-gate 			long nzero;
11997c478bd9Sstevel@tonic-gate 			offset_t nmoved;
12007c478bd9Sstevel@tonic-gate 
12017c478bd9Sstevel@tonic-gate 			nmoved = (uiop->uio_loffset - (off + on));
12027c478bd9Sstevel@tonic-gate 			if (nmoved < 0 || nmoved > n) {
12037c478bd9Sstevel@tonic-gate 				panic("spec_write: nmoved bogus");
12047c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
12057c478bd9Sstevel@tonic-gate 			}
12067c478bd9Sstevel@tonic-gate 			nzero = (long)P2ROUNDUP(on + n, PAGESIZE) -
12077c478bd9Sstevel@tonic-gate 			    (on + nmoved);
12087c478bd9Sstevel@tonic-gate 			if (nzero < 0 || (on + nmoved + nzero > MAXBSIZE)) {
12097c478bd9Sstevel@tonic-gate 				panic("spec_write: nzero bogus");
12107c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
12117c478bd9Sstevel@tonic-gate 			}
12127c478bd9Sstevel@tonic-gate 			(void) kzero(base + on + nmoved, (size_t)nzero);
12137c478bd9Sstevel@tonic-gate 		}
12147c478bd9Sstevel@tonic-gate 
12157c478bd9Sstevel@tonic-gate 		/*
12167c478bd9Sstevel@tonic-gate 		 * Unlock the pages which have been allocated by
12177c478bd9Sstevel@tonic-gate 		 * page_create_va() in segmap_pagecreate().
12187c478bd9Sstevel@tonic-gate 		 */
1219a5652762Spraks 		if (!vpm_enable && newpage)
12207c478bd9Sstevel@tonic-gate 			segmap_pageunlock(segkmap, base + on,
1221e099bf07Scth 			    (size_t)n, S_WRITE);
12227c478bd9Sstevel@tonic-gate 
12237c478bd9Sstevel@tonic-gate 		if (error == 0) {
12247c478bd9Sstevel@tonic-gate 			int flags = 0;
12257c478bd9Sstevel@tonic-gate 
12267c478bd9Sstevel@tonic-gate 			/*
12277c478bd9Sstevel@tonic-gate 			 * Force write back for synchronous write cases.
12287c478bd9Sstevel@tonic-gate 			 */
12297c478bd9Sstevel@tonic-gate 			if (ioflag & (FSYNC|FDSYNC))
12307c478bd9Sstevel@tonic-gate 				flags = SM_WRITE;
12317c478bd9Sstevel@tonic-gate 			else if (n + on == MAXBSIZE || IS_SWAPVP(vp)) {
12327c478bd9Sstevel@tonic-gate 				/*
12337c478bd9Sstevel@tonic-gate 				 * Have written a whole block.
12347c478bd9Sstevel@tonic-gate 				 * Start an asynchronous write and
12357c478bd9Sstevel@tonic-gate 				 * mark the buffer to indicate that
12367c478bd9Sstevel@tonic-gate 				 * it won't be needed again soon.
12377c478bd9Sstevel@tonic-gate 				 * Push swap files here, since it
12387c478bd9Sstevel@tonic-gate 				 * won't happen anywhere else.
12397c478bd9Sstevel@tonic-gate 				 */
12407c478bd9Sstevel@tonic-gate 				flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
12417c478bd9Sstevel@tonic-gate 			}
12427c478bd9Sstevel@tonic-gate 			smark(sp, SUPD|SCHG);
1243a5652762Spraks 			if (vpm_enable) {
1244a5652762Spraks 				error = vpm_sync_pages(blkvp, off, n, flags);
1245a5652762Spraks 			} else {
1246a5652762Spraks 				error = segmap_release(segkmap, base, flags);
1247a5652762Spraks 			}
1248a5652762Spraks 		} else {
1249a5652762Spraks 			if (vpm_enable) {
1250a5652762Spraks 				(void) vpm_sync_pages(blkvp, off, n, SM_INVAL);
1251a5652762Spraks 			} else {
1252a5652762Spraks 				(void) segmap_release(segkmap, base, SM_INVAL);
1253a5652762Spraks 			}
1254a5652762Spraks 		}
12557c478bd9Sstevel@tonic-gate 
12567c478bd9Sstevel@tonic-gate 	} while (error == 0 && uiop->uio_resid > 0 && n != 0);
12577c478bd9Sstevel@tonic-gate 
12587c478bd9Sstevel@tonic-gate 	return (error);
12597c478bd9Sstevel@tonic-gate }
12607c478bd9Sstevel@tonic-gate 
1261da6c28aaSamw /*ARGSUSED6*/
12627c478bd9Sstevel@tonic-gate static int
spec_ioctl(struct vnode * vp,int cmd,intptr_t arg,int mode,struct cred * cr,int * rvalp,caller_context_t * ct)12637c478bd9Sstevel@tonic-gate spec_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, struct cred *cr,
1264da6c28aaSamw     int *rvalp, caller_context_t *ct)
12657c478bd9Sstevel@tonic-gate {
12667c478bd9Sstevel@tonic-gate 	struct snode *sp;
12677c478bd9Sstevel@tonic-gate 	dev_t dev;
12687c478bd9Sstevel@tonic-gate 	int error;
12697c478bd9Sstevel@tonic-gate 
12707c478bd9Sstevel@tonic-gate 	if (vp->v_type != VCHR)
12717c478bd9Sstevel@tonic-gate 		return (ENOTTY);
127225e8c5aaSvikram 
127325e8c5aaSvikram 	/*
127425e8c5aaSvikram 	 * allow ioctls() to go through even for fenced snodes, as they
127525e8c5aaSvikram 	 * may include unconfiguration operation - for example popping of
127625e8c5aaSvikram 	 * streams modules.
127725e8c5aaSvikram 	 */
127825e8c5aaSvikram 
12797c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
12807c478bd9Sstevel@tonic-gate 	dev = sp->s_dev;
1281349dcea3SGarrett D'Amore 	if (vp->v_stream) {
12827c478bd9Sstevel@tonic-gate 		error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
12837c478bd9Sstevel@tonic-gate 	} else {
12847c478bd9Sstevel@tonic-gate 		error = cdev_ioctl(dev, cmd, arg, mode, cr, rvalp);
12857c478bd9Sstevel@tonic-gate 	}
12867c478bd9Sstevel@tonic-gate 	return (error);
12877c478bd9Sstevel@tonic-gate }
12887c478bd9Sstevel@tonic-gate 
12897c478bd9Sstevel@tonic-gate static int
spec_getattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)1290da6c28aaSamw spec_getattr(
1291da6c28aaSamw 	struct vnode *vp,
1292da6c28aaSamw 	struct vattr *vap,
1293da6c28aaSamw 	int flags,
1294da6c28aaSamw 	struct cred *cr,
1295da6c28aaSamw 	caller_context_t *ct)
12967c478bd9Sstevel@tonic-gate {
12977c478bd9Sstevel@tonic-gate 	int error;
12987c478bd9Sstevel@tonic-gate 	struct snode *sp;
12997c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
13007c478bd9Sstevel@tonic-gate 
13017c478bd9Sstevel@tonic-gate 	/* With ATTR_COMM we will not get attributes from realvp */
13027c478bd9Sstevel@tonic-gate 	if (flags & ATTR_COMM) {
13037c478bd9Sstevel@tonic-gate 		sp = VTOS(vp);
13047c478bd9Sstevel@tonic-gate 		vp = sp->s_commonvp;
13057c478bd9Sstevel@tonic-gate 	}
13067c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
130725e8c5aaSvikram 
130825e8c5aaSvikram 	/* we want stat() to fail with ENXIO if the device is fenced off */
130925e8c5aaSvikram 	if (S_ISFENCED(sp))
131025e8c5aaSvikram 		return (ENXIO);
131125e8c5aaSvikram 
13127c478bd9Sstevel@tonic-gate 	realvp = sp->s_realvp;
13137c478bd9Sstevel@tonic-gate 
13147c478bd9Sstevel@tonic-gate 	if (realvp == NULL) {
13157c478bd9Sstevel@tonic-gate 		static int snode_shift	= 0;
13167c478bd9Sstevel@tonic-gate 
13177c478bd9Sstevel@tonic-gate 		/*
13187c478bd9Sstevel@tonic-gate 		 * Calculate the amount of bitshift to a snode pointer which
13197c478bd9Sstevel@tonic-gate 		 * will still keep it unique.  See below.
13207c478bd9Sstevel@tonic-gate 		 */
13217c478bd9Sstevel@tonic-gate 		if (snode_shift == 0)
13227c478bd9Sstevel@tonic-gate 			snode_shift = highbit(sizeof (struct snode));
13237c478bd9Sstevel@tonic-gate 		ASSERT(snode_shift > 0);
13247c478bd9Sstevel@tonic-gate 
13257c478bd9Sstevel@tonic-gate 		/*
13267c478bd9Sstevel@tonic-gate 		 * No real vnode behind this one.  Fill in the fields
13277c478bd9Sstevel@tonic-gate 		 * from the snode.
13287c478bd9Sstevel@tonic-gate 		 *
13297c478bd9Sstevel@tonic-gate 		 * This code should be refined to return only the
13307c478bd9Sstevel@tonic-gate 		 * attributes asked for instead of all of them.
13317c478bd9Sstevel@tonic-gate 		 */
13327c478bd9Sstevel@tonic-gate 		vap->va_type = vp->v_type;
13337c478bd9Sstevel@tonic-gate 		vap->va_mode = 0;
13347c478bd9Sstevel@tonic-gate 		vap->va_uid = vap->va_gid = 0;
13357c478bd9Sstevel@tonic-gate 		vap->va_fsid = sp->s_fsid;
13367c478bd9Sstevel@tonic-gate 
13377c478bd9Sstevel@tonic-gate 		/*
13387c478bd9Sstevel@tonic-gate 		 * If the va_nodeid is > MAX_USHORT, then i386 stats might
13397c478bd9Sstevel@tonic-gate 		 * fail. So we shift down the snode pointer to try and get
13407c478bd9Sstevel@tonic-gate 		 * the most uniqueness into 16-bits.
13417c478bd9Sstevel@tonic-gate 		 */
13427c478bd9Sstevel@tonic-gate 		vap->va_nodeid = ((ino64_t)(uintptr_t)sp >> snode_shift) &
13437c478bd9Sstevel@tonic-gate 		    0xFFFF;
13447c478bd9Sstevel@tonic-gate 		vap->va_nlink = 0;
13457c478bd9Sstevel@tonic-gate 		vap->va_rdev = sp->s_dev;
13467c478bd9Sstevel@tonic-gate 
13477c478bd9Sstevel@tonic-gate 		/*
13487c478bd9Sstevel@tonic-gate 		 * va_nblocks is the number of 512 byte blocks used to store
13497c478bd9Sstevel@tonic-gate 		 * the mknod for the device, not the number of blocks on the
13507c478bd9Sstevel@tonic-gate 		 * device itself.  This is typically zero since the mknod is
13517c478bd9Sstevel@tonic-gate 		 * represented directly in the inode itself.
13527c478bd9Sstevel@tonic-gate 		 */
13537c478bd9Sstevel@tonic-gate 		vap->va_nblocks = 0;
13547c478bd9Sstevel@tonic-gate 	} else {
1355da6c28aaSamw 		error = VOP_GETATTR(realvp, vap, flags, cr, ct);
13567c478bd9Sstevel@tonic-gate 		if (error != 0)
13577c478bd9Sstevel@tonic-gate 			return (error);
13587c478bd9Sstevel@tonic-gate 	}
13597c478bd9Sstevel@tonic-gate 
13607c478bd9Sstevel@tonic-gate 	/* set the size from the snode */
13617c478bd9Sstevel@tonic-gate 	vap->va_size = SPEC_SIZE(VTOS(sp->s_commonvp));
13627c478bd9Sstevel@tonic-gate 	vap->va_blksize = MAXBSIZE;
13637c478bd9Sstevel@tonic-gate 
13647c478bd9Sstevel@tonic-gate 	mutex_enter(&sp->s_lock);
13657c478bd9Sstevel@tonic-gate 	vap->va_atime.tv_sec = sp->s_atime;
13667c478bd9Sstevel@tonic-gate 	vap->va_mtime.tv_sec = sp->s_mtime;
13677c478bd9Sstevel@tonic-gate 	vap->va_ctime.tv_sec = sp->s_ctime;
13687c478bd9Sstevel@tonic-gate 	mutex_exit(&sp->s_lock);
13697c478bd9Sstevel@tonic-gate 
13707c478bd9Sstevel@tonic-gate 	vap->va_atime.tv_nsec = 0;
13717c478bd9Sstevel@tonic-gate 	vap->va_mtime.tv_nsec = 0;
13727c478bd9Sstevel@tonic-gate 	vap->va_ctime.tv_nsec = 0;
13737c478bd9Sstevel@tonic-gate 	vap->va_seq = 0;
13747c478bd9Sstevel@tonic-gate 
13757c478bd9Sstevel@tonic-gate 	return (0);
13767c478bd9Sstevel@tonic-gate }
13777c478bd9Sstevel@tonic-gate 
13787c478bd9Sstevel@tonic-gate static int
spec_setattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)13797c478bd9Sstevel@tonic-gate spec_setattr(
13807c478bd9Sstevel@tonic-gate 	struct vnode *vp,
13817c478bd9Sstevel@tonic-gate 	struct vattr *vap,
13827c478bd9Sstevel@tonic-gate 	int flags,
13837c478bd9Sstevel@tonic-gate 	struct cred *cr,
1384da6c28aaSamw 	caller_context_t *ct)
13857c478bd9Sstevel@tonic-gate {
13867c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
13877c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
13887c478bd9Sstevel@tonic-gate 	int error;
13897c478bd9Sstevel@tonic-gate 
139025e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
139125e8c5aaSvikram 	if (S_ISFENCED(sp))
139225e8c5aaSvikram 		return (ENXIO);
139325e8c5aaSvikram 
13947c478bd9Sstevel@tonic-gate 	if (vp->v_type == VCHR && vp->v_stream && (vap->va_mask & AT_SIZE)) {
13957c478bd9Sstevel@tonic-gate 		/*
13967c478bd9Sstevel@tonic-gate 		 * 1135080:	O_TRUNC should have no effect on
13977c478bd9Sstevel@tonic-gate 		 *		named pipes and terminal devices.
13987c478bd9Sstevel@tonic-gate 		 */
13997c478bd9Sstevel@tonic-gate 		ASSERT(vap->va_mask == AT_SIZE);
14007c478bd9Sstevel@tonic-gate 		return (0);
14017c478bd9Sstevel@tonic-gate 	}
14027c478bd9Sstevel@tonic-gate 
14037c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) == NULL)
14047c478bd9Sstevel@tonic-gate 		error = 0;	/* no real vnode to update */
14057c478bd9Sstevel@tonic-gate 	else
1406da6c28aaSamw 		error = VOP_SETATTR(realvp, vap, flags, cr, ct);
14077c478bd9Sstevel@tonic-gate 	if (error == 0) {
14087c478bd9Sstevel@tonic-gate 		/*
14097c478bd9Sstevel@tonic-gate 		 * If times were changed, update snode.
14107c478bd9Sstevel@tonic-gate 		 */
14117c478bd9Sstevel@tonic-gate 		mutex_enter(&sp->s_lock);
14127c478bd9Sstevel@tonic-gate 		if (vap->va_mask & AT_ATIME)
14137c478bd9Sstevel@tonic-gate 			sp->s_atime = vap->va_atime.tv_sec;
14147c478bd9Sstevel@tonic-gate 		if (vap->va_mask & AT_MTIME) {
14157c478bd9Sstevel@tonic-gate 			sp->s_mtime = vap->va_mtime.tv_sec;
14167c478bd9Sstevel@tonic-gate 			sp->s_ctime = gethrestime_sec();
14177c478bd9Sstevel@tonic-gate 		}
14187c478bd9Sstevel@tonic-gate 		mutex_exit(&sp->s_lock);
14197c478bd9Sstevel@tonic-gate 	}
14207c478bd9Sstevel@tonic-gate 	return (error);
14217c478bd9Sstevel@tonic-gate }
14227c478bd9Sstevel@tonic-gate 
14237c478bd9Sstevel@tonic-gate static int
spec_access(struct vnode * vp,int mode,int flags,struct cred * cr,caller_context_t * ct)1424da6c28aaSamw spec_access(
1425da6c28aaSamw 	struct vnode *vp,
1426da6c28aaSamw 	int mode,
1427da6c28aaSamw 	int flags,
1428da6c28aaSamw 	struct cred *cr,
1429da6c28aaSamw 	caller_context_t *ct)
14307c478bd9Sstevel@tonic-gate {
14317c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
14327c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
14337c478bd9Sstevel@tonic-gate 
143425e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
143525e8c5aaSvikram 	if (S_ISFENCED(sp))
143625e8c5aaSvikram 		return (ENXIO);
143725e8c5aaSvikram 
14387c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) != NULL)
1439da6c28aaSamw 		return (VOP_ACCESS(realvp, mode, flags, cr, ct));
14407c478bd9Sstevel@tonic-gate 	else
14417c478bd9Sstevel@tonic-gate 		return (0);	/* Allow all access. */
14427c478bd9Sstevel@tonic-gate }
14437c478bd9Sstevel@tonic-gate 
14447c478bd9Sstevel@tonic-gate /*
14457c478bd9Sstevel@tonic-gate  * This can be called if creat or an open with O_CREAT is done on the root
14467c478bd9Sstevel@tonic-gate  * of a lofs mount where the mounted entity is a special file.
14477c478bd9Sstevel@tonic-gate  */
14487c478bd9Sstevel@tonic-gate /*ARGSUSED*/
14497c478bd9Sstevel@tonic-gate static int
spec_create(struct vnode * dvp,char * name,vattr_t * vap,enum vcexcl excl,int mode,struct vnode ** vpp,struct cred * cr,int flag,caller_context_t * ct,vsecattr_t * vsecp)1450da6c28aaSamw spec_create(
1451da6c28aaSamw 	struct vnode *dvp,
1452da6c28aaSamw 	char *name,
1453da6c28aaSamw 	vattr_t *vap,
1454da6c28aaSamw 	enum vcexcl excl,
1455da6c28aaSamw 	int mode,
1456da6c28aaSamw 	struct vnode **vpp,
1457da6c28aaSamw 	struct cred *cr,
1458da6c28aaSamw 	int flag,
1459da6c28aaSamw 	caller_context_t *ct,
1460da6c28aaSamw 	vsecattr_t *vsecp)
14617c478bd9Sstevel@tonic-gate {
14627c478bd9Sstevel@tonic-gate 	int error;
146325e8c5aaSvikram 	struct snode *sp = VTOS(dvp);
146425e8c5aaSvikram 
146525e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
146625e8c5aaSvikram 	if (S_ISFENCED(sp))
146725e8c5aaSvikram 		return (ENXIO);
14687c478bd9Sstevel@tonic-gate 
14697c478bd9Sstevel@tonic-gate 	ASSERT(dvp && (dvp->v_flag & VROOT) && *name == '\0');
14707c478bd9Sstevel@tonic-gate 	if (excl == NONEXCL) {
1471da6c28aaSamw 		if (mode && (error = spec_access(dvp, mode, 0, cr, ct)))
14727c478bd9Sstevel@tonic-gate 			return (error);
14737c478bd9Sstevel@tonic-gate 		VN_HOLD(dvp);
14747c478bd9Sstevel@tonic-gate 		return (0);
14757c478bd9Sstevel@tonic-gate 	}
14767c478bd9Sstevel@tonic-gate 	return (EEXIST);
14777c478bd9Sstevel@tonic-gate }
14787c478bd9Sstevel@tonic-gate 
14797c478bd9Sstevel@tonic-gate /*
14807c478bd9Sstevel@tonic-gate  * In order to sync out the snode times without multi-client problems,
14817c478bd9Sstevel@tonic-gate  * make sure the times written out are never earlier than the times
14827c478bd9Sstevel@tonic-gate  * already set in the vnode.
14837c478bd9Sstevel@tonic-gate  */
14847c478bd9Sstevel@tonic-gate static int
spec_fsync(struct vnode * vp,int syncflag,struct cred * cr,caller_context_t * ct)1485da6c28aaSamw spec_fsync(
1486da6c28aaSamw 	struct vnode *vp,
1487da6c28aaSamw 	int syncflag,
1488da6c28aaSamw 	struct cred *cr,
1489da6c28aaSamw 	caller_context_t *ct)
14907c478bd9Sstevel@tonic-gate {
14917c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
14927c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
14937c478bd9Sstevel@tonic-gate 	struct vnode *cvp;
14947c478bd9Sstevel@tonic-gate 	struct vattr va, vatmp;
14957c478bd9Sstevel@tonic-gate 
149625e8c5aaSvikram 	/* allow syncing even if device is fenced off */
149725e8c5aaSvikram 
14987c478bd9Sstevel@tonic-gate 	/* If times didn't change, don't flush anything. */
14997c478bd9Sstevel@tonic-gate 	mutex_enter(&sp->s_lock);
15007c478bd9Sstevel@tonic-gate 	if ((sp->s_flag & (SACC|SUPD|SCHG)) == 0 && vp->v_type != VBLK) {
15017c478bd9Sstevel@tonic-gate 		mutex_exit(&sp->s_lock);
15027c478bd9Sstevel@tonic-gate 		return (0);
15037c478bd9Sstevel@tonic-gate 	}
15047c478bd9Sstevel@tonic-gate 	sp->s_flag &= ~(SACC|SUPD|SCHG);
15057c478bd9Sstevel@tonic-gate 	mutex_exit(&sp->s_lock);
15067c478bd9Sstevel@tonic-gate 	cvp = sp->s_commonvp;
15077c478bd9Sstevel@tonic-gate 	realvp = sp->s_realvp;
15087c478bd9Sstevel@tonic-gate 
15097c478bd9Sstevel@tonic-gate 	if (vp->v_type == VBLK && cvp != vp && vn_has_cached_data(cvp) &&
15107c478bd9Sstevel@tonic-gate 	    (cvp->v_flag & VISSWAP) == 0)
1511da6c28aaSamw 		(void) VOP_PUTPAGE(cvp, (offset_t)0, 0, 0, cr, ct);
15127c478bd9Sstevel@tonic-gate 
1513feb08c6bSbillm 	/*
1514feb08c6bSbillm 	 * For devices that support it, force write cache to stable storage.
1515feb08c6bSbillm 	 * We don't need the lock to check s_flags since we can treat
1516feb08c6bSbillm 	 * SNOFLUSH as a hint.
1517feb08c6bSbillm 	 */
1518feb08c6bSbillm 	if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1519feb08c6bSbillm 	    !(sp->s_flag & SNOFLUSH)) {
1520feb08c6bSbillm 		int rval, rc;
1521a84224b3Sgz 		struct dk_callback spec_callback;
1522a84224b3Sgz 
1523a84224b3Sgz 		spec_callback.dkc_flag = FLUSH_VOLATILE;
1524a84224b3Sgz 		spec_callback.dkc_callback = NULL;
1525a84224b3Sgz 
1526a84224b3Sgz 		/* synchronous flush on volatile cache */
1527feb08c6bSbillm 		rc = cdev_ioctl(vp->v_rdev, DKIOCFLUSHWRITECACHE,
1528a84224b3Sgz 		    (intptr_t)&spec_callback, FNATIVE|FKIOCTL, cr, &rval);
1529a84224b3Sgz 
1530feb08c6bSbillm 		if (rc == ENOTSUP || rc == ENOTTY) {
1531feb08c6bSbillm 			mutex_enter(&sp->s_lock);
1532feb08c6bSbillm 			sp->s_flag |= SNOFLUSH;
1533feb08c6bSbillm 			mutex_exit(&sp->s_lock);
1534feb08c6bSbillm 		}
1535feb08c6bSbillm 	}
1536feb08c6bSbillm 
15377c478bd9Sstevel@tonic-gate 	/*
15387c478bd9Sstevel@tonic-gate 	 * If no real vnode to update, don't flush anything.
15397c478bd9Sstevel@tonic-gate 	 */
15407c478bd9Sstevel@tonic-gate 	if (realvp == NULL)
15417c478bd9Sstevel@tonic-gate 		return (0);
15427c478bd9Sstevel@tonic-gate 
15437c478bd9Sstevel@tonic-gate 	vatmp.va_mask = AT_ATIME|AT_MTIME;
1544da6c28aaSamw 	if (VOP_GETATTR(realvp, &vatmp, 0, cr, ct) == 0) {
15457c478bd9Sstevel@tonic-gate 
15467c478bd9Sstevel@tonic-gate 		mutex_enter(&sp->s_lock);
15477c478bd9Sstevel@tonic-gate 		if (vatmp.va_atime.tv_sec > sp->s_atime)
15487c478bd9Sstevel@tonic-gate 			va.va_atime = vatmp.va_atime;
15497c478bd9Sstevel@tonic-gate 		else {
15507c478bd9Sstevel@tonic-gate 			va.va_atime.tv_sec = sp->s_atime;
15517c478bd9Sstevel@tonic-gate 			va.va_atime.tv_nsec = 0;
15527c478bd9Sstevel@tonic-gate 		}
15537c478bd9Sstevel@tonic-gate 		if (vatmp.va_mtime.tv_sec > sp->s_mtime)
15547c478bd9Sstevel@tonic-gate 			va.va_mtime = vatmp.va_mtime;
15557c478bd9Sstevel@tonic-gate 		else {
15567c478bd9Sstevel@tonic-gate 			va.va_mtime.tv_sec = sp->s_mtime;
15577c478bd9Sstevel@tonic-gate 			va.va_mtime.tv_nsec = 0;
15587c478bd9Sstevel@tonic-gate 		}
15597c478bd9Sstevel@tonic-gate 		mutex_exit(&sp->s_lock);
15607c478bd9Sstevel@tonic-gate 
15617c478bd9Sstevel@tonic-gate 		va.va_mask = AT_ATIME|AT_MTIME;
1562da6c28aaSamw 		(void) VOP_SETATTR(realvp, &va, 0, cr, ct);
15637c478bd9Sstevel@tonic-gate 	}
1564da6c28aaSamw 	(void) VOP_FSYNC(realvp, syncflag, cr, ct);
15657c478bd9Sstevel@tonic-gate 	return (0);
15667c478bd9Sstevel@tonic-gate }
15677c478bd9Sstevel@tonic-gate 
15687c478bd9Sstevel@tonic-gate /*ARGSUSED*/
15697c478bd9Sstevel@tonic-gate static void
spec_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)1570da6c28aaSamw spec_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
15717c478bd9Sstevel@tonic-gate {
15727c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
15737c478bd9Sstevel@tonic-gate 	struct vnode *cvp;
15747c478bd9Sstevel@tonic-gate 	struct vnode *rvp;
15757c478bd9Sstevel@tonic-gate 
15767c478bd9Sstevel@tonic-gate 	/*
15777c478bd9Sstevel@tonic-gate 	 * If no one has reclaimed the vnode, remove from the
15787c478bd9Sstevel@tonic-gate 	 * cache now.
15797c478bd9Sstevel@tonic-gate 	 */
15807c478bd9Sstevel@tonic-gate 	if (vp->v_count < 1) {
15817c478bd9Sstevel@tonic-gate 		panic("spec_inactive: Bad v_count");
15827c478bd9Sstevel@tonic-gate 		/*NOTREACHED*/
15837c478bd9Sstevel@tonic-gate 	}
15847c478bd9Sstevel@tonic-gate 	mutex_enter(&stable_lock);
15857c478bd9Sstevel@tonic-gate 
15867c478bd9Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
1587ade42b55SSebastien Roy 	VN_RELE_LOCKED(vp);
1588ade42b55SSebastien Roy 	if (vp->v_count != 0) {
15897c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
15907c478bd9Sstevel@tonic-gate 		mutex_exit(&stable_lock);
15917c478bd9Sstevel@tonic-gate 		return;
15927c478bd9Sstevel@tonic-gate 	}
15937c478bd9Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
15947c478bd9Sstevel@tonic-gate 
15957c478bd9Sstevel@tonic-gate 	sdelete(sp);
15967c478bd9Sstevel@tonic-gate 	mutex_exit(&stable_lock);
15977c478bd9Sstevel@tonic-gate 
15987c478bd9Sstevel@tonic-gate 	/* We are the sole owner of sp now */
15997c478bd9Sstevel@tonic-gate 	cvp = sp->s_commonvp;
16007c478bd9Sstevel@tonic-gate 	rvp = sp->s_realvp;
16017c478bd9Sstevel@tonic-gate 
16027c478bd9Sstevel@tonic-gate 	if (rvp) {
16037c478bd9Sstevel@tonic-gate 		/*
16047c478bd9Sstevel@tonic-gate 		 * If the snode times changed, then update the times
16057c478bd9Sstevel@tonic-gate 		 * associated with the "realvp".
16067c478bd9Sstevel@tonic-gate 		 */
16077c478bd9Sstevel@tonic-gate 		if ((sp->s_flag & (SACC|SUPD|SCHG)) != 0) {
16087c478bd9Sstevel@tonic-gate 
16097c478bd9Sstevel@tonic-gate 			struct vattr va, vatmp;
16107c478bd9Sstevel@tonic-gate 
16117c478bd9Sstevel@tonic-gate 			mutex_enter(&sp->s_lock);
16127c478bd9Sstevel@tonic-gate 			sp->s_flag &= ~(SACC|SUPD|SCHG);
16137c478bd9Sstevel@tonic-gate 			mutex_exit(&sp->s_lock);
16147c478bd9Sstevel@tonic-gate 			vatmp.va_mask = AT_ATIME|AT_MTIME;
16157c478bd9Sstevel@tonic-gate 			/*
16167c478bd9Sstevel@tonic-gate 			 * The user may not own the device, but we
16177c478bd9Sstevel@tonic-gate 			 * want to update the attributes anyway.
16187c478bd9Sstevel@tonic-gate 			 */
1619da6c28aaSamw 			if (VOP_GETATTR(rvp, &vatmp, 0, kcred, ct) == 0) {
16207c478bd9Sstevel@tonic-gate 				if (vatmp.va_atime.tv_sec > sp->s_atime)
16217c478bd9Sstevel@tonic-gate 					va.va_atime = vatmp.va_atime;
16227c478bd9Sstevel@tonic-gate 				else {
16237c478bd9Sstevel@tonic-gate 					va.va_atime.tv_sec = sp->s_atime;
16247c478bd9Sstevel@tonic-gate 					va.va_atime.tv_nsec = 0;
16257c478bd9Sstevel@tonic-gate 				}
16267c478bd9Sstevel@tonic-gate 				if (vatmp.va_mtime.tv_sec > sp->s_mtime)
16277c478bd9Sstevel@tonic-gate 					va.va_mtime = vatmp.va_mtime;
16287c478bd9Sstevel@tonic-gate 				else {
16297c478bd9Sstevel@tonic-gate 					va.va_mtime.tv_sec = sp->s_mtime;
16307c478bd9Sstevel@tonic-gate 					va.va_mtime.tv_nsec = 0;
16317c478bd9Sstevel@tonic-gate 				}
16327c478bd9Sstevel@tonic-gate 
16337c478bd9Sstevel@tonic-gate 				va.va_mask = AT_ATIME|AT_MTIME;
1634da6c28aaSamw 				(void) VOP_SETATTR(rvp, &va, 0, kcred, ct);
16357c478bd9Sstevel@tonic-gate 			}
16367c478bd9Sstevel@tonic-gate 		}
16377c478bd9Sstevel@tonic-gate 	}
16387c478bd9Sstevel@tonic-gate 	ASSERT(!vn_has_cached_data(vp));
16397c478bd9Sstevel@tonic-gate 	vn_invalid(vp);
16407c478bd9Sstevel@tonic-gate 
16417c478bd9Sstevel@tonic-gate 	/* if we are sharing another file systems vfs, release it */
16427c478bd9Sstevel@tonic-gate 	if (vp->v_vfsp && (vp->v_vfsp != &spec_vfs))
16437c478bd9Sstevel@tonic-gate 		VFS_RELE(vp->v_vfsp);
16447c478bd9Sstevel@tonic-gate 
16457c478bd9Sstevel@tonic-gate 	/* if we have a realvp, release the realvp */
16467c478bd9Sstevel@tonic-gate 	if (rvp)
16477c478bd9Sstevel@tonic-gate 		VN_RELE(rvp);
16487c478bd9Sstevel@tonic-gate 
16497c478bd9Sstevel@tonic-gate 	/* if we have a common, release the common */
1650fbe27353Sedp 	if (cvp && (cvp != vp)) {
16517c478bd9Sstevel@tonic-gate 		VN_RELE(cvp);
1652fbe27353Sedp #ifdef DEBUG
1653fbe27353Sedp 	} else if (cvp) {
1654fbe27353Sedp 		/*
1655fbe27353Sedp 		 * if this is the last reference to a common vnode, any
1656fbe27353Sedp 		 * associated stream had better have been closed
1657fbe27353Sedp 		 */
1658fbe27353Sedp 		ASSERT(cvp == vp);
1659fbe27353Sedp 		ASSERT(cvp->v_stream == NULL);
1660fbe27353Sedp #endif /* DEBUG */
1661fbe27353Sedp 	}
16627c478bd9Sstevel@tonic-gate 
16637c478bd9Sstevel@tonic-gate 	/*
16647c478bd9Sstevel@tonic-gate 	 * if we have a hold on a devinfo node (established by
16657c478bd9Sstevel@tonic-gate 	 * spec_assoc_vp_with_devi), release the hold
16667c478bd9Sstevel@tonic-gate 	 */
16677c478bd9Sstevel@tonic-gate 	if (sp->s_dip)
16687c478bd9Sstevel@tonic-gate 		ddi_release_devi(sp->s_dip);
16697c478bd9Sstevel@tonic-gate 
16707c478bd9Sstevel@tonic-gate 	/*
16717c478bd9Sstevel@tonic-gate 	 * If we have an associated device policy, release it.
16727c478bd9Sstevel@tonic-gate 	 */
16737c478bd9Sstevel@tonic-gate 	if (sp->s_plcy != NULL)
16747c478bd9Sstevel@tonic-gate 		dpfree(sp->s_plcy);
16757c478bd9Sstevel@tonic-gate 
16767c478bd9Sstevel@tonic-gate 	/*
16777c478bd9Sstevel@tonic-gate 	 * If all holds on the devinfo node are through specfs/devfs
16787c478bd9Sstevel@tonic-gate 	 * and we just destroyed the last specfs node associated with the
16797c478bd9Sstevel@tonic-gate 	 * device, then the devinfo node reference count should now be
16807c478bd9Sstevel@tonic-gate 	 * zero.  We can't check this because there may be other holds
16817c478bd9Sstevel@tonic-gate 	 * on the node from non file system sources: ddi_hold_devi_by_instance
16827c478bd9Sstevel@tonic-gate 	 * for example.
16837c478bd9Sstevel@tonic-gate 	 */
16847c478bd9Sstevel@tonic-gate 	kmem_cache_free(snode_cache, sp);
16857c478bd9Sstevel@tonic-gate }
16867c478bd9Sstevel@tonic-gate 
16877c478bd9Sstevel@tonic-gate static int
spec_fid(struct vnode * vp,struct fid * fidp,caller_context_t * ct)1688da6c28aaSamw spec_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
16897c478bd9Sstevel@tonic-gate {
16907c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
16917c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
16927c478bd9Sstevel@tonic-gate 
16937c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) != NULL)
1694da6c28aaSamw 		return (VOP_FID(realvp, fidp, ct));
16957c478bd9Sstevel@tonic-gate 	else
16967c478bd9Sstevel@tonic-gate 		return (EINVAL);
16977c478bd9Sstevel@tonic-gate }
16987c478bd9Sstevel@tonic-gate 
16997c478bd9Sstevel@tonic-gate /*ARGSUSED1*/
17007c478bd9Sstevel@tonic-gate static int
spec_seek(struct vnode * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)1701da6c28aaSamw spec_seek(
1702da6c28aaSamw 	struct vnode *vp,
1703da6c28aaSamw 	offset_t ooff,
1704da6c28aaSamw 	offset_t *noffp,
1705da6c28aaSamw 	caller_context_t *ct)
17067c478bd9Sstevel@tonic-gate {
17077c478bd9Sstevel@tonic-gate 	offset_t maxoff = spec_maxoffset(vp);
17087c478bd9Sstevel@tonic-gate 
17097c478bd9Sstevel@tonic-gate 	if (maxoff == -1 || *noffp <= maxoff)
17107c478bd9Sstevel@tonic-gate 		return (0);
17117c478bd9Sstevel@tonic-gate 	else
17127c478bd9Sstevel@tonic-gate 		return (EINVAL);
17137c478bd9Sstevel@tonic-gate }
17147c478bd9Sstevel@tonic-gate 
17157c478bd9Sstevel@tonic-gate static int
spec_frlock(struct vnode * vp,int cmd,struct flock64 * bfp,int flag,offset_t offset,struct flk_callback * flk_cbp,struct cred * cr,caller_context_t * ct)17167c478bd9Sstevel@tonic-gate spec_frlock(
17177c478bd9Sstevel@tonic-gate 	struct vnode *vp,
17187c478bd9Sstevel@tonic-gate 	int		cmd,
17197c478bd9Sstevel@tonic-gate 	struct flock64	*bfp,
17207c478bd9Sstevel@tonic-gate 	int		flag,
17217c478bd9Sstevel@tonic-gate 	offset_t	offset,
17227c478bd9Sstevel@tonic-gate 	struct flk_callback *flk_cbp,
1723da6c28aaSamw 	struct cred	*cr,
1724da6c28aaSamw 	caller_context_t *ct)
17257c478bd9Sstevel@tonic-gate {
17267c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
17277c478bd9Sstevel@tonic-gate 	struct snode *csp;
17287c478bd9Sstevel@tonic-gate 
17297c478bd9Sstevel@tonic-gate 	csp = VTOS(sp->s_commonvp);
17307c478bd9Sstevel@tonic-gate 	/*
17317c478bd9Sstevel@tonic-gate 	 * If file is being mapped, disallow frlock.
17327c478bd9Sstevel@tonic-gate 	 */
17337c478bd9Sstevel@tonic-gate 	if (csp->s_mapcnt > 0)
17347c478bd9Sstevel@tonic-gate 		return (EAGAIN);
17357c478bd9Sstevel@tonic-gate 
1736da6c28aaSamw 	return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
17377c478bd9Sstevel@tonic-gate }
17387c478bd9Sstevel@tonic-gate 
17397c478bd9Sstevel@tonic-gate static int
spec_realvp(struct vnode * vp,struct vnode ** vpp,caller_context_t * ct)1740da6c28aaSamw spec_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct)
17417c478bd9Sstevel@tonic-gate {
17427c478bd9Sstevel@tonic-gate 	struct vnode *rvp;
17437c478bd9Sstevel@tonic-gate 
17447c478bd9Sstevel@tonic-gate 	if ((rvp = VTOS(vp)->s_realvp) != NULL) {
17457c478bd9Sstevel@tonic-gate 		vp = rvp;
1746da6c28aaSamw 		if (VOP_REALVP(vp, &rvp, ct) == 0)
17477c478bd9Sstevel@tonic-gate 			vp = rvp;
17487c478bd9Sstevel@tonic-gate 	}
17497c478bd9Sstevel@tonic-gate 
17507c478bd9Sstevel@tonic-gate 	*vpp = vp;
17517c478bd9Sstevel@tonic-gate 	return (0);
17527c478bd9Sstevel@tonic-gate }
17537c478bd9Sstevel@tonic-gate 
17547c478bd9Sstevel@tonic-gate /*
17557c478bd9Sstevel@tonic-gate  * Return all the pages from [off..off + len] in block
17567c478bd9Sstevel@tonic-gate  * or character device.
17577c478bd9Sstevel@tonic-gate  */
1758da6c28aaSamw /*ARGSUSED*/
17597c478bd9Sstevel@tonic-gate static int
spec_getpage(struct vnode * vp,offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ct)17607c478bd9Sstevel@tonic-gate spec_getpage(
17617c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
17627c478bd9Sstevel@tonic-gate 	offset_t	off,
17637c478bd9Sstevel@tonic-gate 	size_t		len,
17647c478bd9Sstevel@tonic-gate 	uint_t		*protp,
17657c478bd9Sstevel@tonic-gate 	page_t		*pl[],
17667c478bd9Sstevel@tonic-gate 	size_t		plsz,
17677c478bd9Sstevel@tonic-gate 	struct seg	*seg,
17687c478bd9Sstevel@tonic-gate 	caddr_t		addr,
17697c478bd9Sstevel@tonic-gate 	enum seg_rw	rw,
1770da6c28aaSamw 	struct cred	*cr,
1771da6c28aaSamw 	caller_context_t *ct)
17727c478bd9Sstevel@tonic-gate {
17737c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
17747c478bd9Sstevel@tonic-gate 	int err;
17757c478bd9Sstevel@tonic-gate 
17767c478bd9Sstevel@tonic-gate 	ASSERT(sp->s_commonvp == vp);
17777c478bd9Sstevel@tonic-gate 
17787c478bd9Sstevel@tonic-gate 	/*
17797c478bd9Sstevel@tonic-gate 	 * XXX	Given the above assertion, this might not do
17807c478bd9Sstevel@tonic-gate 	 *	what is wanted here.
17817c478bd9Sstevel@tonic-gate 	 */
17827c478bd9Sstevel@tonic-gate 	if (vp->v_flag & VNOMAP)
17837c478bd9Sstevel@tonic-gate 		return (ENOSYS);
17847c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_SPECFS, TR_SPECFS_GETPAGE,
1785e099bf07Scth 	    "specfs getpage:vp %p off %llx len %ld snode %p",
1786e099bf07Scth 	    vp, off, len, sp);
17877c478bd9Sstevel@tonic-gate 
17887c478bd9Sstevel@tonic-gate 	switch (vp->v_type) {
17897c478bd9Sstevel@tonic-gate 	case VBLK:
17907c478bd9Sstevel@tonic-gate 		if (protp != NULL)
17917c478bd9Sstevel@tonic-gate 			*protp = PROT_ALL;
17927c478bd9Sstevel@tonic-gate 
17937c478bd9Sstevel@tonic-gate 		if (((u_offset_t)off + len) > (SPEC_SIZE(sp) + PAGEOFFSET))
17947c478bd9Sstevel@tonic-gate 			return (EFAULT);	/* beyond EOF */
17957c478bd9Sstevel@tonic-gate 
179606e6833aSJosef 'Jeff' Sipek 		err = pvn_getpages(spec_getapage, vp, (u_offset_t)off, len,
179706e6833aSJosef 'Jeff' Sipek 		    protp, pl, plsz, seg, addr, rw, cr);
17987c478bd9Sstevel@tonic-gate 		break;
17997c478bd9Sstevel@tonic-gate 
18007c478bd9Sstevel@tonic-gate 	case VCHR:
18017c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "spec_getpage called for character device. "
18027c478bd9Sstevel@tonic-gate 		    "Check any non-ON consolidation drivers");
18037c478bd9Sstevel@tonic-gate 		err = 0;
18047c478bd9Sstevel@tonic-gate 		pl[0] = (page_t *)0;
18057c478bd9Sstevel@tonic-gate 		break;
18067c478bd9Sstevel@tonic-gate 
18077c478bd9Sstevel@tonic-gate 	default:
18087c478bd9Sstevel@tonic-gate 		panic("spec_getpage: bad v_type 0x%x", vp->v_type);
18097c478bd9Sstevel@tonic-gate 		/*NOTREACHED*/
18107c478bd9Sstevel@tonic-gate 	}
18117c478bd9Sstevel@tonic-gate 
18127c478bd9Sstevel@tonic-gate 	return (err);
18137c478bd9Sstevel@tonic-gate }
18147c478bd9Sstevel@tonic-gate 
18157c478bd9Sstevel@tonic-gate extern int klustsize;	/* set in machdep.c */
18167c478bd9Sstevel@tonic-gate 
18177c478bd9Sstevel@tonic-gate int spec_ra = 1;
18187c478bd9Sstevel@tonic-gate int spec_lostpage;	/* number of times we lost original page */
18197c478bd9Sstevel@tonic-gate 
18207c478bd9Sstevel@tonic-gate /*ARGSUSED2*/
18217c478bd9Sstevel@tonic-gate static int
spec_getapage(struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)18227c478bd9Sstevel@tonic-gate spec_getapage(
18237c478bd9Sstevel@tonic-gate 	struct vnode *vp,
18247c478bd9Sstevel@tonic-gate 	u_offset_t	off,
18257c478bd9Sstevel@tonic-gate 	size_t		len,
18267c478bd9Sstevel@tonic-gate 	uint_t		*protp,
18277c478bd9Sstevel@tonic-gate 	page_t		*pl[],
18287c478bd9Sstevel@tonic-gate 	size_t		plsz,
18297c478bd9Sstevel@tonic-gate 	struct seg	*seg,
18307c478bd9Sstevel@tonic-gate 	caddr_t		addr,
18317c478bd9Sstevel@tonic-gate 	enum seg_rw	rw,
18327c478bd9Sstevel@tonic-gate 	struct cred	*cr)
18337c478bd9Sstevel@tonic-gate {
18347c478bd9Sstevel@tonic-gate 	struct snode *sp;
18357c478bd9Sstevel@tonic-gate 	struct buf *bp;
18367c478bd9Sstevel@tonic-gate 	page_t *pp, *pp2;
18377c478bd9Sstevel@tonic-gate 	u_offset_t io_off1, io_off2;
18387c478bd9Sstevel@tonic-gate 	size_t io_len1;
18397c478bd9Sstevel@tonic-gate 	size_t io_len2;
18407c478bd9Sstevel@tonic-gate 	size_t blksz;
18417c478bd9Sstevel@tonic-gate 	u_offset_t blkoff;
18427c478bd9Sstevel@tonic-gate 	int dora, err;
18437c478bd9Sstevel@tonic-gate 	page_t *pagefound;
18447c478bd9Sstevel@tonic-gate 	uint_t xlen;
18457c478bd9Sstevel@tonic-gate 	size_t adj_klustsize;
18467c478bd9Sstevel@tonic-gate 	u_offset_t size;
18477c478bd9Sstevel@tonic-gate 	u_offset_t tmpoff;
18487c478bd9Sstevel@tonic-gate 
18497c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
18507c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_SPECFS, TR_SPECFS_GETAPAGE,
1851e099bf07Scth 	    "specfs getapage:vp %p off %llx snode %p", vp, off, sp);
18527c478bd9Sstevel@tonic-gate reread:
18537c478bd9Sstevel@tonic-gate 
18547c478bd9Sstevel@tonic-gate 	err = 0;
18557c478bd9Sstevel@tonic-gate 	bp = NULL;
18567c478bd9Sstevel@tonic-gate 	pp = NULL;
18577c478bd9Sstevel@tonic-gate 	pp2 = NULL;
18587c478bd9Sstevel@tonic-gate 
18597c478bd9Sstevel@tonic-gate 	if (pl != NULL)
18607c478bd9Sstevel@tonic-gate 		pl[0] = NULL;
18617c478bd9Sstevel@tonic-gate 
18627c478bd9Sstevel@tonic-gate 	size = SPEC_SIZE(VTOS(sp->s_commonvp));
18637c478bd9Sstevel@tonic-gate 
18647c478bd9Sstevel@tonic-gate 	if (spec_ra && sp->s_nextr == off)
18657c478bd9Sstevel@tonic-gate 		dora = 1;
18667c478bd9Sstevel@tonic-gate 	else
18677c478bd9Sstevel@tonic-gate 		dora = 0;
18687c478bd9Sstevel@tonic-gate 
18697c478bd9Sstevel@tonic-gate 	if (size == UNKNOWN_SIZE) {
18707c478bd9Sstevel@tonic-gate 		dora = 0;
18717c478bd9Sstevel@tonic-gate 		adj_klustsize = PAGESIZE;
18727c478bd9Sstevel@tonic-gate 	} else {
18737c478bd9Sstevel@tonic-gate 		adj_klustsize = dora ? klustsize : PAGESIZE;
18747c478bd9Sstevel@tonic-gate 	}
18757c478bd9Sstevel@tonic-gate 
18767c478bd9Sstevel@tonic-gate again:
18777c478bd9Sstevel@tonic-gate 	if ((pagefound = page_exists(vp, off)) == NULL) {
18787c478bd9Sstevel@tonic-gate 		if (rw == S_CREATE) {
18797c478bd9Sstevel@tonic-gate 			/*
18807c478bd9Sstevel@tonic-gate 			 * We're allocating a swap slot and it's
18817c478bd9Sstevel@tonic-gate 			 * associated page was not found, so allocate
18827c478bd9Sstevel@tonic-gate 			 * and return it.
18837c478bd9Sstevel@tonic-gate 			 */
18847c478bd9Sstevel@tonic-gate 			if ((pp = page_create_va(vp, off,
18857c478bd9Sstevel@tonic-gate 			    PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
18867c478bd9Sstevel@tonic-gate 				panic("spec_getapage: page_create");
18877c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
18887c478bd9Sstevel@tonic-gate 			}
18897c478bd9Sstevel@tonic-gate 			io_len1 = PAGESIZE;
18907c478bd9Sstevel@tonic-gate 			sp->s_nextr = off + PAGESIZE;
18917c478bd9Sstevel@tonic-gate 		} else {
18927c478bd9Sstevel@tonic-gate 			/*
18937c478bd9Sstevel@tonic-gate 			 * Need to really do disk I/O to get the page(s).
18947c478bd9Sstevel@tonic-gate 			 */
18957c478bd9Sstevel@tonic-gate 			blkoff = (off / adj_klustsize) * adj_klustsize;
18967c478bd9Sstevel@tonic-gate 			if (size == UNKNOWN_SIZE) {
18977c478bd9Sstevel@tonic-gate 				blksz = PAGESIZE;
18987c478bd9Sstevel@tonic-gate 			} else {
18997c478bd9Sstevel@tonic-gate 				if (blkoff + adj_klustsize <= size)
19007c478bd9Sstevel@tonic-gate 					blksz = adj_klustsize;
19017c478bd9Sstevel@tonic-gate 				else
19027c478bd9Sstevel@tonic-gate 					blksz =
19037c478bd9Sstevel@tonic-gate 					    MIN(size - blkoff, adj_klustsize);
19047c478bd9Sstevel@tonic-gate 			}
19057c478bd9Sstevel@tonic-gate 
19067c478bd9Sstevel@tonic-gate 			pp = pvn_read_kluster(vp, off, seg, addr, &tmpoff,
19077c478bd9Sstevel@tonic-gate 			    &io_len1, blkoff, blksz, 0);
19087c478bd9Sstevel@tonic-gate 			io_off1 = tmpoff;
19097c478bd9Sstevel@tonic-gate 			/*
19107c478bd9Sstevel@tonic-gate 			 * Make sure the page didn't sneek into the
19117c478bd9Sstevel@tonic-gate 			 * cache while we blocked in pvn_read_kluster.
19127c478bd9Sstevel@tonic-gate 			 */
19137c478bd9Sstevel@tonic-gate 			if (pp == NULL)
19147c478bd9Sstevel@tonic-gate 				goto again;
19157c478bd9Sstevel@tonic-gate 
19167c478bd9Sstevel@tonic-gate 			/*
19177c478bd9Sstevel@tonic-gate 			 * Zero part of page which we are not
19187c478bd9Sstevel@tonic-gate 			 * going to be reading from disk now.
19197c478bd9Sstevel@tonic-gate 			 */
19207c478bd9Sstevel@tonic-gate 			xlen = (uint_t)(io_len1 & PAGEOFFSET);
19217c478bd9Sstevel@tonic-gate 			if (xlen != 0)
19227c478bd9Sstevel@tonic-gate 				pagezero(pp->p_prev, xlen, PAGESIZE - xlen);
19237c478bd9Sstevel@tonic-gate 
19247c478bd9Sstevel@tonic-gate 			bp = spec_startio(vp, pp, io_off1, io_len1,
19257c478bd9Sstevel@tonic-gate 			    pl == NULL ? (B_ASYNC | B_READ) : B_READ);
19267c478bd9Sstevel@tonic-gate 			sp->s_nextr = io_off1 + io_len1;
19277c478bd9Sstevel@tonic-gate 		}
19287c478bd9Sstevel@tonic-gate 	}
19297c478bd9Sstevel@tonic-gate 
19307c478bd9Sstevel@tonic-gate 	if (dora && rw != S_CREATE) {
19317c478bd9Sstevel@tonic-gate 		u_offset_t off2;
19327c478bd9Sstevel@tonic-gate 		caddr_t addr2;
19337c478bd9Sstevel@tonic-gate 
19347c478bd9Sstevel@tonic-gate 		off2 = ((off / adj_klustsize) + 1) * adj_klustsize;
19357c478bd9Sstevel@tonic-gate 		addr2 = addr + (off2 - off);
19367c478bd9Sstevel@tonic-gate 
19377c478bd9Sstevel@tonic-gate 		pp2 = NULL;
19387c478bd9Sstevel@tonic-gate 		/*
19397c478bd9Sstevel@tonic-gate 		 * If we are past EOF then don't bother trying
19407c478bd9Sstevel@tonic-gate 		 * with read-ahead.
19417c478bd9Sstevel@tonic-gate 		 */
19427c478bd9Sstevel@tonic-gate 		if (off2 >= size)
19437c478bd9Sstevel@tonic-gate 			pp2 = NULL;
19447c478bd9Sstevel@tonic-gate 		else {
19457c478bd9Sstevel@tonic-gate 			if (off2 + adj_klustsize <= size)
19467c478bd9Sstevel@tonic-gate 				blksz = adj_klustsize;
19477c478bd9Sstevel@tonic-gate 			else
19487c478bd9Sstevel@tonic-gate 				blksz = MIN(size - off2, adj_klustsize);
19497c478bd9Sstevel@tonic-gate 
19507c478bd9Sstevel@tonic-gate 			pp2 = pvn_read_kluster(vp, off2, seg, addr2, &tmpoff,
19517c478bd9Sstevel@tonic-gate 			    &io_len2, off2, blksz, 1);
19527c478bd9Sstevel@tonic-gate 			io_off2 = tmpoff;
19537c478bd9Sstevel@tonic-gate 		}
19547c478bd9Sstevel@tonic-gate 
19557c478bd9Sstevel@tonic-gate 		if (pp2 != NULL) {
19567c478bd9Sstevel@tonic-gate 			/*
19577c478bd9Sstevel@tonic-gate 			 * Zero part of page which we are not
19587c478bd9Sstevel@tonic-gate 			 * going to be reading from disk now.
19597c478bd9Sstevel@tonic-gate 			 */
19607c478bd9Sstevel@tonic-gate 			xlen = (uint_t)(io_len2 & PAGEOFFSET);
19617c478bd9Sstevel@tonic-gate 			if (xlen != 0)
19627c478bd9Sstevel@tonic-gate 				pagezero(pp2->p_prev, xlen, PAGESIZE - xlen);
19637c478bd9Sstevel@tonic-gate 
19647c478bd9Sstevel@tonic-gate 			(void) spec_startio(vp, pp2, io_off2, io_len2,
19657c478bd9Sstevel@tonic-gate 			    B_READ | B_ASYNC);
19667c478bd9Sstevel@tonic-gate 		}
19677c478bd9Sstevel@tonic-gate 	}
19687c478bd9Sstevel@tonic-gate 
19697c478bd9Sstevel@tonic-gate 	if (pl == NULL)
19707c478bd9Sstevel@tonic-gate 		return (err);
19717c478bd9Sstevel@tonic-gate 
19727c478bd9Sstevel@tonic-gate 	if (bp != NULL) {
19737c478bd9Sstevel@tonic-gate 		err = biowait(bp);
19747c478bd9Sstevel@tonic-gate 		pageio_done(bp);
19757c478bd9Sstevel@tonic-gate 
19767c478bd9Sstevel@tonic-gate 		if (err) {
19777c478bd9Sstevel@tonic-gate 			if (pp != NULL)
19787c478bd9Sstevel@tonic-gate 				pvn_read_done(pp, B_ERROR);
19797c478bd9Sstevel@tonic-gate 			return (err);
19807c478bd9Sstevel@tonic-gate 		}
19817c478bd9Sstevel@tonic-gate 	}
19827c478bd9Sstevel@tonic-gate 
19837c478bd9Sstevel@tonic-gate 	if (pagefound) {
19847c478bd9Sstevel@tonic-gate 		se_t se = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
19857c478bd9Sstevel@tonic-gate 		/*
19867c478bd9Sstevel@tonic-gate 		 * Page exists in the cache, acquire the appropriate
19877c478bd9Sstevel@tonic-gate 		 * lock.  If this fails, start all over again.
19887c478bd9Sstevel@tonic-gate 		 */
19897c478bd9Sstevel@tonic-gate 
19907c478bd9Sstevel@tonic-gate 		if ((pp = page_lookup(vp, off, se)) == NULL) {
19917c478bd9Sstevel@tonic-gate 			spec_lostpage++;
19927c478bd9Sstevel@tonic-gate 			goto reread;
19937c478bd9Sstevel@tonic-gate 		}
19947c478bd9Sstevel@tonic-gate 		pl[0] = pp;
19957c478bd9Sstevel@tonic-gate 		pl[1] = NULL;
19967c478bd9Sstevel@tonic-gate 
19977c478bd9Sstevel@tonic-gate 		sp->s_nextr = off + PAGESIZE;
19987c478bd9Sstevel@tonic-gate 		return (0);
19997c478bd9Sstevel@tonic-gate 	}
20007c478bd9Sstevel@tonic-gate 
20017c478bd9Sstevel@tonic-gate 	if (pp != NULL)
20027c478bd9Sstevel@tonic-gate 		pvn_plist_init(pp, pl, plsz, off, io_len1, rw);
20037c478bd9Sstevel@tonic-gate 	return (0);
20047c478bd9Sstevel@tonic-gate }
20057c478bd9Sstevel@tonic-gate 
20067c478bd9Sstevel@tonic-gate /*
20077c478bd9Sstevel@tonic-gate  * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED, B_FORCE}.
20087c478bd9Sstevel@tonic-gate  * If len == 0, do from off to EOF.
20097c478bd9Sstevel@tonic-gate  *
20107c478bd9Sstevel@tonic-gate  * The normal cases should be len == 0 & off == 0 (entire vp list),
20117c478bd9Sstevel@tonic-gate  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
20127c478bd9Sstevel@tonic-gate  * (from pageout).
20137c478bd9Sstevel@tonic-gate  */
2014da6c28aaSamw /*ARGSUSED5*/
20157c478bd9Sstevel@tonic-gate int
spec_putpage(struct vnode * vp,offset_t off,size_t len,int flags,struct cred * cr,caller_context_t * ct)20167c478bd9Sstevel@tonic-gate spec_putpage(
20177c478bd9Sstevel@tonic-gate 	struct vnode *vp,
20187c478bd9Sstevel@tonic-gate 	offset_t	off,
20197c478bd9Sstevel@tonic-gate 	size_t		len,
20207c478bd9Sstevel@tonic-gate 	int		flags,
2021da6c28aaSamw 	struct cred	*cr,
2022da6c28aaSamw 	caller_context_t *ct)
20237c478bd9Sstevel@tonic-gate {
20247c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
20257c478bd9Sstevel@tonic-gate 	struct vnode *cvp;
20267c478bd9Sstevel@tonic-gate 	page_t *pp;
20277c478bd9Sstevel@tonic-gate 	u_offset_t io_off;
20287c478bd9Sstevel@tonic-gate 	size_t io_len = 0;	/* for lint */
20297c478bd9Sstevel@tonic-gate 	int err = 0;
20307c478bd9Sstevel@tonic-gate 	u_offset_t size;
20317c478bd9Sstevel@tonic-gate 	u_offset_t tmpoff;
20327c478bd9Sstevel@tonic-gate 
20337c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_count != 0);
20347c478bd9Sstevel@tonic-gate 
20357c478bd9Sstevel@tonic-gate 	if (vp->v_flag & VNOMAP)
20367c478bd9Sstevel@tonic-gate 		return (ENOSYS);
20377c478bd9Sstevel@tonic-gate 
20387c478bd9Sstevel@tonic-gate 	cvp = sp->s_commonvp;
20397c478bd9Sstevel@tonic-gate 	size = SPEC_SIZE(VTOS(cvp));
20407c478bd9Sstevel@tonic-gate 
20417c478bd9Sstevel@tonic-gate 	if (!vn_has_cached_data(vp) || off >= size)
20427c478bd9Sstevel@tonic-gate 		return (0);
20437c478bd9Sstevel@tonic-gate 
20447c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VBLK && cvp == vp);
20457c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_SPECFS, TR_SPECFS_PUTPAGE,
2046e099bf07Scth 	    "specfs putpage:vp %p off %llx len %ld snode %p",
2047e099bf07Scth 	    vp, off, len, sp);
20487c478bd9Sstevel@tonic-gate 
20497c478bd9Sstevel@tonic-gate 	if (len == 0) {
20507c478bd9Sstevel@tonic-gate 		/*
20517c478bd9Sstevel@tonic-gate 		 * Search the entire vp list for pages >= off.
20527c478bd9Sstevel@tonic-gate 		 */
20537c478bd9Sstevel@tonic-gate 		err = pvn_vplist_dirty(vp, off, spec_putapage,
20547c478bd9Sstevel@tonic-gate 		    flags, cr);
20557c478bd9Sstevel@tonic-gate 	} else {
20567c478bd9Sstevel@tonic-gate 		u_offset_t eoff;
20577c478bd9Sstevel@tonic-gate 
20587c478bd9Sstevel@tonic-gate 		/*
20597c478bd9Sstevel@tonic-gate 		 * Loop over all offsets in the range [off...off + len]
20607c478bd9Sstevel@tonic-gate 		 * looking for pages to deal with.  We set limits so
20617c478bd9Sstevel@tonic-gate 		 * that we kluster to klustsize boundaries.
20627c478bd9Sstevel@tonic-gate 		 */
20637c478bd9Sstevel@tonic-gate 		eoff = off + len;
20647c478bd9Sstevel@tonic-gate 		for (io_off = off; io_off < eoff && io_off < size;
20657c478bd9Sstevel@tonic-gate 		    io_off += io_len) {
20667c478bd9Sstevel@tonic-gate 			/*
20677c478bd9Sstevel@tonic-gate 			 * If we are not invalidating, synchronously
20687c478bd9Sstevel@tonic-gate 			 * freeing or writing pages use the routine
20697c478bd9Sstevel@tonic-gate 			 * page_lookup_nowait() to prevent reclaiming
20707c478bd9Sstevel@tonic-gate 			 * them from the free list.
20717c478bd9Sstevel@tonic-gate 			 */
20727c478bd9Sstevel@tonic-gate 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
20737c478bd9Sstevel@tonic-gate 				pp = page_lookup(vp, io_off,
2074e099bf07Scth 				    (flags & (B_INVAL | B_FREE)) ?
2075e099bf07Scth 				    SE_EXCL : SE_SHARED);
20767c478bd9Sstevel@tonic-gate 			} else {
20777c478bd9Sstevel@tonic-gate 				pp = page_lookup_nowait(vp, io_off,
2078e099bf07Scth 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
20797c478bd9Sstevel@tonic-gate 			}
20807c478bd9Sstevel@tonic-gate 
20817c478bd9Sstevel@tonic-gate 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
20827c478bd9Sstevel@tonic-gate 				io_len = PAGESIZE;
20837c478bd9Sstevel@tonic-gate 			else {
20847c478bd9Sstevel@tonic-gate 				err = spec_putapage(vp, pp, &tmpoff, &io_len,
20857c478bd9Sstevel@tonic-gate 				    flags, cr);
20867c478bd9Sstevel@tonic-gate 				io_off = tmpoff;
20877c478bd9Sstevel@tonic-gate 				if (err != 0)
20887c478bd9Sstevel@tonic-gate 					break;
20897c478bd9Sstevel@tonic-gate 				/*
20907c478bd9Sstevel@tonic-gate 				 * "io_off" and "io_len" are returned as
20917c478bd9Sstevel@tonic-gate 				 * the range of pages we actually wrote.
20927c478bd9Sstevel@tonic-gate 				 * This allows us to skip ahead more quickly
20937c478bd9Sstevel@tonic-gate 				 * since several pages may've been dealt
20947c478bd9Sstevel@tonic-gate 				 * with by this iteration of the loop.
20957c478bd9Sstevel@tonic-gate 				 */
20967c478bd9Sstevel@tonic-gate 			}
20977c478bd9Sstevel@tonic-gate 		}
20987c478bd9Sstevel@tonic-gate 	}
20997c478bd9Sstevel@tonic-gate 	return (err);
21007c478bd9Sstevel@tonic-gate }
21017c478bd9Sstevel@tonic-gate 
21027c478bd9Sstevel@tonic-gate 
21037c478bd9Sstevel@tonic-gate /*
21047c478bd9Sstevel@tonic-gate  * Write out a single page, possibly klustering adjacent
21057c478bd9Sstevel@tonic-gate  * dirty pages.
21067c478bd9Sstevel@tonic-gate  */
21077c478bd9Sstevel@tonic-gate /*ARGSUSED5*/
21087c478bd9Sstevel@tonic-gate static int
spec_putapage(struct vnode * vp,page_t * pp,u_offset_t * offp,size_t * lenp,int flags,struct cred * cr)21097c478bd9Sstevel@tonic-gate spec_putapage(
21107c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
21117c478bd9Sstevel@tonic-gate 	page_t		*pp,
21127c478bd9Sstevel@tonic-gate 	u_offset_t	*offp,		/* return value */
21137c478bd9Sstevel@tonic-gate 	size_t		*lenp,		/* return value */
21147c478bd9Sstevel@tonic-gate 	int		flags,
21157c478bd9Sstevel@tonic-gate 	struct cred	*cr)
21167c478bd9Sstevel@tonic-gate {
21177c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
21187c478bd9Sstevel@tonic-gate 	u_offset_t io_off;
21197c478bd9Sstevel@tonic-gate 	size_t io_len;
21207c478bd9Sstevel@tonic-gate 	size_t blksz;
21217c478bd9Sstevel@tonic-gate 	u_offset_t blkoff;
21227c478bd9Sstevel@tonic-gate 	int err = 0;
21237c478bd9Sstevel@tonic-gate 	struct buf *bp;
21247c478bd9Sstevel@tonic-gate 	u_offset_t size;
21257c478bd9Sstevel@tonic-gate 	size_t adj_klustsize;
21267c478bd9Sstevel@tonic-gate 	u_offset_t tmpoff;
21277c478bd9Sstevel@tonic-gate 
21287c478bd9Sstevel@tonic-gate 	/*
21297c478bd9Sstevel@tonic-gate 	 * Destroy read ahead value since we are really going to write.
21307c478bd9Sstevel@tonic-gate 	 */
21317c478bd9Sstevel@tonic-gate 	sp->s_nextr = 0;
21327c478bd9Sstevel@tonic-gate 	size = SPEC_SIZE(VTOS(sp->s_commonvp));
21337c478bd9Sstevel@tonic-gate 
21347c478bd9Sstevel@tonic-gate 	adj_klustsize = klustsize;
21357c478bd9Sstevel@tonic-gate 
21367c478bd9Sstevel@tonic-gate 	blkoff = (pp->p_offset / adj_klustsize) * adj_klustsize;
21377c478bd9Sstevel@tonic-gate 
21387c478bd9Sstevel@tonic-gate 	if (blkoff + adj_klustsize <= size)
21397c478bd9Sstevel@tonic-gate 		blksz = adj_klustsize;
21407c478bd9Sstevel@tonic-gate 	else
21417c478bd9Sstevel@tonic-gate 		blksz = size - blkoff;
21427c478bd9Sstevel@tonic-gate 
21437c478bd9Sstevel@tonic-gate 	/*
21447c478bd9Sstevel@tonic-gate 	 * Find a kluster that fits in one contiguous chunk.
21457c478bd9Sstevel@tonic-gate 	 */
21467c478bd9Sstevel@tonic-gate 	pp = pvn_write_kluster(vp, pp, &tmpoff, &io_len, blkoff,
2147e099bf07Scth 	    blksz, flags);
21487c478bd9Sstevel@tonic-gate 	io_off = tmpoff;
21497c478bd9Sstevel@tonic-gate 
21507c478bd9Sstevel@tonic-gate 	/*
21517c478bd9Sstevel@tonic-gate 	 * Check for page length rounding problems
21527c478bd9Sstevel@tonic-gate 	 * XXX - Is this necessary?
21537c478bd9Sstevel@tonic-gate 	 */
21547c478bd9Sstevel@tonic-gate 	if (io_off + io_len > size) {
21557c478bd9Sstevel@tonic-gate 		ASSERT((io_off + io_len) - size < PAGESIZE);
21567c478bd9Sstevel@tonic-gate 		io_len = size - io_off;
21577c478bd9Sstevel@tonic-gate 	}
21587c478bd9Sstevel@tonic-gate 
21597c478bd9Sstevel@tonic-gate 	bp = spec_startio(vp, pp, io_off, io_len, B_WRITE | flags);
21607c478bd9Sstevel@tonic-gate 
21617c478bd9Sstevel@tonic-gate 	/*
21627c478bd9Sstevel@tonic-gate 	 * Wait for i/o to complete if the request is not B_ASYNC.
21637c478bd9Sstevel@tonic-gate 	 */
21647c478bd9Sstevel@tonic-gate 	if ((flags & B_ASYNC) == 0) {
21657c478bd9Sstevel@tonic-gate 		err = biowait(bp);
21667c478bd9Sstevel@tonic-gate 		pageio_done(bp);
21677c478bd9Sstevel@tonic-gate 		pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
21687c478bd9Sstevel@tonic-gate 	}
21697c478bd9Sstevel@tonic-gate 
21707c478bd9Sstevel@tonic-gate 	if (offp)
21717c478bd9Sstevel@tonic-gate 		*offp = io_off;
21727c478bd9Sstevel@tonic-gate 	if (lenp)
21737c478bd9Sstevel@tonic-gate 		*lenp = io_len;
21747c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_SPECFS, TR_SPECFS_PUTAPAGE,
2175e099bf07Scth 	    "specfs putapage:vp %p offp %p snode %p err %d",
2176e099bf07Scth 	    vp, offp, sp, err);
21777c478bd9Sstevel@tonic-gate 	return (err);
21787c478bd9Sstevel@tonic-gate }
21797c478bd9Sstevel@tonic-gate 
21807c478bd9Sstevel@tonic-gate /*
21817c478bd9Sstevel@tonic-gate  * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
21827c478bd9Sstevel@tonic-gate  */
21837c478bd9Sstevel@tonic-gate static struct buf *
spec_startio(struct vnode * vp,page_t * pp,u_offset_t io_off,size_t io_len,int flags)21847c478bd9Sstevel@tonic-gate spec_startio(
21857c478bd9Sstevel@tonic-gate 	struct vnode *vp,
21867c478bd9Sstevel@tonic-gate 	page_t		*pp,
21877c478bd9Sstevel@tonic-gate 	u_offset_t	io_off,
21887c478bd9Sstevel@tonic-gate 	size_t		io_len,
21897c478bd9Sstevel@tonic-gate 	int		flags)
21907c478bd9Sstevel@tonic-gate {
21917c478bd9Sstevel@tonic-gate 	struct buf *bp;
21927c478bd9Sstevel@tonic-gate 
21937c478bd9Sstevel@tonic-gate 	bp = pageio_setup(pp, io_len, vp, flags);
21947c478bd9Sstevel@tonic-gate 
21957c478bd9Sstevel@tonic-gate 	bp->b_edev = vp->v_rdev;
21967c478bd9Sstevel@tonic-gate 	bp->b_dev = cmpdev(vp->v_rdev);
21977c478bd9Sstevel@tonic-gate 	bp->b_blkno = btodt(io_off);
21987c478bd9Sstevel@tonic-gate 	bp->b_un.b_addr = (caddr_t)0;
21997c478bd9Sstevel@tonic-gate 
22007c478bd9Sstevel@tonic-gate 	(void) bdev_strategy(bp);
22017c478bd9Sstevel@tonic-gate 
22027c478bd9Sstevel@tonic-gate 	if (flags & B_READ)
22037c478bd9Sstevel@tonic-gate 		lwp_stat_update(LWP_STAT_INBLK, 1);
22047c478bd9Sstevel@tonic-gate 	else
22057c478bd9Sstevel@tonic-gate 		lwp_stat_update(LWP_STAT_OUBLK, 1);
22067c478bd9Sstevel@tonic-gate 
22077c478bd9Sstevel@tonic-gate 	return (bp);
22087c478bd9Sstevel@tonic-gate }
22097c478bd9Sstevel@tonic-gate 
22107c478bd9Sstevel@tonic-gate static int
spec_poll(struct vnode * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)22117c478bd9Sstevel@tonic-gate spec_poll(
22127c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
22137c478bd9Sstevel@tonic-gate 	short		events,
22147c478bd9Sstevel@tonic-gate 	int		anyyet,
22157c478bd9Sstevel@tonic-gate 	short		*reventsp,
2216da6c28aaSamw 	struct pollhead **phpp,
2217da6c28aaSamw 	caller_context_t *ct)
22187c478bd9Sstevel@tonic-gate {
22197c478bd9Sstevel@tonic-gate 	dev_t dev;
22207c478bd9Sstevel@tonic-gate 	int error;
22217c478bd9Sstevel@tonic-gate 
22227c478bd9Sstevel@tonic-gate 	if (vp->v_type == VBLK)
2223da6c28aaSamw 		error = fs_poll(vp, events, anyyet, reventsp, phpp, ct);
22247c478bd9Sstevel@tonic-gate 	else {
22257c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type == VCHR);
22267c478bd9Sstevel@tonic-gate 		dev = vp->v_rdev;
2227349dcea3SGarrett D'Amore 		if (vp->v_stream) {
22287c478bd9Sstevel@tonic-gate 			ASSERT(vp->v_stream != NULL);
22297c478bd9Sstevel@tonic-gate 			error = strpoll(vp->v_stream, events, anyyet,
22307c478bd9Sstevel@tonic-gate 			    reventsp, phpp);
22317c478bd9Sstevel@tonic-gate 		} else if (devopsp[getmajor(dev)]->devo_cb_ops->cb_chpoll) {
22327c478bd9Sstevel@tonic-gate 			error = cdev_poll(dev, events, anyyet, reventsp, phpp);
22337c478bd9Sstevel@tonic-gate 		} else {
2234da6c28aaSamw 			error = fs_poll(vp, events, anyyet, reventsp, phpp, ct);
22357c478bd9Sstevel@tonic-gate 		}
22367c478bd9Sstevel@tonic-gate 	}
22377c478bd9Sstevel@tonic-gate 	return (error);
22387c478bd9Sstevel@tonic-gate }
22397c478bd9Sstevel@tonic-gate 
22407c478bd9Sstevel@tonic-gate /*
22417c478bd9Sstevel@tonic-gate  * This routine is called through the cdevsw[] table to handle
22427c478bd9Sstevel@tonic-gate  * traditional mmap'able devices that support a d_mmap function.
22437c478bd9Sstevel@tonic-gate  */
22447c478bd9Sstevel@tonic-gate /*ARGSUSED8*/
22457c478bd9Sstevel@tonic-gate int
spec_segmap(dev_t dev,off_t off,struct as * as,caddr_t * addrp,off_t len,uint_t prot,uint_t maxprot,uint_t flags,struct cred * cred)22467c478bd9Sstevel@tonic-gate spec_segmap(
22477c478bd9Sstevel@tonic-gate 	dev_t dev,
22487c478bd9Sstevel@tonic-gate 	off_t off,
22497c478bd9Sstevel@tonic-gate 	struct as *as,
22507c478bd9Sstevel@tonic-gate 	caddr_t *addrp,
22517c478bd9Sstevel@tonic-gate 	off_t len,
22527c478bd9Sstevel@tonic-gate 	uint_t prot,
22537c478bd9Sstevel@tonic-gate 	uint_t maxprot,
22547c478bd9Sstevel@tonic-gate 	uint_t flags,
22557c478bd9Sstevel@tonic-gate 	struct cred *cred)
22567c478bd9Sstevel@tonic-gate {
22577c478bd9Sstevel@tonic-gate 	struct segdev_crargs dev_a;
22587c478bd9Sstevel@tonic-gate 	int (*mapfunc)(dev_t dev, off_t off, int prot);
22597c478bd9Sstevel@tonic-gate 	size_t i;
22607c478bd9Sstevel@tonic-gate 	int	error;
22617c478bd9Sstevel@tonic-gate 
22627c478bd9Sstevel@tonic-gate 	if ((mapfunc = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap) == nodev)
22637c478bd9Sstevel@tonic-gate 		return (ENODEV);
22647c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_SPECFS, TR_SPECFS_SEGMAP,
2265e099bf07Scth 	    "specfs segmap:dev %x as %p len %lx prot %x",
2266e099bf07Scth 	    dev, as, len, prot);
22677c478bd9Sstevel@tonic-gate 
22687c478bd9Sstevel@tonic-gate 	/*
22697c478bd9Sstevel@tonic-gate 	 * Character devices that support the d_mmap
22707c478bd9Sstevel@tonic-gate 	 * interface can only be mmap'ed shared.
22717c478bd9Sstevel@tonic-gate 	 */
22727c478bd9Sstevel@tonic-gate 	if ((flags & MAP_TYPE) != MAP_SHARED)
22737c478bd9Sstevel@tonic-gate 		return (EINVAL);
22747c478bd9Sstevel@tonic-gate 
22757c478bd9Sstevel@tonic-gate 	/*
22767c478bd9Sstevel@tonic-gate 	 * Check to ensure that the entire range is
22777c478bd9Sstevel@tonic-gate 	 * legal and we are not trying to map in
22787c478bd9Sstevel@tonic-gate 	 * more than the device will let us.
22797c478bd9Sstevel@tonic-gate 	 */
22807c478bd9Sstevel@tonic-gate 	for (i = 0; i < len; i += PAGESIZE) {
22817c478bd9Sstevel@tonic-gate 		if (cdev_mmap(mapfunc, dev, off + i, maxprot) == -1)
22827c478bd9Sstevel@tonic-gate 			return (ENXIO);
22837c478bd9Sstevel@tonic-gate 	}
22847c478bd9Sstevel@tonic-gate 
22857c478bd9Sstevel@tonic-gate 	as_rangelock(as);
228660946fe0Smec 	/* Pick an address w/o worrying about any vac alignment constraints. */
228760946fe0Smec 	error = choose_addr(as, addrp, len, off, ADDR_NOVACALIGN, flags);
228860946fe0Smec 	if (error != 0) {
228960946fe0Smec 		as_rangeunlock(as);
229060946fe0Smec 		return (error);
22917c478bd9Sstevel@tonic-gate 	}
22927c478bd9Sstevel@tonic-gate 
22937c478bd9Sstevel@tonic-gate 	dev_a.mapfunc = mapfunc;
22947c478bd9Sstevel@tonic-gate 	dev_a.dev = dev;
22957c478bd9Sstevel@tonic-gate 	dev_a.offset = off;
22967c478bd9Sstevel@tonic-gate 	dev_a.prot = (uchar_t)prot;
22977c478bd9Sstevel@tonic-gate 	dev_a.maxprot = (uchar_t)maxprot;
22987c478bd9Sstevel@tonic-gate 	dev_a.hat_flags = 0;
22997c478bd9Sstevel@tonic-gate 	dev_a.hat_attr = 0;
23007c478bd9Sstevel@tonic-gate 	dev_a.devmap_data = NULL;
23017c478bd9Sstevel@tonic-gate 
23027c478bd9Sstevel@tonic-gate 	error = as_map(as, *addrp, len, segdev_create, &dev_a);
23037c478bd9Sstevel@tonic-gate 	as_rangeunlock(as);
23047c478bd9Sstevel@tonic-gate 	return (error);
23057c478bd9Sstevel@tonic-gate }
23067c478bd9Sstevel@tonic-gate 
23077c478bd9Sstevel@tonic-gate int
spec_char_map(dev_t dev,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cred)23087c478bd9Sstevel@tonic-gate spec_char_map(
23097c478bd9Sstevel@tonic-gate 	dev_t dev,
23107c478bd9Sstevel@tonic-gate 	offset_t off,
23117c478bd9Sstevel@tonic-gate 	struct as *as,
23127c478bd9Sstevel@tonic-gate 	caddr_t *addrp,
23137c478bd9Sstevel@tonic-gate 	size_t len,
23147c478bd9Sstevel@tonic-gate 	uchar_t prot,
23157c478bd9Sstevel@tonic-gate 	uchar_t maxprot,
23167c478bd9Sstevel@tonic-gate 	uint_t flags,
23177c478bd9Sstevel@tonic-gate 	struct cred *cred)
23187c478bd9Sstevel@tonic-gate {
23197c478bd9Sstevel@tonic-gate 	int error = 0;
23207c478bd9Sstevel@tonic-gate 	major_t maj = getmajor(dev);
23217c478bd9Sstevel@tonic-gate 	int map_flag;
23227c478bd9Sstevel@tonic-gate 	int (*segmap)(dev_t, off_t, struct as *,
23237c478bd9Sstevel@tonic-gate 	    caddr_t *, off_t, uint_t, uint_t, uint_t, cred_t *);
23247c478bd9Sstevel@tonic-gate 	int (*devmap)(dev_t, devmap_cookie_t, offset_t,
2325e099bf07Scth 	    size_t, size_t *, uint_t);
23267c478bd9Sstevel@tonic-gate 	int (*mmap)(dev_t dev, off_t off, int prot);
23277c478bd9Sstevel@tonic-gate 
23287c478bd9Sstevel@tonic-gate 	/*
23297c478bd9Sstevel@tonic-gate 	 * Character device: let the device driver
23307c478bd9Sstevel@tonic-gate 	 * pick the appropriate segment driver.
23317c478bd9Sstevel@tonic-gate 	 *
23327c478bd9Sstevel@tonic-gate 	 * 4.x compat.: allow 'NULL' cb_segmap => spec_segmap
23337c478bd9Sstevel@tonic-gate 	 * Kindness: allow 'nulldev' cb_segmap => spec_segmap
23347c478bd9Sstevel@tonic-gate 	 */
23357c478bd9Sstevel@tonic-gate 	segmap = devopsp[maj]->devo_cb_ops->cb_segmap;
23367c478bd9Sstevel@tonic-gate 	if (segmap == NULL || segmap == nulldev || segmap == nodev) {
23377c478bd9Sstevel@tonic-gate 		mmap = devopsp[maj]->devo_cb_ops->cb_mmap;
23387c478bd9Sstevel@tonic-gate 		map_flag = devopsp[maj]->devo_cb_ops->cb_flag;
23397c478bd9Sstevel@tonic-gate 
23407c478bd9Sstevel@tonic-gate 		/*
23417c478bd9Sstevel@tonic-gate 		 * Use old mmap framework if the driver has both mmap
23427c478bd9Sstevel@tonic-gate 		 * and devmap entry points.  This is to prevent the
23437c478bd9Sstevel@tonic-gate 		 * system from calling invalid devmap entry point
23447c478bd9Sstevel@tonic-gate 		 * for some drivers that might have put garbage in the
23457c478bd9Sstevel@tonic-gate 		 * devmap entry point.
23467c478bd9Sstevel@tonic-gate 		 */
23477c478bd9Sstevel@tonic-gate 		if ((map_flag & D_DEVMAP) || mmap == NULL ||
23487c478bd9Sstevel@tonic-gate 		    mmap == nulldev || mmap == nodev) {
23497c478bd9Sstevel@tonic-gate 			devmap = devopsp[maj]->devo_cb_ops->cb_devmap;
23507c478bd9Sstevel@tonic-gate 
23517c478bd9Sstevel@tonic-gate 			/*
23527c478bd9Sstevel@tonic-gate 			 * If driver provides devmap entry point in
23537c478bd9Sstevel@tonic-gate 			 * cb_ops but not xx_segmap(9E), call
23547c478bd9Sstevel@tonic-gate 			 * devmap_setup with default settings
23557c478bd9Sstevel@tonic-gate 			 * (NULL) for callback_ops and driver
23567c478bd9Sstevel@tonic-gate 			 * callback private data
23577c478bd9Sstevel@tonic-gate 			 */
23587c478bd9Sstevel@tonic-gate 			if (devmap == nodev || devmap == NULL ||
23597c478bd9Sstevel@tonic-gate 			    devmap == nulldev)
23607c478bd9Sstevel@tonic-gate 				return (ENODEV);
23617c478bd9Sstevel@tonic-gate 
23627c478bd9Sstevel@tonic-gate 			error = devmap_setup(dev, off, as, addrp,
23637c478bd9Sstevel@tonic-gate 			    len, prot, maxprot, flags, cred);
23647c478bd9Sstevel@tonic-gate 
23657c478bd9Sstevel@tonic-gate 			return (error);
23667c478bd9Sstevel@tonic-gate 		} else
23677c478bd9Sstevel@tonic-gate 			segmap = spec_segmap;
23687c478bd9Sstevel@tonic-gate 	} else
23697c478bd9Sstevel@tonic-gate 		segmap = cdev_segmap;
23707c478bd9Sstevel@tonic-gate 
23717c478bd9Sstevel@tonic-gate 	return ((*segmap)(dev, (off_t)off, as, addrp, len, prot,
23727c478bd9Sstevel@tonic-gate 	    maxprot, flags, cred));
23737c478bd9Sstevel@tonic-gate }
23747c478bd9Sstevel@tonic-gate 
2375da6c28aaSamw /*ARGSUSED9*/
23767c478bd9Sstevel@tonic-gate static int
spec_map(struct vnode * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cred,caller_context_t * ct)23777c478bd9Sstevel@tonic-gate spec_map(
23787c478bd9Sstevel@tonic-gate 	struct vnode *vp,
23797c478bd9Sstevel@tonic-gate 	offset_t off,
23807c478bd9Sstevel@tonic-gate 	struct as *as,
23817c478bd9Sstevel@tonic-gate 	caddr_t *addrp,
23827c478bd9Sstevel@tonic-gate 	size_t len,
23837c478bd9Sstevel@tonic-gate 	uchar_t prot,
23847c478bd9Sstevel@tonic-gate 	uchar_t maxprot,
23857c478bd9Sstevel@tonic-gate 	uint_t flags,
2386da6c28aaSamw 	struct cred *cred,
2387da6c28aaSamw 	caller_context_t *ct)
23887c478bd9Sstevel@tonic-gate {
23897c478bd9Sstevel@tonic-gate 	int error = 0;
239025e8c5aaSvikram 	struct snode *sp = VTOS(vp);
23917c478bd9Sstevel@tonic-gate 
23927c478bd9Sstevel@tonic-gate 	if (vp->v_flag & VNOMAP)
23937c478bd9Sstevel@tonic-gate 		return (ENOSYS);
23947c478bd9Sstevel@tonic-gate 
239525e8c5aaSvikram 	/* fail map with ENXIO if the device is fenced off */
239625e8c5aaSvikram 	if (S_ISFENCED(sp))
239725e8c5aaSvikram 		return (ENXIO);
239825e8c5aaSvikram 
23997c478bd9Sstevel@tonic-gate 	/*
24007c478bd9Sstevel@tonic-gate 	 * If file is locked, fail mapping attempt.
24017c478bd9Sstevel@tonic-gate 	 */
24027c478bd9Sstevel@tonic-gate 	if (vn_has_flocks(vp))
24037c478bd9Sstevel@tonic-gate 		return (EAGAIN);
24047c478bd9Sstevel@tonic-gate 
24057c478bd9Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
24067c478bd9Sstevel@tonic-gate 		return (spec_char_map(vp->v_rdev, off, as, addrp, len, prot,
24077c478bd9Sstevel@tonic-gate 		    maxprot, flags, cred));
24087c478bd9Sstevel@tonic-gate 	} else if (vp->v_type == VBLK) {
24097c478bd9Sstevel@tonic-gate 		struct segvn_crargs vn_a;
24107c478bd9Sstevel@tonic-gate 		struct vnode *cvp;
24117c478bd9Sstevel@tonic-gate 		struct snode *sp;
24127c478bd9Sstevel@tonic-gate 
24137c478bd9Sstevel@tonic-gate 		/*
24147c478bd9Sstevel@tonic-gate 		 * Block device, use segvn mapping to the underlying commonvp
24157c478bd9Sstevel@tonic-gate 		 * for pages.
24167c478bd9Sstevel@tonic-gate 		 */
24177c478bd9Sstevel@tonic-gate 		if (off > spec_maxoffset(vp))
24180dee76a0Speterte 			return (ENXIO);
24197c478bd9Sstevel@tonic-gate 
24207c478bd9Sstevel@tonic-gate 		sp = VTOS(vp);
24217c478bd9Sstevel@tonic-gate 		cvp = sp->s_commonvp;
24227c478bd9Sstevel@tonic-gate 		ASSERT(cvp != NULL);
24237c478bd9Sstevel@tonic-gate 
2424ae115bc7Smrj 		if (off < 0 || ((offset_t)(off + len) < 0))
24250dee76a0Speterte 			return (ENXIO);
24267c478bd9Sstevel@tonic-gate 
24277c478bd9Sstevel@tonic-gate 		as_rangelock(as);
242860946fe0Smec 		error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
242960946fe0Smec 		if (error != 0) {
243060946fe0Smec 			as_rangeunlock(as);
243160946fe0Smec 			return (error);
24327c478bd9Sstevel@tonic-gate 		}
24337c478bd9Sstevel@tonic-gate 
24347c478bd9Sstevel@tonic-gate 		vn_a.vp = cvp;
24357c478bd9Sstevel@tonic-gate 		vn_a.offset = off;
24367c478bd9Sstevel@tonic-gate 		vn_a.type = flags & MAP_TYPE;
24377c478bd9Sstevel@tonic-gate 		vn_a.prot = (uchar_t)prot;
24387c478bd9Sstevel@tonic-gate 		vn_a.maxprot = (uchar_t)maxprot;
24397c478bd9Sstevel@tonic-gate 		vn_a.flags = flags & ~MAP_TYPE;
24407c478bd9Sstevel@tonic-gate 		vn_a.cred = cred;
24417c478bd9Sstevel@tonic-gate 		vn_a.amp = NULL;
24427c478bd9Sstevel@tonic-gate 		vn_a.szc = 0;
24437c478bd9Sstevel@tonic-gate 		vn_a.lgrp_mem_policy_flags = 0;
24447c478bd9Sstevel@tonic-gate 
24457c478bd9Sstevel@tonic-gate 		error = as_map(as, *addrp, len, segvn_create, &vn_a);
24467c478bd9Sstevel@tonic-gate 		as_rangeunlock(as);
24477c478bd9Sstevel@tonic-gate 	} else
24487c478bd9Sstevel@tonic-gate 		return (ENODEV);
24497c478bd9Sstevel@tonic-gate 
24507c478bd9Sstevel@tonic-gate 	return (error);
24517c478bd9Sstevel@tonic-gate }
24527c478bd9Sstevel@tonic-gate 
24537c478bd9Sstevel@tonic-gate /*ARGSUSED1*/
24547c478bd9Sstevel@tonic-gate static int
spec_addmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cred,caller_context_t * ct)24557c478bd9Sstevel@tonic-gate spec_addmap(
24567c478bd9Sstevel@tonic-gate 	struct vnode *vp,	/* the common vnode */
24577c478bd9Sstevel@tonic-gate 	offset_t off,
24587c478bd9Sstevel@tonic-gate 	struct as *as,
24597c478bd9Sstevel@tonic-gate 	caddr_t addr,
24607c478bd9Sstevel@tonic-gate 	size_t len,		/* how many bytes to add */
24617c478bd9Sstevel@tonic-gate 	uchar_t prot,
24627c478bd9Sstevel@tonic-gate 	uchar_t maxprot,
24637c478bd9Sstevel@tonic-gate 	uint_t flags,
2464da6c28aaSamw 	struct cred *cred,
2465da6c28aaSamw 	caller_context_t *ct)
24667c478bd9Sstevel@tonic-gate {
24677c478bd9Sstevel@tonic-gate 	int error = 0;
24687c478bd9Sstevel@tonic-gate 	struct snode *csp = VTOS(vp);
24697c478bd9Sstevel@tonic-gate 	ulong_t npages;
24707c478bd9Sstevel@tonic-gate 
24717c478bd9Sstevel@tonic-gate 	ASSERT(vp != NULL && VTOS(vp)->s_commonvp == vp);
24727c478bd9Sstevel@tonic-gate 
24737c478bd9Sstevel@tonic-gate 	/*
24747c478bd9Sstevel@tonic-gate 	 * XXX	Given the above assertion, this might not
24757c478bd9Sstevel@tonic-gate 	 *	be a particularly sensible thing to test.
24767c478bd9Sstevel@tonic-gate 	 */
24777c478bd9Sstevel@tonic-gate 	if (vp->v_flag & VNOMAP)
24787c478bd9Sstevel@tonic-gate 		return (ENOSYS);
24797c478bd9Sstevel@tonic-gate 
248025e8c5aaSvikram 	/* fail with EIO if the device is fenced off */
248125e8c5aaSvikram 	if (S_ISFENCED(csp))
248225e8c5aaSvikram 		return (EIO);
248325e8c5aaSvikram 
24847c478bd9Sstevel@tonic-gate 	npages = btopr(len);
24857c478bd9Sstevel@tonic-gate 	LOCK_CSP(csp);
24867c478bd9Sstevel@tonic-gate 	csp->s_mapcnt += npages;
24877c478bd9Sstevel@tonic-gate 
24887c478bd9Sstevel@tonic-gate 	UNLOCK_CSP(csp);
24897c478bd9Sstevel@tonic-gate 	return (error);
24907c478bd9Sstevel@tonic-gate }
24917c478bd9Sstevel@tonic-gate 
24927c478bd9Sstevel@tonic-gate /*ARGSUSED1*/
24937c478bd9Sstevel@tonic-gate static int
spec_delmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint_t prot,uint_t maxprot,uint_t flags,struct cred * cred,caller_context_t * ct)24947c478bd9Sstevel@tonic-gate spec_delmap(
24957c478bd9Sstevel@tonic-gate 	struct vnode *vp,	/* the common vnode */
24967c478bd9Sstevel@tonic-gate 	offset_t off,
24977c478bd9Sstevel@tonic-gate 	struct as *as,
24987c478bd9Sstevel@tonic-gate 	caddr_t addr,
24997c478bd9Sstevel@tonic-gate 	size_t len,		/* how many bytes to take away */
25007c478bd9Sstevel@tonic-gate 	uint_t prot,
25017c478bd9Sstevel@tonic-gate 	uint_t maxprot,
25027c478bd9Sstevel@tonic-gate 	uint_t flags,
2503da6c28aaSamw 	struct cred *cred,
2504da6c28aaSamw 	caller_context_t *ct)
25057c478bd9Sstevel@tonic-gate {
25067c478bd9Sstevel@tonic-gate 	struct snode *csp = VTOS(vp);
25077c478bd9Sstevel@tonic-gate 	ulong_t npages;
25087c478bd9Sstevel@tonic-gate 	long mcnt;
25097c478bd9Sstevel@tonic-gate 
25107c478bd9Sstevel@tonic-gate 	/* segdev passes us the common vp */
25117c478bd9Sstevel@tonic-gate 
25127c478bd9Sstevel@tonic-gate 	ASSERT(vp != NULL && VTOS(vp)->s_commonvp == vp);
25137c478bd9Sstevel@tonic-gate 
251425e8c5aaSvikram 	/* allow delmap to succeed even if device fenced off */
251525e8c5aaSvikram 
25167c478bd9Sstevel@tonic-gate 	/*
25177c478bd9Sstevel@tonic-gate 	 * XXX	Given the above assertion, this might not
25187c478bd9Sstevel@tonic-gate 	 *	be a particularly sensible thing to test..
25197c478bd9Sstevel@tonic-gate 	 */
25207c478bd9Sstevel@tonic-gate 	if (vp->v_flag & VNOMAP)
25217c478bd9Sstevel@tonic-gate 		return (ENOSYS);
25227c478bd9Sstevel@tonic-gate 
25237c478bd9Sstevel@tonic-gate 	npages = btopr(len);
25247c478bd9Sstevel@tonic-gate 
25257c478bd9Sstevel@tonic-gate 	LOCK_CSP(csp);
25267c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);
25277c478bd9Sstevel@tonic-gate 	mcnt = (csp->s_mapcnt -= npages);
25287c478bd9Sstevel@tonic-gate 
25297c478bd9Sstevel@tonic-gate 	if (mcnt == 0) {
25307c478bd9Sstevel@tonic-gate 		/*
25317c478bd9Sstevel@tonic-gate 		 * Call the close routine when the last reference of any
25327c478bd9Sstevel@tonic-gate 		 * kind through any [s, v]node goes away.  The s_dip hold
25337c478bd9Sstevel@tonic-gate 		 * on the devinfo node is released when the vnode is
25347c478bd9Sstevel@tonic-gate 		 * destroyed.
25357c478bd9Sstevel@tonic-gate 		 */
25367c478bd9Sstevel@tonic-gate 		if (csp->s_count == 0) {
25377c478bd9Sstevel@tonic-gate 			csp->s_flag &= ~(SNEEDCLOSE | SSIZEVALID);
25387c478bd9Sstevel@tonic-gate 
25397c478bd9Sstevel@tonic-gate 			/* See comment in spec_close() */
25407c478bd9Sstevel@tonic-gate 			if (csp->s_flag & (SCLONE | SSELFCLONE))
25417c478bd9Sstevel@tonic-gate 				csp->s_flag &= ~SDIPSET;
25427c478bd9Sstevel@tonic-gate 
25437c478bd9Sstevel@tonic-gate 			mutex_exit(&csp->s_lock);
25447c478bd9Sstevel@tonic-gate 
25457c478bd9Sstevel@tonic-gate 			(void) device_close(vp, 0, cred);
25467c478bd9Sstevel@tonic-gate 		} else
25477c478bd9Sstevel@tonic-gate 			mutex_exit(&csp->s_lock);
25487c478bd9Sstevel@tonic-gate 
25497c478bd9Sstevel@tonic-gate 		mutex_enter(&csp->s_lock);
25507c478bd9Sstevel@tonic-gate 	}
25517c478bd9Sstevel@tonic-gate 	ASSERT(mcnt >= 0);
25527c478bd9Sstevel@tonic-gate 
25537c478bd9Sstevel@tonic-gate 	UNLOCK_CSP_LOCK_HELD(csp);
25547c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock);
25557c478bd9Sstevel@tonic-gate 
25567c478bd9Sstevel@tonic-gate 	return (0);
25577c478bd9Sstevel@tonic-gate }
25587c478bd9Sstevel@tonic-gate 
2559da6c28aaSamw /*ARGSUSED4*/
25607c478bd9Sstevel@tonic-gate static int
spec_dump(struct vnode * vp,caddr_t addr,offset_t bn,offset_t count,caller_context_t * ct)2561da6c28aaSamw spec_dump(
2562da6c28aaSamw 	struct vnode *vp,
2563da6c28aaSamw 	caddr_t addr,
2564d7334e51Srm 	offset_t bn,
2565d7334e51Srm 	offset_t count,
2566da6c28aaSamw 	caller_context_t *ct)
25677c478bd9Sstevel@tonic-gate {
256825e8c5aaSvikram 	/* allow dump to succeed even if device fenced off */
256925e8c5aaSvikram 
25707c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VBLK);
2571d7334e51Srm 	return (bdev_dump(vp->v_rdev, addr, (daddr_t)bn, (int)count));
25727c478bd9Sstevel@tonic-gate }
25737c478bd9Sstevel@tonic-gate 
25747c478bd9Sstevel@tonic-gate 
25757c478bd9Sstevel@tonic-gate /*
25767c478bd9Sstevel@tonic-gate  * Do i/o on the given page list from/to vp, io_off for io_len.
25777c478bd9Sstevel@tonic-gate  * Flags are composed of:
2578*78a2e113SAndy Fiddaman  *	{B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_READ, B_WRITE}
25797c478bd9Sstevel@tonic-gate  * If B_ASYNC is not set i/o is waited for.
25807c478bd9Sstevel@tonic-gate  */
25817c478bd9Sstevel@tonic-gate /*ARGSUSED5*/
25827c478bd9Sstevel@tonic-gate static int
spec_pageio(struct vnode * vp,page_t * pp,u_offset_t io_off,size_t io_len,int flags,cred_t * cr,caller_context_t * ct)25837c478bd9Sstevel@tonic-gate spec_pageio(
25847c478bd9Sstevel@tonic-gate 	struct vnode *vp,
25857c478bd9Sstevel@tonic-gate 	page_t	*pp,
25867c478bd9Sstevel@tonic-gate 	u_offset_t io_off,
25877c478bd9Sstevel@tonic-gate 	size_t	io_len,
25887c478bd9Sstevel@tonic-gate 	int	flags,
2589da6c28aaSamw 	cred_t	*cr,
2590da6c28aaSamw 	caller_context_t *ct)
25917c478bd9Sstevel@tonic-gate {
25927c478bd9Sstevel@tonic-gate 	struct buf *bp = NULL;
25937c478bd9Sstevel@tonic-gate 	int err = 0;
25947c478bd9Sstevel@tonic-gate 
25957c478bd9Sstevel@tonic-gate 	if (pp == NULL)
25967c478bd9Sstevel@tonic-gate 		return (EINVAL);
25977c478bd9Sstevel@tonic-gate 
25987c478bd9Sstevel@tonic-gate 	bp = spec_startio(vp, pp, io_off, io_len, flags);
25997c478bd9Sstevel@tonic-gate 
26007c478bd9Sstevel@tonic-gate 	/*
26017c478bd9Sstevel@tonic-gate 	 * Wait for i/o to complete if the request is not B_ASYNC.
26027c478bd9Sstevel@tonic-gate 	 */
26037c478bd9Sstevel@tonic-gate 	if ((flags & B_ASYNC) == 0) {
26047c478bd9Sstevel@tonic-gate 		err = biowait(bp);
26057c478bd9Sstevel@tonic-gate 		pageio_done(bp);
26067c478bd9Sstevel@tonic-gate 	}
26077c478bd9Sstevel@tonic-gate 	return (err);
26087c478bd9Sstevel@tonic-gate }
26097c478bd9Sstevel@tonic-gate 
26107c478bd9Sstevel@tonic-gate /*
26117c478bd9Sstevel@tonic-gate  * Set ACL on underlying vnode if one exists, or return ENOSYS otherwise.
26127c478bd9Sstevel@tonic-gate  */
26137c478bd9Sstevel@tonic-gate int
spec_setsecattr(struct vnode * vp,vsecattr_t * vsap,int flag,struct cred * cr,caller_context_t * ct)2614da6c28aaSamw spec_setsecattr(
2615da6c28aaSamw 	struct vnode *vp,
2616da6c28aaSamw 	vsecattr_t *vsap,
2617da6c28aaSamw 	int flag,
2618da6c28aaSamw 	struct cred *cr,
2619da6c28aaSamw 	caller_context_t *ct)
26207c478bd9Sstevel@tonic-gate {
26217c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
26227c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
26237c478bd9Sstevel@tonic-gate 	int error;
26247c478bd9Sstevel@tonic-gate 
262525e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
262625e8c5aaSvikram 	if (S_ISFENCED(sp))
262725e8c5aaSvikram 		return (ENXIO);
262825e8c5aaSvikram 
26297c478bd9Sstevel@tonic-gate 	/*
26307c478bd9Sstevel@tonic-gate 	 * The acl(2) system calls VOP_RWLOCK on the file before setting an
26317c478bd9Sstevel@tonic-gate 	 * ACL, but since specfs does not serialize reads and writes, this
26327c478bd9Sstevel@tonic-gate 	 * VOP does not do anything.  However, some backing file systems may
26337c478bd9Sstevel@tonic-gate 	 * expect the lock to be held before setting an ACL, so it is taken
26347c478bd9Sstevel@tonic-gate 	 * here privately to avoid serializing specfs reads and writes.
26357c478bd9Sstevel@tonic-gate 	 */
26367c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) != NULL) {
2637da6c28aaSamw 		(void) VOP_RWLOCK(realvp, V_WRITELOCK_TRUE, ct);
2638da6c28aaSamw 		error = VOP_SETSECATTR(realvp, vsap, flag, cr, ct);
2639da6c28aaSamw 		(void) VOP_RWUNLOCK(realvp, V_WRITELOCK_TRUE, ct);
26407c478bd9Sstevel@tonic-gate 		return (error);
26417c478bd9Sstevel@tonic-gate 	} else
26427c478bd9Sstevel@tonic-gate 		return (fs_nosys());
26437c478bd9Sstevel@tonic-gate }
26447c478bd9Sstevel@tonic-gate 
26457c478bd9Sstevel@tonic-gate /*
26467c478bd9Sstevel@tonic-gate  * Get ACL from underlying vnode if one exists, or fabricate it from
26477c478bd9Sstevel@tonic-gate  * the permissions returned by spec_getattr() otherwise.
26487c478bd9Sstevel@tonic-gate  */
26497c478bd9Sstevel@tonic-gate int
spec_getsecattr(struct vnode * vp,vsecattr_t * vsap,int flag,struct cred * cr,caller_context_t * ct)2650da6c28aaSamw spec_getsecattr(
2651da6c28aaSamw 	struct vnode *vp,
2652da6c28aaSamw 	vsecattr_t *vsap,
2653da6c28aaSamw 	int flag,
2654da6c28aaSamw 	struct cred *cr,
2655da6c28aaSamw 	caller_context_t *ct)
26567c478bd9Sstevel@tonic-gate {
26577c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
26587c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
26597c478bd9Sstevel@tonic-gate 
266025e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
266125e8c5aaSvikram 	if (S_ISFENCED(sp))
266225e8c5aaSvikram 		return (ENXIO);
266325e8c5aaSvikram 
26647c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) != NULL)
2665da6c28aaSamw 		return (VOP_GETSECATTR(realvp, vsap, flag, cr, ct));
26667c478bd9Sstevel@tonic-gate 	else
2667da6c28aaSamw 		return (fs_fab_acl(vp, vsap, flag, cr, ct));
26687c478bd9Sstevel@tonic-gate }
26697c478bd9Sstevel@tonic-gate 
26707c478bd9Sstevel@tonic-gate int
spec_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)2671da6c28aaSamw spec_pathconf(
2672da6c28aaSamw 	vnode_t *vp,
2673da6c28aaSamw 	int cmd,
2674da6c28aaSamw 	ulong_t *valp,
2675da6c28aaSamw 	cred_t *cr,
2676da6c28aaSamw 	caller_context_t *ct)
26777c478bd9Sstevel@tonic-gate {
26787c478bd9Sstevel@tonic-gate 	vnode_t *realvp;
26797c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
26807c478bd9Sstevel@tonic-gate 
268125e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
268225e8c5aaSvikram 	if (S_ISFENCED(sp))
268325e8c5aaSvikram 		return (ENXIO);
268425e8c5aaSvikram 
26857c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) != NULL)
2686da6c28aaSamw 		return (VOP_PATHCONF(realvp, cmd, valp, cr, ct));
26877c478bd9Sstevel@tonic-gate 	else
2688da6c28aaSamw 		return (fs_pathconf(vp, cmd, valp, cr, ct));
26897c478bd9Sstevel@tonic-gate }
2690