1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2012 by Delphix. All rights reserved.
27  */
28 
29 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
30 /*	  All Rights Reserved  	*/
31 
32 /*
33  * University Copyright- Copyright (c) 1982, 1986, 1988
34  * The Regents of the University of California
35  * All Rights Reserved
36  *
37  * University Acknowledgment- Portions of this document are derived from
38  * software developed by the University of California, Berkeley, and its
39  * contributors.
40  */
41 
42 
43 #include <sys/types.h>
44 #include <sys/t_lock.h>
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/buf.h>
48 #include <sys/conf.h>
49 #include <sys/cred.h>
50 #include <sys/kmem.h>
51 #include <sys/sysmacros.h>
52 #include <sys/vfs.h>
53 #include <sys/vfs_opreg.h>
54 #include <sys/vnode.h>
55 #include <sys/fs/snode.h>
56 #include <sys/fs/fifonode.h>
57 #include <sys/debug.h>
58 #include <sys/errno.h>
59 #include <sys/time.h>
60 #include <sys/file.h>
61 #include <sys/open.h>
62 #include <sys/user.h>
63 #include <sys/termios.h>
64 #include <sys/stream.h>
65 #include <sys/strsubr.h>
66 #include <sys/autoconf.h>
67 #include <sys/esunddi.h>
68 #include <sys/flock.h>
69 #include <sys/modctl.h>
70 
71 struct vfs spec_vfs;
72 static dev_t specdev;
73 struct kmem_cache *snode_cache;
74 int spec_debug = 0;
75 
76 static struct snode *sfind(dev_t, vtype_t, struct vnode *);
77 static struct vnode *get_cvp(dev_t, vtype_t, struct snode *, int *);
78 static void sinsert(struct snode *);
79 
80 struct vnode *
81 specvp_devfs(
82 	struct vnode	*realvp,
83 	dev_t		dev,
84 	vtype_t		vtyp,
85 	struct cred	*cr,
86 	dev_info_t	*dip)
87 {
88 	struct vnode	*vp;
89 
90 	ASSERT(realvp && dip);
91 	vp = specvp(realvp, dev, vtyp, cr);
92 	ASSERT(vp);
93 
94 	/* associate a dip hold with the common snode's s_dip pointer */
95 	spec_assoc_vp_with_devi(vp, dip);
96 	return (vp);
97 }
98 
99 /*
100  * Return a shadow special vnode for the given dev.
101  * If no snode exists for this dev create one and put it
102  * in a table hashed by <dev, realvp>.  If the snode for
103  * this dev is already in the table return it (ref count is
104  * incremented by sfind).  The snode will be flushed from the
105  * table when spec_inactive calls sdelete.
106  *
107  * The fsid is inherited from the real vnode so that clones
108  * can be found.
109  *
110  */
111 struct vnode *
112 specvp(
113 	struct vnode	*vp,
114 	dev_t		dev,
115 	vtype_t		type,
116 	struct cred	*cr)
117 {
118 	struct snode *sp;
119 	struct snode *nsp;
120 	struct snode *csp;
121 	struct vnode *svp;
122 	struct vattr va;
123 	int	rc;
124 	int	used_csp = 0;		/* Did we use pre-allocated csp */
125 
126 	if (vp == NULL)
127 		return (NULL);
128 	if (vp->v_type == VFIFO)
129 		return (fifovp(vp, cr));
130 
131 	ASSERT(vp->v_type == type);
132 	ASSERT(vp->v_rdev == dev);
133 
134 	/*
135 	 * Pre-allocate snodes before holding any locks in case we block
136 	 */
137 	nsp = kmem_cache_alloc(snode_cache, KM_SLEEP);
138 	csp = kmem_cache_alloc(snode_cache, KM_SLEEP);
139 
140 	/*
141 	 * Get the time attributes outside of the stable lock since
142 	 * this operation may block. Unfortunately, it may not have
143 	 * been required if the snode is in the cache.
144 	 */
145 	va.va_mask = AT_FSID | AT_TIMES;
146 	rc = VOP_GETATTR(vp, &va, 0, cr, NULL);	/* XXX may block! */
147 
148 	mutex_enter(&stable_lock);
149 	if ((sp = sfind(dev, type, vp)) == NULL) {
150 		struct vnode *cvp;
151 
152 		sp = nsp;	/* Use pre-allocated snode */
153 		svp = STOV(sp);
154 
155 		sp->s_realvp	= vp;
156 		VN_HOLD(vp);
157 		sp->s_commonvp	= NULL;
158 		sp->s_dev	= dev;
159 		sp->s_dip	= NULL;
160 		sp->s_nextr	= NULL;
161 		sp->s_list	= NULL;
162 		sp->s_plcy	= NULL;
163 		sp->s_size	= 0;
164 		sp->s_flag	= 0;
165 		if (rc == 0) {
166 			/*
167 			 * Set times in snode to those in the vnode.
168 			 */
169 			sp->s_fsid = va.va_fsid;
170 			sp->s_atime = va.va_atime.tv_sec;
171 			sp->s_mtime = va.va_mtime.tv_sec;
172 			sp->s_ctime = va.va_ctime.tv_sec;
173 		} else {
174 			sp->s_fsid = specdev;
175 			sp->s_atime = 0;
176 			sp->s_mtime = 0;
177 			sp->s_ctime = 0;
178 		}
179 		sp->s_count	= 0;
180 		sp->s_mapcnt	= 0;
181 
182 		vn_reinit(svp);
183 		svp->v_flag	= (vp->v_flag & VROOT);
184 		svp->v_vfsp	= vp->v_vfsp;
185 		VFS_HOLD(svp->v_vfsp);
186 		svp->v_type	= type;
187 		svp->v_rdev	= dev;
188 		(void) vn_copypath(vp, svp);
189 		if (type == VBLK || type == VCHR) {
190 			cvp = get_cvp(dev, type, csp, &used_csp);
191 			svp->v_stream = cvp->v_stream;
192 
193 			sp->s_commonvp = cvp;
194 		}
195 		vn_exists(svp);
196 		sinsert(sp);
197 		mutex_exit(&stable_lock);
198 		if (used_csp == 0) {
199 			/* Didn't use pre-allocated snode so free it */
200 			kmem_cache_free(snode_cache, csp);
201 		}
202 	} else {
203 		mutex_exit(&stable_lock);
204 		/* free unused snode memory */
205 		kmem_cache_free(snode_cache, nsp);
206 		kmem_cache_free(snode_cache, csp);
207 	}
208 	return (STOV(sp));
209 }
210 
211 /*
212  * Return a special vnode for the given dev; no vnode is supplied
213  * for it to shadow.  Always create a new snode and put it in the
214  * table hashed by <dev, NULL>.  The snode will be flushed from the
215  * table when spec_inactive() calls sdelete().  The association of
216  * this node with a attached instance of hardware is not made until
217  * spec_open time.
218  *
219  * N.B. Assumes caller takes on responsibility of making sure no one
220  * else is creating a snode for (dev, type) at this time.
221  */
222 struct vnode *
223 makespecvp(dev_t dev, vtype_t type)
224 {
225 	struct snode *sp;
226 	struct vnode *svp, *cvp;
227 	time_t now;
228 
229 	sp = kmem_cache_alloc(snode_cache, KM_SLEEP);
230 	svp = STOV(sp);
231 	cvp = commonvp(dev, type);
232 	now = gethrestime_sec();
233 
234 	sp->s_realvp	= NULL;
235 	sp->s_commonvp	= cvp;
236 	sp->s_dev	= dev;
237 	sp->s_dip	= NULL;
238 	sp->s_nextr	= NULL;
239 	sp->s_list	= NULL;
240 	sp->s_plcy	= NULL;
241 	sp->s_size	= 0;
242 	sp->s_flag	= 0;
243 	sp->s_fsid	= specdev;
244 	sp->s_atime	= now;
245 	sp->s_mtime	= now;
246 	sp->s_ctime	= now;
247 	sp->s_count	= 0;
248 	sp->s_mapcnt	= 0;
249 
250 	vn_reinit(svp);
251 	svp->v_vfsp	= &spec_vfs;
252 	svp->v_stream	= cvp->v_stream;
253 	svp->v_type	= type;
254 	svp->v_rdev	= dev;
255 
256 	vn_exists(svp);
257 	mutex_enter(&stable_lock);
258 	sinsert(sp);
259 	mutex_exit(&stable_lock);
260 
261 	return (svp);
262 }
263 
264 
265 /*
266  * This function is called from spec_assoc_vp_with_devi(). That function
267  * associates a "new" dip with a common snode, releasing (any) old dip
268  * in the process. This function (spec_assoc_fence()) looks at the "new dip"
269  * and determines whether the snode should be fenced of or not. As the table
270  * below indicates, the value of old-dip is a don't care for all cases.
271  *
272  * old-dip	new-dip		common-snode
273  * =========================================
274  * Don't care	NULL		unfence
275  * Don't care	retired		fence
276  * Don't care	not-retired	unfence
277  *
278  * Since old-dip value is a "don't care", it is not passed into this function.
279  */
280 static void
281 spec_assoc_fence(dev_info_t *ndip, vnode_t *vp)
282 {
283 	int		fence;
284 	struct snode	*csp;
285 
286 	ASSERT(vp);
287 	ASSERT(vn_matchops(vp, spec_getvnodeops()));
288 
289 	fence = 0;
290 	if (ndip != NULL) {
291 		mutex_enter(&DEVI(ndip)->devi_lock);
292 		if (DEVI(ndip)->devi_flags & DEVI_RETIRED)
293 			fence = 1;
294 		mutex_exit(&DEVI(ndip)->devi_lock);
295 	}
296 
297 	csp = VTOCS(vp);
298 	ASSERT(csp);
299 
300 	/* SFENCED flag only set on common snode */
301 	mutex_enter(&csp->s_lock);
302 	if (fence)
303 		csp->s_flag |= SFENCED;
304 	else
305 		csp->s_flag &= ~SFENCED;
306 	mutex_exit(&csp->s_lock);
307 
308 	FENDBG((CE_NOTE, "%sfenced common snode (%p) for new dip=%p",
309 	    fence ? "" : "un", (void *)csp, (void *)ndip));
310 }
311 
312 /*
313  * Associate the common snode with a devinfo node.  This is called from:
314  *
315  *   1) specvp_devfs to associate a specfs node with the dip attached
316  *	by devfs.
317  *
318  *   2) spec_open after path reconstruction and attach.
319  *
320  *   3) From dacf processing to associate a makespecvp node with
321  *	the dip that dacf postattach processing is being performed on.
322  *	This association is made prior to open to avoid recursion issues.
323  *
324  *   4) From ddi_assoc_queue_with_devi to change vnode association as part of
325  *	DL_ATTACH/DL_DETACH processing (SDIPSET already set).  The call
326  *	from ddi_assoc_queue_with_devi may specify a NULL dip.
327  *
328  * We put an extra hold on the devinfo node passed in as we establish it as
329  * the new s_dip pointer.  Any hold associated with the prior s_dip pointer
330  * is released. The new hold will stay active until another call to
331  * spec_assoc_vp_with_devi or until the common snode is destroyed by
332  * spec_inactive after the last VN_RELE of the common node. This devinfo hold
333  * transfers across a clone open except in the clone_dev case, where the clone
334  * driver is no longer required after open.
335  *
336  * When SDIPSET is set and s_dip is NULL, the vnode has an association with
337  * the driver even though there is currently no association with a specific
338  * hardware instance.
339  */
340 void
341 spec_assoc_vp_with_devi(struct vnode *vp, dev_info_t *dip)
342 {
343 	struct snode	*csp;
344 	dev_info_t	*olddip;
345 
346 	ASSERT(vp);
347 
348 	/*
349 	 * Don't establish a NULL association for a vnode associated with the
350 	 * clone driver.  The qassociate(, -1) call from a streams driver's
351 	 * open implementation to indicate support for qassociate has the
352 	 * side-effect of this type of spec_assoc_vp_with_devi call. This
353 	 * call should not change the the association of the pre-clone
354 	 * vnode associated with the clone driver, the post-clone newdev
355 	 * association will be established later by spec_clone().
356 	 */
357 	if ((dip == NULL) && (getmajor(vp->v_rdev) == clone_major))
358 		return;
359 
360 	/* hold the new */
361 	if (dip)
362 		e_ddi_hold_devi(dip);
363 
364 	csp = VTOS(VTOS(vp)->s_commonvp);
365 	mutex_enter(&csp->s_lock);
366 	olddip = csp->s_dip;
367 	csp->s_dip = dip;
368 	csp->s_flag |= SDIPSET;
369 
370 	/* If association changes then invalidate cached size */
371 	if (olddip != dip)
372 		csp->s_flag &= ~SSIZEVALID;
373 	mutex_exit(&csp->s_lock);
374 
375 	spec_assoc_fence(dip, vp);
376 
377 	/* release the old */
378 	if (olddip)
379 		ddi_release_devi(olddip);
380 }
381 
382 /*
383  * Return the held dip associated with the specified snode.
384  */
385 dev_info_t *
386 spec_hold_devi_by_vp(struct vnode *vp)
387 {
388 	struct snode	*csp;
389 	dev_info_t	*dip;
390 
391 	ASSERT(vn_matchops(vp, spec_getvnodeops()));
392 
393 	csp = VTOS(VTOS(vp)->s_commonvp);
394 	dip = csp->s_dip;
395 	if (dip)
396 		e_ddi_hold_devi(dip);
397 	return (dip);
398 }
399 
400 /*
401  * Find a special vnode that refers to the given device
402  * of the given type.  Never return a "common" vnode.
403  * Return NULL if a special vnode does not exist.
404  * HOLD the vnode before returning it.
405  */
406 struct vnode *
407 specfind(dev_t dev, vtype_t type)
408 {
409 	struct snode *st;
410 	struct vnode *nvp;
411 
412 	mutex_enter(&stable_lock);
413 	st = stable[STABLEHASH(dev)];
414 	while (st != NULL) {
415 		if (st->s_dev == dev) {
416 			nvp = STOV(st);
417 			if (nvp->v_type == type && st->s_commonvp != nvp) {
418 				VN_HOLD(nvp);
419 				mutex_exit(&stable_lock);
420 				return (nvp);
421 			}
422 		}
423 		st = st->s_next;
424 	}
425 	mutex_exit(&stable_lock);
426 	return (NULL);
427 }
428 
429 /*
430  * Loop through the snode cache looking for snodes referencing dip.
431  *
432  * This function determines if a devinfo node is "BUSY" from the perspective
433  * of having an active vnode associated with the device, which represents a
434  * dependency on the device's services.  This function is needed because a
435  * devinfo node can have a non-zero devi_ref and still NOT be "BUSY" when,
436  * for instance, the framework is manipulating the node (has an open
437  * ndi_hold_devi).
438  *
439  * Returns:
440  *	DEVI_REFERENCED		- if dip is referenced
441  *	DEVI_NOT_REFERENCED	- if dip is not referenced
442  */
443 int
444 devi_stillreferenced(dev_info_t *dip)
445 {
446 	struct snode	*sp;
447 	int		i;
448 
449 	/* if no hold then there can't be an snode with s_dip == dip */
450 	if (e_ddi_devi_holdcnt(dip) == 0)
451 		return (DEVI_NOT_REFERENCED);
452 
453 	mutex_enter(&stable_lock);
454 	for (i = 0; i < STABLESIZE; i++) {
455 		for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
456 			if (sp->s_dip == dip) {
457 				mutex_exit(&stable_lock);
458 				return (DEVI_REFERENCED);
459 			}
460 		}
461 	}
462 	mutex_exit(&stable_lock);
463 	return (DEVI_NOT_REFERENCED);
464 }
465 
466 /*
467  * Given an snode, returns the open count and the dip
468  * associated with that snode
469  * Assumes the caller holds the appropriate locks
470  * to prevent snode and/or dip from going away.
471  * Returns:
472  *	-1	No associated dip
473  *	>= 0	Number of opens.
474  */
475 int
476 spec_devi_open_count(struct snode *sp, dev_info_t **dipp)
477 {
478 	dev_info_t *dip;
479 	uint_t count;
480 	struct vnode *vp;
481 
482 	ASSERT(sp);
483 	ASSERT(dipp);
484 
485 	vp = STOV(sp);
486 
487 	*dipp = NULL;
488 
489 	/*
490 	 * We are only interested in common snodes. Only common snodes
491 	 * get their s_count fields bumped up on opens.
492 	 */
493 	if (sp->s_commonvp != vp || (dip = sp->s_dip) == NULL)
494 		return (-1);
495 
496 	mutex_enter(&sp->s_lock);
497 	count = sp->s_count + sp->s_mapcnt;
498 	if (sp->s_flag & SLOCKED)
499 		count++;
500 	mutex_exit(&sp->s_lock);
501 
502 	*dipp = dip;
503 
504 	return (count);
505 }
506 
507 /*
508  * Given a device vnode, return the common
509  * vnode associated with it.
510  */
511 struct vnode *
512 common_specvp(struct vnode *vp)
513 {
514 	struct snode *sp;
515 
516 	if ((vp->v_type != VBLK) && (vp->v_type != VCHR) ||
517 	    !vn_matchops(vp, spec_getvnodeops()))
518 		return (vp);
519 	sp = VTOS(vp);
520 	return (sp->s_commonvp);
521 }
522 
523 /*
524  * Returns a special vnode for the given dev.  The vnode is the
525  * one which is "common" to all the snodes which represent the
526  * same device.
527  * Similar to commonvp() but doesn't acquire the stable_lock, and
528  * may use a pre-allocated snode provided by caller.
529  */
530 static struct vnode *
531 get_cvp(
532 	dev_t		dev,
533 	vtype_t		type,
534 	struct snode	*nsp,		/* pre-allocated snode */
535 	int		*used_nsp)	/* flag indicating if we use nsp */
536 {
537 	struct snode *sp;
538 	struct vnode *svp;
539 
540 	ASSERT(MUTEX_HELD(&stable_lock));
541 	if ((sp = sfind(dev, type, NULL)) == NULL) {
542 		sp = nsp;		/* Use pre-allocated snode */
543 		*used_nsp = 1;		/* return value */
544 		svp = STOV(sp);
545 
546 		sp->s_realvp	= NULL;
547 		sp->s_commonvp	= svp;		/* points to itself */
548 		sp->s_dev	= dev;
549 		sp->s_dip	= NULL;
550 		sp->s_nextr	= NULL;
551 		sp->s_list	= NULL;
552 		sp->s_plcy	= NULL;
553 		sp->s_size	= UNKNOWN_SIZE;
554 		sp->s_flag	= 0;
555 		sp->s_fsid	= specdev;
556 		sp->s_atime	= 0;
557 		sp->s_mtime	= 0;
558 		sp->s_ctime	= 0;
559 		sp->s_count	= 0;
560 		sp->s_mapcnt	= 0;
561 
562 		vn_reinit(svp);
563 		svp->v_vfsp	= &spec_vfs;
564 		svp->v_type	= type;
565 		svp->v_rdev	= dev;
566 		vn_exists(svp);
567 		sinsert(sp);
568 	} else
569 		*used_nsp = 0;
570 	return (STOV(sp));
571 }
572 
573 /*
574  * Returns a special vnode for the given dev.  The vnode is the
575  * one which is "common" to all the snodes which represent the
576  * same device.  For use ONLY by SPECFS.
577  */
578 struct vnode *
579 commonvp(dev_t dev, vtype_t type)
580 {
581 	struct snode *sp, *nsp;
582 	struct vnode *svp;
583 
584 	/* Pre-allocate snode in case we might block */
585 	nsp = kmem_cache_alloc(snode_cache, KM_SLEEP);
586 
587 	mutex_enter(&stable_lock);
588 	if ((sp = sfind(dev, type, NULL)) == NULL) {
589 		sp = nsp;		/* Use pre-alloced snode */
590 		svp = STOV(sp);
591 
592 		sp->s_realvp	= NULL;
593 		sp->s_commonvp	= svp;		/* points to itself */
594 		sp->s_dev	= dev;
595 		sp->s_dip	= NULL;
596 		sp->s_nextr	= NULL;
597 		sp->s_list	= NULL;
598 		sp->s_plcy	= NULL;
599 		sp->s_size	= UNKNOWN_SIZE;
600 		sp->s_flag	= 0;
601 		sp->s_fsid	= specdev;
602 		sp->s_atime	= 0;
603 		sp->s_mtime	= 0;
604 		sp->s_ctime	= 0;
605 		sp->s_count	= 0;
606 		sp->s_mapcnt	= 0;
607 
608 		vn_reinit(svp);
609 		svp->v_vfsp	= &spec_vfs;
610 		svp->v_type	= type;
611 		svp->v_rdev	= dev;
612 		vn_exists(svp);
613 		sinsert(sp);
614 		mutex_exit(&stable_lock);
615 	} else {
616 		mutex_exit(&stable_lock);
617 		/* Didn't need the pre-allocated snode */
618 		kmem_cache_free(snode_cache, nsp);
619 	}
620 	return (STOV(sp));
621 }
622 
623 /*
624  * Snode lookup stuff.
625  * These routines maintain a table of snodes hashed by dev so
626  * that the snode for an dev can be found if it already exists.
627  */
628 struct snode *stable[STABLESIZE];
629 int		stablesz = STABLESIZE;
630 kmutex_t	stable_lock;
631 
632 /*
633  * Put a snode in the table.
634  */
635 static void
636 sinsert(struct snode *sp)
637 {
638 	ASSERT(MUTEX_HELD(&stable_lock));
639 	sp->s_next = stable[STABLEHASH(sp->s_dev)];
640 	stable[STABLEHASH(sp->s_dev)] = sp;
641 }
642 
643 /*
644  * Remove an snode from the hash table.
645  * The realvp is not released here because spec_inactive() still
646  * needs it to do a spec_fsync().
647  */
648 void
649 sdelete(struct snode *sp)
650 {
651 	struct snode *st;
652 	struct snode *stprev = NULL;
653 
654 	ASSERT(MUTEX_HELD(&stable_lock));
655 	st = stable[STABLEHASH(sp->s_dev)];
656 	while (st != NULL) {
657 		if (st == sp) {
658 			if (stprev == NULL)
659 				stable[STABLEHASH(sp->s_dev)] = st->s_next;
660 			else
661 				stprev->s_next = st->s_next;
662 			break;
663 		}
664 		stprev = st;
665 		st = st->s_next;
666 	}
667 }
668 
669 /*
670  * Lookup an snode by <dev, type, vp>.
671  * ONLY looks for snodes with non-NULL s_realvp members and
672  * common snodes (with s_commonvp pointing to its vnode).
673  *
674  * If vp is NULL, only return commonvp. Otherwise return
675  * shadow vp with both shadow and common vp's VN_HELD.
676  */
677 static struct snode *
678 sfind(
679 	dev_t	dev,
680 	vtype_t	type,
681 	struct vnode *vp)
682 {
683 	struct snode *st;
684 	struct vnode *svp;
685 
686 	ASSERT(MUTEX_HELD(&stable_lock));
687 	st = stable[STABLEHASH(dev)];
688 	while (st != NULL) {
689 		svp = STOV(st);
690 		if (st->s_dev == dev && svp->v_type == type &&
691 		    VN_CMP(st->s_realvp, vp) &&
692 		    (vp != NULL || st->s_commonvp == svp) &&
693 		    (vp == NULL || st->s_realvp->v_vfsp == vp->v_vfsp)) {
694 			VN_HOLD(svp);
695 			return (st);
696 		}
697 		st = st->s_next;
698 	}
699 	return (NULL);
700 }
701 
702 /*
703  * Mark the accessed, updated, or changed times in an snode
704  * with the current time.
705  */
706 void
707 smark(struct snode *sp, int flag)
708 {
709 	time_t	now = gethrestime_sec();
710 
711 	/* check for change to avoid unnecessary locking */
712 	ASSERT((flag & ~(SACC|SUPD|SCHG)) == 0);
713 	if (((flag & sp->s_flag) != flag) ||
714 	    ((flag & SACC) && (sp->s_atime != now)) ||
715 	    ((flag & SUPD) && (sp->s_mtime != now)) ||
716 	    ((flag & SCHG) && (sp->s_ctime != now))) {
717 		/* lock and update */
718 		mutex_enter(&sp->s_lock);
719 		sp->s_flag |= flag;
720 		if (flag & SACC)
721 			sp->s_atime = now;
722 		if (flag & SUPD)
723 			sp->s_mtime = now;
724 		if (flag & SCHG)
725 			sp->s_ctime = now;
726 		mutex_exit(&sp->s_lock);
727 	}
728 }
729 
730 /*
731  * Return the maximum file offset permitted for this device.
732  * -1 means unrestricted.  SLOFFSET is associated with D_64BIT.
733  *
734  * On a 32-bit kernel this will limit:
735  *   o	D_64BIT devices to SPEC_MAXOFFSET_T.
736  *   o	non-D_64BIT character drivers to a 32-bit offset (MAXOFF_T).
737  */
738 offset_t
739 spec_maxoffset(struct vnode *vp)
740 {
741 	struct snode *sp = VTOS(vp);
742 	struct snode *csp = VTOS(sp->s_commonvp);
743 
744 	if (vp->v_stream)
745 		return ((offset_t)-1);
746 	else if (csp->s_flag & SANYOFFSET)	/* D_U64BIT */
747 		return ((offset_t)-1);
748 #ifdef _ILP32
749 	if (csp->s_flag & SLOFFSET)		/* D_64BIT */
750 		return (SPEC_MAXOFFSET_T);
751 #endif	/* _ILP32 */
752 	return (MAXOFF_T);
753 }
754 
755 /*ARGSUSED*/
756 static int
757 snode_constructor(void *buf, void *cdrarg, int kmflags)
758 {
759 	struct snode *sp = buf;
760 	struct vnode *vp;
761 
762 	vp = sp->s_vnode = vn_alloc(kmflags);
763 	if (vp == NULL) {
764 		return (-1);
765 	}
766 	vn_setops(vp, spec_getvnodeops());
767 	vp->v_data = sp;
768 
769 	mutex_init(&sp->s_lock, NULL, MUTEX_DEFAULT, NULL);
770 	cv_init(&sp->s_cv, NULL, CV_DEFAULT, NULL);
771 	return (0);
772 }
773 
774 /*ARGSUSED1*/
775 static void
776 snode_destructor(void *buf, void *cdrarg)
777 {
778 	struct snode *sp = buf;
779 	struct vnode *vp = STOV(sp);
780 
781 	mutex_destroy(&sp->s_lock);
782 	cv_destroy(&sp->s_cv);
783 
784 	vn_free(vp);
785 }
786 
787 
788 int
789 specinit(int fstype, char *name)
790 {
791 	static const fs_operation_def_t spec_vfsops_template[] = {
792 		VFSNAME_SYNC, { .vfs_sync = spec_sync },
793 		NULL, NULL
794 	};
795 	extern struct vnodeops *spec_vnodeops;
796 	extern const fs_operation_def_t spec_vnodeops_template[];
797 	struct vfsops *spec_vfsops;
798 	int error;
799 	dev_t dev;
800 
801 	/*
802 	 * Associate vfs and vnode operations.
803 	 */
804 	error = vfs_setfsops(fstype, spec_vfsops_template, &spec_vfsops);
805 	if (error != 0) {
806 		cmn_err(CE_WARN, "specinit: bad vfs ops template");
807 		return (error);
808 	}
809 
810 	error = vn_make_ops(name, spec_vnodeops_template, &spec_vnodeops);
811 	if (error != 0) {
812 		(void) vfs_freevfsops_by_type(fstype);
813 		cmn_err(CE_WARN, "specinit: bad vnode ops template");
814 		return (error);
815 	}
816 
817 	mutex_init(&stable_lock, NULL, MUTEX_DEFAULT, NULL);
818 	mutex_init(&spec_syncbusy, NULL, MUTEX_DEFAULT, NULL);
819 
820 	/*
821 	 * Create snode cache
822 	 */
823 	snode_cache = kmem_cache_create("snode_cache", sizeof (struct snode),
824 	    0, snode_constructor, snode_destructor, NULL, NULL, NULL, 0);
825 
826 	/*
827 	 * Associate vfs operations with spec_vfs
828 	 */
829 	VFS_INIT(&spec_vfs, spec_vfsops, (caddr_t)NULL);
830 	if ((dev = getudev()) == -1)
831 		dev = 0;
832 	specdev = makedevice(dev, 0);
833 	return (0);
834 }
835 
836 int
837 device_close(struct vnode *vp, int flag, struct cred *cr)
838 {
839 	struct snode *sp = VTOS(vp);
840 	enum vtype type = vp->v_type;
841 	struct vnode *cvp;
842 	dev_t dev;
843 	int error;
844 
845 	dev = sp->s_dev;
846 	cvp = sp->s_commonvp;
847 
848 	switch (type) {
849 
850 	case VCHR:
851 		if (vp->v_stream) {
852 			if (cvp->v_stream != NULL)
853 				error = strclose(cvp, flag, cr);
854 			vp->v_stream = NULL;
855 		} else
856 			error = dev_close(dev, flag, OTYP_CHR, cr);
857 		break;
858 
859 	case VBLK:
860 		/*
861 		 * On last close a block device we must
862 		 * invalidate any in-core blocks so that we
863 		 * can, for example, change floppy disks.
864 		 */
865 		(void) spec_putpage(cvp, (offset_t)0,
866 		    (size_t)0, B_INVAL|B_FORCE, cr, NULL);
867 		bflush(dev);
868 		binval(dev);
869 		error = dev_close(dev, flag, OTYP_BLK, cr);
870 		break;
871 	default:
872 		panic("device_close: not a device");
873 		/*NOTREACHED*/
874 	}
875 
876 	return (error);
877 }
878 
879 struct vnode *
880 makectty(vnode_t *ovp)
881 {
882 	vnode_t *vp;
883 
884 	if (vp = makespecvp(ovp->v_rdev, VCHR)) {
885 		struct snode *sp;
886 		struct snode *csp;
887 		struct vnode *cvp;
888 
889 		sp = VTOS(vp);
890 		cvp = sp->s_commonvp;
891 		csp = VTOS(cvp);
892 		mutex_enter(&csp->s_lock);
893 		csp->s_count++;
894 		mutex_exit(&csp->s_lock);
895 	}
896 
897 	return (vp);
898 }
899 
900 void
901 spec_snode_walk(int (*callback)(struct snode *sp, void *arg), void *arg)
902 {
903 	struct snode	*sp;
904 	int		i;
905 
906 	ASSERT(callback);
907 
908 	mutex_enter(&stable_lock);
909 	for (i = 0; i < STABLESIZE; i++) {
910 		for (sp = stable[i]; sp; sp = sp->s_next) {
911 			if (callback(sp, arg) != DDI_WALK_CONTINUE)
912 				goto out;
913 		}
914 	}
915 out:
916 	mutex_exit(&stable_lock);
917 }
918 
919 int
920 spec_is_clone(vnode_t *vp)
921 {
922 	struct snode *sp;
923 
924 	if (vn_matchops(vp, spec_getvnodeops())) {
925 		sp = VTOS(vp);
926 		return ((sp->s_flag & SCLONE) ? 1 : 0);
927 	}
928 
929 	return (0);
930 }
931 
932 int
933 spec_is_selfclone(vnode_t *vp)
934 {
935 	struct snode *sp;
936 
937 	if (vn_matchops(vp, spec_getvnodeops())) {
938 		sp = VTOS(vp);
939 		return ((sp->s_flag & SSELFCLONE) ? 1 : 0);
940 	}
941 
942 	return (0);
943 }
944 
945 /*
946  * We may be invoked with a NULL vp in which case we fence off
947  * all snodes associated with dip
948  */
949 int
950 spec_fence_snode(dev_info_t *dip, struct vnode *vp)
951 {
952 	struct snode	*sp;
953 	struct snode	*csp;
954 	int		retired;
955 	int		i;
956 	char		*path;
957 	int		emitted;
958 
959 	ASSERT(dip);
960 
961 	retired = 0;
962 	mutex_enter(&DEVI(dip)->devi_lock);
963 	if (DEVI(dip)->devi_flags & DEVI_RETIRED)
964 		retired = 1;
965 	mutex_exit(&DEVI(dip)->devi_lock);
966 
967 	if (!retired)
968 		return (0);
969 
970 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
971 	(void) ddi_pathname(dip, path);
972 
973 
974 	if (vp != NULL) {
975 		ASSERT(vn_matchops(vp, spec_getvnodeops()));
976 		csp = VTOCS(vp);
977 		ASSERT(csp);
978 		mutex_enter(&csp->s_lock);
979 		csp->s_flag |= SFENCED;
980 		mutex_exit(&csp->s_lock);
981 		FENDBG((CE_NOTE, "fenced off snode(%p) for dip: %s",
982 		    (void *)csp, path));
983 		kmem_free(path, MAXPATHLEN);
984 		return (0);
985 	}
986 
987 	emitted = 0;
988 	mutex_enter(&stable_lock);
989 	for (i = 0; i < STABLESIZE; i++) {
990 		for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
991 			ASSERT(sp->s_commonvp);
992 			csp = VTOS(sp->s_commonvp);
993 			if (csp->s_dip == dip) {
994 				/* fence off the common snode */
995 				mutex_enter(&csp->s_lock);
996 				csp->s_flag |= SFENCED;
997 				mutex_exit(&csp->s_lock);
998 				if (!emitted) {
999 					FENDBG((CE_NOTE, "fenced 1 of N"));
1000 					emitted++;
1001 				}
1002 			}
1003 		}
1004 	}
1005 	mutex_exit(&stable_lock);
1006 
1007 	FENDBG((CE_NOTE, "fenced off all snodes for dip: %s", path));
1008 	kmem_free(path, MAXPATHLEN);
1009 
1010 	return (0);
1011 }
1012 
1013 
1014 int
1015 spec_unfence_snode(dev_info_t *dip)
1016 {
1017 	struct snode	*sp;
1018 	struct snode	*csp;
1019 	int		i;
1020 	char		*path;
1021 	int		emitted;
1022 
1023 	ASSERT(dip);
1024 
1025 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1026 	(void) ddi_pathname(dip, path);
1027 
1028 	emitted = 0;
1029 	mutex_enter(&stable_lock);
1030 	for (i = 0; i < STABLESIZE; i++) {
1031 		for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
1032 			ASSERT(sp->s_commonvp);
1033 			csp = VTOS(sp->s_commonvp);
1034 			ASSERT(csp);
1035 			if (csp->s_dip == dip) {
1036 				/* unfence the common snode */
1037 				mutex_enter(&csp->s_lock);
1038 				csp->s_flag &= ~SFENCED;
1039 				mutex_exit(&csp->s_lock);
1040 				if (!emitted) {
1041 					FENDBG((CE_NOTE, "unfenced 1 of N"));
1042 					emitted++;
1043 				}
1044 			}
1045 		}
1046 	}
1047 	mutex_exit(&stable_lock);
1048 
1049 	FENDBG((CE_NOTE, "unfenced all snodes for dip: %s", path));
1050 	kmem_free(path, MAXPATHLEN);
1051 
1052 	return (0);
1053 }
1054 
1055 void
1056 spec_size_invalidate(dev_t dev, vtype_t type)
1057 {
1058 
1059 	struct snode *csp;
1060 
1061 	mutex_enter(&stable_lock);
1062 	if ((csp = sfind(dev, type, NULL)) != NULL) {
1063 		mutex_enter(&csp->s_lock);
1064 		csp->s_flag &= ~SSIZEVALID;
1065 		VN_RELE_ASYNC(STOV(csp), system_taskq);
1066 		mutex_exit(&csp->s_lock);
1067 	}
1068 	mutex_exit(&stable_lock);
1069 }
1070