xref: /illumos-gate/usr/src/uts/common/sys/vnode.h (revision 04909c8c)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
55a59a8b3Srsb  * Common Development and Distribution License (the "License").
65a59a8b3Srsb  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
21794f0adbSRoger A. Faulkner 
227c478bd9Sstevel@tonic-gate /*
23cb15d5d9SPeter Rival  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24*04909c8cSJohn Levon  * Copyright (c) 2018, Joyent, Inc.
25ade42b55SSebastien Roy  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
26f06dce2cSAndrew Stormont  * Copyright 2017 RackTop Systems.
277c478bd9Sstevel@tonic-gate  */
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
30b4203d75SMarcel Telka /*	  All Rights Reserved	*/
317c478bd9Sstevel@tonic-gate 
327c478bd9Sstevel@tonic-gate /*
337c478bd9Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
347c478bd9Sstevel@tonic-gate  * The Regents of the University of California
357c478bd9Sstevel@tonic-gate  * All Rights Reserved
367c478bd9Sstevel@tonic-gate  *
377c478bd9Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
387c478bd9Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
397c478bd9Sstevel@tonic-gate  * contributors.
407c478bd9Sstevel@tonic-gate  */
417c478bd9Sstevel@tonic-gate 
427c478bd9Sstevel@tonic-gate #ifndef _SYS_VNODE_H
437c478bd9Sstevel@tonic-gate #define	_SYS_VNODE_H
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate #include <sys/types.h>
467c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
477c478bd9Sstevel@tonic-gate #include <sys/rwstlock.h>
487c478bd9Sstevel@tonic-gate #include <sys/time_impl.h>
497c478bd9Sstevel@tonic-gate #include <sys/cred.h>
507c478bd9Sstevel@tonic-gate #include <sys/uio.h>
517c478bd9Sstevel@tonic-gate #include <sys/resource.h>
527c478bd9Sstevel@tonic-gate #include <vm/seg_enum.h>
535a59a8b3Srsb #include <sys/kstat.h>
545a59a8b3Srsb #include <sys/kmem.h>
551b300de9Sjwahlig #include <sys/list.h>
567c478bd9Sstevel@tonic-gate #ifdef	_KERNEL
577c478bd9Sstevel@tonic-gate #include <sys/buf.h>
58ade42b55SSebastien Roy #include <sys/sdt.h>
597c478bd9Sstevel@tonic-gate #endif	/* _KERNEL */
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate #ifdef	__cplusplus
627c478bd9Sstevel@tonic-gate extern "C" {
637c478bd9Sstevel@tonic-gate #endif
647c478bd9Sstevel@tonic-gate 
655a59a8b3Srsb /*
665a59a8b3Srsb  * Statistics for all vnode operations.
675a59a8b3Srsb  * All operations record number of ops (since boot/mount/zero'ed).
685a59a8b3Srsb  * Certain I/O operations (read, write, readdir) also record number
695a59a8b3Srsb  * of bytes transferred.
705a59a8b3Srsb  * This appears in two places in the system: one is embedded in each
715a59a8b3Srsb  * vfs_t.  There is also an array of vopstats_t structures allocated
725a59a8b3Srsb  * on a per-fstype basis.
735a59a8b3Srsb  */
745a59a8b3Srsb 
755a59a8b3Srsb #define	VOPSTATS_STR	"vopstats_"	/* Initial string for vopstat kstats */
765a59a8b3Srsb 
775a59a8b3Srsb typedef struct vopstats {
785a59a8b3Srsb 	kstat_named_t	nopen;		/* VOP_OPEN */
795a59a8b3Srsb 	kstat_named_t	nclose;		/* VOP_CLOSE */
805a59a8b3Srsb 	kstat_named_t	nread;		/* VOP_READ */
815a59a8b3Srsb 	kstat_named_t	read_bytes;
825a59a8b3Srsb 	kstat_named_t	nwrite;		/* VOP_WRITE */
835a59a8b3Srsb 	kstat_named_t	write_bytes;
845a59a8b3Srsb 	kstat_named_t	nioctl;		/* VOP_IOCTL */
855a59a8b3Srsb 	kstat_named_t	nsetfl;		/* VOP_SETFL */
865a59a8b3Srsb 	kstat_named_t	ngetattr;	/* VOP_GETATTR */
875a59a8b3Srsb 	kstat_named_t	nsetattr;	/* VOP_SETATTR */
885a59a8b3Srsb 	kstat_named_t	naccess;	/* VOP_ACCESS */
895a59a8b3Srsb 	kstat_named_t	nlookup;	/* VOP_LOOKUP */
905a59a8b3Srsb 	kstat_named_t	ncreate;	/* VOP_CREATE */
915a59a8b3Srsb 	kstat_named_t	nremove;	/* VOP_REMOVE */
925a59a8b3Srsb 	kstat_named_t	nlink;		/* VOP_LINK */
935a59a8b3Srsb 	kstat_named_t	nrename;	/* VOP_RENAME */
945a59a8b3Srsb 	kstat_named_t	nmkdir;		/* VOP_MKDIR */
955a59a8b3Srsb 	kstat_named_t	nrmdir;		/* VOP_RMDIR */
965a59a8b3Srsb 	kstat_named_t	nreaddir;	/* VOP_READDIR */
975a59a8b3Srsb 	kstat_named_t	readdir_bytes;
985a59a8b3Srsb 	kstat_named_t	nsymlink;	/* VOP_SYMLINK */
995a59a8b3Srsb 	kstat_named_t	nreadlink;	/* VOP_READLINK */
1005a59a8b3Srsb 	kstat_named_t	nfsync;		/* VOP_FSYNC */
1015a59a8b3Srsb 	kstat_named_t	ninactive;	/* VOP_INACTIVE */
1025a59a8b3Srsb 	kstat_named_t	nfid;		/* VOP_FID */
1035a59a8b3Srsb 	kstat_named_t	nrwlock;	/* VOP_RWLOCK */
1045a59a8b3Srsb 	kstat_named_t	nrwunlock;	/* VOP_RWUNLOCK */
1055a59a8b3Srsb 	kstat_named_t	nseek;		/* VOP_SEEK */
1065a59a8b3Srsb 	kstat_named_t	ncmp;		/* VOP_CMP */
1075a59a8b3Srsb 	kstat_named_t	nfrlock;	/* VOP_FRLOCK */
1085a59a8b3Srsb 	kstat_named_t	nspace;		/* VOP_SPACE */
1095a59a8b3Srsb 	kstat_named_t	nrealvp;	/* VOP_REALVP */
1105a59a8b3Srsb 	kstat_named_t	ngetpage;	/* VOP_GETPAGE */
1115a59a8b3Srsb 	kstat_named_t	nputpage;	/* VOP_PUTPAGE */
1125a59a8b3Srsb 	kstat_named_t	nmap;		/* VOP_MAP */
1135a59a8b3Srsb 	kstat_named_t	naddmap;	/* VOP_ADDMAP */
1145a59a8b3Srsb 	kstat_named_t	ndelmap;	/* VOP_DELMAP */
1155a59a8b3Srsb 	kstat_named_t	npoll;		/* VOP_POLL */
1165a59a8b3Srsb 	kstat_named_t	ndump;		/* VOP_DUMP */
1175a59a8b3Srsb 	kstat_named_t	npathconf;	/* VOP_PATHCONF */
1185a59a8b3Srsb 	kstat_named_t	npageio;	/* VOP_PAGEIO */
1195a59a8b3Srsb 	kstat_named_t	ndumpctl;	/* VOP_DUMPCTL */
1205a59a8b3Srsb 	kstat_named_t	ndispose;	/* VOP_DISPOSE */
1215a59a8b3Srsb 	kstat_named_t	nsetsecattr;	/* VOP_SETSECATTR */
1225a59a8b3Srsb 	kstat_named_t	ngetsecattr;	/* VOP_GETSECATTR */
1235a59a8b3Srsb 	kstat_named_t	nshrlock;	/* VOP_SHRLOCK */
1245a59a8b3Srsb 	kstat_named_t	nvnevent;	/* VOP_VNEVENT */
125c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 	kstat_named_t	nreqzcbuf;	/* VOP_REQZCBUF */
126c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 	kstat_named_t	nretzcbuf;	/* VOP_RETZCBUF */
1275a59a8b3Srsb } vopstats_t;
1285a59a8b3Srsb 
1297c478bd9Sstevel@tonic-gate /*
1307c478bd9Sstevel@tonic-gate  * The vnode is the focus of all file activity in UNIX.
1317c478bd9Sstevel@tonic-gate  * A vnode is allocated for each active file, each current
1327c478bd9Sstevel@tonic-gate  * directory, each mounted-on file, and the root.
1337c478bd9Sstevel@tonic-gate  *
1347c478bd9Sstevel@tonic-gate  * Each vnode is usually associated with a file-system-specific node (for
1357c478bd9Sstevel@tonic-gate  * UFS, this is the in-memory inode).  Generally, a vnode and an fs-node
1367c478bd9Sstevel@tonic-gate  * should be created and destroyed together as a pair.
1377c478bd9Sstevel@tonic-gate  *
1387c478bd9Sstevel@tonic-gate  * If a vnode is reused for a new file, it should be reinitialized by calling
1397c478bd9Sstevel@tonic-gate  * either vn_reinit() or vn_recycle().
1407c478bd9Sstevel@tonic-gate  *
1417c478bd9Sstevel@tonic-gate  * vn_reinit() resets the entire vnode as if it was returned by vn_alloc().
1427c478bd9Sstevel@tonic-gate  * The caller is responsible for setting up the entire vnode after calling
1437c478bd9Sstevel@tonic-gate  * vn_reinit().  This is important when using kmem caching where the vnode is
1447c478bd9Sstevel@tonic-gate  * allocated by a constructor, for instance.
1457c478bd9Sstevel@tonic-gate  *
1467c478bd9Sstevel@tonic-gate  * vn_recycle() is used when the file system keeps some state around in both
1477c478bd9Sstevel@tonic-gate  * the vnode and the associated FS-node.  In UFS, for example, the inode of
1487c478bd9Sstevel@tonic-gate  * a deleted file can be reused immediately.  The v_data, v_vfsp, v_op, etc.
1497c478bd9Sstevel@tonic-gate  * remains the same but certain fields related to the previous instance need
1507c478bd9Sstevel@tonic-gate  * to be reset.  In particular:
1517c478bd9Sstevel@tonic-gate  *	v_femhead
1527c478bd9Sstevel@tonic-gate  *	v_path
1537c478bd9Sstevel@tonic-gate  *	v_rdcnt, v_wrcnt
1547c478bd9Sstevel@tonic-gate  *	v_mmap_read, v_mmap_write
1557c478bd9Sstevel@tonic-gate  */
1567c478bd9Sstevel@tonic-gate 
1577c478bd9Sstevel@tonic-gate /*
1587c478bd9Sstevel@tonic-gate  * vnode types.  VNON means no type.  These values are unrelated to
1597c478bd9Sstevel@tonic-gate  * values in on-disk inodes.
1607c478bd9Sstevel@tonic-gate  */
1617c478bd9Sstevel@tonic-gate typedef enum vtype {
1627c478bd9Sstevel@tonic-gate 	VNON	= 0,
1637c478bd9Sstevel@tonic-gate 	VREG	= 1,
1647c478bd9Sstevel@tonic-gate 	VDIR	= 2,
1657c478bd9Sstevel@tonic-gate 	VBLK	= 3,
1667c478bd9Sstevel@tonic-gate 	VCHR	= 4,
1677c478bd9Sstevel@tonic-gate 	VLNK	= 5,
1687c478bd9Sstevel@tonic-gate 	VFIFO	= 6,
1697c478bd9Sstevel@tonic-gate 	VDOOR	= 7,
1707c478bd9Sstevel@tonic-gate 	VPROC	= 8,
1717c478bd9Sstevel@tonic-gate 	VSOCK	= 9,
1727c478bd9Sstevel@tonic-gate 	VPORT	= 10,
1737c478bd9Sstevel@tonic-gate 	VBAD	= 11
1747c478bd9Sstevel@tonic-gate } vtype_t;
1757c478bd9Sstevel@tonic-gate 
1761b300de9Sjwahlig /*
1771b300de9Sjwahlig  * VSD - Vnode Specific Data
1781b300de9Sjwahlig  * Used to associate additional private data with a vnode.
1791b300de9Sjwahlig  */
1801b300de9Sjwahlig struct vsd_node {
1811b300de9Sjwahlig 	list_node_t vs_nodes;		/* list of all VSD nodes */
1821b300de9Sjwahlig 	uint_t vs_nkeys;		/* entries in value array */
1831b300de9Sjwahlig 	void **vs_value;		/* array of value/key */
1841b300de9Sjwahlig };
1851b300de9Sjwahlig 
1867c478bd9Sstevel@tonic-gate /*
1877c478bd9Sstevel@tonic-gate  * Many of the fields in the vnode are read-only once they are initialized
1887c478bd9Sstevel@tonic-gate  * at vnode creation time.  Other fields are protected by locks.
1897c478bd9Sstevel@tonic-gate  *
1907c478bd9Sstevel@tonic-gate  * IMPORTANT: vnodes should be created ONLY by calls to vn_alloc().  They
1917c478bd9Sstevel@tonic-gate  * may not be embedded into the file-system specific node (inode).  The
1927c478bd9Sstevel@tonic-gate  * size of vnodes may change.
1937c478bd9Sstevel@tonic-gate  *
1947c478bd9Sstevel@tonic-gate  * The v_lock protects:
1957c478bd9Sstevel@tonic-gate  *   v_flag
1967c478bd9Sstevel@tonic-gate  *   v_stream
1977c478bd9Sstevel@tonic-gate  *   v_count
1987c478bd9Sstevel@tonic-gate  *   v_shrlocks
1997c478bd9Sstevel@tonic-gate  *   v_path
2001b300de9Sjwahlig  *   v_vsd
201da6c28aaSamw  *   v_xattrdir
2027c478bd9Sstevel@tonic-gate  *
2037c478bd9Sstevel@tonic-gate  * A special lock (implemented by vn_vfswlock in vnode.c) protects:
2047c478bd9Sstevel@tonic-gate  *   v_vfsmountedhere
2057c478bd9Sstevel@tonic-gate  *
2067c478bd9Sstevel@tonic-gate  * The global flock_lock mutex (in flock.c) protects:
2077c478bd9Sstevel@tonic-gate  *   v_filocks
2087c478bd9Sstevel@tonic-gate  *
2097c478bd9Sstevel@tonic-gate  * IMPORTANT NOTE:
2107c478bd9Sstevel@tonic-gate  *
2117c478bd9Sstevel@tonic-gate  *   The following vnode fields are considered public and may safely be
2127c478bd9Sstevel@tonic-gate  *   accessed by file systems or other consumers:
2137c478bd9Sstevel@tonic-gate  *
2147c478bd9Sstevel@tonic-gate  *     v_lock
2157c478bd9Sstevel@tonic-gate  *     v_flag
2167c478bd9Sstevel@tonic-gate  *     v_count
2177c478bd9Sstevel@tonic-gate  *     v_data
2187c478bd9Sstevel@tonic-gate  *     v_vfsp
2197c478bd9Sstevel@tonic-gate  *     v_stream
2207c478bd9Sstevel@tonic-gate  *     v_type
2217c478bd9Sstevel@tonic-gate  *     v_rdev
2227c478bd9Sstevel@tonic-gate  *
2237c478bd9Sstevel@tonic-gate  * ALL OTHER FIELDS SHOULD BE ACCESSED ONLY BY THE OWNER OF THAT FIELD.
2247c478bd9Sstevel@tonic-gate  * In particular, file systems should not access other fields; they may
2257c478bd9Sstevel@tonic-gate  * change or even be removed.  The functionality which was once provided
2267c478bd9Sstevel@tonic-gate  * by these fields is available through vn_* functions.
227e2fc3408SPatrick Mooney  *
228e2fc3408SPatrick Mooney  * VNODE PATH THEORY:
229e2fc3408SPatrick Mooney  * In each vnode, the v_path field holds a cached version of the canonical
230e2fc3408SPatrick Mooney  * filesystem path which that node represents.  Because vnodes lack contextual
231e2fc3408SPatrick Mooney  * information about their own name or position in the VFS hierarchy, this path
232e2fc3408SPatrick Mooney  * must be calculated when the vnode is instantiated by operations such as
233e2fc3408SPatrick Mooney  * fop_create, fop_lookup, or fop_mkdir.  During said operations, both the
234e2fc3408SPatrick Mooney  * parent vnode (and its cached v_path) and future name are known, so the
235e2fc3408SPatrick Mooney  * v_path of the resulting object can easily be set.
236e2fc3408SPatrick Mooney  *
237e2fc3408SPatrick Mooney  * The caching nature of v_path is complicated in the face of directory
238e2fc3408SPatrick Mooney  * renames.  Filesystem drivers are responsible for calling vn_renamepath when
239e2fc3408SPatrick Mooney  * a fop_rename operation succeeds.  While the v_path on the renamed vnode will
240e2fc3408SPatrick Mooney  * be updated, existing children of the directory (direct, or at deeper levels)
241e2fc3408SPatrick Mooney  * will now possess v_path caches which are stale.
242e2fc3408SPatrick Mooney  *
243e2fc3408SPatrick Mooney  * It is expensive (and for non-directories, impossible) to recalculate stale
244e2fc3408SPatrick Mooney  * v_path entries during operations such as vnodetopath.  The best time during
245e2fc3408SPatrick Mooney  * which to correct such wrongs is the same as when v_path is first
246e2fc3408SPatrick Mooney  * initialized: during fop_create/fop_lookup/fop_mkdir/etc, where adequate
247e2fc3408SPatrick Mooney  * context is available to generate the current path.
248e2fc3408SPatrick Mooney  *
249e2fc3408SPatrick Mooney  * In order to quickly detect stale v_path entries (without full lookup
250e2fc3408SPatrick Mooney  * verification) to trigger a v_path update, the v_path_stamp field has been
251e2fc3408SPatrick Mooney  * added to vnode_t.  As part of successful fop_create/fop_lookup/fop_mkdir
252e2fc3408SPatrick Mooney  * operations, where the name and parent vnode are available, the following
253e2fc3408SPatrick Mooney  * rules are used to determine updates to the child:
254e2fc3408SPatrick Mooney  *
255e2fc3408SPatrick Mooney  * 1. If the parent lacks a v_path, clear any existing v_path and v_path_stamp
256e2fc3408SPatrick Mooney  *    on the child.  Until the parent v_path is refreshed to a valid state, the
257e2fc3408SPatrick Mooney  *    child v_path must be considered invalid too.
258e2fc3408SPatrick Mooney  *
259e2fc3408SPatrick Mooney  * 2. If the child lacks a v_path (implying v_path_stamp == 0), it inherits the
260e2fc3408SPatrick Mooney  *    v_path_stamp value from its parent and its v_path is updated.
261e2fc3408SPatrick Mooney  *
262e2fc3408SPatrick Mooney  * 3. If the child v_path_stamp is less than v_path_stamp in the parent, it is
263e2fc3408SPatrick Mooney  *    an indication that the child v_path is stale.  The v_path is updated and
264e2fc3408SPatrick Mooney  *    v_path_stamp in the child is set to the current hrtime().
265e2fc3408SPatrick Mooney  *
266e2fc3408SPatrick Mooney  *    It does _not_ inherit the parent v_path_stamp in order to propagate the
267e2fc3408SPatrick Mooney  *    the time of v_path invalidation through the directory structure.  This
268e2fc3408SPatrick Mooney  *    prevents concurrent invalidations (operating with a now-incorrect v_path)
269e2fc3408SPatrick Mooney  *    at deeper levels in the tree from persisting.
270e2fc3408SPatrick Mooney  *
271e2fc3408SPatrick Mooney  * 4. If the child v_path_stamp is greater or equal to the parent, no action
272e2fc3408SPatrick Mooney  *    needs to be taken.
273e2fc3408SPatrick Mooney  *
274e2fc3408SPatrick Mooney  * Note that fop_rename operations do not follow this ruleset.  They perform an
275e2fc3408SPatrick Mooney  * explicit update of v_path and v_path_stamp (setting it to the current time)
276e2fc3408SPatrick Mooney  *
277e2fc3408SPatrick Mooney  * With these constraints in place, v_path invalidations and updates should
278e2fc3408SPatrick Mooney  * proceed in a timely manner as vnodes are accessed.  While there still are
279e2fc3408SPatrick Mooney  * limited cases where vnodetopath operations will fail, the risk is minimized.
2807c478bd9Sstevel@tonic-gate  */
2817c478bd9Sstevel@tonic-gate 
2827c478bd9Sstevel@tonic-gate struct fem_head;	/* from fem.h */
2837c478bd9Sstevel@tonic-gate 
2847c478bd9Sstevel@tonic-gate typedef struct vnode {
2857c478bd9Sstevel@tonic-gate 	kmutex_t	v_lock;		/* protects vnode fields */
2867c478bd9Sstevel@tonic-gate 	uint_t		v_flag;		/* vnode flags (see below) */
2877c478bd9Sstevel@tonic-gate 	uint_t		v_count;	/* reference count */
2887c478bd9Sstevel@tonic-gate 	void		*v_data;	/* private data for fs */
2897c478bd9Sstevel@tonic-gate 	struct vfs	*v_vfsp;	/* ptr to containing VFS */
2907c478bd9Sstevel@tonic-gate 	struct stdata	*v_stream;	/* associated stream */
2917c478bd9Sstevel@tonic-gate 	enum vtype	v_type;		/* vnode type */
2927c478bd9Sstevel@tonic-gate 	dev_t		v_rdev;		/* device (VCHR, VBLK) */
2937c478bd9Sstevel@tonic-gate 
2947c478bd9Sstevel@tonic-gate 	/* PRIVATE FIELDS BELOW - DO NOT USE */
2957c478bd9Sstevel@tonic-gate 
2967c478bd9Sstevel@tonic-gate 	struct vfs	*v_vfsmountedhere; /* ptr to vfs mounted here */
2977c478bd9Sstevel@tonic-gate 	struct vnodeops	*v_op;		/* vnode operations */
2987c478bd9Sstevel@tonic-gate 	struct page	*v_pages;	/* vnode pages list */
2997c478bd9Sstevel@tonic-gate 	struct filock	*v_filocks;	/* ptr to filock list */
3007c478bd9Sstevel@tonic-gate 	struct shrlocklist *v_shrlocks;	/* ptr to shrlock list */
3017c478bd9Sstevel@tonic-gate 	krwlock_t	v_nbllock;	/* sync for NBMAND locks */
3027c478bd9Sstevel@tonic-gate 	kcondvar_t	v_cv;		/* synchronize locking */
3037c478bd9Sstevel@tonic-gate 	void		*v_locality;	/* hook for locality info */
3047c478bd9Sstevel@tonic-gate 	struct fem_head	*v_femhead;	/* fs monitoring */
3057c478bd9Sstevel@tonic-gate 	char		*v_path;	/* cached path */
306e2fc3408SPatrick Mooney 	hrtime_t	v_path_stamp;	/* timestamp for cached path */
3077c478bd9Sstevel@tonic-gate 	uint_t		v_rdcnt;	/* open for read count  (VREG only) */
3087c478bd9Sstevel@tonic-gate 	uint_t		v_wrcnt;	/* open for write count (VREG only) */
3097c478bd9Sstevel@tonic-gate 	u_longlong_t	v_mmap_read;	/* mmap read count */
3107c478bd9Sstevel@tonic-gate 	u_longlong_t	v_mmap_write;	/* mmap write count */
3117c478bd9Sstevel@tonic-gate 	void		*v_mpssdata;	/* info for large page mappings */
312df2381bfSpraks 	void		*v_fopdata;	/* list of file ops event watches */
313d216dff5SRobert Mastors 	kmutex_t	v_vsd_lock;	/* protects v_vsd field */
3141b300de9Sjwahlig 	struct vsd_node *v_vsd;		/* vnode specific data */
315da6c28aaSamw 	struct vnode	*v_xattrdir;	/* unnamed extended attr dir (GFS) */
316b5fca8f8Stomee 	uint_t		v_count_dnlc;	/* dnlc reference count */
3177c478bd9Sstevel@tonic-gate } vnode_t;
3187c478bd9Sstevel@tonic-gate 
3197c478bd9Sstevel@tonic-gate #define	IS_DEVVP(vp)	\
3207c478bd9Sstevel@tonic-gate 	((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO)
3217c478bd9Sstevel@tonic-gate 
322cb15d5d9SPeter Rival #define	VNODE_ALIGN	64
323cb15d5d9SPeter Rival /* Count of low-order 0 bits in a vnode *, based on size and alignment. */
3241ab248cfSPeter Rival #if defined(_LP64)
325cb15d5d9SPeter Rival #define	VNODE_ALIGN_LOG2	8
3261ab248cfSPeter Rival #else
3271ab248cfSPeter Rival #define	VNODE_ALIGN_LOG2	7
3281ab248cfSPeter Rival #endif
329cb15d5d9SPeter Rival 
3307c478bd9Sstevel@tonic-gate /*
3317c478bd9Sstevel@tonic-gate  * vnode flags.
3327c478bd9Sstevel@tonic-gate  */
3337c478bd9Sstevel@tonic-gate #define	VROOT		0x01	/* root of its file system */
3347c478bd9Sstevel@tonic-gate #define	VNOCACHE	0x02	/* don't keep cache pages on vnode */
3357c478bd9Sstevel@tonic-gate #define	VNOMAP		0x04	/* file cannot be mapped/faulted */
3367c478bd9Sstevel@tonic-gate #define	VDUP		0x08	/* file should be dup'ed rather then opened */
3377c478bd9Sstevel@tonic-gate #define	VNOSWAP		0x10	/* file cannot be used as virtual swap device */
3387c478bd9Sstevel@tonic-gate #define	VNOMOUNT	0x20	/* file cannot be covered by mount */
3397c478bd9Sstevel@tonic-gate #define	VISSWAP		0x40	/* vnode is being used for swap */
3407c478bd9Sstevel@tonic-gate #define	VSWAPLIKE	0x80	/* vnode acts like swap (but may not be) */
3417c478bd9Sstevel@tonic-gate 
3427c478bd9Sstevel@tonic-gate #define	IS_SWAPVP(vp)	(((vp)->v_flag & (VISSWAP | VSWAPLIKE)) != 0)
3437c478bd9Sstevel@tonic-gate 
344f06dce2cSAndrew Stormont #ifdef _KERNEL
3457c478bd9Sstevel@tonic-gate typedef struct vn_vfslocks_entry {
3467c478bd9Sstevel@tonic-gate 	rwstlock_t ve_lock;
3477c478bd9Sstevel@tonic-gate 	void *ve_vpvfs;
3487c478bd9Sstevel@tonic-gate 	struct vn_vfslocks_entry *ve_next;
3497c478bd9Sstevel@tonic-gate 	uint32_t ve_refcnt;
3507c478bd9Sstevel@tonic-gate 	char pad[64 - sizeof (rwstlock_t) - 2 * sizeof (void *) - \
3517c478bd9Sstevel@tonic-gate 	    sizeof (uint32_t)];
3527c478bd9Sstevel@tonic-gate } vn_vfslocks_entry_t;
353f06dce2cSAndrew Stormont #endif
3547c478bd9Sstevel@tonic-gate 
3557c478bd9Sstevel@tonic-gate /*
3567c478bd9Sstevel@tonic-gate  * The following two flags are used to lock the v_vfsmountedhere field
3577c478bd9Sstevel@tonic-gate  */
3587c478bd9Sstevel@tonic-gate #define	VVFSLOCK	0x100
3597c478bd9Sstevel@tonic-gate #define	VVFSWAIT	0x200
3607c478bd9Sstevel@tonic-gate 
3617c478bd9Sstevel@tonic-gate /*
3627c478bd9Sstevel@tonic-gate  * Used to serialize VM operations on a vnode
3637c478bd9Sstevel@tonic-gate  */
3647c478bd9Sstevel@tonic-gate #define	VVMLOCK		0x400
3657c478bd9Sstevel@tonic-gate 
3667c478bd9Sstevel@tonic-gate /*
3677c478bd9Sstevel@tonic-gate  * Tell vn_open() not to fail a directory open for writing but
3687c478bd9Sstevel@tonic-gate  * to go ahead and call VOP_OPEN() to let the filesystem check.
3697c478bd9Sstevel@tonic-gate  */
3707c478bd9Sstevel@tonic-gate #define	VDIROPEN	0x800
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate /*
3737c478bd9Sstevel@tonic-gate  * Flag to let the VM system know that this file is most likely a binary
3747c478bd9Sstevel@tonic-gate  * or shared library since it has been mmap()ed EXEC at some time.
3757c478bd9Sstevel@tonic-gate  */
3767c478bd9Sstevel@tonic-gate #define	VVMEXEC		0x1000
3777c478bd9Sstevel@tonic-gate 
3787c478bd9Sstevel@tonic-gate #define	VPXFS		0x2000  /* clustering: global fs proxy vnode */
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate #define	IS_PXFSVP(vp)	((vp)->v_flag & VPXFS)
3817c478bd9Sstevel@tonic-gate 
3827c478bd9Sstevel@tonic-gate #define	V_XATTRDIR	0x4000	/* attribute unnamed directory */
3837c478bd9Sstevel@tonic-gate 
384ab04eb8eStimh #define	IS_XATTRDIR(vp)	((vp)->v_flag & V_XATTRDIR)
385ab04eb8eStimh 
3867c478bd9Sstevel@tonic-gate #define	V_LOCALITY	0x8000	/* whether locality aware */
3877c478bd9Sstevel@tonic-gate 
3887c478bd9Sstevel@tonic-gate /*
3897c478bd9Sstevel@tonic-gate  * Flag that indicates the VM should maintain the v_pages list with all modified
3907c478bd9Sstevel@tonic-gate  * pages on one end and unmodified pages at the other. This makes finding dirty
3917c478bd9Sstevel@tonic-gate  * pages to write back to disk much faster at the expense of taking a minor
3927c478bd9Sstevel@tonic-gate  * fault on the first store instruction which touches a writable page.
3937c478bd9Sstevel@tonic-gate  */
3947c478bd9Sstevel@tonic-gate #define	VMODSORT	(0x10000)
3957c478bd9Sstevel@tonic-gate #define	IS_VMODSORT(vp) \
3967c478bd9Sstevel@tonic-gate 	(pvn_vmodsort_supported != 0 && ((vp)->v_flag  & VMODSORT) != 0)
3977c478bd9Sstevel@tonic-gate 
3987c478bd9Sstevel@tonic-gate #define	VISSWAPFS	0x20000	/* vnode is being used for swapfs */
399d20abfaaSPavel Tatashin 
400d20abfaaSPavel Tatashin /*
401d20abfaaSPavel Tatashin  * The mdb memstat command assumes that IS_SWAPFSVP only uses the
402d20abfaaSPavel Tatashin  * vnode's v_flag field.  If this changes, cache the additional
403d20abfaaSPavel Tatashin  * fields in mdb; see vn_get in mdb/common/modules/genunix/memory.c
404d20abfaaSPavel Tatashin  */
4057c478bd9Sstevel@tonic-gate #define	IS_SWAPFSVP(vp)	(((vp)->v_flag & VISSWAPFS) != 0)
4067c478bd9Sstevel@tonic-gate 
407da6c28aaSamw #define	V_SYSATTR	0x40000	/* vnode is a GFS system attribute */
408da6c28aaSamw 
409e2fc3408SPatrick Mooney /*
410e2fc3408SPatrick Mooney  * Indication that VOP_LOOKUP operations on this vnode may yield results from a
411e2fc3408SPatrick Mooney  * different VFS instance.  The main use of this is to suppress v_path
412e2fc3408SPatrick Mooney  * calculation logic when filesystems such as procfs emit results which defy
413e2fc3408SPatrick Mooney  * expectations about normal VFS behavior.
414e2fc3408SPatrick Mooney  */
415e2fc3408SPatrick Mooney #define	VTRAVERSE	0x80000
416e2fc3408SPatrick Mooney 
4177c478bd9Sstevel@tonic-gate /*
4187c478bd9Sstevel@tonic-gate  * Vnode attributes.  A bit-mask is supplied as part of the
4197c478bd9Sstevel@tonic-gate  * structure to indicate the attributes the caller wants to
4207c478bd9Sstevel@tonic-gate  * set (setattr) or extract (getattr).
4217c478bd9Sstevel@tonic-gate  */
4227c478bd9Sstevel@tonic-gate 
4237c478bd9Sstevel@tonic-gate /*
4247c478bd9Sstevel@tonic-gate  * Note that va_nodeid and va_nblocks are 64bit data type.
4257c478bd9Sstevel@tonic-gate  * We support large files over NFSV3. With Solaris client and
4267c478bd9Sstevel@tonic-gate  * Server that generates 64bit ino's and sizes these fields
4277c478bd9Sstevel@tonic-gate  * will overflow if they are 32 bit sizes.
4287c478bd9Sstevel@tonic-gate  */
4297c478bd9Sstevel@tonic-gate 
4307c478bd9Sstevel@tonic-gate typedef struct vattr {
4317c478bd9Sstevel@tonic-gate 	uint_t		va_mask;	/* bit-mask of attributes */
4327c478bd9Sstevel@tonic-gate 	vtype_t		va_type;	/* vnode type (for create) */
4337c478bd9Sstevel@tonic-gate 	mode_t		va_mode;	/* file access mode */
4347c478bd9Sstevel@tonic-gate 	uid_t		va_uid;		/* owner user id */
4357c478bd9Sstevel@tonic-gate 	gid_t		va_gid;		/* owner group id */
4367c478bd9Sstevel@tonic-gate 	dev_t		va_fsid;	/* file system id (dev for now) */
4377c478bd9Sstevel@tonic-gate 	u_longlong_t	va_nodeid;	/* node id */
4387c478bd9Sstevel@tonic-gate 	nlink_t		va_nlink;	/* number of references to file */
4397c478bd9Sstevel@tonic-gate 	u_offset_t	va_size;	/* file size in bytes */
4407c478bd9Sstevel@tonic-gate 	timestruc_t	va_atime;	/* time of last access */
4417c478bd9Sstevel@tonic-gate 	timestruc_t	va_mtime;	/* time of last modification */
4427c478bd9Sstevel@tonic-gate 	timestruc_t	va_ctime;	/* time of last status change */
4437c478bd9Sstevel@tonic-gate 	dev_t		va_rdev;	/* device the file represents */
4447c478bd9Sstevel@tonic-gate 	uint_t		va_blksize;	/* fundamental block size */
4457c478bd9Sstevel@tonic-gate 	u_longlong_t	va_nblocks;	/* # of blocks allocated */
4467c478bd9Sstevel@tonic-gate 	uint_t		va_seq;		/* sequence number */
4477c478bd9Sstevel@tonic-gate } vattr_t;
4487c478bd9Sstevel@tonic-gate 
449da6c28aaSamw #define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
450da6c28aaSamw 
451da6c28aaSamw /*
452da6c28aaSamw  * Structure of all optional attributes.
453da6c28aaSamw  */
454da6c28aaSamw typedef struct xoptattr {
455da6c28aaSamw 	timestruc_t	xoa_createtime;	/* Create time of file */
456da6c28aaSamw 	uint8_t		xoa_archive;
457da6c28aaSamw 	uint8_t		xoa_system;
458da6c28aaSamw 	uint8_t		xoa_readonly;
459da6c28aaSamw 	uint8_t		xoa_hidden;
460da6c28aaSamw 	uint8_t		xoa_nounlink;
461da6c28aaSamw 	uint8_t		xoa_immutable;
462da6c28aaSamw 	uint8_t		xoa_appendonly;
463da6c28aaSamw 	uint8_t		xoa_nodump;
464da6c28aaSamw 	uint8_t		xoa_opaque;
465da6c28aaSamw 	uint8_t		xoa_av_quarantined;
466da6c28aaSamw 	uint8_t		xoa_av_modified;
467da6c28aaSamw 	uint8_t		xoa_av_scanstamp[AV_SCANSTAMP_SZ];
4687a286c47SDai Ngo 	uint8_t		xoa_reparse;
46999d5e173STim Haley 	uint64_t	xoa_generation;
470fd9ee8b5Sjoyce mcintosh 	uint8_t		xoa_offline;
471fd9ee8b5Sjoyce mcintosh 	uint8_t		xoa_sparse;
472f67950b2SNasf-Fan 	uint8_t		xoa_projinherit;
473f67950b2SNasf-Fan 	uint64_t	xoa_projid;
474da6c28aaSamw } xoptattr_t;
475da6c28aaSamw 
476da6c28aaSamw /*
477da6c28aaSamw  * The xvattr structure is really a variable length structure that
478da6c28aaSamw  * is made up of:
479da6c28aaSamw  * - The classic vattr_t (xva_vattr)
480da6c28aaSamw  * - a 32 bit quantity (xva_mapsize) that specifies the size of the
481da6c28aaSamw  *   attribute bitmaps in 32 bit words.
482da6c28aaSamw  * - A pointer to the returned attribute bitmap (needed because the
483da6c28aaSamw  *   previous element, the requested attribute bitmap) is variable lenth.
484da6c28aaSamw  * - The requested attribute bitmap, which is an array of 32 bit words.
485da6c28aaSamw  *   Callers use the XVA_SET_REQ() macro to set the bits corresponding to
486da6c28aaSamw  *   the attributes that are being requested.
487da6c28aaSamw  * - The returned attribute bitmap, which is an array of 32 bit words.
488da6c28aaSamw  *   File systems that support optional attributes use the XVA_SET_RTN()
489da6c28aaSamw  *   macro to set the bits corresponding to the attributes that are being
490da6c28aaSamw  *   returned.
491da6c28aaSamw  * - The xoptattr_t structure which contains the attribute values
492da6c28aaSamw  *
493da6c28aaSamw  * xva_mapsize determines how many words in the attribute bitmaps.
494da6c28aaSamw  * Immediately following the attribute bitmaps is the xoptattr_t.
495da6c28aaSamw  * xva_getxoptattr() is used to get the pointer to the xoptattr_t
496da6c28aaSamw  * section.
497da6c28aaSamw  */
498da6c28aaSamw 
499da6c28aaSamw #define	XVA_MAPSIZE	3		/* Size of attr bitmaps */
500da6c28aaSamw #define	XVA_MAGIC	0x78766174	/* Magic # for verification */
501da6c28aaSamw 
502da6c28aaSamw /*
503da6c28aaSamw  * The xvattr structure is an extensible structure which permits optional
504da6c28aaSamw  * attributes to be requested/returned.  File systems may or may not support
505da6c28aaSamw  * optional attributes.  They do so at their own discretion but if they do
506da6c28aaSamw  * support optional attributes, they must register the VFSFT_XVATTR feature
507da6c28aaSamw  * so that the optional attributes can be set/retrived.
508da6c28aaSamw  *
509da6c28aaSamw  * The fields of the xvattr structure are:
510da6c28aaSamw  *
511da6c28aaSamw  * xva_vattr - The first element of an xvattr is a legacy vattr structure
512da6c28aaSamw  * which includes the common attributes.  If AT_XVATTR is set in the va_mask
513da6c28aaSamw  * then the entire structure is treated as an xvattr.  If AT_XVATTR is not
514da6c28aaSamw  * set, then only the xva_vattr structure can be used.
515da6c28aaSamw  *
516da6c28aaSamw  * xva_magic - 0x78766174 (hex for "xvat"). Magic number for verification.
517da6c28aaSamw  *
518da6c28aaSamw  * xva_mapsize - Size of requested and returned attribute bitmaps.
519da6c28aaSamw  *
520da6c28aaSamw  * xva_rtnattrmapp - Pointer to xva_rtnattrmap[].  We need this since the
521da6c28aaSamw  * size of the array before it, xva_reqattrmap[], could change which means
522da6c28aaSamw  * the location of xva_rtnattrmap[] could change.  This will allow unbundled
523da6c28aaSamw  * file systems to find the location of xva_rtnattrmap[] when the sizes change.
524da6c28aaSamw  *
525da6c28aaSamw  * xva_reqattrmap[] - Array of requested attributes.  Attributes are
526da6c28aaSamw  * represented by a specific bit in a specific element of the attribute
527da6c28aaSamw  * map array.  Callers set the bits corresponding to the attributes
528da6c28aaSamw  * that the caller wants to get/set.
529da6c28aaSamw  *
530da6c28aaSamw  * xva_rtnattrmap[] - Array of attributes that the file system was able to
531da6c28aaSamw  * process.  Not all file systems support all optional attributes.  This map
532da6c28aaSamw  * informs the caller which attributes the underlying file system was able
533da6c28aaSamw  * to set/get.  (Same structure as the requested attributes array in terms
534da6c28aaSamw  * of each attribute  corresponding to specific bits and array elements.)
535da6c28aaSamw  *
536da6c28aaSamw  * xva_xoptattrs - Structure containing values of optional attributes.
537da6c28aaSamw  * These values are only valid if the corresponding bits in xva_reqattrmap
538da6c28aaSamw  * are set and the underlying file system supports those attributes.
539da6c28aaSamw  */
540da6c28aaSamw typedef struct xvattr {
541