17c478bdstevel@tonic-gate/*
27c478bdstevel@tonic-gate * CDDL HEADER START
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the
55a59a8brsb * Common Development and Distribution License (the "License").
65a59a8brsb * You may not use this file except in compliance with the License.
77c478bdstevel@tonic-gate *
87c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bdstevel@tonic-gate * See the License for the specific language governing permissions
117c478bdstevel@tonic-gate * and limitations under the License.
127c478bdstevel@tonic-gate *
137c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bdstevel@tonic-gate *
197c478bdstevel@tonic-gate * CDDL HEADER END
207c478bdstevel@tonic-gate */
21794f0adRoger A. Faulkner
227c478bdstevel@tonic-gate/*
23cb15d5dPeter Rival * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24e2fc340Patrick Mooney * Copyright (c) 2017, Joyent, Inc.
25ade42b5Sebastien Roy * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
26f06dce2Andrew Stormont * Copyright 2017 RackTop Systems.
277c478bdstevel@tonic-gate */
287c478bdstevel@tonic-gate
297c478bdstevel@tonic-gate/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
30b4203d7Marcel Telka/*	  All Rights Reserved	*/
317c478bdstevel@tonic-gate
327c478bdstevel@tonic-gate/*
337c478bdstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988
347c478bdstevel@tonic-gate * The Regents of the University of California
357c478bdstevel@tonic-gate * All Rights Reserved
367c478bdstevel@tonic-gate *
377c478bdstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from
387c478bdstevel@tonic-gate * software developed by the University of California, Berkeley, and its
397c478bdstevel@tonic-gate * contributors.
407c478bdstevel@tonic-gate */
417c478bdstevel@tonic-gate
427c478bdstevel@tonic-gate#ifndef _SYS_VNODE_H
437c478bdstevel@tonic-gate#define	_SYS_VNODE_H
447c478bdstevel@tonic-gate
457c478bdstevel@tonic-gate#include <sys/types.h>
467c478bdstevel@tonic-gate#include <sys/t_lock.h>
477c478bdstevel@tonic-gate#include <sys/rwstlock.h>
487c478bdstevel@tonic-gate#include <sys/time_impl.h>
497c478bdstevel@tonic-gate#include <sys/cred.h>
507c478bdstevel@tonic-gate#include <sys/uio.h>
517c478bdstevel@tonic-gate#include <sys/resource.h>
527c478bdstevel@tonic-gate#include <vm/seg_enum.h>
535a59a8brsb#include <sys/kstat.h>
545a59a8brsb#include <sys/kmem.h>
551b300dejwahlig#include <sys/list.h>
567c478bdstevel@tonic-gate#ifdef	_KERNEL
577c478bdstevel@tonic-gate#include <sys/buf.h>
58ade42b5Sebastien Roy#include <sys/sdt.h>
597c478bdstevel@tonic-gate#endif	/* _KERNEL */
607c478bdstevel@tonic-gate
617c478bdstevel@tonic-gate#ifdef	__cplusplus
627c478bdstevel@tonic-gateextern "C" {
637c478bdstevel@tonic-gate#endif
647c478bdstevel@tonic-gate
657c478bdstevel@tonic-gate/*
665a59a8brsb * Statistics for all vnode operations.
675a59a8brsb * All operations record number of ops (since boot/mount/zero'ed).
685a59a8brsb * Certain I/O operations (read, write, readdir) also record number
695a59a8brsb * of bytes transferred.
705a59a8brsb * This appears in two places in the system: one is embedded in each
715a59a8brsb * vfs_t.  There is also an array of vopstats_t structures allocated
725a59a8brsb * on a per-fstype basis.
735a59a8brsb */
745a59a8brsb
755a59a8brsb#define	VOPSTATS_STR	"vopstats_"	/* Initial string for vopstat kstats */
765a59a8brsb
775a59a8brsbtypedef struct vopstats {
785a59a8brsb	kstat_named_t	nopen;		/* VOP_OPEN */
795a59a8brsb	kstat_named_t	nclose;		/* VOP_CLOSE */
805a59a8brsb	kstat_named_t	nread;		/* VOP_READ */
815a59a8brsb	kstat_named_t	read_bytes;
825a59a8brsb	kstat_named_t	nwrite;		/* VOP_WRITE */
835a59a8brsb	kstat_named_t	write_bytes;
845a59a8brsb	kstat_named_t	nioctl;		/* VOP_IOCTL */
855a59a8brsb	kstat_named_t	nsetfl;		/* VOP_SETFL */
865a59a8brsb	kstat_named_t	ngetattr;	/* VOP_GETATTR */
875a59a8brsb	kstat_named_t	nsetattr;	/* VOP_SETATTR */
885a59a8brsb	kstat_named_t	naccess;	/* VOP_ACCESS */
895a59a8brsb	kstat_named_t	nlookup;	/* VOP_LOOKUP */
905a59a8brsb	kstat_named_t	ncreate;	/* VOP_CREATE */
915a59a8brsb	kstat_named_t	nremove;	/* VOP_REMOVE */
925a59a8brsb	kstat_named_t	nlink;		/* VOP_LINK */
935a59a8brsb	kstat_named_t	nrename;	/* VOP_RENAME */
945a59a8brsb	kstat_named_t	nmkdir;		/* VOP_MKDIR */
955a59a8brsb	kstat_named_t	nrmdir;		/* VOP_RMDIR */
965a59a8brsb	kstat_named_t	nreaddir;	/* VOP_READDIR */
975a59a8brsb	kstat_named_t	readdir_bytes;
985a59a8brsb	kstat_named_t	nsymlink;	/* VOP_SYMLINK */
995a59a8brsb	kstat_named_t	nreadlink;	/* VOP_READLINK */
1005a59a8brsb	kstat_named_t	nfsync;		/* VOP_FSYNC */
1015a59a8brsb	kstat_named_t	ninactive;	/* VOP_INACTIVE */
1025a59a8brsb	kstat_named_t	nfid;		/* VOP_FID */
1035a59a8brsb	kstat_named_t	nrwlock;	/* VOP_RWLOCK */
1045a59a8brsb	kstat_named_t	nrwunlock;	/* VOP_RWUNLOCK */
1055a59a8brsb	kstat_named_t	nseek;		/* VOP_SEEK */
1065a59a8brsb	kstat_named_t	ncmp;		/* VOP_CMP */
1075a59a8brsb	kstat_named_t	nfrlock;	/* VOP_FRLOCK */
1085a59a8brsb	kstat_named_t	nspace;		/* VOP_SPACE */
1095a59a8brsb	kstat_named_t	nrealvp;	/* VOP_REALVP */
1105a59a8brsb	kstat_named_t	ngetpage;	/* VOP_GETPAGE */
1115a59a8brsb	kstat_named_t	nputpage;	/* VOP_PUTPAGE */
1125a59a8brsb	kstat_named_t	nmap;		/* VOP_MAP */
1135a59a8brsb	kstat_named_t	naddmap;	/* VOP_ADDMAP */
1145a59a8brsb	kstat_named_t	ndelmap;	/* VOP_DELMAP */
1155a59a8brsb	kstat_named_t	npoll;		/* VOP_POLL */
1165a59a8brsb	kstat_named_t	ndump;		/* VOP_DUMP */
1175a59a8brsb	kstat_named_t	npathconf;	/* VOP_PATHCONF */
1185a59a8brsb	kstat_named_t	npageio;	/* VOP_PAGEIO */
1195a59a8brsb	kstat_named_t	ndumpctl;	/* VOP_DUMPCTL */
1205a59a8brsb	kstat_named_t	ndispose;	/* VOP_DISPOSE */
1215a59a8brsb	kstat_named_t	nsetsecattr;	/* VOP_SETSECATTR */
1225a59a8brsb	kstat_named_t	ngetsecattr;	/* VOP_GETSECATTR */
1235a59a8brsb	kstat_named_t	nshrlock;	/* VOP_SHRLOCK */
1245a59a8brsb	kstat_named_t	nvnevent;	/* VOP_VNEVENT */
125c242f9achunli zhang - Sun Microsystems - Irvine United States	kstat_named_t	nreqzcbuf;	/* VOP_REQZCBUF */
126c242f9achunli zhang - Sun Microsystems - Irvine United States	kstat_named_t	nretzcbuf;	/* VOP_RETZCBUF */
1275a59a8brsb} vopstats_t;
1285a59a8brsb
1295a59a8brsb/*
1307c478bdstevel@tonic-gate * The vnode is the focus of all file activity in UNIX.
1317c478bdstevel@tonic-gate * A vnode is allocated for each active file, each current
1327c478bdstevel@tonic-gate * directory, each mounted-on file, and the root.
1337c478bdstevel@tonic-gate *
1347c478bdstevel@tonic-gate * Each vnode is usually associated with a file-system-specific node (for
1357c478bdstevel@tonic-gate * UFS, this is the in-memory inode).  Generally, a vnode and an fs-node
1367c478bdstevel@tonic-gate * should be created and destroyed together as a pair.
1377c478bdstevel@tonic-gate *
1387c478bdstevel@tonic-gate * If a vnode is reused for a new file, it should be reinitialized by calling
1397c478bdstevel@tonic-gate * either vn_reinit() or vn_recycle().
1407c478bdstevel@tonic-gate *
1417c478bdstevel@tonic-gate * vn_reinit() resets the entire vnode as if it was returned by vn_alloc().
1427c478bdstevel@tonic-gate * The caller is responsible for setting up the entire vnode after calling
1437c478bdstevel@tonic-gate * vn_reinit().  This is important when using kmem caching where the vnode is
1447c478bdstevel@tonic-gate * allocated by a constructor, for instance.
1457c478bdstevel@tonic-gate *
1467c478bdstevel@tonic-gate * vn_recycle() is used when the file system keeps some state around in both
1477c478bdstevel@tonic-gate * the vnode and the associated FS-node.  In UFS, for example, the inode of
1487c478bdstevel@tonic-gate * a deleted file can be reused immediately.  The v_data, v_vfsp, v_op, etc.
1497c478bdstevel@tonic-gate * remains the same but certain fields related to the previous instance need
1507c478bdstevel@tonic-gate * to be reset.  In particular:
1517c478bdstevel@tonic-gate *	v_femhead
1527c478bdstevel@tonic-gate *	v_path
1537c478bdstevel@tonic-gate *	v_rdcnt, v_wrcnt
1547c478bdstevel@tonic-gate *	v_mmap_read, v_mmap_write
1557c478bdstevel@tonic-gate */
1567c478bdstevel@tonic-gate
1577c478bdstevel@tonic-gate/*
1587c478bdstevel@tonic-gate * vnode types.  VNON means no type.  These values are unrelated to
1597c478bdstevel@tonic-gate * values in on-disk inodes.
1607c478bdstevel@tonic-gate */
1617c478bdstevel@tonic-gatetypedef enum vtype {
1627c478bdstevel@tonic-gate	VNON	= 0,
1637c478bdstevel@tonic-gate	VREG	= 1,
1647c478bdstevel@tonic-gate	VDIR	= 2,
1657c478bdstevel@tonic-gate	VBLK	= 3,
1667c478bdstevel@tonic-gate	VCHR	= 4,
1677c478bdstevel@tonic-gate	VLNK	= 5,
1687c478bdstevel@tonic-gate	VFIFO	= 6,
1697c478bdstevel@tonic-gate	VDOOR	= 7,
1707c478bdstevel@tonic-gate	VPROC	= 8,
1717c478bdstevel@tonic-gate	VSOCK	= 9,
1727c478bdstevel@tonic-gate	VPORT	= 10,
1737c478bdstevel@tonic-gate	VBAD	= 11
1747c478bdstevel@tonic-gate} vtype_t;
1757c478bdstevel@tonic-gate
1767c478bdstevel@tonic-gate/*
1771b300dejwahlig * VSD - Vnode Specific Data
1781b300dejwahlig * Used to associate additional private data with a vnode.
1791b300dejwahlig */
1801b300dejwahligstruct vsd_node {
1811b300dejwahlig	list_node_t vs_nodes;		/* list of all VSD nodes */
1821b300dejwahlig	uint_t vs_nkeys;		/* entries in value array */
1831b300dejwahlig	void **vs_value;		/* array of value/key */
1841b300dejwahlig};
1851b300dejwahlig
1861b300dejwahlig/*
1877c478bdstevel@tonic-gate * Many of the fields in the vnode are read-only once they are initialized
1887c478bdstevel@tonic-gate * at vnode creation time.  Other fields are protected by locks.
1897c478bdstevel@tonic-gate *
1907c478bdstevel@tonic-gate * IMPORTANT: vnodes should be created ONLY by calls to vn_alloc().  They
1917c478bdstevel@tonic-gate * may not be embedded into the file-system specific node (inode).  The
1927c478bdstevel@tonic-gate * size of vnodes may change.
1937c478bdstevel@tonic-gate *
1947c478bdstevel@tonic-gate * The v_lock protects:
1957c478bdstevel@tonic-gate *   v_flag
1967c478bdstevel@tonic-gate *   v_stream
1977c478bdstevel@tonic-gate *   v_count
1987c478bdstevel@tonic-gate *   v_shrlocks
1997c478bdstevel@tonic-gate *   v_path
2001b300dejwahlig *   v_vsd
201da6c28aamw *   v_xattrdir
2027c478bdstevel@tonic-gate *
2037c478bdstevel@tonic-gate * A special lock (implemented by vn_vfswlock in vnode.c) protects:
2047c478bdstevel@tonic-gate *   v_vfsmountedhere
2057c478bdstevel@tonic-gate *
2067c478bdstevel@tonic-gate * The global flock_lock mutex (in flock.c) protects:
2077c478bdstevel@tonic-gate *   v_filocks
2087c478bdstevel@tonic-gate *
2097c478bdstevel@tonic-gate * IMPORTANT NOTE:
2107c478bdstevel@tonic-gate *
2117c478bdstevel@tonic-gate *   The following vnode fields are considered public and may safely be
2127c478bdstevel@tonic-gate *   accessed by file systems or other consumers:
2137c478bdstevel@tonic-gate *
2147c478bdstevel@tonic-gate *     v_lock
2157c478bdstevel@tonic-gate *     v_flag
2167c478bdstevel@tonic-gate *     v_count
2177c478bdstevel@tonic-gate *     v_data
2187c478bdstevel@tonic-gate *     v_vfsp
2197c478bdstevel@tonic-gate *     v_stream
2207c478bdstevel@tonic-gate *     v_type
2217c478bdstevel@tonic-gate *     v_rdev
2227c478bdstevel@tonic-gate *
2237c478bdstevel@tonic-gate * ALL OTHER FIELDS SHOULD BE ACCESSED ONLY BY THE OWNER OF THAT FIELD.
2247c478bdstevel@tonic-gate * In particular, file systems should not access other fields; they may
2257c478bdstevel@tonic-gate * change or even be removed.  The functionality which was once provided
2267c478bdstevel@tonic-gate * by these fields is available through vn_* functions.
227e2fc340Patrick Mooney *
228e2fc340Patrick Mooney * VNODE PATH THEORY:
229e2fc340Patrick Mooney * In each vnode, the v_path field holds a cached version of the canonical
230e2fc340Patrick Mooney * filesystem path which that node represents.  Because vnodes lack contextual
231e2fc340Patrick Mooney * information about their own name or position in the VFS hierarchy, this path
232e2fc340Patrick Mooney * must be calculated when the vnode is instantiated by operations such as
233e2fc340Patrick Mooney * fop_create, fop_lookup, or fop_mkdir.  During said operations, both the
234e2fc340Patrick Mooney * parent vnode (and its cached v_path) and future name are known, so the
235e2fc340Patrick Mooney * v_path of the resulting object can easily be set.
236e2fc340Patrick Mooney *
237e2fc340Patrick Mooney * The caching nature of v_path is complicated in the face of directory
238e2fc340Patrick Mooney * renames.  Filesystem drivers are responsible for calling vn_renamepath when
239e2fc340Patrick Mooney * a fop_rename operation succeeds.  While the v_path on the renamed vnode will
240e2fc340Patrick Mooney * be updated, existing children of the directory (direct, or at deeper levels)
241e2fc340Patrick Mooney * will now possess v_path caches which are stale.
242e2fc340Patrick Mooney *
243e2fc340Patrick Mooney * It is expensive (and for non-directories, impossible) to recalculate stale
244e2fc340Patrick Mooney * v_path entries during operations such as vnodetopath.  The best time during
245e2fc340Patrick Mooney * which to correct such wrongs is the same as when v_path is first
246e2fc340Patrick Mooney * initialized: during fop_create/fop_lookup/fop_mkdir/etc, where adequate
247e2fc340Patrick Mooney * context is available to generate the current path.
248e2fc340Patrick Mooney *
249e2fc340Patrick Mooney * In order to quickly detect stale v_path entries (without full lookup
250e2fc340Patrick Mooney * verification) to trigger a v_path update, the v_path_stamp field has been
251e2fc340Patrick Mooney * added to vnode_t.  As part of successful fop_create/fop_lookup/fop_mkdir
252e2fc340Patrick Mooney * operations, where the name and parent vnode are available, the following
253e2fc340Patrick Mooney * rules are used to determine updates to the child:
254e2fc340Patrick Mooney *
255e2fc340Patrick Mooney * 1. If the parent lacks a v_path, clear any existing v_path and v_path_stamp
256e2fc340Patrick Mooney *    on the child.  Until the parent v_path is refreshed to a valid state, the
257e2fc340Patrick Mooney *    child v_path must be considered invalid too.
258e2fc340Patrick Mooney *
259e2fc340Patrick Mooney * 2. If the child lacks a v_path (implying v_path_stamp == 0), it inherits the
260e2fc340Patrick Mooney *    v_path_stamp value from its parent and its v_path is updated.
261e2fc340Patrick Mooney *
262e2fc340Patrick Mooney * 3. If the child v_path_stamp is less than v_path_stamp in the parent, it is
263e2fc340Patrick Mooney *    an indication that the child v_path is stale.  The v_path is updated and
264e2fc340Patrick Mooney *    v_path_stamp in the child is set to the current hrtime().
265e2fc340Patrick Mooney *
266e2fc340Patrick Mooney *    It does _not_ inherit the parent v_path_stamp in order to propagate the
267e2fc340Patrick Mooney *    the time of v_path invalidation through the directory structure.  This
268e2fc340Patrick Mooney *    prevents concurrent invalidations (operating with a now-incorrect v_path)
269e2fc340Patrick Mooney *    at deeper levels in the tree from persisting.
270e2fc340Patrick Mooney *
271e2fc340Patrick Mooney * 4. If the child v_path_stamp is greater or equal to the parent, no action
272e2fc340Patrick Mooney *    needs to be taken.
273e2fc340Patrick Mooney *
274e2fc340Patrick Mooney * Note that fop_rename operations do not follow this ruleset.  They perform an
275e2fc340Patrick Mooney * explicit update of v_path and v_path_stamp (setting it to the current time)
276e2fc340Patrick Mooney *
277e2fc340Patrick Mooney * With these constraints in place, v_path invalidations and updates should
278e2fc340Patrick Mooney * proceed in a timely manner as vnodes are accessed.  While there still are
279e2fc340Patrick Mooney * limited cases where vnodetopath operations will fail, the risk is minimized.
2807c478bdstevel@tonic-gate */
2817c478bdstevel@tonic-gate
2827c478bdstevel@tonic-gatestruct fem_head;	/* from fem.h */
2837c478bdstevel@tonic-gate
2847c478bdstevel@tonic-gatetypedef struct vnode {
2857c478bdstevel@tonic-gate	kmutex_t	v_lock;		/* protects vnode fields */
2867c478bdstevel@tonic-gate	uint_t		v_flag;		/* vnode flags (see below) */
2877c478bdstevel@tonic-gate	uint_t		v_count;	/* reference count */
2887c478bdstevel@tonic-gate	void		*v_data;	/* private data for fs */
2897c478bdstevel@tonic-gate	struct vfs	*v_vfsp;	/* ptr to containing VFS */
2907c478bdstevel@tonic-gate	struct stdata	*v_stream;	/* associated stream */
2917c478bdstevel@tonic-gate	enum vtype	v_type;		/* vnode type */
2927c478bdstevel@tonic-gate	dev_t		v_rdev;		/* device (VCHR, VBLK) */
2937c478bdstevel@tonic-gate
2947c478bdstevel@tonic-gate	/* PRIVATE FIELDS BELOW - DO NOT USE */
2957c478bdstevel@tonic-gate
2967c478bdstevel@tonic-gate	struct vfs	*v_vfsmountedhere; /* ptr to vfs mounted here */
2977c478bdstevel@tonic-gate	struct vnodeops	*v_op;		/* vnode operations */
2987c478bdstevel@tonic-gate	struct page	*v_pages;	/* vnode pages list */
2997c478bdstevel@tonic-gate	struct filock	*v_filocks;	/* ptr to filock list */
3007c478bdstevel@tonic-gate	struct shrlocklist *v_shrlocks;	/* ptr to shrlock list */
3017c478bdstevel@tonic-gate	krwlock_t	v_nbllock;	/* sync for NBMAND locks */
3027c478bdstevel@tonic-gate	kcondvar_t	v_cv;		/* synchronize locking */
3037c478bdstevel@tonic-gate	void		*v_locality;	/* hook for locality info */
3047c478bdstevel@tonic-gate	struct fem_head	*v_femhead;	/* fs monitoring */
3057c478bdstevel@tonic-gate	char		*v_path;	/* cached path */
306e2fc340Patrick Mooney	hrtime_t	v_path_stamp;	/* timestamp for cached path */
3077c478bdstevel@tonic-gate	uint_t		v_rdcnt;	/* open for read count  (VREG only) */
3087c478bdstevel@tonic-gate	uint_t		v_wrcnt;	/* open for write count (VREG only) */
3097c478bdstevel@tonic-gate	u_longlong_t	v_mmap_read;	/* mmap read count */
3107c478bdstevel@tonic-gate	u_longlong_t	v_mmap_write;	/* mmap write count */
3117c478bdstevel@tonic-gate	void		*v_mpssdata;	/* info for large page mappings */
312df2381bpraks	void		*v_fopdata;	/* list of file ops event watches */
313d216dffRobert Mastors	kmutex_t	v_vsd_lock;	/* protects v_vsd field */
3141b300dejwahlig	struct vsd_node *v_vsd;		/* vnode specific data */
315da6c28aamw	struct vnode	*v_xattrdir;	/* unnamed extended attr dir (GFS) */
316b5fca8ftomee	uint_t		v_count_dnlc;	/* dnlc reference count */
3177c478bdstevel@tonic-gate} vnode_t;
3187c478bdstevel@tonic-gate
3197c478bdstevel@tonic-gate#define	IS_DEVVP(vp)	\
3207c478bdstevel@tonic-gate	((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO)
3217c478bdstevel@tonic-gate
322cb15d5dPeter Rival#define	VNODE_ALIGN	64
323cb15d5dPeter Rival/* Count of low-order 0 bits in a vnode *, based on size and alignment. */
3241ab248cPeter Rival#if defined(_LP64)
325cb15d5dPeter Rival#define	VNODE_ALIGN_LOG2	8
3261ab248cPeter Rival#else
3271ab248cPeter Rival#define	VNODE_ALIGN_LOG2	7
3281ab248cPeter Rival#endif
329cb15d5dPeter Rival
3307c478bdstevel@tonic-gate/*
3317c478bdstevel@tonic-gate * vnode flags.
3327c478bdstevel@tonic-gate */
3337c478bdstevel@tonic-gate#define	VROOT		0x01	/* root of its file system */
3347c478bdstevel@tonic-gate#define	VNOCACHE	0x02	/* don't keep cache pages on vnode */
3357c478bdstevel@tonic-gate#define	VNOMAP		0x04	/* file cannot be mapped/faulted */
3367c478bdstevel@tonic-gate#define	VDUP		0x08	/* file should be dup'ed rather then opened */
3377c478bdstevel@tonic-gate#define	VNOSWAP		0x10	/* file cannot be used as virtual swap device */
3387c478bdstevel@tonic-gate#define	VNOMOUNT	0x20	/* file cannot be covered by mount */
3397c478bdstevel@tonic-gate#define	VISSWAP		0x40	/* vnode is being used for swap */
3407c478bdstevel@tonic-gate#define	VSWAPLIKE	0x80	/* vnode acts like swap (but may not be) */
3417c478bdstevel@tonic-gate
3427c478bdstevel@tonic-gate#define	IS_SWAPVP(vp)	(((vp)->v_flag & (VISSWAP | VSWAPLIKE)) != 0)
3437c478bdstevel@tonic-gate
344f06dce2Andrew Stormont#ifdef _KERNEL
3457c478bdstevel@tonic-gatetypedef struct vn_vfslocks_entry {
3467c478bdstevel@tonic-gate	rwstlock_t ve_lock;
3477c478bdstevel@tonic-gate	void *ve_vpvfs;
3487c478bdstevel@tonic-gate	struct vn_vfslocks_entry *ve_next;
3497c478bdstevel@tonic-gate	uint32_t ve_refcnt;
3507c478bdstevel@tonic-gate	char pad[64 - sizeof (rwstlock_t) - 2 * sizeof (void *) - \
3517c478bdstevel@tonic-gate	    sizeof (uint32_t)];
3527c478bdstevel@tonic-gate} vn_vfslocks_entry_t;
353f06dce2Andrew Stormont#endif
3547c478bdstevel@tonic-gate
3557c478bdstevel@tonic-gate/*
3567c478bdstevel@tonic-gate * The following two flags are used to lock the v_vfsmountedhere field
3577c478bdstevel@tonic-gate */
3587c478bdstevel@tonic-gate#define	VVFSLOCK	0x100
3597c478bdstevel@tonic-gate#define	VVFSWAIT	0x200
3607c478bdstevel@tonic-gate
3617c478bdstevel@tonic-gate/*
3627c478bdstevel@tonic-gate * Used to serialize VM operations on a vnode
3637c478bdstevel@tonic-gate */
3647c478bdstevel@tonic-gate#define	VVMLOCK		0x400
3657c478bdstevel@tonic-gate
3667c478bdstevel@tonic-gate/*
3677c478bdstevel@tonic-gate * Tell vn_open() not to fail a directory open for writing but
3687c478bdstevel@tonic-gate * to go ahead and call VOP_OPEN() to let the filesystem check.
3697c478bdstevel@tonic-gate */
3707c478bdstevel@tonic-gate#define	VDIROPEN	0x800
3717c478bdstevel@tonic-gate
3727c478bdstevel@tonic-gate/*
3737c478bdstevel@tonic-gate * Flag to let the VM system know that this file is most likely a binary
3747c478bdstevel@tonic-gate * or shared library since it has been mmap()ed EXEC at some time.
3757c478bdstevel@tonic-gate */
3767c478bdstevel@tonic-gate#define	VVMEXEC		0x1000
3777c478bdstevel@tonic-gate
3787c478bdstevel@tonic-gate#define	VPXFS		0x2000  /* clustering: global fs proxy vnode */
3797c478bdstevel@tonic-gate
3807c478bdstevel@tonic-gate#define	IS_PXFSVP(vp)	((vp)->v_flag & VPXFS)
3817c478bdstevel@tonic-gate
3827c478bdstevel@tonic-gate#define	V_XATTRDIR	0x4000	/* attribute unnamed directory */
3837c478bdstevel@tonic-gate
384ab04eb8timh#define	IS_XATTRDIR(vp)	((vp)->v_flag & V_XATTRDIR)
385ab04eb8timh
3867c478bdstevel@tonic-gate#define	V_LOCALITY	0x8000	/* whether locality aware */
3877c478bdstevel@tonic-gate
3887c478bdstevel@tonic-gate/*
3897c478bdstevel@tonic-gate * Flag that indicates the VM should maintain the v_pages list with all modified
3907c478bdstevel@tonic-gate * pages on one end and unmodified pages at the other. This makes finding dirty
3917c478bdstevel@tonic-gate * pages to write back to disk much faster at the expense of taking a minor
3927c478bdstevel@tonic-gate * fault on the first store instruction which touches a writable page.
3937c478bdstevel@tonic-gate */
3947c478bdstevel@tonic-gate#define	VMODSORT	(0x10000)
3957c478bdstevel@tonic-gate#define	IS_VMODSORT(vp) \
3967c478bdstevel@tonic-gate	(pvn_vmodsort_supported != 0 && ((vp)->v_flag  & VMODSORT) != 0)
3977c478bdstevel@tonic-gate
3987c478bdstevel@tonic-gate#define	VISSWAPFS	0x20000	/* vnode is being used for swapfs */
399d20abfaPavel Tatashin
400d20abfaPavel Tatashin/*
401d20abfaPavel Tatashin * The mdb memstat command assumes that IS_SWAPFSVP only uses the
402d20abfaPavel Tatashin * vnode's v_flag field.  If this changes, cache the additional
403d20abfaPavel Tatashin * fields in mdb; see vn_get in mdb/common/modules/genunix/memory.c
404d20abfaPavel Tatashin */
4057c478bdstevel@tonic-gate#define	IS_SWAPFSVP(vp)	(((vp)->v_flag & VISSWAPFS) != 0)
4067c478bdstevel@tonic-gate
407da6c28aamw#define	V_SYSATTR	0x40000	/* vnode is a GFS system attribute */
408da6c28aamw
4097c478bdstevel@tonic-gate/*
410e2fc340Patrick Mooney * Indication that VOP_LOOKUP operations on this vnode may yield results from a
411e2fc340Patrick Mooney * different VFS instance.  The main use of this is to suppress v_path
412e2fc340Patrick Mooney * calculation logic when filesystems such as procfs emit results which defy
413e2fc340Patrick Mooney * expectations about normal VFS behavior.
414e2fc340Patrick Mooney */
415e2fc340Patrick Mooney#define	VTRAVERSE	0x80000
416e2fc340Patrick Mooney
417e2fc340Patrick Mooney/*
4187c478bdstevel@tonic-gate * Vnode attributes.  A bit-mask is supplied as part of the
4197c478bdstevel@tonic-gate * structure to indicate the attributes the caller wants to
4207c478bdstevel@tonic-gate * set (setattr) or extract (getattr).
4217c478bdstevel@tonic-gate */
4227c478bdstevel@tonic-gate
4237c478bdstevel@tonic-gate/*
4247c478bdstevel@tonic-gate * Note that va_nodeid and va_nblocks are 64bit data type.
4257c478bdstevel@tonic-gate * We support large files over NFSV3. With Solaris client and
4267c478bdstevel@tonic-gate * Server that generates 64bit ino's and sizes these fields
4277c478bdstevel@tonic-gate * will overflow if they are 32 bit sizes.
4287c478bdstevel@tonic-gate */
4297c478bdstevel@tonic-gate
4307c478bdstevel@tonic-gatetypedef struct vattr {
4317c478bdstevel@tonic-gate	uint_t		va_mask;	/* bit-mask of attributes */
4327c478bdstevel@tonic-gate	vtype_t		va_type;	/* vnode type (for create) */
4337c478bdstevel@tonic-gate	mode_t		va_mode;	/* file access mode */
4347c478bdstevel@tonic-gate	uid_t		va_uid;		/* owner user id */
4357c478bdstevel@tonic-gate	gid_t		va_gid;		/* owner group id */
4367c478bdstevel@tonic-gate	dev_t		va_fsid;	/* file system id (dev for now) */
4377c478bdstevel@tonic-gate	u_longlong_t	va_nodeid;	/* node id */
4387c478bdstevel@tonic-gate	nlink_t		va_nlink;	/* number of references to file */
4397c478bdstevel@tonic-gate	u_offset_t	va_size;	/* file size in bytes */
4407c478bdstevel@tonic-gate	timestruc_t	va_atime;	/* time of last access */
4417c478bdstevel@tonic-gate	timestruc_t	va_mtime;	/* time of last modification */
4427c478bdstevel@tonic-gate	timestruc_t	va_ctime;	/* time of last status change */
4437c478bdstevel@tonic-gate	dev_t		va_rdev;	/* device the file represents */
4447c478bdstevel@tonic-gate	uint_t		va_blksize;	/* fundamental block size */
4457c478bdstevel@tonic-gate	u_longlong_t	va_nblocks;	/* # of blocks allocated */
4467c478bdstevel@tonic-gate	uint_t		va_seq;		/* sequence number */
4477c478bdstevel@tonic-gate} vattr_t;
4487c478bdstevel@tonic-gate
449da6c28aamw#define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
450da6c28aamw
451da6c28aamw/*
452da6c28aamw * Structure of all optional attributes.
453da6c28aamw */
454da6c28aamwtypedef struct xoptattr {
455da6c28aamw	timestruc_t	xoa_createtime;	/* Create time of file */
456da6c28aamw	uint8_t		xoa_archive;
457da6c28aamw	uint8_t		xoa_system;
458da6c28aamw	uint8_t		xoa_readonly;
459da6c28aamw	uint8_t		xoa_hidden;
460da6c28aamw	uint8_t		xoa_nounlink;
461da6c28aamw	uint8_t		xoa_immutable;
462da6c28aamw	uint8_t		xoa_appendonly;
463da6c28aamw	uint8_t		xoa_nodump;
464da6c28aamw	uint8_t		xoa_opaque;
465da6c28aamw	uint8_t		xoa_av_quarantined;
466da6c28aamw	uint8_t		xoa_av_modified;
467da6c28aamw	uint8_t		xoa_av_scanstamp[AV_SCANSTAMP_SZ];
4687a286c4Dai Ngo	uint8_t		xoa_reparse;
46999d5e17Tim Haley	uint64_t	xoa_generation;
470fd9ee8bjoyce mcintosh	uint8_t		xoa_offline;
471fd9ee8bjoyce mcintosh	uint8_t		xoa_sparse;
472f67950bNasf-Fan	uint8_t		xoa_projinherit;
473f67950bNasf-Fan	uint64_t	xoa_projid;
474da6c28aamw} xoptattr_t;
475da6c28aamw
476da6c28aamw/*
477da6c28aamw * The xvattr structure is really a variable length structure that
478da6c28aamw * is made up of:
479da6c28aamw * - The classic vattr_t (xva_vattr)
480da6c28aamw * - a 32 bit quantity (xva_mapsize) that specifies the size of the
481da6c28aamw *   attribute bitmaps in 32 bit words.
482da6c28aamw * - A pointer to the returned attribute bitmap (needed because the
483da6c28aamw *   previous element, the requested attribute bitmap) is variable lenth.
484da6c28aamw * - The requested attribute bitmap, which is an array of 32 bit words.
485da6c28aamw *   Callers use the XVA_SET_REQ() macro to set the bits corresponding to
486da6c28aamw *   the attributes that are being requested.
487da6c28aamw * - The returned attribute bitmap, which is an array of 32 bit words.
488da6c28aamw *   File systems that support optional attributes use the XVA_SET_RTN()
489da6c28aamw *   macro to set the bits corresponding to the attributes that are being
490da6c28aamw *   returned.
491da6c28aamw * - The xoptattr_t structure which contains the attribute values
492da6c28aamw *
493da6c28aamw * xva_mapsize determines how many words in the attribute bitmaps.
494da6c28aamw * Immediately following the attribute bitmaps is the xoptattr_t.
495da6c28aamw * xva_getxoptattr() is used to get the pointer to the xoptattr_t
496da6c28aamw * section.
497da6c28aamw */
498da6c28aamw
499da6c28aamw#define	XVA_MAPSIZE	3		/* Size of attr bitmaps */
500da6c28aamw#define	XVA_MAGIC	0x78766174	/* Magic # for verification */
501da6c28aamw
502da6c28aamw/*
503da6c28aamw * The xvattr structure is an extensible structure which permits optional
504da6c28aamw * attributes to be requested/returned.  File systems may or may not support
505da6c28aamw * optional attributes.  They do so at their own discretion but if they do
506da6c28aamw * support optional attributes, they must register the VFSFT_XVATTR feature
507da6c28aamw * so that the optional attributes can be set/retrived.
508da6c28aamw *
509da6c28aamw * The fields of the xvattr structure are:
510da6c28aamw *
511da6c28aamw * xva_vattr - The first element of an xvattr is a legacy vattr structure
512da6c28aamw * which includes the common attributes.  If AT_XVATTR is set in the va_mask
513da6c28aamw * then the entire structure is treated as an xvattr.  If AT_XVATTR is not
514da6c28aamw * set, then only the xva_vattr structure can be used.
515da6c28aamw *
516da6c28aamw * xva_magic - 0x78766174 (hex for "xvat"). Magic number for verification.
517da6c28aamw *
518da6c28aamw * xva_mapsize - Size of requested and returned attribute bitmaps.
519da6c28aamw *
520da6c28aamw * xva_rtnattrmapp - Pointer to xva_rtnattrmap[].  We need this since the
521da6c28aamw * size of the array before it, xva_reqattrmap[], could change which means
522da6c28aamw * the location of xva_rtnattrmap[] could change.  This will allow unbundled
523da6c28aamw * file systems to find the location of xva_rtnattrmap[] when the sizes change.
524da6c28aamw *
525da6c28aamw * xva_reqattrmap[] - Array of requested attributes.  Attributes are
526da6c28aamw * represented by a specific bit in a specific element of the attribute
527da6c28aamw * map array.  Callers set the bits corresponding to the attributes
528da6c28aamw * that the caller wants to get/set.
529da6c28aamw *
530da6c28aamw * xva_rtnattrmap[] - Array of attributes that the file system was able to
531da6c28aamw * process.  Not all file systems support all optional attributes.  This map
532da6c28aamw * informs the caller which attributes the underlying file system was able
533da6c28aamw * to set/get.  (Same structure as the requested attributes array in terms
534da6c28aamw * of each attribute  corresponding to specific bits and array elements.)
535da6c28aamw *
536da6c28aamw * xva_xoptattrs - Structure containing values of optional attributes.
537da6c28aamw * These values are only valid if the corresponding bits in xva_reqattrmap
538da6c28aamw * are set and the underlying file system supports those attributes.
539da6c28aamw */
540da6c28aamwtypedef struct xvattr {
541da6c28aamw	vattr_t		xva_vattr;	/* Embedded vattr structure */
542da6c28aamw	uint32_t	xva_magic;	/* Magic Number */
543da6c28aamw	uint32_t	xva_mapsize;	/* Size of attr bitmap (32-bit words) */
544da6c28aamw	uint32_t	*xva_rtnattrmapp;	/* Ptr to xva_rtnattrmap[] */
545da6c28aamw	uint32_t	xva_reqattrmap[XVA_MAPSIZE];	/* Requested attrs */
546da6c28aamw	uint32_t	xva_rtnattrmap[XVA_MAPSIZE];	/* Returned attrs */
547da6c28aamw	xoptattr_t	xva_xoptattrs;	/* Optional attributes */
548da6c28aamw} xvattr_t;
549da6c28aamw
5507c478bdstevel@tonic-gate#ifdef _SYSCALL32
5517c478bdstevel@tonic-gate/*
5527c478bdstevel@tonic-gate * For bigtypes time_t changed to 64 bit on the 64-bit kernel.
5537c478bdstevel@tonic-gate * Define an old version for user/kernel interface
5547c478bdstevel@tonic-gate */
5557c478bdstevel@tonic-gate
5567c478bdstevel@tonic-gate#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
5577c478bdstevel@tonic-gate#pragma pack(4)
5587c478bdstevel@tonic-gate#endif
5597c478bdstevel@tonic-gate
5607c478bdstevel@tonic-gatetypedef struct vattr32 {
5617c478bdstevel@tonic-gate	uint32_t	va_mask;	/* bit-mask of attributes */
5627c478bdstevel@tonic-gate	vtype_t		va_type;	/* vnode type (for create) */
5637c478bdstevel@tonic-gate	mode32_t	va_mode;	/* file access mode */
5647c478bdstevel@tonic-gate	uid32_t		va_uid;		/* owner user id */
5657c478bdstevel@tonic-gate	gid32_t		va_gid;		/* owner group id */
5667c478bdstevel@tonic-gate	dev32_t		va_fsid;	/* file system id (dev for now) */
5677c478bdstevel@tonic-gate	u_longlong_t	va_nodeid;	/* node id */
5687c478bdstevel@tonic-gate	nlink_t		va_nlink;	/* number of references to file */
5697c478bdstevel@tonic-gate	u_offset_t	va_size;	/* file size in bytes */
5707c478bdstevel@tonic-gate	timestruc32_t	va_atime;	/* time of last access */
5717c478bdstevel@tonic-gate	timestruc32_t	va_mtime;	/* time of last modification */
5727c478bdstevel@tonic-gate	timestruc32_t	va_ctime;	/* time of last status change */
5737c478bdstevel@tonic-gate	dev32_t		va_rdev;	/* device the file represents */
5747c478bdstevel@tonic-gate	uint32_t	va_blksize;	/* fundamental block size */
5757c478bdstevel@tonic-gate	u_longlong_t	va_nblocks;	/* # of blocks allocated */
5767c478bdstevel@tonic-gate	uint32_t	va_seq;		/* sequence number */
5777c478bdstevel@tonic-gate} vattr32_t;
5787c478bdstevel@tonic-gate
5797c478bdstevel@tonic-gate#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
5807c478bdstevel@tonic-gate#pragma pack()
5817c478bdstevel@tonic-gate#endif
5827c478bdstevel@tonic-gate
5837c478bdstevel@tonic-gate#else  /* not _SYSCALL32 */
5847c478bdstevel@tonic-gate#define	vattr32		vattr
5857c478bdstevel@tonic-gatetypedef vattr_t		vattr32_t;
5867c478bdstevel@tonic-gate#endif /* _SYSCALL32 */
5877c478bdstevel@tonic-gate
5887c478bdstevel@tonic-gate/*
5897c478bdstevel@tonic-gate * Attributes of interest to the caller of setattr or getattr.
5907c478bdstevel@tonic-gate */
591da6c28aamw#define	AT_TYPE		0x00001
592da6c28aamw#define	AT_MODE		0x00002
593da6c28aamw#define	AT_UID		0x00004
594da6c28aamw#define	AT_GID		0x00008
595da6c28aamw#define	AT_FSID		0x00010
596da6c28aamw#define	AT_NODEID	0x00020
597da6c28aamw#define	AT_NLINK	0x00040
598da6c28aamw#define	AT_SIZE		0x00080
599da6c28aamw#define	AT_ATIME	0x00100
600da6c28aamw#define	AT_MTIME	0x00200
601da6c28aamw#define	AT_CTIME	0x00400
602da6c28aamw#define	AT_RDEV		0x00800
603da6c28aamw#define	AT_BLKSIZE	0x01000
604da6c28aamw#define	AT_NBLOCKS	0x02000
605da6c28aamw/*			0x04000 */	/* unused */
606