1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2018, Joyent, Inc.
25 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
26 * Copyright 2017 RackTop Systems.
27 */
28
29/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
30/*	  All Rights Reserved	*/
31
32/*
33 * University Copyright- Copyright (c) 1982, 1986, 1988
34 * The Regents of the University of California
35 * All Rights Reserved
36 *
37 * University Acknowledgment- Portions of this document are derived from
38 * software developed by the University of California, Berkeley, and its
39 * contributors.
40 */
41
42#ifndef _SYS_VNODE_H
43#define	_SYS_VNODE_H
44
45#include <sys/types.h>
46#include <sys/t_lock.h>
47#include <sys/rwstlock.h>
48#include <sys/time_impl.h>
49#include <sys/cred.h>
50#include <sys/uio.h>
51#include <sys/resource.h>
52#include <vm/seg_enum.h>
53#include <sys/kstat.h>
54#include <sys/kmem.h>
55#include <sys/list.h>
56#ifdef	_KERNEL
57#include <sys/buf.h>
58#include <sys/sdt.h>
59#endif	/* _KERNEL */
60
61#ifdef	__cplusplus
62extern "C" {
63#endif
64
65/*
66 * Statistics for all vnode operations.
67 * All operations record number of ops (since boot/mount/zero'ed).
68 * Certain I/O operations (read, write, readdir) also record number
69 * of bytes transferred.
70 * This appears in two places in the system: one is embedded in each
71 * vfs_t.  There is also an array of vopstats_t structures allocated
72 * on a per-fstype basis.
73 */
74
75#define	VOPSTATS_STR	"vopstats_"	/* Initial string for vopstat kstats */
76
77typedef struct vopstats {
78	kstat_named_t	nopen;		/* VOP_OPEN */
79	kstat_named_t	nclose;		/* VOP_CLOSE */
80	kstat_named_t	nread;		/* VOP_READ */
81	kstat_named_t	read_bytes;
82	kstat_named_t	nwrite;		/* VOP_WRITE */
83	kstat_named_t	write_bytes;
84	kstat_named_t	nioctl;		/* VOP_IOCTL */
85	kstat_named_t	nsetfl;		/* VOP_SETFL */
86	kstat_named_t	ngetattr;	/* VOP_GETATTR */
87	kstat_named_t	nsetattr;	/* VOP_SETATTR */
88	kstat_named_t	naccess;	/* VOP_ACCESS */
89	kstat_named_t	nlookup;	/* VOP_LOOKUP */
90	kstat_named_t	ncreate;	/* VOP_CREATE */
91	kstat_named_t	nremove;	/* VOP_REMOVE */
92	kstat_named_t	nlink;		/* VOP_LINK */
93	kstat_named_t	nrename;	/* VOP_RENAME */
94	kstat_named_t	nmkdir;		/* VOP_MKDIR */
95	kstat_named_t	nrmdir;		/* VOP_RMDIR */
96	kstat_named_t	nreaddir;	/* VOP_READDIR */
97	kstat_named_t	readdir_bytes;
98	kstat_named_t	nsymlink;	/* VOP_SYMLINK */
99	kstat_named_t	nreadlink;	/* VOP_READLINK */
100	kstat_named_t	nfsync;		/* VOP_FSYNC */
101	kstat_named_t	ninactive;	/* VOP_INACTIVE */
102	kstat_named_t	nfid;		/* VOP_FID */
103	kstat_named_t	nrwlock;	/* VOP_RWLOCK */
104	kstat_named_t	nrwunlock;	/* VOP_RWUNLOCK */
105	kstat_named_t	nseek;		/* VOP_SEEK */
106	kstat_named_t	ncmp;		/* VOP_CMP */
107	kstat_named_t	nfrlock;	/* VOP_FRLOCK */
108	kstat_named_t	nspace;		/* VOP_SPACE */
109	kstat_named_t	nrealvp;	/* VOP_REALVP */
110	kstat_named_t	ngetpage;	/* VOP_GETPAGE */
111	kstat_named_t	nputpage;	/* VOP_PUTPAGE */
112	kstat_named_t	nmap;		/* VOP_MAP */
113	kstat_named_t	naddmap;	/* VOP_ADDMAP */
114	kstat_named_t	ndelmap;	/* VOP_DELMAP */
115	kstat_named_t	npoll;		/* VOP_POLL */
116	kstat_named_t	ndump;		/* VOP_DUMP */
117	kstat_named_t	npathconf;	/* VOP_PATHCONF */
118	kstat_named_t	npageio;	/* VOP_PAGEIO */
119	kstat_named_t	ndumpctl;	/* VOP_DUMPCTL */
120	kstat_named_t	ndispose;	/* VOP_DISPOSE */
121	kstat_named_t	nsetsecattr;	/* VOP_SETSECATTR */
122	kstat_named_t	ngetsecattr;	/* VOP_GETSECATTR */
123	kstat_named_t	nshrlock;	/* VOP_SHRLOCK */
124	kstat_named_t	nvnevent;	/* VOP_VNEVENT */
125	kstat_named_t	nreqzcbuf;	/* VOP_REQZCBUF */
126	kstat_named_t	nretzcbuf;	/* VOP_RETZCBUF */
127} vopstats_t;
128
129/*
130 * The vnode is the focus of all file activity in UNIX.
131 * A vnode is allocated for each active file, each current
132 * directory, each mounted-on file, and the root.
133 *
134 * Each vnode is usually associated with a file-system-specific node (for
135 * UFS, this is the in-memory inode).  Generally, a vnode and an fs-node
136 * should be created and destroyed together as a pair.
137 *
138 * If a vnode is reused for a new file, it should be reinitialized by calling
139 * either vn_reinit() or vn_recycle().
140 *
141 * vn_reinit() resets the entire vnode as if it was returned by vn_alloc().
142 * The caller is responsible for setting up the entire vnode after calling
143 * vn_reinit().  This is important when using kmem caching where the vnode is
144 * allocated by a constructor, for instance.
145 *
146 * vn_recycle() is used when the file system keeps some state around in both
147 * the vnode and the associated FS-node.  In UFS, for example, the inode of
148 * a deleted file can be reused immediately.  The v_data, v_vfsp, v_op, etc.
149 * remains the same but certain fields related to the previous instance need
150 * to be reset.  In particular:
151 *	v_femhead
152 *	v_path
153 *	v_rdcnt, v_wrcnt
154 *	v_mmap_read, v_mmap_write
155 */
156
157/*
158 * vnode types.  VNON means no type.  These values are unrelated to
159 * values in on-disk inodes.
160 */
161typedef enum vtype {
162	VNON	= 0,
163	VREG	= 1,
164	VDIR	= 2,
165	VBLK	= 3,
166	VCHR	= 4,
167	VLNK	= 5,
168	VFIFO	= 6,
169	VDOOR	= 7,
170	VPROC	= 8,
171	VSOCK	= 9,
172	VPORT	= 10,
173	VBAD	= 11
174} vtype_t;
175
176/*
177 * VSD - Vnode Specific Data
178 * Used to associate additional private data with a vnode.
179 */
180struct vsd_node {
181	list_node_t vs_nodes;		/* list of all VSD nodes */
182	uint_t vs_nkeys;		/* entries in value array */
183	void **vs_value;		/* array of value/key */
184};
185
186/*
187 * Many of the fields in the vnode are read-only once they are initialized
188 * at vnode creation time.  Other fields are protected by locks.
189 *
190 * IMPORTANT: vnodes should be created ONLY by calls to vn_alloc().  They
191 * may not be embedded into the file-system specific node (inode).  The
192 * size of vnodes may change.
193 *
194 * The v_lock protects:
195 *   v_flag
196 *   v_stream
197 *   v_count
198 *   v_shrlocks
199 *   v_path
200 *   v_vsd
201 *   v_xattrdir
202 *
203 * A special lock (implemented by vn_vfswlock in vnode.c) protects:
204 *   v_vfsmountedhere
205 *
206 * The global flock_lock mutex (in flock.c) protects:
207 *   v_filocks
208 *
209 * IMPORTANT NOTE:
210 *
211 *   The following vnode fields are considered public and may safely be
212 *   accessed by file systems or other consumers:
213 *
214 *     v_lock
215 *     v_flag
216 *     v_count
217 *     v_data
218 *     v_vfsp
219 *     v_stream
220 *     v_type
221 *     v_rdev
222 *
223 * ALL OTHER FIELDS SHOULD BE ACCESSED ONLY BY THE OWNER OF THAT FIELD.
224 * In particular, file systems should not access other fields; they may
225 * change or even be removed.  The functionality which was once provided
226 * by these fields is available through vn_* functions.
227 *
228 * VNODE PATH THEORY:
229 * In each vnode, the v_path field holds a cached version of the canonical
230 * filesystem path which that node represents.  Because vnodes lack contextual
231 * information about their own name or position in the VFS hierarchy, this path
232 * must be calculated when the vnode is instantiated by operations such as
233 * fop_create, fop_lookup, or fop_mkdir.  During said operations, both the
234 * parent vnode (and its cached v_path) and future name are known, so the
235 * v_path of the resulting object can easily be set.
236 *
237 * The caching nature of v_path is complicated in the face of directory
238 * renames.  Filesystem drivers are responsible for calling vn_renamepath when
239 * a fop_rename operation succeeds.  While the v_path on the renamed vnode will
240 * be updated, existing children of the directory (direct, or at deeper levels)
241 * will now possess v_path caches which are stale.
242 *
243 * It is expensive (and for non-directories, impossible) to recalculate stale
244 * v_path entries during operations such as vnodetopath.  The best time during
245 * which to correct such wrongs is the same as when v_path is first
246 * initialized: during fop_create/fop_lookup/fop_mkdir/etc, where adequate
247 * context is available to generate the current path.
248 *
249 * In order to quickly detect stale v_path entries (without full lookup
250 * verification) to trigger a v_path update, the v_path_stamp field has been
251 * added to vnode_t.  As part of successful fop_create/fop_lookup/fop_mkdir
252 * operations, where the name and parent vnode are available, the following
253 * rules are used to determine updates to the child:
254 *
255 * 1. If the parent lacks a v_path, clear any existing v_path and v_path_stamp
256 *    on the child.  Until the parent v_path is refreshed to a valid state, the
257 *    child v_path must be considered invalid too.
258 *
259 * 2. If the child lacks a v_path (implying v_path_stamp == 0), it inherits the
260 *    v_path_stamp value from its parent and its v_path is updated.
261 *
262 * 3. If the child v_path_stamp is less than v_path_stamp in the parent, it is
263 *    an indication that the child v_path is stale.  The v_path is updated and
264 *    v_path_stamp in the child is set to the current hrtime().
265 *
266 *    It does _not_ inherit the parent v_path_stamp in order to propagate the
267 *    the time of v_path invalidation through the directory structure.  This
268 *    prevents concurrent invalidations (operating with a now-incorrect v_path)
269 *    at deeper levels in the tree from persisting.
270 *
271 * 4. If the child v_path_stamp is greater or equal to the parent, no action
272 *    needs to be taken.
273 *
274 * Note that fop_rename operations do not follow this ruleset.  They perform an
275 * explicit update of v_path and v_path_stamp (setting it to the current time)
276 *
277 * With these constraints in place, v_path invalidations and updates should
278 * proceed in a timely manner as vnodes are accessed.  While there still are
279 * limited cases where vnodetopath operations will fail, the risk is minimized.
280 */
281
282struct fem_head;	/* from fem.h */
283
284typedef struct vnode {
285	kmutex_t	v_lock;		/* protects vnode fields */
286	uint_t		v_flag;		/* vnode flags (see below) */
287	uint_t		v_count;	/* reference count */
288	void		*v_data;	/* private data for fs */
289	struct vfs	*v_vfsp;	/* ptr to containing VFS */
290	struct stdata	*v_stream;	/* associated stream */
291	enum vtype	v_type;		/* vnode type */
292	dev_t		v_rdev;		/* device (VCHR, VBLK) */
293
294	/* PRIVATE FIELDS BELOW - DO NOT USE */
295
296	struct vfs	*v_vfsmountedhere; /* ptr to vfs mounted here */
297	struct vnodeops	*v_op;		/* vnode operations */
298	struct page	*v_pages;	/* vnode pages list */
299	struct filock	*v_filocks;	/* ptr to filock list */
300	struct shrlocklist *v_shrlocks;	/* ptr to shrlock list */
301	krwlock_t	v_nbllock;	/* sync for NBMAND locks */
302	kcondvar_t	v_cv;		/* synchronize locking */
303	void		*v_locality;	/* hook for locality info */
304	struct fem_head	*v_femhead;	/* fs monitoring */
305	char		*v_path;	/* cached path */
306	hrtime_t	v_path_stamp;	/* timestamp for cached path */
307	uint_t		v_rdcnt;	/* open for read count  (VREG only) */
308	uint_t		v_wrcnt;	/* open for write count (VREG only) */
309	u_longlong_t	v_mmap_read;	/* mmap read count */
310	u_longlong_t	v_mmap_write;	/* mmap write count */
311	void		*v_mpssdata;	/* info for large page mappings */
312	void		*v_fopdata;	/* list of file ops event watches */
313	kmutex_t	v_vsd_lock;	/* protects v_vsd field */
314	struct vsd_node *v_vsd;		/* vnode specific data */
315	struct vnode	*v_xattrdir;	/* unnamed extended attr dir (GFS) */
316	uint_t		v_count_dnlc;	/* dnlc reference count */
317} vnode_t;
318
319#define	IS_DEVVP(vp)	\
320	((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO)
321
322#define	VNODE_ALIGN	64
323/* Count of low-order 0 bits in a vnode *, based on size and alignment. */
324#if defined(_LP64)
325#define	VNODE_ALIGN_LOG2	8
326#else
327#define	VNODE_ALIGN_LOG2	7
328#endif
329
330/*
331 * vnode flags.
332 */
333#define	VROOT		0x01	/* root of its file system */
334#define	VNOCACHE	0x02	/* don't keep cache pages on vnode */
335#define	VNOMAP		0x04	/* file cannot be mapped/faulted */
336#define	VDUP		0x08	/* file should be dup'ed rather then opened */
337#define	VNOSWAP		0x10	/* file cannot be used as virtual swap device */
338#define	VNOMOUNT	0x20	/* file cannot be covered by mount */
339#define	VISSWAP		0x40	/* vnode is being used for swap */
340#define	VSWAPLIKE	0x80	/* vnode acts like swap (but may not be) */
341
342#define	IS_SWAPVP(vp)	(((vp)->v_flag & (VISSWAP | VSWAPLIKE)) != 0)
343
344#ifdef _KERNEL
345typedef struct vn_vfslocks_entry {
346	rwstlock_t ve_lock;
347	void *ve_vpvfs;
348	struct vn_vfslocks_entry *ve_next;
349	uint32_t ve_refcnt;
350	char pad[64 - sizeof (rwstlock_t) - 2 * sizeof (void *) - \
351	    sizeof (uint32_t)];
352} vn_vfslocks_entry_t;
353#endif
354
355/*
356 * The following two flags are used to lock the v_vfsmountedhere field
357 */
358#define	VVFSLOCK	0x100
359#define	VVFSWAIT	0x200
360
361/*
362 * Used to serialize VM operations on a vnode
363 */
364#define	VVMLOCK		0x400
365
366/*
367 * Tell vn_open() not to fail a directory open for writing but
368 * to go ahead and call VOP_OPEN() to let the filesystem check.
369 */
370#define	VDIROPEN	0x800
371
372/*
373 * Flag to let the VM system know that this file is most likely a binary
374 * or shared library since it has been mmap()ed EXEC at some time.
375 */
376#define	VVMEXEC		0x1000
377
378#define	VPXFS		0x2000  /* clustering: global fs proxy vnode */
379
380#define	IS_PXFSVP(vp)	((vp)->v_flag & VPXFS)
381
382#define	V_XATTRDIR	0x4000	/* attribute unnamed directory */
383
384#define	IS_XATTRDIR(vp)	((vp)->v_flag & V_XATTRDIR)
385
386#define	V_LOCALITY	0x8000	/* whether locality aware */
387
388/*
389 * Flag that indicates the VM should maintain the v_pages list with all modified
390 * pages on one end and unmodified pages at the other. This makes finding dirty
391 * pages to write back to disk much faster at the expense of taking a minor
392 * fault on the first store instruction which touches a writable page.
393 */
394#define	VMODSORT	(0x10000)
395#define	IS_VMODSORT(vp) \
396	(pvn_vmodsort_supported != 0 && ((vp)->v_flag  & VMODSORT) != 0)
397
398#define	VISSWAPFS	0x20000	/* vnode is being used for swapfs */
399
400/*
401 * The mdb memstat command assumes that IS_SWAPFSVP only uses the
402 * vnode's v_flag field.  If this changes, cache the additional
403 * fields in mdb; see vn_get in mdb/common/modules/genunix/memory.c
404 */
405#define	IS_SWAPFSVP(vp)	(((vp)->v_flag & VISSWAPFS) != 0)
406
407#define	V_SYSATTR	0x40000	/* vnode is a GFS system attribute */
408
409/*
410 * Indication that VOP_LOOKUP operations on this vnode may yield results from a
411 * different VFS instance.  The main use of this is to suppress v_path
412 * calculation logic when filesystems such as procfs emit results which defy
413 * expectations about normal VFS behavior.
414 */
415#define	VTRAVERSE	0x80000
416
417/*
418 * Vnode attributes.  A bit-mask is supplied as part of the
419 * structure to indicate the attributes the caller wants to
420 * set (setattr) or extract (getattr).
421 */
422
423/*
424 * Note that va_nodeid and va_nblocks are 64bit data type.
425 * We support large files over NFSV3. With Solaris client and
426 * Server that generates 64bit ino's and sizes these fields
427 * will overflow if they are 32 bit sizes.
428 */
429
430typedef struct vattr {
431	uint_t		va_mask;	/* bit-mask of attributes */
432	vtype_t		va_type;	/* vnode type (for create) */
433	mode_t		va_mode;	/* file access mode */
434	uid_t		va_uid;		/* owner user id */
435	gid_t		va_gid;		/* owner group id */
436	dev_t		va_fsid;	/* file system id (dev for now) */
437	u_longlong_t	va_nodeid;	/* node id */
438	nlink_t		va_nlink;	/* number of references to file */
439	u_offset_t	va_size;	/* file size in bytes */
440	timestruc_t	va_atime;	/* time of last access */
441	timestruc_t	va_mtime;	/* time of last modification */
442	timestruc_t	va_ctime;	/* time of last status change */
443	dev_t		va_rdev;	/* device the file represents */
444	uint_t		va_blksize;	/* fundamental block size */
445	u_longlong_t	va_nblocks;	/* # of blocks allocated */
446	uint_t		va_seq;		/* sequence number */
447} vattr_t;
448
449#define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
450
451/*
452 * Structure of all optional attributes.
453 */
454typedef struct xoptattr {
455	timestruc_t	xoa_createtime;	/* Create time of file */
456	uint8_t		xoa_archive;
457	uint8_t		xoa_system;
458	uint8_t		xoa_readonly;
459	uint8_t		xoa_hidden;
460	uint8_t		xoa_nounlink;
461	uint8_t		xoa_immutable;
462	uint8_t		xoa_appendonly;
463	uint8_t		xoa_nodump;
464	uint8_t		xoa_opaque;
465	uint8_t		xoa_av_quarantined;
466	uint8_t		xoa_av_modified;
467	uint8_t		xoa_av_scanstamp[AV_SCANSTAMP_SZ];
468	uint8_t		xoa_reparse;
469	uint64_t	xoa_generation;
470	uint8_t		xoa_offline;
471	uint8_t		xoa_sparse;
472	uint8_t		xoa_projinherit;
473	uint64_t	xoa_projid;
474} xoptattr_t;
475
476/*
477 * The xvattr structure is really a variable length structure that
478 * is made up of:
479 * - The classic vattr_t (xva_vattr)
480 * - a 32 bit quantity (xva_mapsize) that specifies the size of the
481 *   attribute bitmaps in 32 bit words.
482 * - A pointer to the returned attribute bitmap (needed because the
483 *   previous element, the requested attribute bitmap) is variable lenth.
484 * - The requested attribute bitmap, which is an array of 32 bit words.
485 *   Callers use the XVA_SET_REQ() macro to set the bits corresponding to
486 *   the attributes that are being requested.
487 * - The returned attribute bitmap, which is an array of 32 bit words.
488 *   File systems that support optional attributes use the XVA_SET_RTN()
489 *   macro to set the bits corresponding to the attributes that are being
490 *   returned.
491 * - The xoptattr_t structure which contains the attribute values
492 *
493 * xva_mapsize determines how many words in the attribute bitmaps.
494 * Immediately following the attribute bitmaps is the xoptattr_t.
495 * xva_getxoptattr() is used to get the pointer to the xoptattr_t
496 * section.
497 */
498
499#define	XVA_MAPSIZE	3		/* Size of attr bitmaps */
500#define	XVA_MAGIC	0x78766174	/* Magic # for verification */
501
502/*
503 * The xvattr structure is an extensible structure which permits optional
504 * attributes to be requested/returned.  File systems may or may not support
505 * optional attributes.  They do so at their own discretion but if they do
506 * support optional attributes, they must register the VFSFT_XVATTR feature
507 * so that the optional attributes can be set/retrived.
508 *
509 * The fields of the xvattr structure are:
510 *
511 * xva_vattr - The first element of an xvattr is a legacy vattr structure
512 * which includes the common attributes.  If AT_XVATTR is set in the va_mask
513 * then the entire structure is treated as an xvattr.  If AT_XVATTR is not
514 * set, then only the xva_vattr structure can be used.
515 *
516 * xva_magic - 0x78766174 (hex for "xvat"). Magic number for verification.
517 *
518 * xva_mapsize - Size of requested and returned attribute bitmaps.
519 *
520 * xva_rtnattrmapp - Pointer to xva_rtnattrmap[].  We need this since the
521 * size of the array before it, xva_reqattrmap[], could change which means
522 * the location of xva_rtnattrmap[] could change.  This will allow unbundled
523 * file systems to find the location of xva_rtnattrmap[] when the sizes change.
524 *
525 * xva_reqattrmap[] - Array of requested attributes.  Attributes are
526 * represented by a specific bit in a specific element of the attribute
527 * map array.  Callers set the bits corresponding to the attributes
528 * that the caller wants to get/set.
529 *
530 * xva_rtnattrmap[] - Array of attributes that the file system was able to
531 * process.  Not all file systems support all optional attributes.  This map
532 * informs the caller which attributes the underlying file system was able
533 * to set/get.  (Same structure as the requested attributes array in terms
534 * of each attribute  corresponding to specific bits and array elements.)
535 *
536 * xva_xoptattrs - Structure containing values of optional attributes.
537 * These values are only valid if the corresponding bits in xva_reqattrmap
538 * are set and the underlying file system supports those attributes.
539 */
540typedef struct xvattr {
541	vattr_t		xva_vattr;	/* Embedded vattr structure */
542	uint32_t	xva_magic;	/* Magic Number */
543	uint32_t	xva_mapsize;	/* Size of attr bitmap (32-bit words) */
544	uint32_t	*xva_rtnattrmapp;	/* Ptr to xva_rtnattrmap[] */
545	uint32_t	xva_reqattrmap[XVA_MAPSIZE];	/* Requested attrs */
546	uint32_t	xva_rtnattrmap[XVA_MAPSIZE];	/* Returned attrs */
547	xoptattr_t	xva_xoptattrs;	/* Optional attributes */
548} xvattr_t;
549
550#ifdef _SYSCALL32
551/*
552 * For bigtypes time_t changed to 64 bit on the 64-bit kernel.
553 * Define an old version for user/kernel interface
554 */
555
556#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
557#pragma pack(4)
558#endif
559
560typedef struct vattr32 {
561	uint32_t	va_mask;	/* bit-mask of attributes */
562	vtype_t		va_type;	/* vnode type (for create) */
563	mode32_t	va_mode;	/* file access mode */
564	uid32_t		va_uid;		/* owner user id */
565	gid32_t		va_gid;		/* owner group id */
566	dev32_t		va_fsid;	/* file system id (dev for now) */
567	u_longlong_t	va_nodeid;	/* node id */
568	nlink_t		va_nlink;	/* number of references to file */
569	u_offset_t	va_size;	/* file size in bytes */
570	timestruc32_t	va_atime;	/* time of last access */
571	timestruc32_t	va_mtime;	/* time of last modification */
572	timestruc32_t	va_ctime;	/* time of last status change */
573	dev32_t		va_rdev;	/* device the file represents */
574	uint32_t	va_blksize;	/* fundamental block size */
575	u_longlong_t	va_nblocks;	/* # of blocks allocated */
576	uint32_t	va_seq;		/* sequence number */
577} vattr32_t;
578
579#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
580#pragma pack()
581#endif
582
583#else  /* not _SYSCALL32 */
584#define	vattr32		vattr
585typedef vattr_t		vattr32_t;
586#endif /* _SYSCALL32 */
587
588/*
589 * Attributes of interest to the caller of setattr or getattr.
590 */
591#define	AT_TYPE		0x00001
592#define	AT_MODE		0x00002
593#define	AT_UID		0x00004
594#define	AT_GID		0x00008
595#define	AT_FSID		0x00010
596#define	AT_NODEID	0x00020
597#define	AT_NLINK	0x00040
598#define	AT_SIZE		0x00080
599#define	AT_ATIME	0x00100
600#define	AT_MTIME	0x00200
601#define	AT_CTIME	0x00400
602#define	AT_RDEV		0x00800
603#define	AT_BLKSIZE	0x01000
604#define	AT_NBLOCKS	0x02000
605/*			0x04000 */	/* unused */
606#define	AT_SEQ		0x08000
607/*
608 * If AT_XVATTR is set then there are additional bits to process in
609 * the xvattr_t's attribute bitmap.  If this is not set then the bitmap
610 * MUST be ignored.  Note that this bit must be set/cleared explicitly.
611 * That is, setting AT_ALL will NOT set AT_XVATTR.
612 */
613#define	AT_XVATTR	0x10000
614
615#define	AT_ALL		(AT_TYPE|AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|\
616			AT_NLINK|AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|\
617			AT_RDEV|AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
618
619#define	AT_STAT		(AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|AT_NLINK|\
620			AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|AT_RDEV|AT_TYPE)
621
622#define	AT_TIMES	(AT_ATIME|AT_MTIME|AT_CTIME)
623
624#define	AT_NOSET	(AT_NLINK|AT_RDEV|AT_FSID|AT_NODEID|AT_TYPE|\
625			AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
626
627/*
628 * Attribute bits used in the extensible attribute's (xva's) attribute
629 * bitmaps.  Note that the bitmaps are made up of a variable length number
630 * of 32-bit words.  The convention is to use XAT{n}_{attrname} where "n"
631 * is the element in the bitmap (starting at 1).  This convention is for
632 * the convenience of the maintainer to keep track of which element each
633 * attribute belongs to.
634 *
635 * NOTE THAT CONSUMERS MUST *NOT* USE THE XATn_* DEFINES DIRECTLY.  CONSUMERS
636 * MUST USE THE XAT_* DEFINES.
637 */
638#define	XAT0_INDEX	0LL		/* Index into bitmap for XAT0 attrs */
639#define	XAT0_CREATETIME	0x00000001	/* Create time of file */
640#define	XAT0_ARCHIVE	0x00000002	/* Archive */
641#define	XAT0_SYSTEM	0x00000004	/* System */
642#define	XAT0_READONLY	0x00000008	/* Readonly */
643#define	XAT0_HIDDEN	0x00000010	/* Hidden */
644#define	XAT0_NOUNLINK	0x00000020	/* Nounlink */
645#define	XAT0_IMMUTABLE	0x00000040	/* immutable */
646#define	XAT0_APPENDONLY	0x00000080	/* appendonly */
647#define	XAT0_NODUMP	0x00000100	/* nodump */
648#define	XAT0_OPAQUE	0x00000200	/* opaque */
649#define	XAT0_AV_QUARANTINED	0x00000400	/* anti-virus quarantine */
650#define	XAT0_AV_MODIFIED	0x00000800	/* anti-virus modified */
651#define	XAT0_AV_SCANSTAMP	0x00001000	/* anti-virus scanstamp */
652#define	XAT0_REPARSE	0x00002000	/* FS reparse point */
653#define	XAT0_GEN	0x00004000	/* object generation number */
654#define	XAT0_OFFLINE	0x00008000	/* offline */
655#define	XAT0_SPARSE	0x00010000	/* sparse */
656#define	XAT0_PROJINHERIT	0x00020000	/* Create with parent projid */
657#define	XAT0_PROJID	0x00040000	/* Project ID */
658
659#define	XAT0_ALL_ATTRS	(XAT0_CREATETIME|XAT0_ARCHIVE|XAT0_SYSTEM| \
660    XAT0_READONLY|XAT0_HIDDEN|XAT0_NOUNLINK|XAT0_IMMUTABLE|XAT0_APPENDONLY| \
661    XAT0_NODUMP|XAT0_OPAQUE|XAT0_AV_QUARANTINED|  XAT0_AV_MODIFIED| \
662    XAT0_AV_SCANSTAMP|XAT0_REPARSE|XATO_GEN|XAT0_OFFLINE|XAT0_SPARSE| \
663    XAT0_PROJINHERIT | XAT0_PROJID)
664
665/* Support for XAT_* optional attributes */
666#define	XVA_MASK		0xffffffff	/* Used to mask off 32 bits */
667#define	XVA_SHFT		32		/* Used to shift index */
668
669/*
670 * Used to pry out the index and attribute bits from the XAT_* attributes
671 * defined below.  Note that we're masking things down to 32 bits then
672 * casting to uint32_t.
673 */
674#define	XVA_INDEX(attr)		((uint32_t)(((attr) >> XVA_SHFT) & XVA_MASK))
675#define	XVA_ATTRBIT(attr)	((uint32_t)((attr) & XVA_MASK))
676
677/*
678 * The following defines present a "flat namespace" so that consumers don't
679 * need to keep track of which element belongs to which bitmap entry.
680 *
681 * NOTE THAT THESE MUST NEVER BE OR-ed TOGETHER
682 */
683#define	XAT_CREATETIME		((XAT0_INDEX << XVA_SHFT) | XAT0_CREATETIME)
684#define	XAT_ARCHIVE		((XAT0_INDEX << XVA_SHFT) | XAT0_ARCHIVE)
685#define	XAT_SYSTEM		((XAT0_INDEX << XVA_SHFT) | XAT0_SYSTEM)
686#define	XAT_READONLY		((XAT0_INDEX << XVA_SHFT) | XAT0_READONLY)
687#define	XAT_HIDDEN		((XAT0_INDEX << XVA_SHFT) | XAT0_HIDDEN)
688#define	XAT_NOUNLINK		((XAT0_INDEX << XVA_SHFT) | XAT0_NOUNLINK)
689#define	XAT_IMMUTABLE		((XAT0_INDEX << XVA_SHFT) | XAT0_IMMUTABLE)
690#define	XAT_APPENDONLY		((XAT0_INDEX << XVA_SHFT) | XAT0_APPENDONLY)
691#define	XAT_NODUMP		((XAT0_INDEX << XVA_SHFT) | XAT0_NODUMP)
692#define	XAT_OPAQUE		((XAT0_INDEX << XVA_SHFT) | XAT0_OPAQUE)
693#define	XAT_AV_QUARANTINED	((XAT0_INDEX << XVA_SHFT) | XAT0_AV_QUARANTINED)
694#define	XAT_AV_MODIFIED		((XAT0_INDEX << XVA_SHFT) | XAT0_AV_MODIFIED)
695#define	XAT_AV_SCANSTAMP	((XAT0_INDEX << XVA_SHFT) | XAT0_AV_SCANSTAMP)
696#define	XAT_REPARSE		((XAT0_INDEX << XVA_SHFT) | XAT0_REPARSE)
697#define	XAT_GEN			((XAT0_INDEX << XVA_SHFT) | XAT0_GEN)
698#define	XAT_OFFLINE		((XAT0_INDEX << XVA_SHFT) | XAT0_OFFLINE)
699#define	XAT_SPARSE		((XAT0_INDEX << XVA_SHFT) | XAT0_SPARSE)
700#define	XAT_PROJINHERIT		((XAT0_INDEX << XVA_SHFT) | XAT0_PROJINHERIT)
701#define	XAT_PROJID		((XAT0_INDEX << XVA_SHFT) | XAT0_PROJID)
702
703/*
704 * The returned attribute map array (xva_rtnattrmap[]) is located past the
705 * requested attribute map array (xva_reqattrmap[]).  Its location changes
706 * when the array sizes change.  We use a separate pointer in a known location
707 * (xva_rtnattrmapp) to hold the location of xva_rtnattrmap[].  This is
708 * set in xva_init()
709 */
710#define	XVA_RTNATTRMAP(xvap)	((xvap)->xva_rtnattrmapp)
711
712/*
713 * XVA_SET_REQ() sets an attribute bit in the proper element in the bitmap
714 * of requested attributes (xva_reqattrmap[]).
715 */
716#define	XVA_SET_REQ(xvap, attr)					\
717	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
718	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
719	(xvap)->xva_reqattrmap[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr)
720/*
721 * XVA_CLR_REQ() clears an attribute bit in the proper element in the bitmap
722 * of requested attributes (xva_reqattrmap[]).
723 */
724#define	XVA_CLR_REQ(xvap, attr)					\
725	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
726	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
727	(xvap)->xva_reqattrmap[XVA_INDEX(attr)] &= ~XVA_ATTRBIT(attr)
728
729/*
730 * XVA_SET_RTN() sets an attribute bit in the proper element in the bitmap
731 * of returned attributes (xva_rtnattrmap[]).
732 */
733#define	XVA_SET_RTN(xvap, attr)					\
734	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
735	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
736	(XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr)
737
738/*
739 * XVA_ISSET_REQ() checks the requested attribute bitmap (xva_reqattrmap[])
740 * to see of the corresponding attribute bit is set.  If so, returns non-zero.
741 */
742#define	XVA_ISSET_REQ(xvap, attr)					\
743	((((xvap)->xva_vattr.va_mask | AT_XVATTR) &&			\
744		((xvap)->xva_magic == XVA_MAGIC) &&			\
745		((xvap)->xva_mapsize > XVA_INDEX(attr))) ?		\
746	((xvap)->xva_reqattrmap[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) :	0)
747
748/*
749 * XVA_ISSET_RTN() checks the returned attribute bitmap (xva_rtnattrmap[])
750 * to see of the corresponding attribute bit is set.  If so, returns non-zero.
751 */
752#define	XVA_ISSET_RTN(xvap, attr)					\
753	((((xvap)->xva_vattr.va_mask | AT_XVATTR) &&			\
754		((xvap)->xva_magic == XVA_MAGIC) &&			\
755		((xvap)->xva_mapsize > XVA_INDEX(attr))) ?		\
756	((XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) : 0)
757
758/*
759 *  Modes.  Some values same as S_xxx entries from stat.h for convenience.
760 */
761#define	VSUID		04000		/* set user id on execution */
762#define	VSGID		02000		/* set group id on execution */
763#define	VSVTX		01000		/* save swapped text even after use */
764
765/*
766 * Permissions.
767 */
768#define	VREAD		00400
769#define	VWRITE		00200
770#define	VEXEC		00100
771
772#define	MODEMASK	07777		/* mode bits plus permission bits */
773#define	PERMMASK	00777		/* permission bits */
774
775/*
776 * VOP_ACCESS flags
777 */
778#define	V_ACE_MASK	0x1	/* mask represents  NFSv4 ACE permissions */
779#define	V_APPEND	0x2	/* want to do append only check */
780
781/*
782 * Check whether mandatory file locking is enabled.
783 */
784
785#define	MANDMODE(mode)		(((mode) & (VSGID|(VEXEC>>3))) == VSGID)
786#define	MANDLOCK(vp, mode)	((vp)->v_type == VREG && MANDMODE(mode))
787
788/*
789 * Flags for vnode operations.
790 */
791enum rm		{ RMFILE, RMDIRECTORY };	/* rm or rmdir (remove) */
792enum symfollow	{ NO_FOLLOW, FOLLOW };		/* follow symlinks (or not) */
793enum vcexcl	{ NONEXCL, EXCL };		/* (non)excl create */
794enum create	{ CRCREAT, CRMKNOD, CRMKDIR };	/* reason for create */
795
796typedef enum rm		rm_t;
797typedef enum symfollow	symfollow_t;
798typedef enum vcexcl	vcexcl_t;
799typedef enum create	create_t;
800
801/*
802 * Vnode Events - Used by VOP_VNEVENT
803 * The VE_PRE_RENAME_* events fire before the rename operation and are
804 * primarily used for specialized applications, such as NFSv4 delegation, which
805 * need to know about rename before it occurs.
806 */
807typedef enum vnevent	{
808	VE_SUPPORT	= 0,	/* Query */
809	VE_RENAME_SRC	= 1,	/* Rename, with vnode as source */
810	VE_RENAME_DEST	= 2,	/* Rename, with vnode as target/destination */
811	VE_REMOVE	= 3,	/* Remove of vnode's name */
812	VE_RMDIR	= 4,	/* Remove of directory vnode's name */
813	VE_CREATE	= 5,	/* Create with vnode's name which exists */
814	VE_LINK		= 6, 	/* Link with vnode's name as source */
815	VE_RENAME_DEST_DIR	= 7, 	/* Rename with vnode as target dir */
816	VE_MOUNTEDOVER	= 8, 	/* File or Filesystem got mounted over vnode */
817	VE_TRUNCATE = 9,	/* Truncate */
818	VE_PRE_RENAME_SRC = 10,	/* Pre-rename, with vnode as source */
819	VE_PRE_RENAME_DEST = 11, /* Pre-rename, with vnode as target/dest. */
820	VE_PRE_RENAME_DEST_DIR = 12 /* Pre-rename with vnode as target dir */
821} vnevent_t;
822
823/*
824 * Values for checking vnode open and map counts
825 */
826enum v_mode { V_READ, V_WRITE, V_RDORWR, V_RDANDWR };
827
828typedef enum v_mode v_mode_t;
829
830#define	V_TRUE	1
831#define	V_FALSE	0
832
833/*
834 * Structure used on VOP_GETSECATTR and VOP_SETSECATTR operations
835 */
836
837typedef struct vsecattr {
838	uint_t		vsa_mask;	/* See below */
839	int		vsa_aclcnt;	/* ACL entry count */
840	void		*vsa_aclentp;	/* pointer to ACL entries */
841	int		vsa_dfaclcnt;	/* default ACL entry count */
842	void		*vsa_dfaclentp;	/* pointer to default ACL entries */
843	size_t		vsa_aclentsz;	/* ACE size in bytes of vsa_aclentp */
844	uint_t		vsa_aclflags;	/* ACE ACL flags */
845} vsecattr_t;
846
847/* vsa_mask values */
848#define	VSA_ACL			0x0001
849#define	VSA_ACLCNT		0x0002
850#define	VSA_DFACL		0x0004
851#define	VSA_DFACLCNT		0x0008
852#define	VSA_ACE			0x0010
853#define	VSA_ACECNT		0x0020
854#define	VSA_ACE_ALLTYPES	0x0040
855#define	VSA_ACE_ACLFLAGS	0x0080	/* get/set ACE ACL flags */
856
857/*
858 * Structure used by various vnode operations to determine
859 * the context (pid, host, identity) of a caller.
860 *
861 * The cc_caller_id is used to identify one or more callers who invoke
862 * operations, possibly on behalf of others.  For example, the NFS
863 * server could have it's own cc_caller_id which can be detected by
864 * vnode/vfs operations or (FEM) monitors on those operations.  New
865 * caller IDs are generated by fs_new_caller_id().
866 */
867typedef struct caller_context {
868	pid_t		cc_pid;		/* Process ID of the caller */
869	int		cc_sysid;	/* System ID, used for remote calls */
870	u_longlong_t	cc_caller_id;	/* Identifier for (set of) caller(s) */
871	ulong_t		cc_flags;
872} caller_context_t;
873
874/*
875 * Flags for caller context.  The caller sets CC_DONTBLOCK if it does not
876 * want to block inside of a FEM monitor.  The monitor will set CC_WOULDBLOCK
877 * and return EAGAIN if the operation would have blocked.
878 */
879#define	CC_WOULDBLOCK	0x01
880#define	CC_DONTBLOCK	0x02
881
882/*
883 * Structure tags for function prototypes, defined elsewhere.
884 */
885struct pathname;
886struct fid;
887struct flock64;
888struct flk_callback;
889struct shrlock;
890struct page;
891struct seg;
892struct as;
893struct pollhead;
894struct taskq;
895
896#ifdef	_KERNEL
897
898/*
899 * VNODE_OPS defines all the vnode operations.  It is used to define
900 * the vnodeops structure (below) and the fs_func_p union (vfs_opreg.h).
901 */
902#define	VNODE_OPS							\
903	int	(*vop_open)(vnode_t **, int, cred_t *,			\
904				caller_context_t *);			\
905	int	(*vop_close)(vnode_t *, int, int, offset_t, cred_t *,	\
906				caller_context_t *);			\
907	int	(*vop_read)(vnode_t *, uio_t *, int, cred_t *,		\
908				caller_context_t *);			\
909	int	(*vop_write)(vnode_t *, uio_t *, int, cred_t *,		\
910				caller_context_t *);			\
911	int	(*vop_ioctl)(vnode_t *, int, intptr_t, int, cred_t *,	\
912				int *, caller_context_t *);		\
913	int	(*vop_setfl)(vnode_t *, int, int, cred_t *,		\
914				caller_context_t *);			\
915	int	(*vop_getattr)(vnode_t *, vattr_t *, int, cred_t *,	\
916				caller_context_t *);			\
917	int	(*vop_setattr)(vnode_t *, vattr_t *, int, cred_t *,	\
918				caller_context_t *);			\
919	int	(*vop_access)(vnode_t *, int, int, cred_t *,		\
920				caller_context_t *);			\
921	int	(*vop_lookup)(vnode_t *, char *, vnode_t **,		\
922				struct pathname *,			\
923				int, vnode_t *, cred_t *,		\
924				caller_context_t *, int *,		\
925				struct pathname *);			\
926	int	(*vop_create)(vnode_t *, char *, vattr_t *, vcexcl_t,	\
927				int, vnode_t **, cred_t *, int,		\
928				caller_context_t *, vsecattr_t *);	\
929	int	(*vop_remove)(vnode_t *, char *, cred_t *,		\
930				caller_context_t *, int);		\
931	int	(*vop_link)(vnode_t *, vnode_t *, char *, cred_t *,	\
932				caller_context_t *, int);		\
933	int	(*vop_rename)(vnode_t *, char *, vnode_t *, char *,	\
934				cred_t *, caller_context_t *, int);	\
935	int	(*vop_mkdir)(vnode_t *, char *, vattr_t *, vnode_t **,	\
936				cred_t *, caller_context_t *, int,	\
937				vsecattr_t *);				\
938	int	(*vop_rmdir)(vnode_t *, char *, vnode_t *, cred_t *,	\
939				caller_context_t *, int);		\
940	int	(*vop_readdir)(vnode_t *, uio_t *, cred_t *, int *,	\
941				caller_context_t *, int);		\
942	int	(*vop_symlink)(vnode_t *, char *, vattr_t *, char *,	\
943				cred_t *, caller_context_t *, int);	\
944	int	(*vop_readlink)(vnode_t *, uio_t *, cred_t *,		\
945				caller_context_t *);			\
946	int	(*vop_fsync)(vnode_t *, int, cred_t *,			\
947				caller_context_t *);			\
948	void	(*vop_inactive)(vnode_t *, cred_t *,			\
949				caller_context_t *);			\
950	int	(*vop_fid)(vnode_t *, struct fid *,			\
951				caller_context_t *);			\
952	int	(*vop_rwlock)(vnode_t *, int, caller_context_t *);	\
953	void	(*vop_rwunlock)(vnode_t *, int, caller_context_t *);	\
954	int	(*vop_seek)(vnode_t *, offset_t, offset_t *,		\
955				caller_context_t *);			\
956	int	(*vop_cmp)(vnode_t *, vnode_t *, caller_context_t *);	\
957	int	(*vop_frlock)(vnode_t *, int, struct flock64 *,		\
958				int, offset_t,				\
959				struct flk_callback *, cred_t *,	\
960				caller_context_t *);			\
961	int	(*vop_space)(vnode_t *, int, struct flock64 *,		\
962				int, offset_t,				\
963				cred_t *, caller_context_t *);		\
964	int	(*vop_realvp)(vnode_t *, vnode_t **,			\
965				caller_context_t *);			\
966	int	(*vop_getpage)(vnode_t *, offset_t, size_t, uint_t *,	\
967				struct page **, size_t, struct seg *,	\
968				caddr_t, enum seg_rw, cred_t *,		\
969				caller_context_t *);			\
970	int	(*vop_putpage)(vnode_t *, offset_t, size_t,		\
971				int, cred_t *, caller_context_t *);	\
972	int	(*vop_map)(vnode_t *, offset_t, struct as *,		\
973				caddr_t *, size_t,			\
974				uchar_t, uchar_t, uint_t, cred_t *,	\
975				caller_context_t *);			\
976	int	(*vop_addmap)(vnode_t *, offset_t, struct as *,		\
977				caddr_t, size_t,			\
978				uchar_t, uchar_t, uint_t, cred_t *,	\
979				caller_context_t *);			\
980	int	(*vop_delmap)(vnode_t *, offset_t, struct as *,		\
981				caddr_t, size_t,			\
982				uint_t, uint_t, uint_t, cred_t *,	\
983				caller_context_t *);			\
984	int	(*vop_poll)(vnode_t *, short, int, short *,		\
985				struct pollhead **,			\
986				caller_context_t *);			\
987	int	(*vop_dump)(vnode_t *, caddr_t, offset_t, offset_t,	\
988				caller_context_t *);			\
989	int	(*vop_pathconf)(vnode_t *, int, ulong_t *, cred_t *,	\
990				caller_context_t *);			\
991	int	(*vop_pageio)(vnode_t *, struct page *,			\
992				u_offset_t, size_t, int, cred_t *,	\
993				caller_context_t *);			\
994	int	(*vop_dumpctl)(vnode_t *, int, offset_t *,		\
995				caller_context_t *);			\
996	void	(*vop_dispose)(vnode_t *, struct page *,		\
997				int, int, cred_t *,			\
998				caller_context_t *);			\
999	int	(*vop_setsecattr)(vnode_t *, vsecattr_t *,		\
1000				int, cred_t *, caller_context_t *);	\
1001	int	(*vop_getsecattr)(vnode_t *, vsecattr_t *,		\
1002				int, cred_t *, caller_context_t *);	\
1003	int	(*vop_shrlock)(vnode_t *, int, struct shrlock *,	\
1004				int, cred_t *, caller_context_t *);	\
1005	int	(*vop_vnevent)(vnode_t *, vnevent_t, vnode_t *,		\
1006				char *, caller_context_t *);		\
1007	int	(*vop_reqzcbuf)(vnode_t *, enum uio_rw, xuio_t *,	\
1008				cred_t *, caller_context_t *);		\
1009	int	(*vop_retzcbuf)(vnode_t *, xuio_t *, cred_t *,		\
1010				caller_context_t *)
1011	/* NB: No ";" */
1012
1013/*
1014 * Operations on vnodes.  Note: File systems must never operate directly
1015 * on a 'vnodeops' structure -- it WILL change in future releases!  They
1016 * must use vn_make_ops() to create the structure.
1017 */
1018typedef struct vnodeops {
1019	const char *vnop_name;
1020	VNODE_OPS;	/* Signatures of all vnode operations (vops) */
1021} vnodeops_t;
1022
1023typedef int (*fs_generic_func_p) ();	/* Generic vop/vfsop/femop/fsemop ptr */
1024
1025extern int	fop_open(vnode_t **, int, cred_t *, caller_context_t *);
1026extern int	fop_close(vnode_t *, int, int, offset_t, cred_t *,
1027				caller_context_t *);
1028extern int	fop_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
1029extern int	fop_write(vnode_t *, uio_t *, int, cred_t *,
1030				caller_context_t *);
1031extern int	fop_ioctl(vnode_t *, int, intptr_t, int, cred_t *, int *,
1032				caller_context_t *);
1033extern int	fop_setfl(vnode_t *, int, int, cred_t *, caller_context_t *);
1034extern int	fop_getattr(vnode_t *, vattr_t *, int, cred_t *,
1035				caller_context_t *);
1036extern int	fop_setattr(vnode_t *, vattr_t *, int, cred_t *,
1037				caller_context_t *);
1038extern int	fop_access(vnode_t *, int, int, cred_t *, caller_context_t *);
1039extern int	fop_lookup(vnode_t *, char *, vnode_t **, struct pathname *,
1040				int, vnode_t *, cred_t *, caller_context_t *,
1041				int *, struct pathname *);
1042extern int	fop_create(vnode_t *, char *, vattr_t *, vcexcl_t, int,
1043				vnode_t **, cred_t *, int, caller_context_t *,
1044				vsecattr_t *);
1045extern int	fop_remove(vnode_t *vp, char *, cred_t *, caller_context_t *,
1046				int);
1047extern int	fop_link(vnode_t *, vnode_t *, char *, cred_t *,
1048				caller_context_t *, int);
1049extern int	fop_rename(vnode_t *, char *, vnode_t *, char *, cred_t *,
1050				caller_context_t *, int);
1051extern int	fop_mkdir(vnode_t *, char *, vattr_t *, vnode_t **, cred_t *,
1052				caller_context_t *, int, vsecattr_t *);
1053extern int	fop_rmdir(vnode_t *, char *, vnode_t *, cred_t *,
1054				caller_context_t *, int);
1055extern int	fop_readdir(vnode_t *, uio_t *, cred_t *, int *,
1056				caller_context_t *, int);
1057extern int	fop_symlink(vnode_t *, char *, vattr_t *, char *, cred_t *,
1058				caller_context_t *, int);
1059extern int	fop_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
1060extern int	fop_fsync(vnode_t *, int, cred_t *, caller_context_t *);
1061extern void	fop_inactive(vnode_t *, cred_t *, caller_context_t *);
1062extern int	fop_fid(vnode_t *, struct fid *, caller_context_t *);
1063extern int	fop_rwlock(vnode_t *, int, caller_context_t *);
1064extern void	fop_rwunlock(vnode_t *, int, caller_context_t *);
1065extern int	fop_seek(vnode_t *, offset_t, offset_t *, caller_context_t *);
1066extern int	fop_cmp(vnode_t *, vnode_t *, caller_context_t *);
1067extern int	fop_frlock(vnode_t *, int, struct flock64 *, int, offset_t,
1068				struct flk_callback *, cred_t *,
1069				caller_context_t *);
1070extern int	fop_space(vnode_t *, int, struct flock64 *, int, offset_t,
1071				cred_t *, caller_context_t *);
1072extern int	fop_realvp(vnode_t *, vnode_t **, caller_context_t *);
1073extern int	fop_getpage(vnode_t *, offset_t, size_t, uint_t *,
1074				struct page **, size_t, struct seg *,
1075				caddr_t, enum seg_rw, cred_t *,
1076				caller_context_t *);
1077extern int	fop_putpage(vnode_t *, offset_t, size_t, int, cred_t *,
1078				caller_context_t *);
1079extern int	fop_map(vnode_t *, offset_t, struct as *, caddr_t *, size_t,
1080				uchar_t, uchar_t, uint_t, cred_t *cr,
1081				caller_context_t *);
1082extern int	fop_addmap(vnode_t *, offset_t, struct as *, caddr_t, size_t,
1083				uchar_t, uchar_t, uint_t, cred_t *,
1084				caller_context_t *);
1085extern int	fop_delmap(vnode_t *, offset_t, struct as *, caddr_t, size_t,
1086				uint_t, uint_t, uint_t, cred_t *,
1087				caller_context_t *);
1088extern int	fop_poll(vnode_t *, short, int, short *, struct pollhead **,
1089				caller_context_t *);
1090extern int	fop_dump(vnode_t *, caddr_t, offset_t, offset_t,
1091    caller_context_t *);
1092extern int	fop_pathconf(vnode_t *, int, ulong_t *, cred_t *,
1093				caller_context_t *);
1094extern int	fop_pageio(vnode_t *, struct page *, u_offset_t, size_t, int,
1095				cred_t *, caller_context_t *);
1096extern int	fop_dumpctl(vnode_t *, int, offset_t *, caller_context_t *);
1097extern void	fop_dispose(vnode_t *, struct page *, int, int, cred_t *,
1098				caller_context_t *);
1099extern int	fop_setsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
1100				caller_context_t *);
1101extern int	fop_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
1102				caller_context_t *);
1103extern int	fop_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *,
1104				caller_context_t *);
1105extern int	fop_vnevent(vnode_t *, vnevent_t, vnode_t *, char *,
1106				caller_context_t *);
1107extern int	fop_reqzcbuf(vnode_t *, enum uio_rw, xuio_t *, cred_t *,
1108				caller_context_t *);
1109extern int	fop_retzcbuf(vnode_t *, xuio_t *, cred_t *, caller_context_t *);
1110
1111#endif	/* _KERNEL */
1112
1113#define	VOP_OPEN(vpp, mode, cr, ct) \
1114	fop_open(vpp, mode, cr, ct)
1115#define	VOP_CLOSE(vp, f, c, o, cr, ct) \
1116	fop_close(vp, f, c, o, cr, ct)
1117#define	VOP_READ(vp, uiop, iof, cr, ct) \
1118	fop_read(vp, uiop, iof, cr, ct)
1119#define	VOP_WRITE(vp, uiop, iof, cr, ct) \
1120	fop_write(vp, uiop, iof, cr, ct)
1121#define	VOP_IOCTL(vp, cmd, a, f, cr, rvp, ct) \
1122	fop_ioctl(vp, cmd, a, f, cr, rvp, ct)
1123#define	VOP_SETFL(vp, f, a, cr, ct) \
1124	fop_setfl(vp, f, a, cr, ct)
1125#define	VOP_GETATTR(vp, vap, f, cr, ct) \
1126	fop_getattr(vp, vap, f, cr, ct)
1127#define	VOP_SETATTR(vp, vap, f, cr, ct) \
1128	fop_setattr(vp, vap, f, cr, ct)
1129#define	VOP_ACCESS(vp, mode, f, cr, ct) \
1130	fop_access(vp, mode, f, cr, ct)
1131#define	VOP_LOOKUP(vp, cp, vpp, pnp, f, rdir, cr, ct, defp, rpnp) \
1132	fop_lookup(vp, cp, vpp, pnp, f, rdir, cr, ct, defp, rpnp)
1133#define	VOP_CREATE(dvp, p, vap, ex, mode, vpp, cr, flag, ct, vsap) \
1134	fop_create(dvp, p, vap, ex, mode, vpp, cr, flag, ct, vsap)
1135#define	VOP_REMOVE(dvp, p, cr, ct, f) \
1136	fop_remove(dvp, p, cr, ct, f)
1137#define	VOP_LINK(tdvp, fvp, p, cr, ct, f) \
1138	fop_link(tdvp, fvp, p, cr, ct, f)
1139#define	VOP_RENAME(fvp, fnm, tdvp, tnm, cr, ct, f) \
1140	fop_rename(fvp, fnm, tdvp, tnm, cr, ct, f)
1141#define	VOP_MKDIR(dp, p, vap, vpp, cr, ct, f, vsap) \
1142	fop_mkdir(dp, p, vap, vpp, cr, ct, f, vsap)
1143#define	VOP_RMDIR(dp, p, cdir, cr, ct, f) \
1144	fop_rmdir(dp, p, cdir, cr, ct, f)
1145#define	VOP_READDIR(vp, uiop, cr, eofp, ct, f) \
1146	fop_readdir(vp, uiop, cr, eofp, ct, f)
1147#define	VOP_SYMLINK(dvp, lnm, vap, tnm, cr, ct, f) \
1148	fop_symlink(dvp, lnm, vap, tnm, cr, ct, f)
1149#define	VOP_READLINK(vp, uiop, cr, ct) \
1150	fop_readlink(vp, uiop, cr, ct)
1151#define	VOP_FSYNC(vp, syncflag, cr, ct) \
1152	fop_fsync(vp, syncflag, cr, ct)
1153#define	VOP_INACTIVE(vp, cr, ct) \
1154	fop_inactive(vp, cr, ct)
1155#define	VOP_FID(vp, fidp, ct) \
1156	fop_fid(vp, fidp, ct)
1157#define	VOP_RWLOCK(vp, w, ct) \
1158	fop_rwlock(vp, w, ct)
1159#define	VOP_RWUNLOCK(vp, w, ct) \
1160	fop_rwunlock(vp, w, ct)
1161#define	VOP_SEEK(vp, ooff, noffp, ct) \
1162	fop_seek(vp, ooff, noffp, ct)
1163#define	VOP_CMP(vp1, vp2, ct) \
1164	fop_cmp(vp1, vp2, ct)
1165#define	VOP_FRLOCK(vp, cmd, a, f, o, cb, cr, ct) \
1166	fop_frlock(vp, cmd, a, f, o, cb, cr, ct)
1167#define	VOP_SPACE(vp, cmd, a, f, o, cr, ct) \
1168	fop_space(vp, cmd, a, f, o, cr, ct)
1169#define	VOP_REALVP(vp1, vp2, ct) \
1170	fop_realvp(vp1, vp2, ct)
1171#define	VOP_GETPAGE(vp, of, sz, pr, pl, ps, sg, a, rw, cr, ct) \
1172	fop_getpage(vp, of, sz, pr, pl, ps, sg, a, rw, cr, ct)
1173#define	VOP_PUTPAGE(vp, of, sz, fl, cr, ct) \
1174	fop_putpage(vp, of, sz, fl, cr, ct)
1175#define	VOP_MAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \
1176	fop_map(vp, of, as, a, sz, p, mp, fl, cr, ct)
1177#define	VOP_ADDMAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \
1178	fop_addmap(vp, of, as, a, sz, p, mp, fl, cr, ct)
1179#define	VOP_DELMAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \
1180	fop_delmap(vp, of, as, a, sz, p, mp, fl, cr, ct)
1181#define	VOP_POLL(vp, events, anyyet, reventsp, phpp, ct) \
1182	fop_poll(vp, events, anyyet, reventsp, phpp, ct)
1183#define	VOP_DUMP(vp, addr, bn, count, ct) \
1184	fop_dump(vp, addr, bn, count, ct)
1185#define	VOP_PATHCONF(vp, cmd, valp, cr, ct) \
1186	fop_pathconf(vp, cmd, valp, cr, ct)
1187#define	VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct) \
1188	fop_pageio(vp, pp, io_off, io_len, flags, cr, ct)
1189#define	VOP_DUMPCTL(vp, action, blkp, ct) \
1190	fop_dumpctl(vp, action, blkp, ct)
1191#define	VOP_DISPOSE(vp, pp, flag, dn, cr, ct) \
1192	fop_dispose(vp, pp, flag, dn, cr, ct)
1193#define	VOP_GETSECATTR(vp, vsap, f, cr, ct) \
1194	fop_getsecattr(vp, vsap, f, cr, ct)
1195#define	VOP_SETSECATTR(vp, vsap, f, cr, ct) \
1196	fop_setsecattr(vp, vsap, f, cr, ct)
1197#define	VOP_SHRLOCK(vp, cmd, shr, f, cr, ct) \
1198	fop_shrlock(vp, cmd, shr, f, cr, ct)
1199#define	VOP_VNEVENT(vp, vnevent, dvp, fnm, ct) \
1200	fop_vnevent(vp, vnevent, dvp, fnm, ct)
1201#define	VOP_REQZCBUF(vp, rwflag, xuiop, cr, ct) \
1202	fop_reqzcbuf(vp, rwflag, xuiop, cr, ct)
1203#define	VOP_RETZCBUF(vp, xuiop, cr, ct) \
1204	fop_retzcbuf(vp, xuiop, cr, ct)
1205
1206#define	VOPNAME_OPEN		"open"
1207#define	VOPNAME_CLOSE		"close"
1208#define	VOPNAME_READ		"read"
1209#define	VOPNAME_WRITE		"write"
1210#define	VOPNAME_IOCTL		"ioctl"
1211#define	VOPNAME_SETFL		"setfl"
1212#define	VOPNAME_GETATTR		"getattr"
1213#define	VOPNAME_SETATTR		"setattr"
1214#define	VOPNAME_ACCESS		"access"
1215#define	VOPNAME_LOOKUP		"lookup"
1216#define	VOPNAME_CREATE		"create"
1217#define	VOPNAME_REMOVE		"remove"
1218#define	VOPNAME_LINK		"link"
1219#define	VOPNAME_RENAME		"rename"
1220#define	VOPNAME_MKDIR		"mkdir"
1221#define	VOPNAME_RMDIR		"rmdir"
1222#define	VOPNAME_READDIR		"readdir"
1223#define	VOPNAME_SYMLINK		"symlink"
1224#define	VOPNAME_READLINK	"readlink"
1225#define	VOPNAME_FSYNC		"fsync"
1226#define	VOPNAME_INACTIVE	"inactive"
1227#define	VOPNAME_FID		"fid"
1228#define	VOPNAME_RWLOCK		"rwlock"
1229#define	VOPNAME_RWUNLOCK	"rwunlock"
1230#define	VOPNAME_SEEK		"seek"
1231#define	VOPNAME_CMP		"cmp"
1232#define	VOPNAME_FRLOCK		"frlock"
1233#define	VOPNAME_SPACE		"space"
1234#define	VOPNAME_REALVP		"realvp"
1235#define	VOPNAME_GETPAGE		"getpage"
1236#define	VOPNAME_PUTPAGE		"putpage"
1237#define	VOPNAME_MAP		"map"
1238#define	VOPNAME_ADDMAP		"addmap"
1239#define	VOPNAME_DELMAP		"delmap"
1240#define	VOPNAME_POLL		"poll"
1241#define	VOPNAME_DUMP		"dump"
1242#define	VOPNAME_PATHCONF	"pathconf"
1243#define	VOPNAME_PAGEIO		"pageio"
1244#define	VOPNAME_DUMPCTL		"dumpctl"
1245#define	VOPNAME_DISPOSE		"dispose"
1246#define	VOPNAME_GETSECATTR	"getsecattr"
1247#define	VOPNAME_SETSECATTR	"setsecattr"
1248#define	VOPNAME_SHRLOCK		"shrlock"
1249#define	VOPNAME_VNEVENT		"vnevent"
1250#define	VOPNAME_REQZCBUF	"reqzcbuf"
1251#define	VOPNAME_RETZCBUF	"retzcbuf"
1252
1253/*
1254 * Flags for VOP_LOOKUP
1255 *
1256 * Defined in file.h, but also possible, FIGNORECASE and FSEARCH
1257 *
1258 */
1259#define	LOOKUP_DIR		0x01	/* want parent dir vp */
1260#define	LOOKUP_XATTR		0x02	/* lookup up extended attr dir */
1261#define	CREATE_XATTR_DIR	0x04	/* Create extended attr dir */
1262#define	LOOKUP_HAVE_SYSATTR_DIR	0x08	/* Already created virtual GFS dir */
1263
1264/*
1265 * Flags for VOP_READDIR
1266 */
1267#define	V_RDDIR_ENTFLAGS	0x01	/* request dirent flags */
1268#define	V_RDDIR_ACCFILTER	0x02	/* filter out inaccessible dirents */
1269
1270/*
1271 * Flags for VOP_RWLOCK/VOP_RWUNLOCK
1272 * VOP_RWLOCK will return the flag that was actually set, or -1 if none.
1273 */
1274#define	V_WRITELOCK_TRUE	(1)	/* Request write-lock on the vnode */
1275#define	V_WRITELOCK_FALSE	(0)	/* Request read-lock on the vnode */
1276
1277/*
1278 * Flags for VOP_DUMPCTL
1279 */
1280#define	DUMP_ALLOC	0
1281#define	DUMP_FREE	1
1282#define	DUMP_SCAN	2
1283
1284/*
1285 * Public vnode manipulation functions.
1286 */
1287#ifdef	_KERNEL
1288
1289vnode_t *vn_alloc(int);
1290void	vn_reinit(vnode_t *);
1291void	vn_recycle(vnode_t *);
1292void	vn_free(vnode_t *);
1293
1294int	vn_is_readonly(vnode_t *);
1295int   	vn_is_opened(vnode_t *, v_mode_t);
1296int   	vn_is_mapped(vnode_t *, v_mode_t);
1297int   	vn_has_other_opens(vnode_t *, v_mode_t);
1298void	vn_open_upgrade(vnode_t *, int);
1299void	vn_open_downgrade(vnode_t *, int);
1300
1301int	vn_can_change_zones(vnode_t *vp);
1302
1303int	vn_has_flocks(vnode_t *);
1304int	vn_has_mandatory_locks(vnode_t *, int);
1305int	vn_has_cached_data(vnode_t *);
1306
1307void	vn_setops(vnode_t *, vnodeops_t *);
1308vnodeops_t *vn_getops(vnode_t *);
1309int	vn_matchops(vnode_t *, vnodeops_t *);
1310int	vn_matchopval(vnode_t *, char *, fs_generic_func_p);
1311int	vn_ismntpt(vnode_t *);
1312
1313struct vfs *vn_mountedvfs(vnode_t *);
1314
1315int	vn_in_dnlc(vnode_t *);
1316
1317void	vn_create_cache(void);
1318void	vn_destroy_cache(void);
1319
1320void	vn_freevnodeops(vnodeops_t *);
1321
1322int	vn_open(char *pnamep, enum uio_seg seg, int filemode, int createmode,
1323		struct vnode **vpp, enum create crwhy, mode_t umask);
1324int	vn_openat(char *pnamep, enum uio_seg seg, int filemode, int createmode,
1325		struct vnode **vpp, enum create crwhy,
1326		mode_t umask, struct vnode *startvp, int fd);
1327int	vn_create(char *pnamep, enum uio_seg seg, struct vattr *vap,
1328		enum vcexcl excl, int mode, struct vnode **vpp,
1329		enum create why, int flag, mode_t umask);
1330int	vn_createat(char *pnamep, enum uio_seg seg, struct vattr *vap,
1331		enum vcexcl excl, int mode, struct vnode **vpp,
1332		enum create why, int flag, mode_t umask, struct vnode *startvp);
1333int	vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, ssize_t len,
1334		offset_t offset, enum uio_seg seg, int ioflag, rlim64_t ulimit,
1335		cred_t *cr, ssize_t *residp);
1336void	vn_rele(struct vnode *vp);
1337void	vn_rele_async(struct vnode *vp, struct taskq *taskq);
1338void	vn_rele_dnlc(struct vnode *vp);
1339void	vn_rele_stream(struct vnode *vp);
1340int	vn_link(char *from, char *to, enum uio_seg seg);
1341int	vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
1342		vnode_t *tstartvp, char *to, enum uio_seg seg);
1343int	vn_rename(char *from, char *to, enum uio_seg seg);
1344int	vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp, char *tname,
1345		enum uio_seg seg);
1346int	vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag);
1347int	vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg,
1348		enum rm dirflag);
1349int	vn_compare(vnode_t *vp1, vnode_t *vp2);
1350int	vn_vfswlock(struct vnode *vp);
1351int	vn_vfswlock_wait(struct vnode *vp);
1352int	vn_vfsrlock(struct vnode *vp);
1353int	vn_vfsrlock_wait(struct vnode *vp);
1354void	vn_vfsunlock(struct vnode *vp);
1355int	vn_vfswlock_held(struct vnode *vp);
1356vnode_t *specvp(struct vnode *vp, dev_t dev, vtype_t type, struct cred *cr);
1357vnode_t *makespecvp(dev_t dev, vtype_t type);
1358vn_vfslocks_entry_t *vn_vfslocks_getlock(void *);
1359void	vn_vfslocks_rele(vn_vfslocks_entry_t *);
1360boolean_t vn_is_reparse(vnode_t *, cred_t *, caller_context_t *);
1361
1362void vn_copypath(struct vnode *src, struct vnode *dst);
1363void vn_setpath_str(struct vnode *vp, const char *str, size_t len);
1364void vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
1365    const char *path, size_t plen);
1366void vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len);
1367
1368/* Private vnode manipulation functions */
1369void vn_clearpath(vnode_t *, hrtime_t);
1370void vn_updatepath(vnode_t *, vnode_t *, const char *);
1371
1372
1373/* Vnode event notification */
1374void	vnevent_rename_src(vnode_t *, vnode_t *, char *, caller_context_t *);
1375void	vnevent_rename_dest(vnode_t *, vnode_t *, char *, caller_context_t *);
1376void	vnevent_remove(vnode_t *, vnode_t *, char *, caller_context_t *);
1377void	vnevent_rmdir(vnode_t *, vnode_t *, char *, caller_context_t *);
1378void	vnevent_create(vnode_t *, caller_context_t *);
1379void	vnevent_link(vnode_t *, caller_context_t *);
1380void	vnevent_rename_dest_dir(vnode_t *, caller_context_t *ct);
1381void	vnevent_mountedover(vnode_t *, caller_context_t *);
1382void	vnevent_truncate(vnode_t *, caller_context_t *);
1383int	vnevent_support(vnode_t *, caller_context_t *);
1384void	vnevent_pre_rename_src(vnode_t *, vnode_t *, char *,
1385	    caller_context_t *);
1386void	vnevent_pre_rename_dest(vnode_t *, vnode_t *, char *,
1387	    caller_context_t *);
1388void	vnevent_pre_rename_dest_dir(vnode_t *, vnode_t *, char *,
1389	    caller_context_t *);
1390
1391/* Vnode specific data */
1392void vsd_create(uint_t *, void (*)(void *));
1393void vsd_destroy(uint_t *);
1394void *vsd_get(vnode_t *, uint_t);
1395int vsd_set(vnode_t *, uint_t, void *);
1396void vsd_free(vnode_t *);
1397
1398/*
1399 * Extensible vnode attribute (xva) routines:
1400 * xva_init() initializes an xvattr_t (zero struct, init mapsize, set AT_XATTR)
1401 * xva_getxoptattr() returns a ponter to the xoptattr_t section of xvattr_t
1402 */
1403void		xva_init(xvattr_t *);
1404xoptattr_t	*xva_getxoptattr(xvattr_t *);	/* Get ptr to xoptattr_t */
1405
1406void xattr_init(void);		/* Initialize vnodeops for xattrs */
1407
1408/* GFS tunnel for xattrs */
1409int xattr_dir_lookup(vnode_t *, vnode_t **, int, cred_t *);
1410
1411/* Reparse Point */
1412void reparse_point_init(void);
1413
1414/* Context identification */
1415u_longlong_t	fs_new_caller_id();
1416
1417int	vn_vmpss_usepageio(vnode_t *);
1418
1419/* Empty v_path placeholder */
1420extern char *vn_vpath_empty;
1421
1422/*
1423 * Needed for use of IS_VMODSORT() in kernel.
1424 */
1425extern uint_t pvn_vmodsort_supported;
1426
1427/*
1428 * All changes to v_count should be done through VN_HOLD() or VN_RELE(), or
1429 * one of their variants. This makes it possible to ensure proper locking,
1430 * and to guarantee that all modifications are accompanied by a firing of
1431 * the vn-hold or vn-rele SDT DTrace probe.
1432 *
1433 * Example DTrace command for tracing vnode references using these probes:
1434 *
1435 * dtrace -q -n 'sdt:::vn-hold,sdt:::vn-rele
1436 * {
1437 *	this->vp = (vnode_t *)arg0;
1438 *	printf("%s %s(%p[%s]) %d\n", execname, probename, this->vp,
1439 *	    this->vp->v_path == NULL ? "NULL" : stringof(this->vp->v_path),
1440 *	    this->vp->v_count)
1441 * }'
1442 */
1443#define	VN_HOLD_LOCKED(vp) {			\
1444	ASSERT(mutex_owned(&(vp)->v_lock));	\
1445	(vp)->v_count++;			\
1446	DTRACE_PROBE1(vn__hold, vnode_t *, vp);	\
1447}
1448
1449#define	VN_HOLD(vp)	{		\
1450	mutex_enter(&(vp)->v_lock);	\
1451	VN_HOLD_LOCKED(vp);		\
1452	mutex_exit(&(vp)->v_lock);	\
1453}
1454
1455#define	VN_RELE(vp)	{ \
1456	vn_rele(vp); \
1457}
1458
1459#define	VN_RELE_ASYNC(vp, taskq)	{ \
1460	vn_rele_async(vp, taskq); \
1461}
1462
1463#define	VN_RELE_LOCKED(vp) {			\
1464	ASSERT(mutex_owned(&(vp)->v_lock));	\
1465	ASSERT((vp)->v_count >= 1);		\
1466	(vp)->v_count--;			\
1467	DTRACE_PROBE1(vn__rele, vnode_t *, vp);	\
1468}
1469
1470#define	VN_SET_VFS_TYPE_DEV(vp, vfsp, type, dev)	{ \
1471	(vp)->v_vfsp = (vfsp); \
1472	(vp)->v_type = (type); \
1473	(vp)->v_rdev = (dev); \
1474}
1475
1476/*
1477 * Compare two vnodes for equality.  In general this macro should be used
1478 * in preference to calling VOP_CMP directly.
1479 */
1480#define	VN_CMP(VP1, VP2)	((VP1) == (VP2) ? 1 : 	\
1481	((VP1) && (VP2) && (vn_getops(VP1) == vn_getops(VP2)) ? \
1482	VOP_CMP(VP1, VP2, NULL) : 0))
1483
1484/*
1485 * Some well-known global vnodes used by the VM system to name pages.
1486 */
1487extern struct vnode kvps[];
1488
1489typedef enum {
1490	KV_KVP,		/* vnode for all segkmem pages */
1491	KV_ZVP,		/* vnode for all ZFS pages */
1492	KV_VVP,		/* vnode for all VMM pages */
1493#if defined(__sparc)
1494	KV_MPVP,	/* vnode for all page_t meta-pages */
1495	KV_PROMVP,	/* vnode for all PROM pages */
1496#endif	/* __sparc */
1497	KV_MAX		/* total number of vnodes in kvps[] */
1498} kvps_index_t;
1499
1500#define	VN_ISKAS(vp)	((vp) >= &kvps[0] && (vp) < &kvps[KV_MAX])
1501
1502#endif	/* _KERNEL */
1503
1504/*
1505 * Flags to VOP_SETATTR/VOP_GETATTR.
1506 */
1507#define	ATTR_UTIME	0x01	/* non-default utime(2) request */
1508#define	ATTR_EXEC	0x02	/* invocation from exec(2) */
1509#define	ATTR_COMM	0x04	/* yield common vp attributes */
1510#define	ATTR_HINT	0x08	/* information returned will be `hint' */
1511#define	ATTR_REAL	0x10	/* yield attributes of the real vp */
1512#define	ATTR_NOACLCHECK	0x20	/* Don't check ACL when checking permissions */
1513#define	ATTR_TRIGGER	0x40	/* Mount first if vnode is a trigger mount */
1514/*
1515 * Generally useful macros.
1516 */
1517#define	VBSIZE(vp)	((vp)->v_vfsp->vfs_bsize)
1518
1519#define	VTOZONE(vp)	((vp)->v_vfsp->vfs_zone)
1520
1521#define	NULLVP		((struct vnode *)0)
1522#define	NULLVPP		((struct vnode **)0)
1523
1524#ifdef	_KERNEL
1525
1526/*
1527 * Structure used while handling asynchronous VOP_PUTPAGE operations.
1528 */
1529struct async_reqs {
1530	struct async_reqs *a_next;	/* pointer to next arg struct */
1531	struct vnode *a_vp;		/* vnode pointer */
1532	u_offset_t a_off;			/* offset in file */
1533	uint_t a_len;			/* size of i/o request */
1534	int a_flags;			/* flags to indicate operation type */
1535	struct cred *a_cred;		/* cred pointer	*/
1536	ushort_t a_prealloced;		/* set if struct is pre-allocated */
1537};
1538
1539/*
1540 * VN_DISPOSE() -- given a page pointer, safely invoke VOP_DISPOSE().
1541 * Note that there is no guarantee that the page passed in will be
1542 * freed.  If that is required, then a check after calling VN_DISPOSE would
1543 * be necessary to ensure the page was freed.
1544 */
1545#define	VN_DISPOSE(pp, flag, dn, cr)	{ \
1546	if ((pp)->p_vnode != NULL && !VN_ISKAS((pp)->p_vnode)) \
1547		VOP_DISPOSE((pp)->p_vnode, (pp), (flag), (dn), (cr), NULL); \
1548	else if ((flag) == B_FREE) \
1549		page_free((pp), (dn)); \
1550	else \
1551		page_destroy((pp), (dn)); \
1552	}
1553
1554#endif	/* _KERNEL */
1555
1556#ifdef	__cplusplus
1557}
1558#endif
1559
1560#endif	/* _SYS_VNODE_H */
1561