17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 55a59a8b3Srsb * Common Development and Distribution License (the "License"). 65a59a8b3Srsb * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 21794f0adbSRoger A. Faulkner 227c478bd9Sstevel@tonic-gate /* 23cb15d5d9SPeter Rival * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. 24*04909c8cSJohn Levon * Copyright (c) 2018, Joyent, Inc. 25ade42b55SSebastien Roy * Copyright (c) 2011, 2017 by Delphix. All rights reserved. 26f06dce2cSAndrew Stormont * Copyright 2017 RackTop Systems. 277c478bd9Sstevel@tonic-gate */ 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 30b4203d75SMarcel Telka /* All Rights Reserved */ 317c478bd9Sstevel@tonic-gate 327c478bd9Sstevel@tonic-gate /* 337c478bd9Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 347c478bd9Sstevel@tonic-gate * The Regents of the University of California 357c478bd9Sstevel@tonic-gate * All Rights Reserved 367c478bd9Sstevel@tonic-gate * 377c478bd9Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 387c478bd9Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 397c478bd9Sstevel@tonic-gate * contributors. 407c478bd9Sstevel@tonic-gate */ 417c478bd9Sstevel@tonic-gate 427c478bd9Sstevel@tonic-gate #ifndef _SYS_VNODE_H 437c478bd9Sstevel@tonic-gate #define _SYS_VNODE_H 447c478bd9Sstevel@tonic-gate 457c478bd9Sstevel@tonic-gate #include <sys/types.h> 467c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 477c478bd9Sstevel@tonic-gate #include <sys/rwstlock.h> 487c478bd9Sstevel@tonic-gate #include <sys/time_impl.h> 497c478bd9Sstevel@tonic-gate #include <sys/cred.h> 507c478bd9Sstevel@tonic-gate #include <sys/uio.h> 517c478bd9Sstevel@tonic-gate #include <sys/resource.h> 527c478bd9Sstevel@tonic-gate #include <vm/seg_enum.h> 535a59a8b3Srsb #include <sys/kstat.h> 545a59a8b3Srsb #include <sys/kmem.h> 551b300de9Sjwahlig #include <sys/list.h> 567c478bd9Sstevel@tonic-gate #ifdef _KERNEL 577c478bd9Sstevel@tonic-gate #include <sys/buf.h> 58ade42b55SSebastien Roy #include <sys/sdt.h> 597c478bd9Sstevel@tonic-gate #endif /* _KERNEL */ 607c478bd9Sstevel@tonic-gate 617c478bd9Sstevel@tonic-gate #ifdef __cplusplus 627c478bd9Sstevel@tonic-gate extern "C" { 637c478bd9Sstevel@tonic-gate #endif 647c478bd9Sstevel@tonic-gate 655a59a8b3Srsb /* 665a59a8b3Srsb * Statistics for all vnode operations. 675a59a8b3Srsb * All operations record number of ops (since boot/mount/zero'ed). 685a59a8b3Srsb * Certain I/O operations (read, write, readdir) also record number 695a59a8b3Srsb * of bytes transferred. 705a59a8b3Srsb * This appears in two places in the system: one is embedded in each 715a59a8b3Srsb * vfs_t. There is also an array of vopstats_t structures allocated 725a59a8b3Srsb * on a per-fstype basis. 735a59a8b3Srsb */ 745a59a8b3Srsb 755a59a8b3Srsb #define VOPSTATS_STR "vopstats_" /* Initial string for vopstat kstats */ 765a59a8b3Srsb 775a59a8b3Srsb typedef struct vopstats { 785a59a8b3Srsb kstat_named_t nopen; /* VOP_OPEN */ 795a59a8b3Srsb kstat_named_t nclose; /* VOP_CLOSE */ 805a59a8b3Srsb kstat_named_t nread; /* VOP_READ */ 815a59a8b3Srsb kstat_named_t read_bytes; 825a59a8b3Srsb kstat_named_t nwrite; /* VOP_WRITE */ 835a59a8b3Srsb kstat_named_t write_bytes; 845a59a8b3Srsb kstat_named_t nioctl; /* VOP_IOCTL */ 855a59a8b3Srsb kstat_named_t nsetfl; /* VOP_SETFL */ 865a59a8b3Srsb kstat_named_t ngetattr; /* VOP_GETATTR */ 875a59a8b3Srsb kstat_named_t nsetattr; /* VOP_SETATTR */ 885a59a8b3Srsb kstat_named_t naccess; /* VOP_ACCESS */ 895a59a8b3Srsb kstat_named_t nlookup; /* VOP_LOOKUP */ 905a59a8b3Srsb kstat_named_t ncreate; /* VOP_CREATE */ 915a59a8b3Srsb kstat_named_t nremove; /* VOP_REMOVE */ 925a59a8b3Srsb kstat_named_t nlink; /* VOP_LINK */ 935a59a8b3Srsb kstat_named_t nrename; /* VOP_RENAME */ 945a59a8b3Srsb kstat_named_t nmkdir; /* VOP_MKDIR */ 955a59a8b3Srsb kstat_named_t nrmdir; /* VOP_RMDIR */ 965a59a8b3Srsb kstat_named_t nreaddir; /* VOP_READDIR */ 975a59a8b3Srsb kstat_named_t readdir_bytes; 985a59a8b3Srsb kstat_named_t nsymlink; /* VOP_SYMLINK */ 995a59a8b3Srsb kstat_named_t nreadlink; /* VOP_READLINK */ 1005a59a8b3Srsb kstat_named_t nfsync; /* VOP_FSYNC */ 1015a59a8b3Srsb kstat_named_t ninactive; /* VOP_INACTIVE */ 1025a59a8b3Srsb kstat_named_t nfid; /* VOP_FID */ 1035a59a8b3Srsb kstat_named_t nrwlock; /* VOP_RWLOCK */ 1045a59a8b3Srsb kstat_named_t nrwunlock; /* VOP_RWUNLOCK */ 1055a59a8b3Srsb kstat_named_t nseek; /* VOP_SEEK */ 1065a59a8b3Srsb kstat_named_t ncmp; /* VOP_CMP */ 1075a59a8b3Srsb kstat_named_t nfrlock; /* VOP_FRLOCK */ 1085a59a8b3Srsb kstat_named_t nspace; /* VOP_SPACE */ 1095a59a8b3Srsb kstat_named_t nrealvp; /* VOP_REALVP */ 1105a59a8b3Srsb kstat_named_t ngetpage; /* VOP_GETPAGE */ 1115a59a8b3Srsb kstat_named_t nputpage; /* VOP_PUTPAGE */ 1125a59a8b3Srsb kstat_named_t nmap; /* VOP_MAP */ 1135a59a8b3Srsb kstat_named_t naddmap; /* VOP_ADDMAP */ 1145a59a8b3Srsb kstat_named_t ndelmap; /* VOP_DELMAP */ 1155a59a8b3Srsb kstat_named_t npoll; /* VOP_POLL */ 1165a59a8b3Srsb kstat_named_t ndump; /* VOP_DUMP */ 1175a59a8b3Srsb kstat_named_t npathconf; /* VOP_PATHCONF */ 1185a59a8b3Srsb kstat_named_t npageio; /* VOP_PAGEIO */ 1195a59a8b3Srsb kstat_named_t ndumpctl; /* VOP_DUMPCTL */ 1205a59a8b3Srsb kstat_named_t ndispose; /* VOP_DISPOSE */ 1215a59a8b3Srsb kstat_named_t nsetsecattr; /* VOP_SETSECATTR */ 1225a59a8b3Srsb kstat_named_t ngetsecattr; /* VOP_GETSECATTR */ 1235a59a8b3Srsb kstat_named_t nshrlock; /* VOP_SHRLOCK */ 1245a59a8b3Srsb kstat_named_t nvnevent; /* VOP_VNEVENT */ 125c242f9a0Schunli zhang - Sun Microsystems - Irvine United States kstat_named_t nreqzcbuf; /* VOP_REQZCBUF */ 126c242f9a0Schunli zhang - Sun Microsystems - Irvine United States kstat_named_t nretzcbuf; /* VOP_RETZCBUF */ 1275a59a8b3Srsb } vopstats_t; 1285a59a8b3Srsb 1297c478bd9Sstevel@tonic-gate /* 1307c478bd9Sstevel@tonic-gate * The vnode is the focus of all file activity in UNIX. 1317c478bd9Sstevel@tonic-gate * A vnode is allocated for each active file, each current 1327c478bd9Sstevel@tonic-gate * directory, each mounted-on file, and the root. 1337c478bd9Sstevel@tonic-gate * 1347c478bd9Sstevel@tonic-gate * Each vnode is usually associated with a file-system-specific node (for 1357c478bd9Sstevel@tonic-gate * UFS, this is the in-memory inode). Generally, a vnode and an fs-node 1367c478bd9Sstevel@tonic-gate * should be created and destroyed together as a pair. 1377c478bd9Sstevel@tonic-gate * 1387c478bd9Sstevel@tonic-gate * If a vnode is reused for a new file, it should be reinitialized by calling 1397c478bd9Sstevel@tonic-gate * either vn_reinit() or vn_recycle(). 1407c478bd9Sstevel@tonic-gate * 1417c478bd9Sstevel@tonic-gate * vn_reinit() resets the entire vnode as if it was returned by vn_alloc(). 1427c478bd9Sstevel@tonic-gate * The caller is responsible for setting up the entire vnode after calling 1437c478bd9Sstevel@tonic-gate * vn_reinit(). This is important when using kmem caching where the vnode is 1447c478bd9Sstevel@tonic-gate * allocated by a constructor, for instance. 1457c478bd9Sstevel@tonic-gate * 1467c478bd9Sstevel@tonic-gate * vn_recycle() is used when the file system keeps some state around in both 1477c478bd9Sstevel@tonic-gate * the vnode and the associated FS-node. In UFS, for example, the inode of 1487c478bd9Sstevel@tonic-gate * a deleted file can be reused immediately. The v_data, v_vfsp, v_op, etc. 1497c478bd9Sstevel@tonic-gate * remains the same but certain fields related to the previous instance need 1507c478bd9Sstevel@tonic-gate * to be reset. In particular: 1517c478bd9Sstevel@tonic-gate * v_femhead 1527c478bd9Sstevel@tonic-gate * v_path 1537c478bd9Sstevel@tonic-gate * v_rdcnt, v_wrcnt 1547c478bd9Sstevel@tonic-gate * v_mmap_read, v_mmap_write 1557c478bd9Sstevel@tonic-gate */ 1567c478bd9Sstevel@tonic-gate 1577c478bd9Sstevel@tonic-gate /* 1587c478bd9Sstevel@tonic-gate * vnode types. VNON means no type. These values are unrelated to 1597c478bd9Sstevel@tonic-gate * values in on-disk inodes. 1607c478bd9Sstevel@tonic-gate */ 1617c478bd9Sstevel@tonic-gate typedef enum vtype { 1627c478bd9Sstevel@tonic-gate VNON = 0, 1637c478bd9Sstevel@tonic-gate VREG = 1, 1647c478bd9Sstevel@tonic-gate VDIR = 2, 1657c478bd9Sstevel@tonic-gate VBLK = 3, 1667c478bd9Sstevel@tonic-gate VCHR = 4, 1677c478bd9Sstevel@tonic-gate VLNK = 5, 1687c478bd9Sstevel@tonic-gate VFIFO = 6, 1697c478bd9Sstevel@tonic-gate VDOOR = 7, 1707c478bd9Sstevel@tonic-gate VPROC = 8, 1717c478bd9Sstevel@tonic-gate VSOCK = 9, 1727c478bd9Sstevel@tonic-gate VPORT = 10, 1737c478bd9Sstevel@tonic-gate VBAD = 11 1747c478bd9Sstevel@tonic-gate } vtype_t; 1757c478bd9Sstevel@tonic-gate 1761b300de9Sjwahlig /* 1771b300de9Sjwahlig * VSD - Vnode Specific Data 1781b300de9Sjwahlig * Used to associate additional private data with a vnode. 1791b300de9Sjwahlig */ 1801b300de9Sjwahlig struct vsd_node { 1811b300de9Sjwahlig list_node_t vs_nodes; /* list of all VSD nodes */ 1821b300de9Sjwahlig uint_t vs_nkeys; /* entries in value array */ 1831b300de9Sjwahlig void **vs_value; /* array of value/key */ 1841b300de9Sjwahlig }; 1851b300de9Sjwahlig 1867c478bd9Sstevel@tonic-gate /* 1877c478bd9Sstevel@tonic-gate * Many of the fields in the vnode are read-only once they are initialized 1887c478bd9Sstevel@tonic-gate * at vnode creation time. Other fields are protected by locks. 1897c478bd9Sstevel@tonic-gate * 1907c478bd9Sstevel@tonic-gate * IMPORTANT: vnodes should be created ONLY by calls to vn_alloc(). They 1917c478bd9Sstevel@tonic-gate * may not be embedded into the file-system specific node (inode). The 1927c478bd9Sstevel@tonic-gate * size of vnodes may change. 1937c478bd9Sstevel@tonic-gate * 1947c478bd9Sstevel@tonic-gate * The v_lock protects: 1957c478bd9Sstevel@tonic-gate * v_flag 1967c478bd9Sstevel@tonic-gate * v_stream 1977c478bd9Sstevel@tonic-gate * v_count 1987c478bd9Sstevel@tonic-gate * v_shrlocks 1997c478bd9Sstevel@tonic-gate * v_path 2001b300de9Sjwahlig * v_vsd 201da6c28aaSamw * v_xattrdir 2027c478bd9Sstevel@tonic-gate * 2037c478bd9Sstevel@tonic-gate * A special lock (implemented by vn_vfswlock in vnode.c) protects: 2047c478bd9Sstevel@tonic-gate * v_vfsmountedhere 2057c478bd9Sstevel@tonic-gate * 2067c478bd9Sstevel@tonic-gate * The global flock_lock mutex (in flock.c) protects: 2077c478bd9Sstevel@tonic-gate * v_filocks 2087c478bd9Sstevel@tonic-gate * 2097c478bd9Sstevel@tonic-gate * IMPORTANT NOTE: 2107c478bd9Sstevel@tonic-gate * 2117c478bd9Sstevel@tonic-gate * The following vnode fields are considered public and may safely be 2127c478bd9Sstevel@tonic-gate * accessed by file systems or other consumers: 2137c478bd9Sstevel@tonic-gate * 2147c478bd9Sstevel@tonic-gate * v_lock 2157c478bd9Sstevel@tonic-gate * v_flag 2167c478bd9Sstevel@tonic-gate * v_count 2177c478bd9Sstevel@tonic-gate * v_data 2187c478bd9Sstevel@tonic-gate * v_vfsp 2197c478bd9Sstevel@tonic-gate * v_stream 2207c478bd9Sstevel@tonic-gate * v_type 2217c478bd9Sstevel@tonic-gate * v_rdev 2227c478bd9Sstevel@tonic-gate * 2237c478bd9Sstevel@tonic-gate * ALL OTHER FIELDS SHOULD BE ACCESSED ONLY BY THE OWNER OF THAT FIELD. 2247c478bd9Sstevel@tonic-gate * In particular, file systems should not access other fields; they may 2257c478bd9Sstevel@tonic-gate * change or even be removed. The functionality which was once provided 2267c478bd9Sstevel@tonic-gate * by these fields is available through vn_* functions. 227e2fc3408SPatrick Mooney * 228e2fc3408SPatrick Mooney * VNODE PATH THEORY: 229e2fc3408SPatrick Mooney * In each vnode, the v_path field holds a cached version of the canonical 230e2fc3408SPatrick Mooney * filesystem path which that node represents. Because vnodes lack contextual 231e2fc3408SPatrick Mooney * information about their own name or position in the VFS hierarchy, this path 232e2fc3408SPatrick Mooney * must be calculated when the vnode is instantiated by operations such as 233e2fc3408SPatrick Mooney * fop_create, fop_lookup, or fop_mkdir. During said operations, both the 234e2fc3408SPatrick Mooney * parent vnode (and its cached v_path) and future name are known, so the 235e2fc3408SPatrick Mooney * v_path of the resulting object can easily be set. 236e2fc3408SPatrick Mooney * 237e2fc3408SPatrick Mooney * The caching nature of v_path is complicated in the face of directory 238e2fc3408SPatrick Mooney * renames. Filesystem drivers are responsible for calling vn_renamepath when 239e2fc3408SPatrick Mooney * a fop_rename operation succeeds. While the v_path on the renamed vnode will 240e2fc3408SPatrick Mooney * be updated, existing children of the directory (direct, or at deeper levels) 241e2fc3408SPatrick Mooney * will now possess v_path caches which are stale. 242e2fc3408SPatrick Mooney * 243e2fc3408SPatrick Mooney * It is expensive (and for non-directories, impossible) to recalculate stale 244e2fc3408SPatrick Mooney * v_path entries during operations such as vnodetopath. The best time during 245e2fc3408SPatrick Mooney * which to correct such wrongs is the same as when v_path is first 246e2fc3408SPatrick Mooney * initialized: during fop_create/fop_lookup/fop_mkdir/etc, where adequate 247e2fc3408SPatrick Mooney * context is available to generate the current path. 248e2fc3408SPatrick Mooney * 249e2fc3408SPatrick Mooney * In order to quickly detect stale v_path entries (without full lookup 250e2fc3408SPatrick Mooney * verification) to trigger a v_path update, the v_path_stamp field has been 251e2fc3408SPatrick Mooney * added to vnode_t. As part of successful fop_create/fop_lookup/fop_mkdir 252e2fc3408SPatrick Mooney * operations, where the name and parent vnode are available, the following 253e2fc3408SPatrick Mooney * rules are used to determine updates to the child: 254e2fc3408SPatrick Mooney * 255e2fc3408SPatrick Mooney * 1. If the parent lacks a v_path, clear any existing v_path and v_path_stamp 256e2fc3408SPatrick Mooney * on the child. Until the parent v_path is refreshed to a valid state, the 257e2fc3408SPatrick Mooney * child v_path must be considered invalid too. 258e2fc3408SPatrick Mooney * 259e2fc3408SPatrick Mooney * 2. If the child lacks a v_path (implying v_path_stamp == 0), it inherits the 260e2fc3408SPatrick Mooney * v_path_stamp value from its parent and its v_path is updated. 261e2fc3408SPatrick Mooney * 262e2fc3408SPatrick Mooney * 3. If the child v_path_stamp is less than v_path_stamp in the parent, it is 263e2fc3408SPatrick Mooney * an indication that the child v_path is stale. The v_path is updated and 264e2fc3408SPatrick Mooney * v_path_stamp in the child is set to the current hrtime(). 265e2fc3408SPatrick Mooney * 266e2fc3408SPatrick Mooney * It does _not_ inherit the parent v_path_stamp in order to propagate the 267e2fc3408SPatrick Mooney * the time of v_path invalidation through the directory structure. This 268e2fc3408SPatrick Mooney * prevents concurrent invalidations (operating with a now-incorrect v_path) 269e2fc3408SPatrick Mooney * at deeper levels in the tree from persisting. 270e2fc3408SPatrick Mooney * 271e2fc3408SPatrick Mooney * 4. If the child v_path_stamp is greater or equal to the parent, no action 272e2fc3408SPatrick Mooney * needs to be taken. 273e2fc3408SPatrick Mooney * 274e2fc3408SPatrick Mooney * Note that fop_rename operations do not follow this ruleset. They perform an 275e2fc3408SPatrick Mooney * explicit update of v_path and v_path_stamp (setting it to the current time) 276e2fc3408SPatrick Mooney * 277e2fc3408SPatrick Mooney * With these constraints in place, v_path invalidations and updates should 278e2fc3408SPatrick Mooney * proceed in a timely manner as vnodes are accessed. While there still are 279e2fc3408SPatrick Mooney * limited cases where vnodetopath operations will fail, the risk is minimized. 2807c478bd9Sstevel@tonic-gate */ 2817c478bd9Sstevel@tonic-gate 2827c478bd9Sstevel@tonic-gate struct fem_head; /* from fem.h */ 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate typedef struct vnode { 2857c478bd9Sstevel@tonic-gate kmutex_t v_lock; /* protects vnode fields */ 2867c478bd9Sstevel@tonic-gate uint_t v_flag; /* vnode flags (see below) */ 2877c478bd9Sstevel@tonic-gate uint_t v_count; /* reference count */ 2887c478bd9Sstevel@tonic-gate void *v_data; /* private data for fs */ 2897c478bd9Sstevel@tonic-gate struct vfs *v_vfsp; /* ptr to containing VFS */ 2907c478bd9Sstevel@tonic-gate struct stdata *v_stream; /* associated stream */ 2917c478bd9Sstevel@tonic-gate enum vtype v_type; /* vnode type */ 2927c478bd9Sstevel@tonic-gate dev_t v_rdev; /* device (VCHR, VBLK) */ 2937c478bd9Sstevel@tonic-gate 2947c478bd9Sstevel@tonic-gate /* PRIVATE FIELDS BELOW - DO NOT USE */ 2957c478bd9Sstevel@tonic-gate 2967c478bd9Sstevel@tonic-gate struct vfs *v_vfsmountedhere; /* ptr to vfs mounted here */ 2977c478bd9Sstevel@tonic-gate struct vnodeops *v_op; /* vnode operations */ 2987c478bd9Sstevel@tonic-gate struct page *v_pages; /* vnode pages list */ 2997c478bd9Sstevel@tonic-gate struct filock *v_filocks; /* ptr to filock list */ 3007c478bd9Sstevel@tonic-gate struct shrlocklist *v_shrlocks; /* ptr to shrlock list */ 3017c478bd9Sstevel@tonic-gate krwlock_t v_nbllock; /* sync for NBMAND locks */ 3027c478bd9Sstevel@tonic-gate kcondvar_t v_cv; /* synchronize locking */ 3037c478bd9Sstevel@tonic-gate void *v_locality; /* hook for locality info */ 3047c478bd9Sstevel@tonic-gate struct fem_head *v_femhead; /* fs monitoring */ 3057c478bd9Sstevel@tonic-gate char *v_path; /* cached path */ 306e2fc3408SPatrick Mooney hrtime_t v_path_stamp; /* timestamp for cached path */ 3077c478bd9Sstevel@tonic-gate uint_t v_rdcnt; /* open for read count (VREG only) */ 3087c478bd9Sstevel@tonic-gate uint_t v_wrcnt; /* open for write count (VREG only) */ 3097c478bd9Sstevel@tonic-gate u_longlong_t v_mmap_read; /* mmap read count */ 3107c478bd9Sstevel@tonic-gate u_longlong_t v_mmap_write; /* mmap write count */ 3117c478bd9Sstevel@tonic-gate void *v_mpssdata; /* info for large page mappings */ 312df2381bfSpraks void *v_fopdata; /* list of file ops event watches */ 313d216dff5SRobert Mastors kmutex_t v_vsd_lock; /* protects v_vsd field */ 3141b300de9Sjwahlig struct vsd_node *v_vsd; /* vnode specific data */ 315da6c28aaSamw struct vnode *v_xattrdir; /* unnamed extended attr dir (GFS) */ 316b5fca8f8Stomee uint_t v_count_dnlc; /* dnlc reference count */ 3177c478bd9Sstevel@tonic-gate } vnode_t; 3187c478bd9Sstevel@tonic-gate 3197c478bd9Sstevel@tonic-gate #define IS_DEVVP(vp) \ 3207c478bd9Sstevel@tonic-gate ((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO) 3217c478bd9Sstevel@tonic-gate 322cb15d5d9SPeter Rival #define VNODE_ALIGN 64 323cb15d5d9SPeter Rival /* Count of low-order 0 bits in a vnode *, based on size and alignment. */ 3241ab248cfSPeter Rival #if defined(_LP64) 325cb15d5d9SPeter Rival #define VNODE_ALIGN_LOG2 8 3261ab248cfSPeter Rival #else 3271ab248cfSPeter Rival #define VNODE_ALIGN_LOG2 7 3281ab248cfSPeter Rival #endif 329cb15d5d9SPeter Rival 3307c478bd9Sstevel@tonic-gate /* 3317c478bd9Sstevel@tonic-gate * vnode flags. 3327c478bd9Sstevel@tonic-gate */ 3337c478bd9Sstevel@tonic-gate #define VROOT 0x01 /* root of its file system */ 3347c478bd9Sstevel@tonic-gate #define VNOCACHE 0x02 /* don't keep cache pages on vnode */ 3357c478bd9Sstevel@tonic-gate #define VNOMAP 0x04 /* file cannot be mapped/faulted */ 3367c478bd9Sstevel@tonic-gate #define VDUP 0x08 /* file should be dup'ed rather then opened */ 3377c478bd9Sstevel@tonic-gate #define VNOSWAP 0x10 /* file cannot be used as virtual swap device */ 3387c478bd9Sstevel@tonic-gate #define VNOMOUNT 0x20 /* file cannot be covered by mount */ 3397c478bd9Sstevel@tonic-gate #define VISSWAP 0x40 /* vnode is being used for swap */ 3407c478bd9Sstevel@tonic-gate #define VSWAPLIKE 0x80 /* vnode acts like swap (but may not be) */ 3417c478bd9Sstevel@tonic-gate 3427c478bd9Sstevel@tonic-gate #define IS_SWAPVP(vp) (((vp)->v_flag & (VISSWAP | VSWAPLIKE)) != 0) 3437c478bd9Sstevel@tonic-gate 344f06dce2cSAndrew Stormont #ifdef _KERNEL 3457c478bd9Sstevel@tonic-gate typedef struct vn_vfslocks_entry { 3467c478bd9Sstevel@tonic-gate rwstlock_t ve_lock; 3477c478bd9Sstevel@tonic-gate void *ve_vpvfs; 3487c478bd9Sstevel@tonic-gate struct vn_vfslocks_entry *ve_next; 3497c478bd9Sstevel@tonic-gate uint32_t ve_refcnt; 3507c478bd9Sstevel@tonic-gate char pad[64 - sizeof (rwstlock_t) - 2 * sizeof (void *) - \ 3517c478bd9Sstevel@tonic-gate sizeof (uint32_t)]; 3527c478bd9Sstevel@tonic-gate } vn_vfslocks_entry_t; 353f06dce2cSAndrew Stormont #endif 3547c478bd9Sstevel@tonic-gate 3557c478bd9Sstevel@tonic-gate /* 3567c478bd9Sstevel@tonic-gate * The following two flags are used to lock the v_vfsmountedhere field 3577c478bd9Sstevel@tonic-gate */ 3587c478bd9Sstevel@tonic-gate #define VVFSLOCK 0x100 3597c478bd9Sstevel@tonic-gate #define VVFSWAIT 0x200 3607c478bd9Sstevel@tonic-gate 3617c478bd9Sstevel@tonic-gate /* 3627c478bd9Sstevel@tonic-gate * Used to serialize VM operations on a vnode 3637c478bd9Sstevel@tonic-gate */ 3647c478bd9Sstevel@tonic-gate #define VVMLOCK 0x400 3657c478bd9Sstevel@tonic-gate 3667c478bd9Sstevel@tonic-gate /* 3677c478bd9Sstevel@tonic-gate * Tell vn_open() not to fail a directory open for writing but 3687c478bd9Sstevel@tonic-gate * to go ahead and call VOP_OPEN() to let the filesystem check. 3697c478bd9Sstevel@tonic-gate */ 3707c478bd9Sstevel@tonic-gate #define VDIROPEN 0x800 3717c478bd9Sstevel@tonic-gate 3727c478bd9Sstevel@tonic-gate /* 3737c478bd9Sstevel@tonic-gate * Flag to let the VM system know that this file is most likely a binary 3747c478bd9Sstevel@tonic-gate * or shared library since it has been mmap()ed EXEC at some time. 3757c478bd9Sstevel@tonic-gate */ 3767c478bd9Sstevel@tonic-gate #define VVMEXEC 0x1000 3777c478bd9Sstevel@tonic-gate 3787c478bd9Sstevel@tonic-gate #define VPXFS 0x2000 /* clustering: global fs proxy vnode */ 3797c478bd9Sstevel@tonic-gate 3807c478bd9Sstevel@tonic-gate #define IS_PXFSVP(vp) ((vp)->v_flag & VPXFS) 3817c478bd9Sstevel@tonic-gate 3827c478bd9Sstevel@tonic-gate #define V_XATTRDIR 0x4000 /* attribute unnamed directory */ 3837c478bd9Sstevel@tonic-gate 384ab04eb8eStimh #define IS_XATTRDIR(vp) ((vp)->v_flag & V_XATTRDIR) 385ab04eb8eStimh 3867c478bd9Sstevel@tonic-gate #define V_LOCALITY 0x8000 /* whether locality aware */ 3877c478bd9Sstevel@tonic-gate 3887c478bd9Sstevel@tonic-gate /* 3897c478bd9Sstevel@tonic-gate * Flag that indicates the VM should maintain the v_pages list with all modified 3907c478bd9Sstevel@tonic-gate * pages on one end and unmodified pages at the other. This makes finding dirty 3917c478bd9Sstevel@tonic-gate * pages to write back to disk much faster at the expense of taking a minor 3927c478bd9Sstevel@tonic-gate * fault on the first store instruction which touches a writable page. 3937c478bd9Sstevel@tonic-gate */ 3947c478bd9Sstevel@tonic-gate #define VMODSORT (0x10000) 3957c478bd9Sstevel@tonic-gate #define IS_VMODSORT(vp) \ 3967c478bd9Sstevel@tonic-gate (pvn_vmodsort_supported != 0 && ((vp)->v_flag & VMODSORT) != 0) 3977c478bd9Sstevel@tonic-gate 3987c478bd9Sstevel@tonic-gate #define VISSWAPFS 0x20000 /* vnode is being used for swapfs */ 399d20abfaaSPavel Tatashin 400d20abfaaSPavel Tatashin /* 401d20abfaaSPavel Tatashin * The mdb memstat command assumes that IS_SWAPFSVP only uses the 402d20abfaaSPavel Tatashin * vnode's v_flag field. If this changes, cache the additional 403d20abfaaSPavel Tatashin * fields in mdb; see vn_get in mdb/common/modules/genunix/memory.c 404d20abfaaSPavel Tatashin */ 4057c478bd9Sstevel@tonic-gate #define IS_SWAPFSVP(vp) (((vp)->v_flag & VISSWAPFS) != 0) 4067c478bd9Sstevel@tonic-gate 407da6c28aaSamw #define V_SYSATTR 0x40000 /* vnode is a GFS system attribute */ 408da6c28aaSamw 409e2fc3408SPatrick Mooney /* 410e2fc3408SPatrick Mooney * Indication that VOP_LOOKUP operations on this vnode may yield results from a 411e2fc3408SPatrick Mooney * different VFS instance. The main use of this is to suppress v_path 412e2fc3408SPatrick Mooney * calculation logic when filesystems such as procfs emit results which defy 413e2fc3408SPatrick Mooney * expectations about normal VFS behavior. 414e2fc3408SPatrick Mooney */ 415e2fc3408SPatrick Mooney #define VTRAVERSE 0x80000 416e2fc3408SPatrick Mooney 4177c478bd9Sstevel@tonic-gate /* 4187c478bd9Sstevel@tonic-gate * Vnode attributes. A bit-mask is supplied as part of the 4197c478bd9Sstevel@tonic-gate * structure to indicate the attributes the caller wants to 4207c478bd9Sstevel@tonic-gate * set (setattr) or extract (getattr). 4217c478bd9Sstevel@tonic-gate */ 4227c478bd9Sstevel@tonic-gate 4237c478bd9Sstevel@tonic-gate /* 4247c478bd9Sstevel@tonic-gate * Note that va_nodeid and va_nblocks are 64bit data type. 4257c478bd9Sstevel@tonic-gate * We support large files over NFSV3. With Solaris client and 4267c478bd9Sstevel@tonic-gate * Server that generates 64bit ino's and sizes these fields 4277c478bd9Sstevel@tonic-gate * will overflow if they are 32 bit sizes. 4287c478bd9Sstevel@tonic-gate */ 4297c478bd9Sstevel@tonic-gate 4307c478bd9Sstevel@tonic-gate typedef struct vattr { 4317c478bd9Sstevel@tonic-gate uint_t va_mask; /* bit-mask of attributes */ 4327c478bd9Sstevel@tonic-gate vtype_t va_type; /* vnode type (for create) */ 4337c478bd9Sstevel@tonic-gate mode_t va_mode; /* file access mode */ 4347c478bd9Sstevel@tonic-gate uid_t va_uid; /* owner user id */ 4357c478bd9Sstevel@tonic-gate gid_t va_gid; /* owner group id */ 4367c478bd9Sstevel@tonic-gate dev_t va_fsid; /* file system id (dev for now) */ 4377c478bd9Sstevel@tonic-gate u_longlong_t va_nodeid; /* node id */ 4387c478bd9Sstevel@tonic-gate nlink_t va_nlink; /* number of references to file */ 4397c478bd9Sstevel@tonic-gate u_offset_t va_size; /* file size in bytes */ 4407c478bd9Sstevel@tonic-gate timestruc_t va_atime; /* time of last access */ 4417c478bd9Sstevel@tonic-gate timestruc_t va_mtime; /* time of last modification */ 4427c478bd9Sstevel@tonic-gate timestruc_t va_ctime; /* time of last status change */ 4437c478bd9Sstevel@tonic-gate dev_t va_rdev; /* device the file represents */ 4447c478bd9Sstevel@tonic-gate uint_t va_blksize; /* fundamental block size */ 4457c478bd9Sstevel@tonic-gate u_longlong_t va_nblocks; /* # of blocks allocated */ 4467c478bd9Sstevel@tonic-gate uint_t va_seq; /* sequence number */ 4477c478bd9Sstevel@tonic-gate } vattr_t; 4487c478bd9Sstevel@tonic-gate 449da6c28aaSamw #define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ 450da6c28aaSamw 451da6c28aaSamw /* 452da6c28aaSamw * Structure of all optional attributes. 453da6c28aaSamw */ 454da6c28aaSamw typedef struct xoptattr { 455da6c28aaSamw timestruc_t xoa_createtime; /* Create time of file */ 456da6c28aaSamw uint8_t xoa_archive; 457da6c28aaSamw uint8_t xoa_system; 458da6c28aaSamw uint8_t xoa_readonly; 459da6c28aaSamw uint8_t xoa_hidden; 460da6c28aaSamw uint8_t xoa_nounlink; 461da6c28aaSamw uint8_t xoa_immutable; 462da6c28aaSamw uint8_t xoa_appendonly; 463da6c28aaSamw uint8_t xoa_nodump; 464da6c28aaSamw uint8_t xoa_opaque; 465da6c28aaSamw uint8_t xoa_av_quarantined; 466da6c28aaSamw uint8_t xoa_av_modified; 467da6c28aaSamw uint8_t xoa_av_scanstamp[AV_SCANSTAMP_SZ]; 4687a286c47SDai Ngo uint8_t xoa_reparse; 46999d5e173STim Haley uint64_t xoa_generation; 470fd9ee8b5Sjoyce mcintosh uint8_t xoa_offline; 471fd9ee8b5Sjoyce mcintosh uint8_t xoa_sparse; 472f67950b2SNasf-Fan uint8_t xoa_projinherit; 473f67950b2SNasf-Fan uint64_t xoa_projid; 474da6c28aaSamw } xoptattr_t; 475da6c28aaSamw 476da6c28aaSamw /* 477da6c28aaSamw * The xvattr structure is really a variable length structure that 478da6c28aaSamw * is made up of: 479da6c28aaSamw * - The classic vattr_t (xva_vattr) 480da6c28aaSamw * - a 32 bit quantity (xva_mapsize) that specifies the size of the 481da6c28aaSamw * attribute bitmaps in 32 bit words. 482da6c28aaSamw * - A pointer to the returned attribute bitmap (needed because the 483da6c28aaSamw * previous element, the requested attribute bitmap) is variable lenth. 484da6c28aaSamw * - The requested attribute bitmap, which is an array of 32 bit words. 485da6c28aaSamw * Callers use the XVA_SET_REQ() macro to set the bits corresponding to 486da6c28aaSamw * the attributes that are being requested. 487da6c28aaSamw * - The returned attribute bitmap, which is an array of 32 bit words. 488da6c28aaSamw * File systems that support optional attributes use the XVA_SET_RTN() 489da6c28aaSamw * macro to set the bits corresponding to the attributes that are being 490da6c28aaSamw * returned. 491da6c28aaSamw * - The xoptattr_t structure which contains the attribute values 492da6c28aaSamw * 493da6c28aaSamw * xva_mapsize determines how many words in the attribute bitmaps. 494da6c28aaSamw * Immediately following the attribute bitmaps is the xoptattr_t. 495da6c28aaSamw * xva_getxoptattr() is used to get the pointer to the xoptattr_t 496da6c28aaSamw * section. 497da6c28aaSamw */ 498da6c28aaSamw 499da6c28aaSamw #define XVA_MAPSIZE 3 /* Size of attr bitmaps */ 500da6c28aaSamw #define XVA_MAGIC 0x78766174 /* Magic # for verification */ 501da6c28aaSamw 502da6c28aaSamw /* 503da6c28aaSamw * The xvattr structure is an extensible structure which permits optional 504da6c28aaSamw * attributes to be requested/returned. File systems may or may not support 505da6c28aaSamw * optional attributes. They do so at their own discretion but if they do 506da6c28aaSamw * support optional attributes, they must register the VFSFT_XVATTR feature 507da6c28aaSamw * so that the optional attributes can be set/retrived. 508da6c28aaSamw * 509da6c28aaSamw * The fields of the xvattr structure are: 510da6c28aaSamw * 511da6c28aaSamw * xva_vattr - The first element of an xvattr is a legacy vattr structure 512da6c28aaSamw * which includes the common attributes. If AT_XVATTR is set in the va_mask 513da6c28aaSamw * then the entire structure is treated as an xvattr. If AT_XVATTR is not 514da6c28aaSamw * set, then only the xva_vattr structure can be used. 515da6c28aaSamw * 516da6c28aaSamw * xva_magic - 0x78766174 (hex for "xvat"). Magic number for verification. 517da6c28aaSamw * 518da6c28aaSamw * xva_mapsize - Size of requested and returned attribute bitmaps. 519da6c28aaSamw * 520da6c28aaSamw * xva_rtnattrmapp - Pointer to xva_rtnattrmap[]. We need this since the 521da6c28aaSamw * size of the array before it, xva_reqattrmap[], could change which means 522da6c28aaSamw * the location of xva_rtnattrmap[] could change. This will allow unbundled 523da6c28aaSamw * file systems to find the location of xva_rtnattrmap[] when the sizes change. 524da6c28aaSamw * 525da6c28aaSamw * xva_reqattrmap[] - Array of requested attributes. Attributes are 526da6c28aaSamw * represented by a specific bit in a specific element of the attribute 527da6c28aaSamw * map array. Callers set the bits corresponding to the attributes 528da6c28aaSamw * that the caller wants to get/set. 529da6c28aaSamw * 530da6c28aaSamw * xva_rtnattrmap[] - Array of attributes that the file system was able to 531da6c28aaSamw * process. Not all file systems support all optional attributes. This map 532da6c28aaSamw * informs the caller which attributes the underlying file system was able 533da6c28aaSamw * to set/get. (Same structure as the requested attributes array in terms 534da6c28aaSamw * of each attribute corresponding to specific bits and array elements.) 535da6c28aaSamw * 536da6c28aaSamw * xva_xoptattrs - Structure containing values of optional attributes. 537da6c28aaSamw * These values are only valid if the corresponding bits in xva_reqattrmap 538da6c28aaSamw * are set and the underlying file system supports those attributes. 539da6c28aaSamw */ 540da6c28aaSamw typedef struct xvattr { 541