1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5736b9155Smarks * Common Development and Distribution License (the "License"). 6736b9155Smarks * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 211c17160aSKevin Crowe 22fa9e4066Sahrens /* 23d39ee142SMark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24ade42b55SSebastien Roy * Copyright (c) 2012, 2017 by Delphix. All rights reserved. 25c3d26abcSMatthew Ahrens * Copyright (c) 2014 Integros [integros.com] 2654207fd2SJerry Jelinek * Copyright 2015 Joyent, Inc. 271c17160aSKevin Crowe * Copyright 2017 Nexenta Systems, Inc. 28fa9e4066Sahrens */ 29fa9e4066Sahrens 3075c76197Speteh /* Portions Copyright 2007 Jeremy Teo */ 3155da60b9SMark J Musante /* Portions Copyright 2010 Robert Milkowski */ 3275c76197Speteh 33fa9e4066Sahrens #include <sys/types.h> 34fa9e4066Sahrens #include <sys/param.h> 35fa9e4066Sahrens #include <sys/time.h> 36fa9e4066Sahrens #include <sys/systm.h> 37fa9e4066Sahrens #include <sys/sysmacros.h> 38fa9e4066Sahrens #include <sys/resource.h> 39fa9e4066Sahrens #include <sys/vfs.h> 40aa59c4cbSrsb #include <sys/vfs_opreg.h> 41fa9e4066Sahrens #include <sys/vnode.h> 42fa9e4066Sahrens #include <sys/file.h> 43fa9e4066Sahrens #include <sys/stat.h> 44fa9e4066Sahrens #include <sys/kmem.h> 45fa9e4066Sahrens #include <sys/taskq.h> 46fa9e4066Sahrens #include <sys/uio.h> 47fa9e4066Sahrens #include <sys/vmsystm.h> 48fa9e4066Sahrens #include <sys/atomic.h> 4944eda4d7Smaybee #include <sys/vm.h> 50fa9e4066Sahrens #include <vm/seg_vn.h> 51fa9e4066Sahrens #include <vm/pvn.h> 52fa9e4066Sahrens #include <vm/as.h> 530fab61baSJonathan W Adams #include <vm/kpm.h> 540fab61baSJonathan W Adams #include <vm/seg_kpm.h> 55fa9e4066Sahrens #include <sys/mman.h> 56fa9e4066Sahrens #include <sys/pathname.h> 57fa9e4066Sahrens #include <sys/cmn_err.h> 58fa9e4066Sahrens #include <sys/errno.h> 59fa9e4066Sahrens #include <sys/unistd.h> 60fa9e4066Sahrens #include <sys/zfs_dir.h> 61fa9e4066Sahrens #include <sys/zfs_acl.h> 62fa9e4066Sahrens #include <sys/zfs_ioctl.h> 63fa9e4066Sahrens #include <sys/fs/zfs.h> 64fa9e4066Sahrens #include <sys/dmu.h> 6555da60b9SMark J Musante #include <sys/dmu_objset.h> 66fa9e4066Sahrens #include <sys/spa.h> 67fa9e4066Sahrens #include <sys/txg.h> 68fa9e4066Sahrens #include <sys/dbuf.h> 69fa9e4066Sahrens #include <sys/zap.h> 700a586ceaSMark Shellenbaum #include <sys/sa.h> 71fa9e4066Sahrens #include <sys/dirent.h> 72fa9e4066Sahrens #include <sys/policy.h> 73fa9e4066Sahrens #include <sys/sunddi.h> 74fa9e4066Sahrens #include <sys/filio.h> 75c1ce5987SMark Shellenbaum #include <sys/sid.h> 76fa9e4066Sahrens #include "fs/fs_subr.h" 77fa9e4066Sahrens #include <sys/zfs_ctldir.h> 78da6c28aaSamw #include <sys/zfs_fuid.h> 790a586ceaSMark Shellenbaum #include <sys/zfs_sa.h> 80033f9833Sek #include <sys/dnlc.h> 81104e2ed7Sperrin #include <sys/zfs_rlock.h> 82da6c28aaSamw #include <sys/extdirent.h> 83da6c28aaSamw #include <sys/kidmap.h> 8467dbe2beSCasper H.S. Dik #include <sys/cred.h> 85b38f0970Sck #include <sys/attr.h> 861271e4b1SPrakash Surya #include <sys/zil.h> 87fa9e4066Sahrens 88fa9e4066Sahrens /* 89fa9e4066Sahrens * Programming rules. 90fa9e4066Sahrens * 91fa9e4066Sahrens * Each vnode op performs some logical unit of work. To do this, the ZPL must 92fa9e4066Sahrens * properly lock its in-core state, create a DMU transaction, do the work, 93fa9e4066Sahrens * record this work in the intent log (ZIL), commit the DMU transaction, 94da6c28aaSamw * and wait for the intent log to commit if it is a synchronous operation. 95da6c28aaSamw * Moreover, the vnode ops must work in both normal and log replay context. 96fa9e4066Sahrens * The ordering of events is important to avoid deadlocks and references 97fa9e4066Sahrens * to freed memory. The example below illustrates the following Big Rules: 98fa9e4066Sahrens * 99f7170741SWill Andrews * (1) A check must be made in each zfs thread for a mounted file system. 1003cb34c60Sahrens * This is done avoiding races using ZFS_ENTER(zfsvfs). 101f7170741SWill Andrews * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 102f7170741SWill Andrews * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 103f7170741SWill Andrews * can return EIO from the calling function. 104fa9e4066Sahrens * 105fa9e4066Sahrens * (2) VN_RELE() should always be the last thing except for zil_commit() 106b19a79ecSperrin * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 107fa9e4066Sahrens * First, if it's the last reference, the vnode/znode 108fa9e4066Sahrens * can be freed, so the zp may point to freed memory. Second, the last 109fa9e4066Sahrens * reference will call zfs_zinactive(), which may induce a lot of work -- 110104e2ed7Sperrin * pushing cached pages (which acquires range locks) and syncing out 111fa9e4066Sahrens * cached atime changes. Third, zfs_zinactive() may require a new tx, 112fa9e4066Sahrens * which could deadlock the system if you were already holding one. 1139d3574bfSNeil Perrin * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 114fa9e4066Sahrens * 1157885c754Sperrin * (3) All range locks must be grabbed before calling dmu_tx_assign(), 1167885c754Sperrin * as they can span dmu_tx_assign() calls. 1177885c754Sperrin * 118e722410cSMatthew Ahrens * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 119e722410cSMatthew Ahrens * dmu_tx_assign(). This is critical because we don't want to block 120e722410cSMatthew Ahrens * while holding locks. 121e722410cSMatthew Ahrens * 122e722410cSMatthew Ahrens * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 123e722410cSMatthew Ahrens * reduces lock contention and CPU usage when we must wait (note that if 124e722410cSMatthew Ahrens * throughput is constrained by the storage, nearly every transaction 125e722410cSMatthew Ahrens * must wait). 126e722410cSMatthew Ahrens * 127e722410cSMatthew Ahrens * Note, in particular, that if a lock is sometimes acquired before 128e722410cSMatthew Ahrens * the tx assigns, and sometimes after (e.g. z_lock), then failing 129e722410cSMatthew Ahrens * to use a non-blocking assign can deadlock the system. The scenario: 130fa9e4066Sahrens * 131fa9e4066Sahrens * Thread A has grabbed a lock before calling dmu_tx_assign(). 132fa9e4066Sahrens * Thread B is in an already-assigned tx, and blocks for this lock. 133fa9e4066Sahrens * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 134fa9e4066Sahrens * forever, because the previous txg can't quiesce until B's tx commits. 135fa9e4066Sahrens * 136fa9e4066Sahrens * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 13769962b56SMatthew Ahrens * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 138*f864f99eSPrakash Surya * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT, 13969962b56SMatthew Ahrens * to indicate that this operation has already called dmu_tx_wait(). 14069962b56SMatthew Ahrens * This will ensure that we don't retry forever, waiting a short bit 14169962b56SMatthew Ahrens * each time. 142fa9e4066Sahrens * 1437885c754Sperrin * (5) If the operation succeeded, generate the intent log entry for it 144fa9e4066Sahrens * before dropping locks. This ensures that the ordering of events 145fa9e4066Sahrens * in the intent log matches the order in which they actually occurred. 146f7170741SWill Andrews * During ZIL replay the zfs_log_* functions will update the sequence 1471209a471SNeil Perrin * number to indicate the zil transaction has replayed. 148fa9e4066Sahrens * 1497885c754Sperrin * (6) At the end of each vnode op, the DMU tx must always commit, 150fa9e4066Sahrens * regardless of whether there were any errors. 151fa9e4066Sahrens * 1525002558fSNeil Perrin * (7) After dropping all locks, invoke zil_commit(zilog, foid) 153fa9e4066Sahrens * to ensure that synchronous semantics are provided when necessary. 154fa9e4066Sahrens * 155fa9e4066Sahrens * In general, this is how things should be ordered in each vnode op: 156fa9e4066Sahrens * 157fa9e4066Sahrens * ZFS_ENTER(zfsvfs); // exit if unmounted 158fa9e4066Sahrens * top: 159fa9e4066Sahrens * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 160fa9e4066Sahrens * rw_enter(...); // grab any other locks you need 161fa9e4066Sahrens * tx = dmu_tx_create(...); // get DMU tx 162fa9e4066Sahrens * dmu_tx_hold_*(); // hold each object you might modify 163*f864f99eSPrakash Surya * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 164fa9e4066Sahrens * if (error) { 165fa9e4066Sahrens * rw_exit(...); // drop locks 166fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 167fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1681209a471SNeil Perrin * if (error == ERESTART) { 16969962b56SMatthew Ahrens * waited = B_TRUE; 1708a2f1b91Sahrens * dmu_tx_wait(tx); 1718a2f1b91Sahrens * dmu_tx_abort(tx); 172fa9e4066Sahrens * goto top; 173fa9e4066Sahrens * } 1748a2f1b91Sahrens * dmu_tx_abort(tx); // abort DMU tx 175fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 176fa9e4066Sahrens * return (error); // really out of space 177fa9e4066Sahrens * } 178fa9e4066Sahrens * error = do_real_work(); // do whatever this VOP does 179fa9e4066Sahrens * if (error == 0) 180b19a79ecSperrin * zfs_log_*(...); // on success, make ZIL entry 181fa9e4066Sahrens * dmu_tx_commit(tx); // commit DMU tx -- error or not 182fa9e4066Sahrens * rw_exit(...); // drop locks 183fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 184fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1855002558fSNeil Perrin * zil_commit(zilog, foid); // synchronous when necessary 186fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 187fa9e4066Sahrens * return (error); // done, report error 188fa9e4066Sahrens */ 1893cb34c60Sahrens 190fa9e4066Sahrens /* ARGSUSED */ 191fa9e4066Sahrens static int 192da6c28aaSamw zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 193fa9e4066Sahrens { 19467bd71c6Sperrin znode_t *zp = VTOZ(*vpp); 195b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 196b614fdaaSMark Shellenbaum 197b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 198b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 19967bd71c6Sperrin 2000a586ceaSMark Shellenbaum if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 201da6c28aaSamw ((flag & FAPPEND) == 0)) { 202b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 203be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 204da6c28aaSamw } 205da6c28aaSamw 206da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 207da6c28aaSamw ZTOV(zp)->v_type == VREG && 2080a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 209b614fdaaSMark Shellenbaum if (fs_vscan(*vpp, cr, 0) != 0) { 210b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 211be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 212b614fdaaSMark Shellenbaum } 213b614fdaaSMark Shellenbaum } 214da6c28aaSamw 21567bd71c6Sperrin /* Keep a count of the synchronous opens in the znode */ 21667bd71c6Sperrin if (flag & (FSYNC | FDSYNC)) 21767bd71c6Sperrin atomic_inc_32(&zp->z_sync_cnt); 218da6c28aaSamw 219b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 220fa9e4066Sahrens return (0); 221fa9e4066Sahrens } 222fa9e4066Sahrens 223fa9e4066Sahrens /* ARGSUSED */ 224fa9e4066Sahrens static int 225da6c28aaSamw zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 226da6c28aaSamw caller_context_t *ct) 227fa9e4066Sahrens { 22867bd71c6Sperrin znode_t *zp = VTOZ(vp); 229b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 230b614fdaaSMark Shellenbaum 231ee8143cbSChris Kirby /* 232ee8143cbSChris Kirby * Clean up any locks held by this process on the vp. 233ee8143cbSChris Kirby */ 234ee8143cbSChris Kirby cleanlocks(vp, ddi_get_pid(), 0); 235ee8143cbSChris Kirby cleanshares(vp, ddi_get_pid()); 236ee8143cbSChris Kirby 237b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 238b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 23967bd71c6Sperrin 24067bd71c6Sperrin /* Decrement the synchronous opens in the znode */ 241ecb72030Sperrin if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 24267bd71c6Sperrin atomic_dec_32(&zp->z_sync_cnt); 24367bd71c6Sperrin 244da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 245da6c28aaSamw ZTOV(zp)->v_type == VREG && 2460a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 247da6c28aaSamw VERIFY(fs_vscan(vp, cr, 1) == 0); 248da6c28aaSamw 249b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 250fa9e4066Sahrens return (0); 251fa9e4066Sahrens } 252fa9e4066Sahrens 253fa9e4066Sahrens /* 254fa9e4066Sahrens * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 255fa9e4066Sahrens * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 256fa9e4066Sahrens */ 257fa9e4066Sahrens static int 258fa9e4066Sahrens zfs_holey(vnode_t *vp, int cmd, offset_t *off) 259fa9e4066Sahrens { 260fa9e4066Sahrens znode_t *zp = VTOZ(vp); 261fa9e4066Sahrens uint64_t noff = (uint64_t)*off; /* new offset */ 262fa9e4066Sahrens uint64_t file_sz; 263fa9e4066Sahrens int error; 264fa9e4066Sahrens boolean_t hole; 265fa9e4066Sahrens 2660a586ceaSMark Shellenbaum file_sz = zp->z_size; 267fa9e4066Sahrens if (noff >= file_sz) { 268be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 269fa9e4066Sahrens } 270fa9e4066Sahrens 271fa9e4066Sahrens if (cmd == _FIO_SEEK_HOLE) 272fa9e4066Sahrens hole = B_TRUE; 273fa9e4066Sahrens else 274fa9e4066Sahrens hole = B_FALSE; 275fa9e4066Sahrens 276fa9e4066Sahrens error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 277fa9e4066Sahrens 2780fbc0cd0SMatthew Ahrens if (error == ESRCH) 279be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 2800fbc0cd0SMatthew Ahrens 2810fbc0cd0SMatthew Ahrens /* 2820fbc0cd0SMatthew Ahrens * We could find a hole that begins after the logical end-of-file, 2830fbc0cd0SMatthew Ahrens * because dmu_offset_next() only works on whole blocks. If the 2840fbc0cd0SMatthew Ahrens * EOF falls mid-block, then indicate that the "virtual hole" 2850fbc0cd0SMatthew Ahrens * at the end of the file begins at the logical EOF, rather than 2860fbc0cd0SMatthew Ahrens * at the end of the last block. 2870fbc0cd0SMatthew Ahrens */ 2880fbc0cd0SMatthew Ahrens if (noff > file_sz) { 2890fbc0cd0SMatthew Ahrens ASSERT(hole); 2900fbc0cd0SMatthew Ahrens noff = file_sz; 291fa9e4066Sahrens } 292fa9e4066Sahrens 293fa9e4066Sahrens if (noff < *off) 294fa9e4066Sahrens return (error); 295fa9e4066Sahrens *off = noff; 296fa9e4066Sahrens return (error); 297fa9e4066Sahrens } 298fa9e4066Sahrens 299fa9e4066Sahrens /* ARGSUSED */ 300fa9e4066Sahrens static int 301fa9e4066Sahrens zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, 302da6c28aaSamw int *rvalp, caller_context_t *ct) 303fa9e4066Sahrens { 304fa9e4066Sahrens offset_t off; 3052bcf0248SMax Grossman offset_t ndata; 3062bcf0248SMax Grossman dmu_object_info_t doi; 307fa9e4066Sahrens int error; 308fa9e4066Sahrens zfsvfs_t *zfsvfs; 309f18faf3fSek znode_t *zp; 310fa9e4066Sahrens 311fa9e4066Sahrens switch (com) { 312ecb72030Sperrin case _FIOFFS: 3132bcf0248SMax Grossman { 314fa9e4066Sahrens return (zfs_sync(vp->v_vfsp, 0, cred)); 315fa9e4066Sahrens 316ea8dc4b6Seschrock /* 317ea8dc4b6Seschrock * The following two ioctls are used by bfu. Faking out, 318ea8dc4b6Seschrock * necessary to avoid bfu errors. 319ea8dc4b6Seschrock */ 3202bcf0248SMax Grossman } 321ecb72030Sperrin case _FIOGDIO: 322ecb72030Sperrin case _FIOSDIO: 3232bcf0248SMax Grossman { 324ea8dc4b6Seschrock return (0); 3252bcf0248SMax Grossman } 326ea8dc4b6Seschrock 327ecb72030Sperrin case _FIO_SEEK_DATA: 328ecb72030Sperrin case _FIO_SEEK_HOLE: 3292bcf0248SMax Grossman { 330fa9e4066Sahrens if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 331be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 332fa9e4066Sahrens 333f18faf3fSek zp = VTOZ(vp); 334f18faf3fSek zfsvfs = zp->z_zfsvfs; 3353cb34c60Sahrens ZFS_ENTER(zfsvfs); 3363cb34c60Sahrens ZFS_VERIFY_ZP(zp); 337fa9e4066Sahrens 338fa9e4066Sahrens /* offset parameter is in/out */ 339fa9e4066Sahrens error = zfs_holey(vp, com, &off); 340fa9e4066Sahrens ZFS_EXIT(zfsvfs); 341fa9e4066Sahrens if (error) 342fa9e4066Sahrens return (error); 343fa9e4066Sahrens if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 344be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 345fa9e4066Sahrens return (0); 346fa9e4066Sahrens } 3472bcf0248SMax Grossman case _FIO_COUNT_FILLED: 3482bcf0248SMax Grossman { 3492bcf0248SMax Grossman /* 3502bcf0248SMax Grossman * _FIO_COUNT_FILLED adds a new ioctl command which 3512bcf0248SMax Grossman * exposes the number of filled blocks in a 3522bcf0248SMax Grossman * ZFS object. 3532bcf0248SMax Grossman */ 3542bcf0248SMax Grossman zp = VTOZ(vp); 3552bcf0248SMax Grossman zfsvfs = zp->z_zfsvfs; 3562bcf0248SMax Grossman ZFS_ENTER(zfsvfs); 3572bcf0248SMax Grossman ZFS_VERIFY_ZP(zp); 3582bcf0248SMax Grossman 3592bcf0248SMax Grossman /* 3602bcf0248SMax Grossman * Wait for all dirty blocks for this object 3612bcf0248SMax Grossman * to get synced out to disk, and the DMU info 3622bcf0248SMax Grossman * updated. 3632bcf0248SMax Grossman */ 3642bcf0248SMax Grossman error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 3652bcf0248SMax Grossman if (error) { 3662bcf0248SMax Grossman ZFS_EXIT(zfsvfs); 3672bcf0248SMax Grossman return (error); 3682bcf0248SMax Grossman } 3692bcf0248SMax Grossman 3702bcf0248SMax Grossman /* 3712bcf0248SMax Grossman * Retrieve fill count from DMU object. 3722bcf0248SMax Grossman */ 3732bcf0248SMax Grossman error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 3742bcf0248SMax Grossman if (error) { 3752bcf0248SMax Grossman ZFS_EXIT(zfsvfs); 3762bcf0248SMax Grossman return (error); 3772bcf0248SMax Grossman } 3782bcf0248SMax Grossman 3792bcf0248SMax Grossman ndata = doi.doi_fill_count; 3802bcf0248SMax Grossman 3812bcf0248SMax Grossman ZFS_EXIT(zfsvfs); 3822bcf0248SMax Grossman if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 3832bcf0248SMax Grossman return (SET_ERROR(EFAULT)); 3842bcf0248SMax Grossman return (0); 3852bcf0248SMax Grossman } 3862bcf0248SMax Grossman } 387be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTTY)); 388fa9e4066Sahrens } 389fa9e4066Sahrens 3900fab61baSJonathan W Adams /* 3910fab61baSJonathan W Adams * Utility functions to map and unmap a single physical page. These 3920fab61baSJonathan W Adams * are used to manage the mappable copies of ZFS file data, and therefore 3930fab61baSJonathan W Adams * do not update ref/mod bits. 3940fab61baSJonathan W Adams */ 3950fab61baSJonathan W Adams caddr_t 3960fab61baSJonathan W Adams zfs_map_page(page_t *pp, enum seg_rw rw) 3970fab61baSJonathan W Adams { 3980fab61baSJonathan W Adams if (kpm_enable) 3990fab61baSJonathan W Adams return (hat_kpm_mapin(pp, 0)); 4000fab61baSJonathan W Adams ASSERT(rw == S_READ || rw == S_WRITE); 4010fab61baSJonathan W Adams return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), 4020fab61baSJonathan W Adams (caddr_t)-1)); 4030fab61baSJonathan W Adams } 4040fab61baSJonathan W Adams 4050fab61baSJonathan W Adams void 4060fab61baSJonathan W Adams zfs_unmap_page(page_t *pp, caddr_t addr) 4070fab61baSJonathan W Adams { 4080fab61baSJonathan W Adams if (kpm_enable) { 4090fab61baSJonathan W Adams hat_kpm_mapout(pp, 0, addr); 4100fab61baSJonathan W Adams } else { 4110fab61baSJonathan W Adams ppmapout(addr); 4120fab61baSJonathan W Adams } 4130fab61baSJonathan W Adams } 4140fab61baSJonathan W Adams 415fa9e4066Sahrens /* 416fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 417fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 418fa9e4066Sahrens * 419fa9e4066Sahrens * On Write: If we find a memory mapped page, we write to *both* 420fa9e4066Sahrens * the page and the dmu buffer. 421fa9e4066Sahrens */ 422ac05c741SMark Maybee static void 423ac05c741SMark Maybee update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) 424fa9e4066Sahrens { 425ac05c741SMark Maybee int64_t off; 426fa9e4066Sahrens 427fa9e4066Sahrens off = start & PAGEOFFSET; 428fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 429fa9e4066Sahrens page_t *pp; 430ac05c741SMark Maybee uint64_t nbytes = MIN(PAGESIZE - off, len); 431fa9e4066Sahrens 432fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 433fa9e4066Sahrens caddr_t va; 434fa9e4066Sahrens 4350fab61baSJonathan W Adams va = zfs_map_page(pp, S_WRITE); 4367bfdf011SNeil Perrin (void) dmu_read(os, oid, start+off, nbytes, va+off, 4377bfdf011SNeil Perrin DMU_READ_PREFETCH); 4380fab61baSJonathan W Adams zfs_unmap_page(pp, va); 439fa9e4066Sahrens page_unlock(pp); 440fa9e4066Sahrens } 441ac05c741SMark Maybee len -= nbytes; 442fa9e4066Sahrens off = 0; 443fa9e4066Sahrens } 444fa9e4066Sahrens } 445fa9e4066Sahrens 446fa9e4066Sahrens /* 447fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 448fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 449fa9e4066Sahrens * 450fa9e4066Sahrens * On Read: We "read" preferentially from memory mapped pages, 451fa9e4066Sahrens * else we default from the dmu buffer. 452fa9e4066Sahrens * 453fa9e4066Sahrens * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 454f7170741SWill Andrews * the file is memory mapped. 455fa9e4066Sahrens */ 456fa9e4066Sahrens static int 457feb08c6bSbillm mappedread(vnode_t *vp, int nbytes, uio_t *uio) 458fa9e4066Sahrens { 459feb08c6bSbillm znode_t *zp = VTOZ(vp); 460feb08c6bSbillm int64_t start, off; 461fa9e4066Sahrens int len = nbytes; 462fa9e4066Sahrens int error = 0; 463fa9e4066Sahrens 464fa9e4066Sahrens start = uio->uio_loffset; 465fa9e4066Sahrens off = start & PAGEOFFSET; 466fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 467fa9e4066Sahrens page_t *pp; 468feb08c6bSbillm uint64_t bytes = MIN(PAGESIZE - off, len); 469fa9e4066Sahrens 470fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 471fa9e4066Sahrens caddr_t va; 472fa9e4066Sahrens 4730fab61baSJonathan W Adams va = zfs_map_page(pp, S_READ); 474fa9e4066Sahrens error = uiomove(va + off, bytes, UIO_READ, uio); 4750fab61baSJonathan W Adams zfs_unmap_page(pp, va); 476fa9e4066Sahrens page_unlock(pp); 477fa9e4066Sahrens } else { 478f8554bb9SMatthew Ahrens error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 479f8554bb9SMatthew Ahrens uio, bytes); 480fa9e4066Sahrens } 481fa9e4066Sahrens len -= bytes; 482fa9e4066Sahrens off = 0; 483fa9e4066Sahrens if (error) 484fa9e4066Sahrens break; 485fa9e4066Sahrens } 486fa9e4066Sahrens return (error); 487fa9e4066Sahrens } 488fa9e4066Sahrens 489feb08c6bSbillm offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 490fa9e4066Sahrens 491fa9e4066Sahrens /* 492fa9e4066Sahrens * Read bytes from specified file into supplied buffer. 493fa9e4066Sahrens * 494fa9e4066Sahrens * IN: vp - vnode of file to be read from. 495fa9e4066Sahrens * uio - structure supplying read location, range info, 496fa9e4066Sahrens * and return buffer. 497fa9e4066Sahrens * ioflag - SYNC flags; used to provide FRSYNC semantics. 498fa9e4066Sahrens * cr - credentials of caller. 499da6c28aaSamw * ct - caller context 500fa9e4066Sahrens * 501fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 502fa9e4066Sahrens * 503f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 504fa9e4066Sahrens * 505fa9e4066Sahrens * Side Effects: 506fa9e4066Sahrens * vp - atime updated if byte count > 0 507fa9e4066Sahrens */ 508fa9e4066Sahrens /* ARGSUSED */ 509fa9e4066Sahrens static int 510fa9e4066Sahrens zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 511fa9e4066Sahrens { 512fa9e4066Sahrens znode_t *zp = VTOZ(vp); 513fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 514feb08c6bSbillm ssize_t n, nbytes; 515d5285caeSGeorge Wilson int error = 0; 516104e2ed7Sperrin rl_t *rl; 517c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 518fa9e4066Sahrens 5193cb34c60Sahrens ZFS_ENTER(zfsvfs); 5203cb34c60Sahrens ZFS_VERIFY_ZP(zp); 521fa9e4066Sahrens 5220a586ceaSMark Shellenbaum if (zp->z_pflags & ZFS_AV_QUARANTINED) { 5230616c50eSmarks ZFS_EXIT(zfsvfs); 524be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 5250616c50eSmarks } 5260616c50eSmarks 527fa9e4066Sahrens /* 528fa9e4066Sahrens * Validate file offset 529fa9e4066Sahrens */ 530fa9e4066Sahrens if (uio->uio_loffset < (offset_t)0) { 531fa9e4066Sahrens ZFS_EXIT(zfsvfs); 532be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 533fa9e4066Sahrens } 534fa9e4066Sahrens 535fa9e4066Sahrens /* 536fa9e4066Sahrens * Fasttrack empty reads 537fa9e4066Sahrens */ 538fa9e4066Sahrens if (uio->uio_resid == 0) { 539fa9e4066Sahrens ZFS_EXIT(zfsvfs); 540fa9e4066Sahrens return (0); 541fa9e4066Sahrens } 542fa9e4066Sahrens 543fa9e4066Sahrens /* 544104e2ed7Sperrin * Check for mandatory locks 545fa9e4066Sahrens */ 5460a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode)) { 547fa9e4066Sahrens if (error = chklock(vp, FREAD, 548fa9e4066Sahrens uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 549fa9e4066Sahrens ZFS_EXIT(zfsvfs); 550fa9e4066Sahrens return (error); 551fa9e4066Sahrens } 552fa9e4066Sahrens } 553fa9e4066Sahrens 554fa9e4066Sahrens /* 555fa9e4066Sahrens * If we're in FRSYNC mode, sync out this znode before reading it. 556fa9e4066Sahrens */ 55755da60b9SMark J Musante if (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5585002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 559fa9e4066Sahrens 560fa9e4066Sahrens /* 561104e2ed7Sperrin * Lock the range against changes. 562fa9e4066Sahrens */ 563104e2ed7Sperrin rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 564104e2ed7Sperrin 565fa9e4066Sahrens /* 566fa9e4066Sahrens * If we are reading past end-of-file we can skip 567fa9e4066Sahrens * to the end; but we might still need to set atime. 568fa9e4066Sahrens */ 5690a586ceaSMark Shellenbaum if (uio->uio_loffset >= zp->z_size) { 570fa9e4066Sahrens error = 0; 571fa9e4066Sahrens goto out; 572fa9e4066Sahrens } 573fa9e4066Sahrens 5740a586ceaSMark Shellenbaum ASSERT(uio->uio_loffset < zp->z_size); 5750a586ceaSMark Shellenbaum n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 576feb08c6bSbillm 577c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 578c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 579c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int nblk; 580c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz = zp->z_blksz; 581c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uint64_t offset = uio->uio_loffset; 582c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 583c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 584c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((ISP2(blksz))) { 585c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 586c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz)) / blksz; 587c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 588c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(offset + n <= blksz); 589c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = 1; 590c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 591570de38fSSurya Prakki (void) dmu_xuio_init(xuio, nblk); 592c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 593c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (vn_has_cached_data(vp)) { 594c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 595c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * For simplicity, we always allocate a full buffer 596c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * even if we only expect to read a portion of a block. 597c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 598c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (--nblk >= 0) { 599570de38fSSurya Prakki (void) dmu_xuio_add(xuio, 6000a586ceaSMark Shellenbaum dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 6010a586ceaSMark Shellenbaum blksz), 0, blksz); 602c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 603c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 604c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 605c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 606feb08c6bSbillm while (n > 0) { 607feb08c6bSbillm nbytes = MIN(n, zfs_read_chunk_size - 608feb08c6bSbillm P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 609fa9e4066Sahrens 610f8554bb9SMatthew Ahrens if (vn_has_cached_data(vp)) { 611feb08c6bSbillm error = mappedread(vp, nbytes, uio); 612f8554bb9SMatthew Ahrens } else { 613f8554bb9SMatthew Ahrens error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 614f8554bb9SMatthew Ahrens uio, nbytes); 615f8554bb9SMatthew Ahrens } 616b87f3af3Sperrin if (error) { 617b87f3af3Sperrin /* convert checksum errors into IO errors */ 618b87f3af3Sperrin if (error == ECKSUM) 619be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 620feb08c6bSbillm break; 621b87f3af3Sperrin } 622fa9e4066Sahrens 623feb08c6bSbillm n -= nbytes; 624fa9e4066Sahrens } 625fa9e4066Sahrens out: 626c5c6ffa0Smaybee zfs_range_unlock(rl); 627fa9e4066Sahrens 628fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 629fa9e4066Sahrens ZFS_EXIT(zfsvfs); 630fa9e4066Sahrens return (error); 631fa9e4066Sahrens } 632fa9e4066Sahrens 633fa9e4066Sahrens /* 634fa9e4066Sahrens * Write the bytes to a file. 635fa9e4066Sahrens * 636fa9e4066Sahrens * IN: vp - vnode of file to be written to. 637fa9e4066Sahrens * uio - structure supplying write location, range info, 638fa9e4066Sahrens * and data buffer. 639f7170741SWill Andrews * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 640f7170741SWill Andrews * set if in append mode. 641fa9e4066Sahrens * cr - credentials of caller. 642da6c28aaSamw * ct - caller context (NFS/CIFS fem monitor only) 643fa9e4066Sahrens * 644fa9e4066Sahrens * OUT: uio - updated offset and range. 645fa9e4066Sahrens * 646f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 647fa9e4066Sahrens * 648fa9e4066Sahrens * Timestamps: 649fa9e4066Sahrens * vp - ctime|mtime updated if byte count > 0 650fa9e4066Sahrens */ 6510a586ceaSMark Shellenbaum 652fa9e4066Sahrens /* ARGSUSED */ 653fa9e4066Sahrens static int 654fa9e4066Sahrens zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 655fa9e4066Sahrens { 656fa9e4066Sahrens znode_t *zp = VTOZ(vp); 657fa9e4066Sahrens rlim64_t limit = uio->uio_llimit; 658fa9e4066Sahrens ssize_t start_resid = uio->uio_resid; 659fa9e4066Sahrens ssize_t tx_bytes; 660fa9e4066Sahrens uint64_t end_size; 661fa9e4066Sahrens dmu_tx_t *tx; 662fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 663f18faf3fSek zilog_t *zilog; 664fa9e4066Sahrens offset_t woff; 665fa9e4066Sahrens ssize_t n, nbytes; 666104e2ed7Sperrin rl_t *rl; 667fa9e4066Sahrens int max_blksz = zfsvfs->z_max_blksz; 668d5285caeSGeorge Wilson int error = 0; 6692fdbea25SAleksandr Guzovskiy arc_buf_t *abuf; 670d5285caeSGeorge Wilson iovec_t *aiov = NULL; 671c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 672c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i_iov = 0; 673c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int iovcnt = uio->uio_iovcnt; 674c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *iovp = uio->uio_iov; 675c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int write_eof; 6760a586ceaSMark Shellenbaum int count = 0; 6770a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[4]; 6780a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 679fa9e4066Sahrens 680fa9e4066Sahrens /* 681fa9e4066Sahrens * Fasttrack empty write 682fa9e4066Sahrens */ 683104e2ed7Sperrin n = start_resid; 684fa9e4066Sahrens if (n == 0) 685fa9e4066Sahrens return (0); 686fa9e4066Sahrens 687104e2ed7Sperrin if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 688104e2ed7Sperrin limit = MAXOFFSET_T; 689104e2ed7Sperrin 6903cb34c60Sahrens ZFS_ENTER(zfsvfs); 6913cb34c60Sahrens ZFS_VERIFY_ZP(zp); 692c09193bfSmarks 6930a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 6940a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 6950a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 6960a586ceaSMark Shellenbaum &zp->z_size, 8); 6970a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 6980a586ceaSMark Shellenbaum &zp->z_pflags, 8); 6990a586ceaSMark Shellenbaum 7002144b121SMarcel Telka /* 7012144b121SMarcel Telka * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 7022144b121SMarcel Telka * callers might not be able to detect properly that we are read-only, 7032144b121SMarcel Telka * so check it explicitly here. 7042144b121SMarcel Telka */ 7052144b121SMarcel Telka if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 7062144b121SMarcel Telka ZFS_EXIT(zfsvfs); 7072144b121SMarcel Telka return (SET_ERROR(EROFS)); 7082144b121SMarcel Telka } 7092144b121SMarcel Telka 710c09193bfSmarks /* 7112889ec41SGordon Ross * If immutable or not appending then return EPERM. 7122889ec41SGordon Ross * Intentionally allow ZFS_READONLY through here. 7132889ec41SGordon Ross * See zfs_zaccess_common() 714c09193bfSmarks */ 7152889ec41SGordon Ross if ((zp->z_pflags & ZFS_IMMUTABLE) || 7160a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 7170a586ceaSMark Shellenbaum (uio->uio_loffset < zp->z_size))) { 718c09193bfSmarks ZFS_EXIT(zfsvfs); 719be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 720c09193bfSmarks } 721c09193bfSmarks 722f18faf3fSek zilog = zfsvfs->z_log; 723fa9e4066Sahrens 72441865f27SWilliam Gorrell /* 72541865f27SWilliam Gorrell * Validate file offset 72641865f27SWilliam Gorrell */ 7270a586ceaSMark Shellenbaum woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 72841865f27SWilliam Gorrell if (woff < 0) { 72941865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 730be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 73141865f27SWilliam Gorrell } 73241865f27SWilliam Gorrell 73341865f27SWilliam Gorrell /* 73441865f27SWilliam Gorrell * Check for mandatory locks before calling zfs_range_lock() 73541865f27SWilliam Gorrell * in order to prevent a deadlock with locks set via fcntl(). 73641865f27SWilliam Gorrell */ 7370a586ceaSMark Shellenbaum if (MANDMODE((mode_t)zp->z_mode) && 73841865f27SWilliam Gorrell (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 73941865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 74041865f27SWilliam Gorrell return (error); 74141865f27SWilliam Gorrell } 74241865f27SWilliam Gorrell 743fa9e4066Sahrens /* 744c5c6ffa0Smaybee * Pre-fault the pages to ensure slow (eg NFS) pages 745104e2ed7Sperrin * don't hold up txg. 746c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Skip this if uio contains loaned arc_buf. 747fa9e4066Sahrens */ 748c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 749c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 750c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 751c242f9a0Schunli zhang - Sun Microsystems - Irvine United States else 752ff866947SSanjeev Bagewadi uio_prefaultpages(MIN(n, max_blksz), uio); 753fa9e4066Sahrens 754fa9e4066Sahrens /* 755fa9e4066Sahrens * If in append mode, set the io offset pointer to eof. 756fa9e4066Sahrens */ 757104e2ed7Sperrin if (ioflag & FAPPEND) { 758104e2ed7Sperrin /* 75941865f27SWilliam Gorrell * Obtain an appending range lock to guarantee file append 76041865f27SWilliam Gorrell * semantics. We reset the write offset once we have the lock. 761104e2ed7Sperrin */ 762104e2ed7Sperrin rl = zfs_range_lock(zp, 0, n, RL_APPEND); 76341865f27SWilliam Gorrell woff = rl->r_off; 764104e2ed7Sperrin if (rl->r_len == UINT64_MAX) { 76541865f27SWilliam Gorrell /* 76641865f27SWilliam Gorrell * We overlocked the file because this write will cause 76741865f27SWilliam Gorrell * the file block size to increase. 76841865f27SWilliam Gorrell * Note that zp_size cannot change with this lock held. 76941865f27SWilliam Gorrell */ 7700a586ceaSMark Shellenbaum woff = zp->z_size; 771104e2ed7Sperrin } 77241865f27SWilliam Gorrell uio->uio_loffset = woff; 773fa9e4066Sahrens } else { 774fa9e4066Sahrens /* 77541865f27SWilliam Gorrell * Note that if the file block size will change as a result of 77641865f27SWilliam Gorrell * this write, then this range lock will lock the entire file 77741865f27SWilliam Gorrell * so that we can re-write the block safely. 778fa9e4066Sahrens */ 779104e2ed7Sperrin rl = zfs_range_lock(zp, woff, n, RL_WRITER); 780fa9e4066Sahrens } 781fa9e4066Sahrens 782fa9e4066Sahrens if (woff >= limit) { 783feb08c6bSbillm zfs_range_unlock(rl); 784feb08c6bSbillm ZFS_EXIT(zfsvfs); 785be6fd75aSMatthew Ahrens return (SET_ERROR(EFBIG)); 786fa9e4066Sahrens } 787fa9e4066Sahrens 788fa9e4066Sahrens if ((woff + n) > limit || woff > (limit - n)) 789fa9e4066Sahrens n = limit - woff; 790fa9e4066Sahrens 791c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* Will this write extend the file length? */ 7920a586ceaSMark Shellenbaum write_eof = (woff + n > zp->z_size); 793c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 7940a586ceaSMark Shellenbaum end_size = MAX(zp->z_size, woff + n); 795fa9e4066Sahrens 796104e2ed7Sperrin /* 797feb08c6bSbillm * Write the file in reasonable size chunks. Each chunk is written 798feb08c6bSbillm * in a separate transaction; this keeps the intent log records small 799feb08c6bSbillm * and allows us to do more fine-grained space accounting. 800104e2ed7Sperrin */ 801feb08c6bSbillm while (n > 0) { 8022fdbea25SAleksandr Guzovskiy abuf = NULL; 8032fdbea25SAleksandr Guzovskiy woff = uio->uio_loffset; 8040a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 8050a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 8062fdbea25SAleksandr Guzovskiy if (abuf != NULL) 8072fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 808be6fd75aSMatthew Ahrens error = SET_ERROR(EDQUOT); 80914843421SMatthew Ahrens break; 81014843421SMatthew Ahrens } 8112fdbea25SAleksandr Guzovskiy 812c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio && abuf == NULL) { 813c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(i_iov < iovcnt); 814c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov = &iovp[i_iov]; 815c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i_iov); 816c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_clear(xuio, i_iov); 817c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_cp_write, int, i_iov, 818c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *, aiov, arc_buf_t *, abuf); 819c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT((aiov->iov_base == abuf->b_data) || 820c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ((char *)aiov->iov_base - (char *)abuf->b_data + 821c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len == arc_buf_size(abuf))); 822c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i_iov++; 823c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else if (abuf == NULL && n >= max_blksz && 8240a586ceaSMark Shellenbaum woff >= zp->z_size && 8252fdbea25SAleksandr Guzovskiy P2PHASE(woff, max_blksz) == 0 && 8262fdbea25SAleksandr Guzovskiy zp->z_blksz == max_blksz) { 827c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 828c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * This write covers a full block. "Borrow" a buffer 829c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * from the dmu so that we can fill it before we enter 830c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * a transaction. This avoids the possibility of 831c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * holding up the transaction if the data copy hangs 832c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * up on a pagefault (e.g., from an NFS server mapping). 833c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 8342fdbea25SAleksandr Guzovskiy size_t cbytes; 8352fdbea25SAleksandr Guzovskiy 8360a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 8370a586ceaSMark Shellenbaum max_blksz); 8382fdbea25SAleksandr Guzovskiy ASSERT(abuf != NULL); 8392fdbea25SAleksandr Guzovskiy ASSERT(arc_buf_size(abuf) == max_blksz); 8402fdbea25SAleksandr Guzovskiy if (error = uiocopy(abuf->b_data, max_blksz, 8412fdbea25SAleksandr Guzovskiy UIO_WRITE, uio, &cbytes)) { 8422fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 8432fdbea25SAleksandr Guzovskiy break; 8442fdbea25SAleksandr Guzovskiy } 8452fdbea25SAleksandr Guzovskiy ASSERT(cbytes == max_blksz); 8462fdbea25SAleksandr Guzovskiy } 8472fdbea25SAleksandr Guzovskiy 8482fdbea25SAleksandr Guzovskiy /* 8492fdbea25SAleksandr Guzovskiy * Start a transaction. 8502fdbea25SAleksandr Guzovskiy */ 851feb08c6bSbillm tx = dmu_tx_create(zfsvfs->z_os); 8520a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 853feb08c6bSbillm dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 8540a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 855e722410cSMatthew Ahrens error = dmu_tx_assign(tx, TXG_WAIT); 856feb08c6bSbillm if (error) { 857feb08c6bSbillm dmu_tx_abort(tx); 8582fdbea25SAleksandr Guzovskiy if (abuf != NULL) 8592fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 860feb08c6bSbillm break; 861feb08c6bSbillm } 862104e2ed7Sperrin 863feb08c6bSbillm /* 864feb08c6bSbillm * If zfs_range_lock() over-locked we grow the blocksize 865feb08c6bSbillm * and then reduce the lock range. This will only happen 866feb08c6bSbillm * on the first iteration since zfs_range_reduce() will 867feb08c6bSbillm * shrink down r_len to the appropriate size. 868feb08c6bSbillm */ 869feb08c6bSbillm if (rl->r_len == UINT64_MAX) { 870feb08c6bSbillm uint64_t new_blksz; 871feb08c6bSbillm 872feb08c6bSbillm if (zp->z_blksz > max_blksz) { 873b5152584SMatthew Ahrens /* 874b5152584SMatthew Ahrens * File's blocksize is already larger than the 875b5152584SMatthew Ahrens * "recordsize" property. Only let it grow to 876b5152584SMatthew Ahrens * the next power of 2. 877b5152584SMatthew Ahrens */ 878feb08c6bSbillm ASSERT(!ISP2(zp->z_blksz)); 879b5152584SMatthew Ahrens new_blksz = MIN(end_size, 880b5152584SMatthew Ahrens 1 << highbit64(zp->z_blksz)); 881feb08c6bSbillm } else { 882feb08c6bSbillm new_blksz = MIN(end_size, max_blksz); 883feb08c6bSbillm } 884feb08c6bSbillm zfs_grow_blocksize(zp, new_blksz, tx); 885feb08c6bSbillm zfs_range_reduce(rl, woff, n); 886fa9e4066Sahrens } 887fa9e4066Sahrens 888fa9e4066Sahrens /* 889fa9e4066Sahrens * XXX - should we really limit each write to z_max_blksz? 890fa9e4066Sahrens * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 891fa9e4066Sahrens */ 892fa9e4066Sahrens nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 893fa9e4066Sahrens 8942fdbea25SAleksandr Guzovskiy if (abuf == NULL) { 8952fdbea25SAleksandr Guzovskiy tx_bytes = uio->uio_resid; 89694d1a210STim Haley error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 89794d1a210STim Haley uio, nbytes, tx); 8982fdbea25SAleksandr Guzovskiy tx_bytes -= uio->uio_resid; 8992fdbea25SAleksandr Guzovskiy } else { 9002fdbea25SAleksandr Guzovskiy tx_bytes = nbytes; 901c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 902c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 903c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * If this is not a full block write, but we are 904c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * extending the file past EOF and this data starts 905c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * block-aligned, use assign_arcbuf(). Otherwise, 906c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write via dmu_write(). 907c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 908c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (tx_bytes < max_blksz && (!write_eof || 909c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_base != abuf->b_data)) { 910c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio); 911c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_write(zfsvfs->z_os, zp->z_id, woff, 912c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len, aiov->iov_base, tx); 913c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 914c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_stat_wbuf_copied(); 915c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 916c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio || tx_bytes == max_blksz); 9170a586ceaSMark Shellenbaum dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 9180a586ceaSMark Shellenbaum woff, abuf, tx); 919c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 9202fdbea25SAleksandr Guzovskiy ASSERT(tx_bytes <= uio->uio_resid); 9212fdbea25SAleksandr Guzovskiy uioskip(uio, tx_bytes); 9222fdbea25SAleksandr Guzovskiy } 9232fdbea25SAleksandr Guzovskiy if (tx_bytes && vn_has_cached_data(vp)) { 924ac05c741SMark Maybee update_pages(vp, woff, 925ac05c741SMark Maybee tx_bytes, zfsvfs->z_os, zp->z_id); 9262fdbea25SAleksandr Guzovskiy } 927fa9e4066Sahrens 928feb08c6bSbillm /* 929feb08c6bSbillm * If we made no progress, we're done. If we made even 930feb08c6bSbillm * partial progress, update the znode and ZIL accordingly. 931feb08c6bSbillm */ 932feb08c6bSbillm if (tx_bytes == 0) { 9330a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 9340a586ceaSMark Shellenbaum (void *)&zp->z_size, sizeof (uint64_t), tx); 935af2c4821Smaybee dmu_tx_commit(tx); 936feb08c6bSbillm ASSERT(error != 0); 937fa9e4066Sahrens break; 938fa9e4066Sahrens } 939fa9e4066Sahrens 940169cdae2Smarks /* 941169cdae2Smarks * Clear Set-UID/Set-GID bits on successful write if not 942169cdae2Smarks * privileged and at least one of the excute bits is set. 943169cdae2Smarks * 944169cdae2Smarks * It would be nice to to this after all writes have 945169cdae2Smarks * been done, but that would still expose the ISUID/ISGID 946169cdae2Smarks * to another app after the partial write is committed. 947da6c28aaSamw * 948f1696b23SMark Shellenbaum * Note: we don't call zfs_fuid_map_id() here because 949f1696b23SMark Shellenbaum * user 0 is not an ephemeral uid. 950169cdae2Smarks */ 951169cdae2Smarks mutex_enter(&zp->z_acl_lock); 9520a586ceaSMark Shellenbaum if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 953169cdae2Smarks (S_IXUSR >> 6))) != 0 && 9540a586ceaSMark Shellenbaum (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 955169cdae2Smarks secpolicy_vnode_setid_retain(cr, 9560a586ceaSMark Shellenbaum (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 9570a586ceaSMark Shellenbaum uint64_t newmode; 9580a586ceaSMark Shellenbaum zp->z_mode &= ~(S_ISUID | S_ISGID); 9590a586ceaSMark Shellenbaum newmode = zp->z_mode; 9600a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 9610a586ceaSMark Shellenbaum (void *)&newmode, sizeof (uint64_t), tx); 962169cdae2Smarks } 963169cdae2Smarks mutex_exit(&zp->z_acl_lock); 964169cdae2Smarks 9650a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 9660a586ceaSMark Shellenbaum B_TRUE); 967fa9e4066Sahrens 968fa9e4066Sahrens /* 969feb08c6bSbillm * Update the file size (zp_size) if it has changed; 970feb08c6bSbillm * account for possible concurrent updates. 971fa9e4066Sahrens */ 9720a586ceaSMark Shellenbaum while ((end_size = zp->z_size) < uio->uio_loffset) { 9730a586ceaSMark Shellenbaum (void) atomic_cas_64(&zp->z_size, end_size, 974fa9e4066Sahrens uio->uio_loffset); 9750a586ceaSMark Shellenbaum ASSERT(error == 0); 9760a586ceaSMark Shellenbaum } 977c0e50c98SNeil Perrin /* 978c0e50c98SNeil Perrin * If we are replaying and eof is non zero then force 979c0e50c98SNeil Perrin * the file size to the specified eof. Note, there's no 980c0e50c98SNeil Perrin * concurrency during replay. 981c0e50c98SNeil Perrin */ 982c0e50c98SNeil Perrin if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 983c0e50c98SNeil Perrin zp->z_size = zfsvfs->z_replay_eof; 984c0e50c98SNeil Perrin 9850a586ceaSMark Shellenbaum error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 9860a586ceaSMark Shellenbaum 987feb08c6bSbillm zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 988feb08c6bSbillm dmu_tx_commit(tx); 989fa9e4066Sahrens 990feb08c6bSbillm if (error != 0) 991feb08c6bSbillm break; 992feb08c6bSbillm ASSERT(tx_bytes == nbytes); 993feb08c6bSbillm n -= nbytes; 994ff866947SSanjeev Bagewadi 995ff866947SSanjeev Bagewadi if (!xuio && n > 0) 996ff866947SSanjeev Bagewadi uio_prefaultpages(MIN(n, max_blksz), uio); 997feb08c6bSbillm } 998fa9e4066Sahrens 999c5c6ffa0Smaybee zfs_range_unlock(rl); 1000fa9e4066Sahrens 1001fa9e4066Sahrens /* 1002fa9e4066Sahrens * If we're in replay mode, or we made no progress, return error. 1003fa9e4066Sahrens * Otherwise, it's at least a partial write, so it's successful. 1004fa9e4066Sahrens */ 10051209a471SNeil Perrin if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1006fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1007fa9e4066Sahrens return (error); 1008fa9e4066Sahrens } 1009fa9e4066Sahrens 101055da60b9SMark J Musante if (ioflag & (FSYNC | FDSYNC) || 101155da60b9SMark J Musante zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 10125002558fSNeil Perrin zil_commit(zilog, zp->z_id); 1013fa9e4066Sahrens 1014fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1015fa9e4066Sahrens return (0); 1016fa9e4066Sahrens } 1017fa9e4066Sahrens 1018c5c6ffa0Smaybee void 1019b24ab676SJeff Bonwick zfs_get_done(zgd_t *zgd, int error) 1020c5c6ffa0Smaybee { 1021b24ab676SJeff Bonwick znode_t *zp = zgd->zgd_private; 1022b24ab676SJeff Bonwick objset_t *os = zp->z_zfsvfs->z_os; 1023b24ab676SJeff Bonwick 1024b24ab676SJeff Bonwick if (zgd->zgd_db) 1025b24ab676SJeff Bonwick dmu_buf_rele(zgd->zgd_db, zgd); 1026b24ab676SJeff Bonwick 1027b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 1028c5c6ffa0Smaybee 10299d3574bfSNeil Perrin /* 10309d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 10319d3574bfSNeil Perrin * txg stopped from syncing. 10329d3574bfSNeil Perrin */ 1033b24ab676SJeff Bonwick VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1034b24ab676SJeff Bonwick 1035b24ab676SJeff Bonwick if (error == 0 && zgd->zgd_bp) 10361271e4b1SPrakash Surya zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); 1037b24ab676SJeff Bonwick 103867bd71c6Sperrin kmem_free(zgd, sizeof (zgd_t)); 1039c5c6ffa0Smaybee } 1040c5c6ffa0Smaybee 1041c87b8fc5SMark J Musante #ifdef DEBUG 1042c87b8fc5SMark J Musante static int zil_fault_io = 0; 1043c87b8fc5SMark J Musante #endif 1044c87b8fc5SMark J Musante 1045fa9e4066Sahrens /* 1046fa9e4066Sahrens * Get data to generate a TX_WRITE intent log record. 1047fa9e4066Sahrens */ 1048fa9e4066Sahrens int 10491271e4b1SPrakash Surya zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) 1050fa9e4066Sahrens { 1051fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 1052fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 1053fa9e4066Sahrens znode_t *zp; 1054b24ab676SJeff Bonwick uint64_t object = lr->lr_foid; 1055b24ab676SJeff Bonwick uint64_t offset = lr->lr_offset; 1056b24ab676SJeff Bonwick uint64_t size = lr->lr_length; 1057c5c6ffa0Smaybee dmu_buf_t *db; 105867bd71c6Sperrin zgd_t *zgd; 1059fa9e4066Sahrens int error = 0; 1060fa9e4066Sahrens 10611271e4b1SPrakash Surya ASSERT3P(lwb, !=, NULL); 10621271e4b1SPrakash Surya ASSERT3P(zio, !=, NULL); 10631271e4b1SPrakash Surya ASSERT3U(size, !=, 0); 1064fa9e4066Sahrens 1065fa9e4066Sahrens /* 1066104e2ed7Sperrin * Nothing to do if the file has been removed 1067fa9e4066Sahrens */ 1068b24ab676SJeff Bonwick if (zfs_zget(zfsvfs, object, &zp) != 0) 1069be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1070893a6d32Sahrens if (zp->z_unlinked) { 10719d3574bfSNeil Perrin /* 10729d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 10739d3574bfSNeil Perrin * txg stopped from syncing. 10749d3574bfSNeil Perrin */ 10759d3574bfSNeil Perrin VN_RELE_ASYNC(ZTOV(zp), 10769d3574bfSNeil Perrin dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1077be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1078fa9e4066Sahrens } 1079fa9e4066Sahrens 1080b24ab676SJeff Bonwick zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 10811271e4b1SPrakash Surya zgd->zgd_lwb = lwb; 1082b24ab676SJeff Bonwick zgd->zgd_private = zp; 1083b24ab676SJeff Bonwick 1084fa9e4066Sahrens /* 1085fa9e4066Sahrens * Write records come in two flavors: immediate and indirect. 1086fa9e4066Sahrens * For small writes it's cheaper to store the data with the 1087fa9e4066Sahrens * log record (immediate); for large writes it's cheaper to 1088fa9e4066Sahrens * sync the data and get a pointer to it (indirect) so that 1089fa9e4066Sahrens * we don't have to write the data twice. 1090fa9e4066Sahrens */ 1091104e2ed7Sperrin if (buf != NULL) { /* immediate write */ 1092b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1093104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 10940a586ceaSMark Shellenbaum if (offset >= zp->z_size) { 1095be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 1096b24ab676SJeff Bonwick } else { 1097b24ab676SJeff Bonwick error = dmu_read(os, object, offset, size, buf, 1098b24ab676SJeff Bonwick DMU_READ_NO_PREFETCH); 1099104e2ed7Sperrin } 1100b24ab676SJeff Bonwick ASSERT(error == 0 || error == ENOENT); 1101104e2ed7Sperrin } else { /* indirect write */ 1102fa9e4066Sahrens /* 1103104e2ed7Sperrin * Have to lock the whole block to ensure when it's 110442b14111SLOLi * written out and its checksum is being calculated 1105104e2ed7Sperrin * that no one can change the data. We need to re-check 1106104e2ed7Sperrin * blocksize after we get the lock in case it's changed! 1107fa9e4066Sahrens */ 1108104e2ed7Sperrin for (;;) { 1109b24ab676SJeff Bonwick uint64_t blkoff; 1110b24ab676SJeff Bonwick size = zp->z_blksz; 1111dfe73b3dSJeff Bonwick blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1112b24ab676SJeff Bonwick offset -= blkoff; 1113b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1114b24ab676SJeff Bonwick RL_READER); 1115b24ab676SJeff Bonwick if (zp->z_blksz == size) 1116104e2ed7Sperrin break; 1117b24ab676SJeff Bonwick offset += blkoff; 1118b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 1119104e2ed7Sperrin } 1120104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 11210a586ceaSMark Shellenbaum if (lr->lr_offset >= zp->z_size) 1122be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 1123c87b8fc5SMark J Musante #ifdef DEBUG 1124c87b8fc5SMark J Musante if (zil_fault_io) { 1125be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 1126c87b8fc5SMark J Musante zil_fault_io = 0; 1127c87b8fc5SMark J Musante } 1128c87b8fc5SMark J Musante #endif 1129b24ab676SJeff Bonwick if (error == 0) 113047cb52daSJeff Bonwick error = dmu_buf_hold(os, object, offset, zgd, &db, 113147cb52daSJeff Bonwick DMU_READ_NO_PREFETCH); 1132c87b8fc5SMark J Musante 1133975c32a0SNeil Perrin if (error == 0) { 1134b7edcb94SMatthew Ahrens blkptr_t *bp = &lr->lr_blkptr; 113580901aeaSGeorge Wilson 1136b24ab676SJeff Bonwick zgd->zgd_db = db; 1137b24ab676SJeff Bonwick zgd->zgd_bp = bp; 1138b24ab676SJeff Bonwick 1139b24ab676SJeff Bonwick ASSERT(db->db_offset == offset); 1140b24ab676SJeff Bonwick ASSERT(db->db_size == size); 1141b24ab676SJeff Bonwick 1142b24ab676SJeff Bonwick error = dmu_sync(zio, lr->lr_common.lrc_txg, 1143b24ab676SJeff Bonwick zfs_get_done, zgd); 11440c94e1afSAndriy Gapon ASSERT(error || lr->lr_length <= size); 1145b24ab676SJeff Bonwick 1146975c32a0SNeil Perrin /* 1147b24ab676SJeff Bonwick * On success, we need to wait for the write I/O 1148b24ab676SJeff Bonwick * initiated by dmu_sync() to complete before we can 1149b24ab676SJeff Bonwick * release this dbuf. We will finish everything up 1150b24ab676SJeff Bonwick * in the zfs_get_done() callback. 1151975c32a0SNeil Perrin */ 1152b24ab676SJeff Bonwick if (error == 0) 1153b24ab676SJeff Bonwick return (0); 1154975c32a0SNeil Perrin 1155b24ab676SJeff Bonwick if (error == EALREADY) { 1156b24ab676SJeff Bonwick lr->lr_common.lrc_txtype = TX_WRITE2; 11575cabbc6bSPrashanth Sreenivasa /* 11585cabbc6bSPrashanth Sreenivasa * TX_WRITE2 relies on the data previously 11595cabbc6bSPrashanth Sreenivasa * written by the TX_WRITE that caused 11605cabbc6bSPrashanth Sreenivasa * EALREADY. We zero out the BP because 11615cabbc6bSPrashanth Sreenivasa * it is the old, currently-on-disk BP, 11625cabbc6bSPrashanth Sreenivasa * so there's no need to zio_flush() its 11635cabbc6bSPrashanth Sreenivasa * vdevs (flushing would needlesly hurt 11645cabbc6bSPrashanth Sreenivasa * performance, and doesn't work on 11655cabbc6bSPrashanth Sreenivasa * indirect vdevs). 11665cabbc6bSPrashanth Sreenivasa */ 11675cabbc6bSPrashanth Sreenivasa zgd->zgd_bp = NULL; 11685cabbc6bSPrashanth Sreenivasa BP_ZERO(bp); 1169b24ab676SJeff Bonwick error = 0; 1170b24ab676SJeff Bonwick } 1171975c32a0SNeil Perrin } 1172fa9e4066Sahrens } 1173b24ab676SJeff Bonwick 1174b24ab676SJeff Bonwick zfs_get_done(zgd, error); 1175b24ab676SJeff Bonwick 1176fa9e4066Sahrens return (error); 1177fa9e4066Sahrens } 1178fa9e4066Sahrens 1179fa9e4066Sahrens /*ARGSUSED*/ 1180fa9e4066Sahrens static int 1181da6c28aaSamw zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1182da6c28aaSamw caller_context_t *ct) 1183fa9e4066Sahrens { 1184fa9e4066Sahrens znode_t *zp = VTOZ(vp); 1185fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1186fa9e4066Sahrens int error; 1187fa9e4066Sahrens 11883cb34c60Sahrens ZFS_ENTER(zfsvfs); 11893cb34c60Sahrens ZFS_VERIFY_ZP(zp); 1190da6c28aaSamw 1191da6c28aaSamw if (flag & V_ACE_MASK) 1192da6c28aaSamw error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1193da6c28aaSamw else 1194da6c28aaSamw error = zfs_zaccess_rwx(zp, mode, flag, cr); 1195da6c28aaSamw 1196fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1197fa9e4066Sahrens return (error); 1198fa9e4066Sahrens } 1199fa9e4066Sahrens 1200d47621a4STim Haley /* 1201d47621a4STim Haley * If vnode is for a device return a specfs vnode instead. 1202d47621a4STim Haley */ 1203d47621a4STim Haley static int 1204d47621a4STim Haley specvp_check(vnode_t **vpp, cred_t *cr) 1205d47621a4STim Haley { 1206d47621a4STim Haley int error = 0; 1207d47621a4STim Haley 1208d47621a4STim Haley if (IS_DEVVP(*vpp)) { 1209d47621a4STim Haley struct vnode *svp; 1210d47621a4STim Haley 1211d47621a4STim Haley svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1212d47621a4STim Haley VN_RELE(*vpp); 1213d47621a4STim Haley if (svp == NULL) 1214be6fd75aSMatthew Ahrens error = SET_ERROR(ENOSYS); 1215d47621a4STim Haley *vpp = svp; 1216d47621a4STim Haley } 1217d47621a4STim Haley return (error); 1218d47621a4STim Haley } 1219d47621a4STim Haley 1220d47621a4STim Haley 1221fa9e4066Sahrens /* 1222fa9e4066Sahrens * Lookup an entry in a directory, or an extended attribute directory. 1223fa9e4066Sahrens * If it exists, return a held vnode reference for it. 1224fa9e4066Sahrens * 1225fa9e4066Sahrens * IN: dvp - vnode of directory to search. 1226fa9e4066Sahrens * nm - name of entry to lookup. 1227fa9e4066Sahrens * pnp - full pathname to lookup [UNUSED]. 1228fa9e4066Sahrens * flags - LOOKUP_XATTR set if looking for an attribute. 1229fa9e4066Sahrens * rdir - root directory vnode [UNUSED]. 1230fa9e4066Sahrens * cr - credentials of caller. 1231da6c28aaSamw * ct - caller context 1232da6c28aaSamw * direntflags - directory lookup flags 1233da6c28aaSamw * realpnp - returned pathname. 1234fa9e4066Sahrens * 1235fa9e4066Sahrens * OUT: vpp - vnode of located entry, NULL if not found. 1236fa9e4066Sahrens * 1237f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1238fa9e4066Sahrens * 1239fa9e4066Sahrens * Timestamps: 1240fa9e4066Sahrens * NA 1241fa9e4066Sahrens */ 1242fa9e4066Sahrens /* ARGSUSED */ 1243fa9e4066Sahrens static int 1244fa9e4066Sahrens zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 1245da6c28aaSamw int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 1246da6c28aaSamw int *direntflags, pathname_t *realpnp) 1247fa9e4066Sahrens { 1248fa9e4066Sahrens znode_t *zdp = VTOZ(dvp); 1249fa9e4066Sahrens zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1250d47621a4STim Haley int error = 0; 1251d47621a4STim Haley 12521c17160aSKevin Crowe /* 12531c17160aSKevin Crowe * Fast path lookup, however we must skip DNLC lookup 12541c17160aSKevin Crowe * for case folding or normalizing lookups because the 12551c17160aSKevin Crowe * DNLC code only stores the passed in name. This means 12561c17160aSKevin Crowe * creating 'a' and removing 'A' on a case insensitive 12571c17160aSKevin Crowe * file system would work, but DNLC still thinks 'a' 12581c17160aSKevin Crowe * exists and won't let you create it again on the next 12591c17160aSKevin Crowe * pass through fast path. 12601c17160aSKevin Crowe */ 1261d47621a4STim Haley if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1262d47621a4STim Haley 1263d47621a4STim Haley if (dvp->v_type != VDIR) { 1264be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTDIR)); 12650a586ceaSMark Shellenbaum } else if (zdp->z_sa_hdl == NULL) { 1266be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 1267d47621a4STim Haley } 1268d47621a4STim Haley 1269d47621a4STim Haley if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1270d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1271d47621a4STim Haley if (!error) { 1272d47621a4STim Haley *vpp = dvp; 1273d47621a4STim Haley VN_HOLD(*vpp); 1274d47621a4STim Haley return (0); 1275d47621a4STim Haley } 1276d47621a4STim Haley return (error); 12771c17160aSKevin Crowe } else if (!zdp->z_zfsvfs->z_norm && 12781c17160aSKevin Crowe (zdp->z_zfsvfs->z_case == ZFS_CASE_SENSITIVE)) { 12791c17160aSKevin Crowe 1280d47621a4STim Haley vnode_t *tvp = dnlc_lookup(dvp, nm); 1281d47621a4STim Haley 1282d47621a4STim Haley if (tvp) { 1283d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1284d47621a4STim Haley if (error) { 1285d47621a4STim Haley VN_RELE(tvp); 1286d47621a4STim Haley return (error); 1287d47621a4STim Haley } 1288d47621a4STim Haley if (tvp == DNLC_NO_VNODE) { 1289d47621a4STim Haley VN_RELE(tvp); 1290be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1291d47621a4STim Haley } else { 1292d47621a4STim Haley *vpp = tvp; 1293d47621a4STim Haley return (specvp_check(vpp, cr)); 1294d47621a4STim Haley } 1295d47621a4STim Haley } 1296d47621a4STim Haley } 1297d47621a4STim Haley } 1298d47621a4STim Haley 1299d47621a4STim Haley DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1300fa9e4066Sahrens 13013cb34c60Sahrens ZFS_ENTER(zfsvfs); 13023cb34c60Sahrens ZFS_VERIFY_ZP(zdp); 1303fa9e4066Sahrens 1304fa9e4066Sahrens *vpp = NULL; 1305fa9e4066Sahrens 1306fa9e4066Sahrens if (flags & LOOKUP_XATTR) { 13077b55fa8eSck /* 13087b55fa8eSck * If the xattr property is off, refuse the lookup request. 13097b55fa8eSck */ 13107b55fa8eSck if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 13117b55fa8eSck ZFS_EXIT(zfsvfs); 1312be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 13137b55fa8eSck } 13147b55fa8eSck 1315fa9e4066Sahrens /* 1316fa9e4066Sahrens * We don't allow recursive attributes.. 1317fa9e4066Sahrens * Maybe someday we will. 1318fa9e4066Sahrens */ 13190a586ceaSMark Shellenbaum if (zdp->z_pflags & ZFS_XATTR) { 1320fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1321be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1322fa9e4066Sahrens } 1323fa9e4066Sahrens 13243f063a9dSck if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1325fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1326fa9e4066Sahrens return (error); 1327fa9e4066Sahrens } 1328fa9e4066Sahrens 1329fa9e4066Sahrens /* 1330fa9e4066Sahrens * Do we have permission to get into attribute directory? 1331fa9e4066Sahrens */ 1332fa9e4066Sahrens 1333da6c28aaSamw if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1334da6c28aaSamw B_FALSE, cr)) { 1335fa9e4066Sahrens VN_RELE(*vpp); 1336da6c28aaSamw *vpp = NULL; 1337fa9e4066Sahrens } 1338fa9e4066Sahrens 1339fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1340fa9e4066Sahrens return (error); 1341fa9e4066Sahrens } 1342fa9e4066Sahrens 13430f2dc02eSek if (dvp->v_type != VDIR) { 13440f2dc02eSek ZFS_EXIT(zfsvfs); 1345be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTDIR)); 13460f2dc02eSek } 1347736b9155Smarks 1348fa9e4066Sahrens /* 1349fa9e4066Sahrens * Check accessibility of directory. 1350fa9e4066Sahrens */ 1351fa9e4066Sahrens 1352da6c28aaSamw if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1353fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1354fa9e4066Sahrens return (error); 1355fa9e4066Sahrens } 1356fa9e4066Sahrens 1357de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1358da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1359da6c28aaSamw ZFS_EXIT(zfsvfs); 1360be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1361da6c28aaSamw } 1362fa9e4066Sahrens 1363da6c28aaSamw error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1364d47621a4STim Haley if (error == 0) 1365d47621a4STim Haley error = specvp_check(vpp, cr); 1366fa9e4066Sahrens 1367fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1368fa9e4066Sahrens return (error); 1369fa9e4066Sahrens } 1370fa9e4066Sahrens 1371fa9e4066Sahrens /* 1372fa9e4066Sahrens * Attempt to create a new entry in a directory. If the entry 1373fa9e4066Sahrens * already exists, truncate the file if permissible, else return 1374fa9e4066Sahrens * an error. Return the vp of the created or trunc'd file. 1375fa9e4066Sahrens * 1376fa9e4066Sahrens * IN: dvp - vnode of directory to put new file entry in. 1377fa9e4066Sahrens * name - name of new file entry. 1378fa9e4066Sahrens * vap - attributes of new file. 1379fa9e4066Sahrens * excl - flag indicating exclusive or non-exclusive mode. 1380fa9e4066Sahrens * mode - mode to open file with. 1381fa9e4066Sahrens * cr - credentials of caller. 1382fa9e4066Sahrens * flag - large file flag [UNUSED]. 1383da6c28aaSamw * ct - caller context 13844bb73804SMatthew Ahrens * vsecp - ACL to be set 1385fa9e4066Sahrens * 1386fa9e4066Sahrens * OUT: vpp - vnode of created or trunc'd entry. 1387fa9e4066Sahrens * 1388f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1389fa9e4066Sahrens * 1390fa9e4066Sahrens * Timestamps: 1391fa9e4066Sahrens * dvp - ctime|mtime updated if new entry created 1392fa9e4066Sahrens * vp - ctime|mtime always, atime if new 1393fa9e4066Sahrens */ 1394da6c28aaSamw 1395fa9e4066Sahrens /* ARGSUSED */ 1396fa9e4066Sahrens static int 1397fa9e4066Sahrens zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, 1398da6c28aaSamw int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, 1399da6c28aaSamw vsecattr_t *vsecp) 1400fa9e4066Sahrens { 1401fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1402fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1403f18faf3fSek zilog_t *zilog; 1404f18faf3fSek objset_t *os; 1405fa9e4066Sahrens zfs_dirlock_t *dl; 1406fa9e4066Sahrens dmu_tx_t *tx; 1407fa9e4066Sahrens int error; 1408c1ce5987SMark Shellenbaum ksid_t *ksid; 1409c1ce5987SMark Shellenbaum uid_t uid; 1410c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 14110a586ceaSMark Shellenbaum zfs_acl_ids_t acl_ids; 141289459e17SMark Shellenbaum boolean_t fuid_dirtied; 1413c8c24165SMark Shellenbaum boolean_t have_acl = B_FALSE; 141469962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1415da6c28aaSamw 1416da6c28aaSamw /* 1417da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1418da6c28aaSamw * make sure file system is at proper version 1419da6c28aaSamw */ 1420da6c28aaSamw 1421c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1422c1ce5987SMark Shellenbaum if (ksid) 1423c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1424c1ce5987SMark Shellenbaum else 1425c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1426c1ce5987SMark Shellenbaum 1427da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1428da6c28aaSamw (vsecp || (vap->va_mask & AT_XVATTR) || 1429c1ce5987SMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1430be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1431fa9e4066Sahrens 14323cb34c60Sahrens ZFS_ENTER(zfsvfs); 14333cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1434f18faf3fSek os = zfsvfs->z_os; 1435f18faf3fSek zilog = zfsvfs->z_log; 1436fa9e4066Sahrens 1437de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1438da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1439da6c28aaSamw ZFS_EXIT(zfsvfs); 1440be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1441da6c28aaSamw } 1442da6c28aaSamw 1443da6c28aaSamw if (vap->va_mask & AT_XVATTR) { 1444da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1445da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1446da6c28aaSamw ZFS_EXIT(zfsvfs); 1447da6c28aaSamw return (error); 1448da6c28aaSamw } 1449da6c28aaSamw } 1450fa9e4066Sahrens top: 1451fa9e4066Sahrens *vpp = NULL; 1452fa9e4066Sahrens 1453fa9e4066Sahrens if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) 1454fa9e4066Sahrens vap->va_mode &= ~VSVTX; 1455fa9e4066Sahrens 1456fa9e4066Sahrens if (*name == '\0') { 1457fa9e4066Sahrens /* 1458fa9e4066Sahrens * Null component name refers to the directory itself. 1459fa9e4066Sahrens */ 1460fa9e4066Sahrens VN_HOLD(dvp); 1461fa9e4066Sahrens zp = dzp; 1462fa9e4066Sahrens dl = NULL; 1463fa9e4066Sahrens error = 0; 1464fa9e4066Sahrens } else { 1465fa9e4066Sahrens /* possible VN_HOLD(zp) */ 1466da6c28aaSamw int zflg = 0; 1467da6c28aaSamw 1468da6c28aaSamw if (flag & FIGNORECASE) 1469da6c28aaSamw zflg |= ZCILOOK; 1470da6c28aaSamw 1471da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1472da6c28aaSamw NULL, NULL); 1473da6c28aaSamw if (error) { 14740b2a8171SMark Shellenbaum if (have_acl) 14750b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1476fa9e4066Sahrens if (strcmp(name, "..") == 0) 1477be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 1478fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1479fa9e4066Sahrens return (error); 1480fa9e4066Sahrens } 1481fa9e4066Sahrens } 14820a586ceaSMark Shellenbaum 1483fa9e4066Sahrens if (zp == NULL) { 1484da6c28aaSamw uint64_t txtype; 1485da6c28aaSamw 1486fa9e4066Sahrens /* 1487fa9e4066Sahrens * Create a new file object and update the directory 1488fa9e4066Sahrens * to reference it. 1489fa9e4066Sahrens */ 1490da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 14910b2a8171SMark Shellenbaum if (have_acl) 14920b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1493fa9e4066Sahrens goto out; 1494fa9e4066Sahrens } 1495fa9e4066Sahrens 1496fa9e4066Sahrens /* 1497fa9e4066Sahrens * We only support the creation of regular files in 1498fa9e4066Sahrens * extended attribute directories. 1499fa9e4066Sahrens */ 15000a586ceaSMark Shellenbaum 15010a586ceaSMark Shellenbaum if ((dzp->z_pflags & ZFS_XATTR) && 1502fa9e4066Sahrens (vap->va_type != VREG)) { 15030b2a8171SMark Shellenbaum if (have_acl) 15040b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1505be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 1506fa9e4066Sahrens goto out; 1507fa9e4066Sahrens } 1508fa9e4066Sahrens 1509c8c24165SMark Shellenbaum if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1510c8c24165SMark Shellenbaum cr, vsecp, &acl_ids)) != 0) 151189459e17SMark Shellenbaum goto out; 1512c8c24165SMark Shellenbaum have_acl = B_TRUE; 1513c8c24165SMark Shellenbaum 151414843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 15154929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 1516be6fd75aSMatthew Ahrens error = SET_ERROR(EDQUOT); 151714843421SMatthew Ahrens goto out; 151814843421SMatthew Ahrens } 151989459e17SMark Shellenbaum 1520fa9e4066Sahrens tx = dmu_tx_create(os); 15210a586ceaSMark Shellenbaum 15220a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 15230a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 15240a586ceaSMark Shellenbaum 152589459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 152614843421SMatthew Ahrens if (fuid_dirtied) 152714843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 1528ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 15290a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 15300a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && 15310a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1532fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 15330a586ceaSMark Shellenbaum 0, acl_ids.z_aclp->z_acl_bytes); 1534da6c28aaSamw } 1535*f864f99eSPrakash Surya error = dmu_tx_assign(tx, 1536*f864f99eSPrakash Surya (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 1537fa9e4066Sahrens if (error) { 1538fa9e4066Sahrens zfs_dirent_unlock(dl); 15391209a471SNeil Perrin if (error == ERESTART) { 154069962b56SMatthew Ahrens waited = B_TRUE; 15418a2f1b91Sahrens dmu_tx_wait(tx); 15428a2f1b91Sahrens dmu_tx_abort(tx); 1543fa9e4066Sahrens goto top; 1544fa9e4066Sahrens } 1545c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 15468a2f1b91Sahrens dmu_tx_abort(tx); 1547fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1548fa9e4066Sahrens return (error); 1549fa9e4066Sahrens } 15500a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 155189459e17SMark Shellenbaum 155289459e17SMark Shellenbaum if (fuid_dirtied) 155389459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 155489459e17SMark Shellenbaum 1555fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1556da6c28aaSamw txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1557da6c28aaSamw if (flag & FIGNORECASE) 1558da6c28aaSamw txtype |= TX_CI; 1559da6c28aaSamw zfs_log_create(zilog, tx, txtype, dzp, zp, name, 156089459e17SMark Shellenbaum vsecp, acl_ids.z_fuidp, vap); 156189459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1562fa9e4066Sahrens dmu_tx_commit(tx); 1563fa9e4066Sahrens } else { 1564da6c28aaSamw int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1565da6c28aaSamw 15660b2a8171SMark Shellenbaum if (have_acl) 15670b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 15680b2a8171SMark Shellenbaum have_acl = B_FALSE; 15690b2a8171SMark Shellenbaum 1570fa9e4066Sahrens /* 1571fa9e4066Sahrens * A directory entry already exists for this name. 1572fa9e4066Sahrens */ 1573fa9e4066Sahrens /* 1574fa9e4066Sahrens * Can't truncate an existing file if in exclusive mode. 1575fa9e4066Sahrens */ 1576fa9e4066Sahrens if (excl == EXCL) { 1577be6fd75aSMatthew Ahrens error = SET_ERROR(EEXIST); 1578fa9e4066Sahrens goto out; 1579fa9e4066Sahrens } 1580fa9e4066Sahrens /* 1581fa9e4066Sahrens * Can't open a directory for writing. 1582fa9e4066Sahrens */ 1583fa9e4066Sahrens if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1584be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 1585fa9e4066Sahrens goto out; 1586fa9e4066Sahrens } 1587fa9e4066Sahrens /* 1588fa9e4066Sahrens * Verify requested access to file. 1589fa9e4066Sahrens */ 1590da6c28aaSamw if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1591fa9e4066Sahrens goto out; 1592fa9e4066Sahrens } 1593fa9e4066Sahrens 1594fa9e4066Sahrens mutex_enter(&dzp->z_lock); 1595fa9e4066Sahrens dzp->z_seq++; 1596fa9e4066Sahrens mutex_exit(&dzp->z_lock); 1597fa9e4066Sahrens 15985730cc9aSmaybee /* 15995730cc9aSmaybee * Truncate regular files if requested. 16005730cc9aSmaybee */ 16015730cc9aSmaybee if ((ZTOV(zp)->v_type == VREG) && 1602fa9e4066Sahrens (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1603cdb0ab79Smaybee /* we can't hold any locks when calling zfs_freesp() */ 1604cdb0ab79Smaybee zfs_dirent_unlock(dl); 1605cdb0ab79Smaybee dl = NULL; 16065730cc9aSmaybee error = zfs_freesp(zp, 0, 0, mode, TRUE); 1607df2381bfSpraks if (error == 0) { 1608da6c28aaSamw vnevent_create(ZTOV(zp), ct); 1609df2381bfSpraks } 1610fa9e4066Sahrens } 1611fa9e4066Sahrens } 1612fa9e4066Sahrens out: 1613fa9e4066Sahrens 1614fa9e4066Sahrens if (dl) 1615fa9e4066Sahrens zfs_dirent_unlock(dl); 1616fa9e4066Sahrens 1617fa9e4066Sahrens if (error) { 1618fa9e4066Sahrens if (zp) 1619fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1620fa9e4066Sahrens } else { 1621fa9e4066Sahrens *vpp = ZTOV(zp); 1622d47621a4STim Haley error = specvp_check(vpp, cr); 1623fa9e4066Sahrens } 1624fa9e4066Sahrens 162555da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 16265002558fSNeil Perrin zil_commit(zilog, 0); 162755da60b9SMark J Musante 1628fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1629fa9e4066Sahrens return (error); 1630fa9e4066Sahrens } 1631fa9e4066Sahrens 1632fa9e4066Sahrens /* 1633fa9e4066Sahrens * Remove an entry from a directory. 1634fa9e4066Sahrens * 1635fa9e4066Sahrens * IN: dvp - vnode of directory to remove entry from. 1636fa9e4066Sahrens * name - name of entry to remove. 1637fa9e4066Sahrens * cr - credentials of caller. 1638da6c28aaSamw * ct - caller context 1639da6c28aaSamw * flags - case flags 1640fa9e4066Sahrens * 1641f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1642fa9e4066Sahrens * 1643fa9e4066Sahrens * Timestamps: 1644fa9e4066Sahrens * dvp - ctime|mtime 1645fa9e4066Sahrens * vp - ctime (if nlink > 0) 1646fa9e4066Sahrens */ 16470a586ceaSMark Shellenbaum 16480a586ceaSMark Shellenbaum uint64_t null_xattr = 0; 16490a586ceaSMark Shellenbaum 1650da6c28aaSamw /*ARGSUSED*/ 1651fa9e4066Sahrens static int 1652da6c28aaSamw zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1653da6c28aaSamw int flags) 1654fa9e4066Sahrens { 1655fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 16560b2a8171SMark Shellenbaum znode_t *xzp; 1657fa9e4066Sahrens vnode_t *vp; 1658fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1659f18faf3fSek zilog_t *zilog; 16600b2a8171SMark Shellenbaum uint64_t acl_obj, xattr_obj; 16614bb73804SMatthew Ahrens uint64_t xattr_obj_unlinked = 0; 166251bd2f97SNeil Perrin uint64_t obj = 0; 1663fa9e4066Sahrens zfs_dirlock_t *dl; 1664fa9e4066Sahrens dmu_tx_t *tx; 1665893a6d32Sahrens boolean_t may_delete_now, delete_now = FALSE; 1666cdb0ab79Smaybee boolean_t unlinked, toobig = FALSE; 1667da6c28aaSamw uint64_t txtype; 1668da6c28aaSamw pathname_t *realnmp = NULL; 1669da6c28aaSamw pathname_t realnm; 1670fa9e4066Sahrens int error; 1671da6c28aaSamw int zflg = ZEXISTS; 167269962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1673fa9e4066Sahrens 16743cb34c60Sahrens ZFS_ENTER(zfsvfs); 16753cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1676f18faf3fSek zilog = zfsvfs->z_log; 1677fa9e4066Sahrens 1678da6c28aaSamw if (flags & FIGNORECASE) { 1679da6c28aaSamw zflg |= ZCILOOK; 1680da6c28aaSamw pn_alloc(&realnm); 1681da6c28aaSamw realnmp = &realnm; 1682da6c28aaSamw } 1683da6c28aaSamw 1684fa9e4066Sahrens top: 16850b2a8171SMark Shellenbaum xattr_obj = 0; 16860b2a8171SMark Shellenbaum xzp = NULL; 1687fa9e4066Sahrens /* 1688fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1689fa9e4066Sahrens */ 1690da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1691da6c28aaSamw NULL, realnmp)) { 1692da6c28aaSamw if (realnmp) 1693da6c28aaSamw pn_free(realnmp); 1694fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1695fa9e4066Sahrens return (error); 1696fa9e4066Sahrens } 1697fa9e4066Sahrens 1698fa9e4066Sahrens vp = ZTOV(zp); 1699fa9e4066Sahrens 1700fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1701fa9e4066Sahrens goto out; 1702fa9e4066Sahrens } 1703fa9e4066Sahrens 1704fa9e4066Sahrens /* 1705fa9e4066Sahrens * Need to use rmdir for removing directories. 1706fa9e4066Sahrens */ 1707fa9e4066Sahrens if (vp->v_type == VDIR) { 1708be6fd75aSMatthew Ahrens error = SET_ERROR(EPERM); 1709fa9e4066Sahrens goto out; 1710fa9e4066Sahrens } 1711fa9e4066Sahrens 1712da6c28aaSamw vnevent_remove(vp, dvp, name, ct); 1713fa9e4066Sahrens 1714da6c28aaSamw if (realnmp) 1715ab04eb8eStimh dnlc_remove(dvp, realnmp->pn_buf); 1716da6c28aaSamw else 1717da6c28aaSamw dnlc_remove(dvp, name); 1718033f9833Sek 1719fa9e4066Sahrens mutex_enter(&vp->v_lock); 1720fa9e4066Sahrens may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1721fa9e4066Sahrens mutex_exit(&vp->v_lock); 1722fa9e4066Sahrens 1723fa9e4066Sahrens /* 1724893a6d32Sahrens * We may delete the znode now, or we may put it in the unlinked set; 1725fa9e4066Sahrens * it depends on whether we're the last link, and on whether there are 1726fa9e4066Sahrens * other holds on the vnode. So we dmu_tx_hold() the right things to 1727fa9e4066Sahrens * allow for either case. 1728fa9e4066Sahrens */ 172951bd2f97SNeil Perrin obj = zp->z_id; 1730fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1731ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 17320a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 17330a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 17340a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 1735cdb0ab79Smaybee if (may_delete_now) { 1736cdb0ab79Smaybee toobig = 17370a586ceaSMark Shellenbaum zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1738cdb0ab79Smaybee /* if the file is too big, only hold_free a token amount */ 1739cdb0ab79Smaybee dmu_tx_hold_free(tx, zp->z_id, 0, 1740cdb0ab79Smaybee (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1741cdb0ab79Smaybee } 1742fa9e4066Sahrens 1743fa9e4066Sahrens /* are there any extended attributes? */ 17440a586ceaSMark Shellenbaum error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 17450a586ceaSMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 17460b2a8171SMark Shellenbaum if (error == 0 && xattr_obj) { 17470a586ceaSMark Shellenbaum error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1748fb09f5aaSMadhav Suresh ASSERT0(error); 17490a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 17500a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1751fa9e4066Sahrens } 1752fa9e4066Sahrens 17531412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 17541412a1a2SMark Shellenbaum if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1755fa9e4066Sahrens dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 17561412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 1757fa9e4066Sahrens 1758fa9e4066Sahrens /* charge as an update -- would be nice not to charge at all */ 1759893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1760fa9e4066Sahrens 17614bb73804SMatthew Ahrens /* 17626575bca0SSimon Klinkert * Mark this transaction as typically resulting in a net free of space 17634bb73804SMatthew Ahrens */ 17646575bca0SSimon Klinkert dmu_tx_mark_netfree(tx); 17654bb73804SMatthew Ahrens 1766*f864f99eSPrakash Surya error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 1767fa9e4066Sahrens if (error) { 1768fa9e4066Sahrens zfs_dirent_unlock(dl); 1769fa9e4066Sahrens VN_RELE(vp); 17700b2a8171SMark Shellenbaum if (xzp) 17710b2a8171SMark Shellenbaum VN_RELE(ZTOV(xzp)); 17721209a471SNeil Perrin if (error == ERESTART) { 177369962b56SMatthew Ahrens waited = B_TRUE; 17748a2f1b91Sahrens dmu_tx_wait(tx); 17758a2f1b91Sahrens dmu_tx_abort(tx); 1776fa9e4066Sahrens goto top; 1777fa9e4066Sahrens } 1778da6c28aaSamw if (realnmp) 1779da6c28aaSamw pn_free(realnmp); 17808a2f1b91Sahrens dmu_tx_abort(tx); 1781fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1782fa9e4066Sahrens return (error); 1783fa9e4066Sahrens } 1784fa9e4066Sahrens 1785fa9e4066Sahrens /* 1786fa9e4066Sahrens * Remove the directory entry. 1787fa9e4066Sahrens */ 1788da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1789fa9e4066Sahrens 1790fa9e4066Sahrens if (error) { 1791fa9e4066Sahrens dmu_tx_commit(tx); 1792fa9e4066Sahrens goto out; 1793fa9e4066Sahrens } 1794fa9e4066Sahrens 1795893a6d32Sahrens if (unlinked) { 17961412a1a2SMark Shellenbaum /* 17971412a1a2SMark Shellenbaum * Hold z_lock so that we can make sure that the ACL obj 17981412a1a2SMark Shellenbaum * hasn't changed. Could have been deleted due to 17991412a1a2SMark Shellenbaum * zfs_sa_upgrade(). 18001412a1a2SMark Shellenbaum */ 18011412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 1802fa9e4066Sahrens mutex_enter(&vp->v_lock); 18030a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 18040a586ceaSMark Shellenbaum &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1805cdb0ab79Smaybee delete_now = may_delete_now && !toobig && 1806fa9e4066Sahrens vp->v_count == 1 && !vn_has_cached_data(vp) && 18071412a1a2SMark Shellenbaum xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 18080a586ceaSMark Shellenbaum acl_obj; 1809fa9e4066Sahrens mutex_exit(&vp->v_lock); 1810fa9e4066Sahrens } 1811fa9e4066Sahrens 1812fa9e4066Sahrens if (delete_now) { 18130a586ceaSMark Shellenbaum if (xattr_obj_unlinked) { 18140a586ceaSMark Shellenbaum ASSERT3U(xzp->z_links, ==, 2); 1815fa9e4066Sahrens mutex_enter(&xzp->z_lock); 1816893a6d32Sahrens xzp->z_unlinked = 1; 18170a586ceaSMark Shellenbaum xzp->z_links = 0; 18180a586ceaSMark Shellenbaum error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 18190a586ceaSMark Shellenbaum &xzp->z_links, sizeof (xzp->z_links), tx); 18200a586ceaSMark Shellenbaum ASSERT3U(error, ==, 0); 1821fa9e4066Sahrens mutex_exit(&xzp->z_lock); 1822893a6d32Sahrens zfs_unlinked_add(xzp, tx); 18231412a1a2SMark Shellenbaum 18240a586ceaSMark Shellenbaum if (zp->z_is_sa) 18250a586ceaSMark Shellenbaum error = sa_remove(zp->z_sa_hdl, 18260a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), tx); 18270a586ceaSMark Shellenbaum else 18280a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, 18290a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), &null_xattr, 18300a586ceaSMark Shellenbaum sizeof (uint64_t), tx); 1831fb09f5aaSMadhav Suresh ASSERT0(error); 1832fa9e4066Sahrens } 1833fa9e4066Sahrens mutex_enter(&vp->v_lock); 1834ade42b55SSebastien Roy VN_RELE_LOCKED(vp); 1835fb09f5aaSMadhav Suresh ASSERT0(vp->v_count); 1836fa9e4066Sahrens mutex_exit(&vp->v_lock); 1837fa9e4066Sahrens mutex_exit(&zp->z_lock); 1838fa9e4066Sahrens zfs_znode_delete(zp, tx); 1839893a6d32Sahrens } else if (unlinked) { 18401412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 1841893a6d32Sahrens zfs_unlinked_add(zp, tx); 1842fa9e4066Sahrens } 1843fa9e4066Sahrens 1844da6c28aaSamw txtype = TX_REMOVE; 1845da6c28aaSamw if (flags & FIGNORECASE) 1846da6c28aaSamw txtype |= TX_CI; 184751bd2f97SNeil Perrin zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 1848fa9e4066Sahrens 1849fa9e4066Sahrens dmu_tx_commit(tx); 1850fa9e4066Sahrens out: 1851da6c28aaSamw if (realnmp) 1852da6c28aaSamw pn_free(realnmp); 1853da6c28aaSamw 1854fa9e4066Sahrens zfs_dirent_unlock(dl); 1855fa9e4066Sahrens 185606e0070dSMark Shellenbaum if (!delete_now) 1857fa9e4066Sahrens VN_RELE(vp); 185806e0070dSMark Shellenbaum if (xzp) 1859fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 1860fa9e4066Sahrens 186155da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 18625002558fSNeil Perrin zil_commit(zilog, 0); 186355da60b9SMark J Musante 1864fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1865fa9e4066Sahrens return (error); 1866fa9e4066Sahrens } 1867fa9e4066Sahrens 1868fa9e4066Sahrens /* 1869fa9e4066Sahrens * Create a new directory and insert it into dvp using the name 1870fa9e4066Sahrens * provided. Return a pointer to the inserted directory. 1871fa9e4066Sahrens * 1872fa9e4066Sahrens * IN: dvp - vnode of directory to add subdir to. 1873fa9e4066Sahrens * dirname - name of new directory. 1874fa9e4066Sahrens * vap - attributes of new directory. 1875fa9e4066Sahrens * cr - credentials of caller. 1876da6c28aaSamw * ct - caller context 1877f7170741SWill Andrews * flags - case flags 1878da6c28aaSamw * vsecp - ACL to be set 1879fa9e4066Sahrens * 1880fa9e4066Sahrens * OUT: vpp - vnode of created directory. 1881fa9e4066Sahrens * 1882f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1883fa9e4066Sahrens * 1884fa9e4066Sahrens * Timestamps: 1885fa9e4066Sahrens * dvp - ctime|mtime updated 1886fa9e4066Sahrens * vp - ctime|mtime|atime updated 1887fa9e4066Sahrens */ 1888da6c28aaSamw /*ARGSUSED*/ 1889fa9e4066Sahrens static int 1890da6c28aaSamw zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1891da6c28aaSamw caller_context_t *ct, int flags, vsecattr_t *vsecp) 1892fa9e4066Sahrens { 1893fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1894fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1895f18faf3fSek zilog_t *zilog; 1896fa9e4066Sahrens zfs_dirlock_t *dl; 1897da6c28aaSamw uint64_t txtype; 1898fa9e4066Sahrens dmu_tx_t *tx; 1899fa9e4066Sahrens int error; 1900da6c28aaSamw int zf = ZNEW; 1901c1ce5987SMark Shellenbaum ksid_t *ksid; 1902c1ce5987SMark Shellenbaum uid_t uid; 1903c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 19040a586ceaSMark Shellenbaum zfs_acl_ids_t acl_ids; 190589459e17SMark Shellenbaum boolean_t fuid_dirtied; 190669962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1907fa9e4066Sahrens 1908fa9e4066Sahrens ASSERT(vap->va_type == VDIR); 1909fa9e4066Sahrens 1910da6c28aaSamw /* 1911da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1912da6c28aaSamw * make sure file system is at proper version 1913da6c28aaSamw */ 1914da6c28aaSamw 1915c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1916c1ce5987SMark Shellenbaum if (ksid) 1917c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1918c1ce5987SMark Shellenbaum else 1919c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1920da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1921c1ce5987SMark Shellenbaum (vsecp || (vap->va_mask & AT_XVATTR) || 1922756962ecSMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1923be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1924da6c28aaSamw 19253cb34c60Sahrens ZFS_ENTER(zfsvfs); 19263cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1927f18faf3fSek zilog = zfsvfs->z_log; 1928fa9e4066Sahrens 19290a586ceaSMark Shellenbaum if (dzp->z_pflags & ZFS_XATTR) { 1930fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1931be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1932fa9e4066Sahrens } 1933da6c28aaSamw 1934de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(dirname, 1935da6c28aaSamw strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1936da6c28aaSamw ZFS_EXIT(zfsvfs); 1937be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1938da6c28aaSamw } 1939da6c28aaSamw if (flags & FIGNORECASE) 1940da6c28aaSamw zf |= ZCILOOK; 1941da6c28aaSamw 1942c8c24165SMark Shellenbaum if (vap->va_mask & AT_XVATTR) { 1943da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1944da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1945da6c28aaSamw ZFS_EXIT(zfsvfs); 1946da6c28aaSamw return (error); 1947da6c28aaSamw } 1948c8c24165SMark Shellenbaum } 1949fa9e4066Sahrens 1950c8c24165SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 1951c8c24165SMark Shellenbaum vsecp, &acl_ids)) != 0) { 1952c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 1953c8c24165SMark Shellenbaum return (error); 1954c8c24165SMark Shellenbaum } 1955fa9e4066Sahrens /* 1956fa9e4066Sahrens * First make sure the new directory doesn't exist. 1957c8c24165SMark Shellenbaum * 1958c8c24165SMark Shellenbaum * Existence is checked first to make sure we don't return 1959c8c24165SMark Shellenbaum * EACCES instead of EEXIST which can cause some applications 1960c8c24165SMark Shellenbaum * to fail. 1961fa9e4066Sahrens */ 1962da6c28aaSamw top: 1963da6c28aaSamw *vpp = NULL; 1964da6c28aaSamw 1965da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1966da6c28aaSamw NULL, NULL)) { 1967c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1968fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1969fa9e4066Sahrens return (error); 1970fa9e4066Sahrens } 1971fa9e4066Sahrens 1972da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1973c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1974d2443e76Smarks zfs_dirent_unlock(dl); 1975d2443e76Smarks ZFS_EXIT(zfsvfs); 1976d2443e76Smarks return (error); 1977d2443e76Smarks } 1978d2443e76Smarks 197914843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 19804929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 198114843421SMatthew Ahrens zfs_dirent_unlock(dl); 198214843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 1983be6fd75aSMatthew Ahrens return (SET_ERROR(EDQUOT)); 198414843421SMatthew Ahrens } 198589459e17SMark Shellenbaum 1986fa9e4066Sahrens /* 1987fa9e4066Sahrens * Add a new entry to the directory. 1988fa9e4066Sahrens */ 1989fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1990ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1991ea8dc4b6Seschrock dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 199289459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 199314843421SMatthew Ahrens if (fuid_dirtied) 199414843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 19950a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 19960a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 19970a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 19980a586ceaSMark Shellenbaum } 19990a586ceaSMark Shellenbaum 20000a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 20010a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 20020a586ceaSMark Shellenbaum 2003*f864f99eSPrakash Surya error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 2004fa9e4066Sahrens if (error) { 2005fa9e4066Sahrens zfs_dirent_unlock(dl); 20061209a471SNeil Perrin if (error == ERESTART) { 200769962b56SMatthew Ahrens waited = B_TRUE; 20088a2f1b91Sahrens dmu_tx_wait(tx); 20098a2f1b91Sahrens dmu_tx_abort(tx); 2010fa9e4066Sahrens goto top; 2011fa9e4066Sahrens } 2012c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 20138a2f1b91Sahrens dmu_tx_abort(tx); 2014fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2015fa9e4066Sahrens return (error); 2016fa9e4066Sahrens } 2017fa9e4066Sahrens 2018fa9e4066Sahrens /* 2019fa9e4066Sahrens * Create new node. 2020fa9e4066Sahrens */ 20210a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2022fa9e4066Sahrens 202389459e17SMark Shellenbaum if (fuid_dirtied) 202489459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 20250a586ceaSMark Shellenbaum 2026fa9e4066Sahrens /* 2027fa9e4066Sahrens * Now put new name in parent dir. 2028fa9e4066Sahrens */ 2029fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 2030fa9e4066Sahrens 2031fa9e4066Sahrens *vpp = ZTOV(zp); 2032fa9e4066Sahrens 2033da6c28aaSamw txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 2034da6c28aaSamw if (flags & FIGNORECASE) 2035da6c28aaSamw txtype |= TX_CI; 203689459e17SMark Shellenbaum zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 203789459e17SMark Shellenbaum acl_ids.z_fuidp, vap); 2038da6c28aaSamw 203989459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 20400a586ceaSMark Shellenbaum 2041fa9e4066Sahrens dmu_tx_commit(tx); 2042fa9e4066Sahrens 2043fa9e4066Sahrens zfs_dirent_unlock(dl); 2044fa9e4066Sahrens 204555da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 20465002558fSNeil Perrin zil_commit(zilog, 0); 204755da60b9SMark J Musante 2048fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2049fa9e4066Sahrens return (0); 2050fa9e4066Sahrens } 2051fa9e4066Sahrens 2052fa9e4066Sahrens /* 2053fa9e4066Sahrens * Remove a directory subdir entry. If the current working 2054fa9e4066Sahrens * directory is the same as the subdir to be removed, the 2055fa9e4066Sahrens * remove will fail. 2056fa9e4066Sahrens * 2057fa9e4066Sahrens * IN: dvp - vnode of directory to remove from. 2058fa9e4066Sahrens * name - name of directory to be removed. 2059fa9e4066Sahrens * cwd - vnode of current working directory. 2060fa9e4066Sahrens * cr - credentials of caller. 2061da6c28aaSamw * ct - caller context 2062da6c28aaSamw * flags - case flags 2063fa9e4066Sahrens * 2064f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 2065fa9e4066Sahrens * 2066fa9e4066Sahrens * Timestamps: 2067fa9e4066Sahrens * dvp - ctime|mtime updated 2068fa9e4066Sahrens */ 2069da6c28aaSamw /*ARGSUSED*/ 2070fa9e4066Sahrens static int 2071da6c28aaSamw zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2072da6c28aaSamw caller_context_t *ct, int flags) 2073fa9e4066Sahrens { 2074fa9e4066Sahrens znode_t *dzp = VTOZ(dvp); 2075fa9e4066Sahrens znode_t *zp; 2076fa9e4066Sahrens vnode_t *vp; 2077fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2078f18faf3fSek zilog_t *zilog; 2079fa9e4066Sahrens zfs_dirlock_t *dl; 2080fa9e4066Sahrens dmu_tx_t *tx; 2081fa9e4066Sahrens int error; 2082da6c28aaSamw int zflg = ZEXISTS; 208369962b56SMatthew Ahrens boolean_t waited = B_FALSE; 2084fa9e4066Sahrens 20853cb34c60Sahrens ZFS_ENTER(zfsvfs); 20863cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 2087f18faf3fSek zilog = zfsvfs->z_log; 2088fa9e4066Sahrens 2089da6c28aaSamw if (flags & FIGNORECASE) 2090da6c28aaSamw zflg |= ZCILOOK; 2091fa9e4066Sahrens top: 2092fa9e4066Sahrens zp = NULL; 2093fa9e4066Sahrens 2094fa9e4066Sahrens /* 2095fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 2096fa9e4066Sahrens */ 2097da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2098da6c28aaSamw NULL, NULL)) { 2099fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2100fa9e4066Sahrens return (error); 2101fa9e4066Sahrens } 2102fa9e4066Sahrens 2103fa9e4066Sahrens vp = ZTOV(zp); 2104fa9e4066Sahrens 2105fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2106fa9e4066Sahrens goto out; 2107fa9e4066Sahrens } 2108fa9e4066Sahrens 2109fa9e4066Sahrens if (vp->v_type != VDIR) { 2110be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTDIR); 2111fa9e4066Sahrens goto out; 2112fa9e4066Sahrens } 2113fa9e4066Sahrens 2114fa9e4066Sahrens if (vp == cwd) { 2115be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 2116fa9e4066Sahrens goto out; 2117fa9e4066Sahrens } 2118fa9e4066Sahrens 2119da6c28aaSamw vnevent_rmdir(vp, dvp, name, ct); 2120fa9e4066Sahrens 2121fa9e4066Sahrens /* 2122af2c4821Smaybee * Grab a lock on the directory to make sure that noone is 2123af2c4821Smaybee * trying to add (or lookup) entries while we are removing it. 2124af2c4821Smaybee */ 2125af2c4821Smaybee rw_enter(&zp->z_name_lock, RW_WRITER); 2126af2c4821Smaybee 2127af2c4821Smaybee /* 2128af2c4821Smaybee * Grab a lock on the parent pointer to make sure we play well 2129fa9e4066Sahrens * with the treewalk and directory rename code. 2130fa9e4066Sahrens */ 2131fa9e4066Sahrens rw_enter(&zp->z_parent_lock, RW_WRITER); 2132fa9e4066Sahrens 2133fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 2134ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 21350a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2136893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 21370a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 21380a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 213999189164SSimon Klinkert dmu_tx_mark_netfree(tx); 2140*f864f99eSPrakash Surya error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 2141fa9e4066Sahrens if (error) { 2142fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 2143af2c4821Smaybee rw_exit(&zp->z_name_lock); 2144fa9e4066Sahrens zfs_dirent_unlock(dl); 2145fa9e4066Sahrens VN_RELE(vp); 21461209a471SNeil Perrin if (error == ERESTART) { 214769962b56SMatthew Ahrens waited = B_TRUE; 21488a2f1b91Sahrens dmu_tx_wait(tx); 21498a2f1b91Sahrens dmu_tx_abort(tx); 2150fa9e4066Sahrens goto top; 2151fa9e4066Sahrens } 21528a2f1b91Sahrens dmu_tx_abort(tx); 2153fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2154fa9e4066Sahrens return (error); 2155fa9e4066Sahrens } 2156fa9e4066Sahrens 2157da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2158fa9e4066Sahrens 2159da6c28aaSamw if (error == 0) { 2160da6c28aaSamw uint64_t txtype = TX_RMDIR; 2161da6c28aaSamw if (flags & FIGNORECASE) 2162da6c28aaSamw txtype |= TX_CI; 21635002558fSNeil Perrin zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2164da6c28aaSamw } 2165fa9e4066Sahrens 2166fa9e4066Sahrens dmu_tx_commit(tx); 2167fa9e4066Sahrens 2168fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 2169af2c4821Smaybee rw_exit(&zp->z_name_lock); 2170fa9e4066Sahrens out: 2171fa9e4066Sahrens zfs_dirent_unlock(dl); 2172fa9e4066Sahrens 2173fa9e4066Sahrens VN_RELE(vp); 2174fa9e4066Sahrens 217555da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 21765002558fSNeil Perrin zil_commit(zilog, 0); 217755da60b9SMark J Musante 2178fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2179fa9e4066Sahrens return (error); 2180fa9e4066Sahrens } 2181fa9e4066Sahrens 2182fa9e4066Sahrens /* 2183fa9e4066Sahrens * Read as many directory entries as will fit into the provided 2184fa9e4066Sahrens * buffer from the given directory cursor position (specified in 2185f7170741SWill Andrews * the uio structure). 2186fa9e4066Sahrens * 2187fa9e4066Sahrens * IN: vp - vnode of directory to read. 2188fa9e4066Sahrens * uio - structure supplying read location, range info, 2189fa9e4066Sahrens * and return buffer. 2190fa9e4066Sahrens * cr - credentials of caller. 2191da6c28aaSamw * ct - caller context 2192da6c28aaSamw * flags - case flags 2193fa9e4066Sahrens * 2194fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 2195fa9e4066Sahrens * eofp - set to true if end-of-file detected. 2196fa9e4066Sahrens * 2197f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 2198fa9e4066Sahrens * 2199fa9e4066Sahrens * Timestamps: 2200fa9e4066Sahrens * vp - atime updated 2201fa9e4066Sahrens * 2202fa9e4066Sahrens * Note that the low 4 bits of the cookie returned by zap is always zero. 2203fa9e4066Sahrens * This allows us to use the low range for "special" directory entries: 2204fa9e4066Sahrens * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2205fa9e4066Sahrens * we use the offset 2 for the '.zfs' directory. 2206fa9e4066Sahrens */ 2207fa9e4066Sahrens /* ARGSUSED */ 2208fa9e4066Sahrens static int 2209da6c28aaSamw zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, 2210da6c28aaSamw caller_context_t *ct, int flags) 2211fa9e4066Sahrens { 2212fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2213fa9e4066Sahrens iovec_t *iovp; 2214da6c28aaSamw edirent_t *eodp; 2215fa9e4066Sahrens dirent64_t *odp; 2216fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 22177f6e3e7dSperrin objset_t *os; 2218fa9e4066Sahrens caddr_t outbuf; 2219fa9e4066Sahrens size_t bufsize; 2220fa9e4066Sahrens zap_cursor_t zc; 2221fa9e4066Sahrens zap_attribute_t zap; 2222fa9e4066Sahrens uint_t bytes_wanted; 2223fa9e4066Sahrens uint64_t offset; /* must be unsigned; checks for < 1 */ 22240a586ceaSMark Shellenbaum uint64_t parent; 2225fa9e4066Sahrens int local_eof; 22267f6e3e7dSperrin int outcount; 22277f6e3e7dSperrin int error; 22287f6e3e7dSperrin uint8_t prefetch; 2229b38f0970Sck boolean_t check_sysattrs; 2230fa9e4066Sahrens 22313cb34c60Sahrens ZFS_ENTER(zfsvfs); 22323cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2233fa9e4066Sahrens 22340a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 22350a586ceaSMark Shellenbaum &parent, sizeof (parent))) != 0) { 22360a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 22370a586ceaSMark Shellenbaum return (error); 22380a586ceaSMark Shellenbaum } 22390a586ceaSMark Shellenbaum 2240fa9e4066Sahrens /* 2241fa9e4066Sahrens * If we are not given an eof variable, 2242fa9e4066Sahrens * use a local one. 2243fa9e4066Sahrens */ 2244fa9e4066Sahrens if (eofp == NULL) 2245fa9e4066Sahrens eofp = &local_eof; 2246fa9e4066Sahrens 2247fa9e4066Sahrens /* 2248fa9e4066Sahrens * Check for valid iov_len. 2249fa9e4066Sahrens */ 2250fa9e4066Sahrens if (uio->uio_iov->iov_len <= 0) { 2251fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2252be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 2253fa9e4066Sahrens } 2254fa9e4066Sahrens 2255fa9e4066Sahrens /* 2256fa9e4066Sahrens * Quit if directory has been removed (posix) 2257fa9e4066Sahrens */ 2258893a6d32Sahrens if ((*eofp = zp->z_unlinked) != 0) { 2259fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2260fa9e4066Sahrens return (0); 2261fa9e4066Sahrens } 2262fa9e4066Sahrens 22637f6e3e7dSperrin error = 0; 22647f6e3e7dSperrin os = zfsvfs->z_os; 22657f6e3e7dSperrin offset = uio->uio_loffset; 22667f6e3e7dSperrin prefetch = zp->z_zn_prefetch; 22677f6e3e7dSperrin 2268fa9e4066Sahrens /* 2269fa9e4066Sahrens * Initialize the iterator cursor. 2270fa9e4066Sahrens */ 2271fa9e4066Sahrens if (offset <= 3) { 2272fa9e4066Sahrens /* 2273fa9e4066Sahrens * Start iteration from the beginning of the directory. 2274fa9e4066Sahrens */ 22757f6e3e7dSperrin zap_cursor_init(&zc, os, zp->z_id); 2276fa9e4066Sahrens } else { 2277fa9e4066Sahrens /* 2278fa9e4066Sahrens * The offset is a serialized cursor. 2279fa9e4066Sahrens */ 22807f6e3e7dSperrin zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2281fa9e4066Sahrens } 2282fa9e4066Sahrens 2283fa9e4066Sahrens /* 2284fa9e4066Sahrens * Get space to change directory entries into fs independent format. 2285fa9e4066Sahrens */ 2286fa9e4066Sahrens iovp = uio->uio_iov; 2287fa9e4066Sahrens bytes_wanted = iovp->iov_len; 2288fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2289fa9e4066Sahrens bufsize = bytes_wanted; 2290fa9e4066Sahrens outbuf = kmem_alloc(bufsize, KM_SLEEP); 2291fa9e4066Sahrens odp = (struct dirent64 *)outbuf; 2292fa9e4066Sahrens } else { 2293fa9e4066Sahrens bufsize = bytes_wanted; 2294d5285caeSGeorge Wilson outbuf = NULL; 2295fa9e4066Sahrens odp = (struct dirent64 *)iovp->iov_base; 2296fa9e4066Sahrens } 2297da6c28aaSamw eodp = (struct edirent *)odp; 2298fa9e4066Sahrens 2299b38f0970Sck /* 23009660e5cbSJanice Chang * If this VFS supports the system attribute view interface; and 23019660e5cbSJanice Chang * we're looking at an extended attribute directory; and we care 23029660e5cbSJanice Chang * about normalization conflicts on this vfs; then we must check 23039660e5cbSJanice Chang * for normalization conflicts with the sysattr name space. 2304b38f0970Sck */ 23059660e5cbSJanice Chang check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2306b38f0970Sck (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2307b38f0970Sck (flags & V_RDDIR_ENTFLAGS); 2308b38f0970Sck 2309fa9e4066Sahrens /* 2310fa9e4066Sahrens * Transform to file-system independent format 2311fa9e4066Sahrens */ 2312fa9e4066Sahrens outcount = 0; 2313fa9e4066Sahrens while (outcount < bytes_wanted) { 2314b1b8ab34Slling ino64_t objnum; 2315b1b8ab34Slling ushort_t reclen; 231697f85387STim Haley off64_t *next = NULL; 2317b1b8ab34Slling 2318fa9e4066Sahrens /* 2319fa9e4066Sahrens * Special case `.', `..', and `.zfs'. 2320fa9e4066Sahrens */ 2321fa9e4066Sahrens if (offset == 0) { 2322fa9e4066Sahrens (void) strcpy(zap.za_name, "."); 2323da6c28aaSamw zap.za_normalization_conflict = 0; 2324b1b8ab34Slling objnum = zp->z_id; 2325fa9e4066Sahrens } else if (offset == 1) { 2326fa9e4066Sahrens (void) strcpy(zap.za_name, ".."); 2327da6c28aaSamw zap.za_normalization_conflict = 0; 23280a586ceaSMark Shellenbaum objnum = parent; 2329fa9e4066Sahrens } else if (offset == 2 && zfs_show_ctldir(zp)) { 2330fa9e4066Sahrens (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2331da6c28aaSamw zap.za_normalization_conflict = 0; 2332b1b8ab34Slling objnum = ZFSCTL_INO_ROOT; 2333fa9e4066Sahrens } else { 2334fa9e4066Sahrens /* 2335fa9e4066Sahrens * Grab next entry. 2336fa9e4066Sahrens */ 2337fa9e4066Sahrens if (error = zap_cursor_retrieve(&zc, &zap)) { 2338fa9e4066Sahrens if ((*eofp = (error == ENOENT)) != 0) 2339fa9e4066Sahrens break; 2340fa9e4066Sahrens else 2341fa9e4066Sahrens goto update; 2342fa9e4066Sahrens } 2343fa9e4066Sahrens 2344fa9e4066Sahrens if (zap.za_integer_length != 8 || 2345fa9e4066Sahrens zap.za_num_integers != 1) { 2346fa9e4066Sahrens cmn_err(CE_WARN, "zap_readdir: bad directory " 2347fa9e4066Sahrens "entry, obj = %lld, offset = %lld\n", 2348fa9e4066Sahrens (u_longlong_t)zp->z_id, 2349fa9e4066Sahrens (u_longlong_t)offset); 2350be6fd75aSMatthew Ahrens error = SET_ERROR(ENXIO); 2351fa9e4066Sahrens goto update; 2352fa9e4066Sahrens } 2353b1b8ab34Slling 2354b1b8ab34Slling objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2355b1b8ab34Slling /* 2356b1b8ab34Slling * MacOS X can extract the object type here such as: 2357b1b8ab34Slling * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2358b1b8ab34Slling */ 2359b38f0970Sck 2360b38f0970Sck if (check_sysattrs && !zap.za_normalization_conflict) { 2361b38f0970Sck zap.za_normalization_conflict = 2362b38f0970Sck xattr_sysattr_casechk(zap.za_name); 2363b38f0970Sck } 2364fa9e4066Sahrens } 2365da6c28aaSamw 2366e802abbdSTim Haley if (flags & V_RDDIR_ACCFILTER) { 2367e802abbdSTim Haley /* 2368e802abbdSTim Haley * If we have no access at all, don't include 2369e802abbdSTim Haley * this entry in the returned information 2370e802abbdSTim Haley */ 2371e802abbdSTim Haley znode_t *ezp; 2372e802abbdSTim Haley if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2373e802abbdSTim Haley goto skip_entry; 2374e802abbdSTim Haley if (!zfs_has_access(ezp, cr)) { 2375e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2376e802abbdSTim Haley goto skip_entry; 2377e802abbdSTim Haley } 2378e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2379e802abbdSTim Haley } 2380e802abbdSTim Haley 2381da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) 2382da6c28aaSamw reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2383da6c28aaSamw else 2384da6c28aaSamw reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2385fa9e4066Sahrens 2386fa9e4066Sahrens /* 2387fa9e4066Sahrens * Will this entry fit in the buffer? 2388fa9e4066Sahrens */ 2389b1b8ab34Slling if (outcount + reclen > bufsize) { 2390fa9e4066Sahrens /* 2391fa9e4066Sahrens * Did we manage to fit anything in the buffer? 2392fa9e4066Sahrens */ 2393fa9e4066Sahrens if (!outcount) { 2394be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 2395fa9e4066Sahrens goto update; 2396fa9e4066Sahrens } 2397fa9e4066Sahrens break; 2398fa9e4066Sahrens } 2399da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) { 2400da6c28aaSamw /* 2401da6c28aaSamw * Add extended flag entry: 2402da6c28aaSamw */ 2403da6c28aaSamw eodp->ed_ino = objnum; 2404da6c28aaSamw eodp->ed_reclen = reclen; 2405da6c28aaSamw /* NOTE: ed_off is the offset for the *next* entry */ 2406da6c28aaSamw next = &(eodp->ed_off); 2407da6c28aaSamw eodp->ed_eflags = zap.za_normalization_conflict ? 2408da6c28aaSamw ED_CASE_CONFLICT : 0; 2409da6c28aaSamw (void) strncpy(eodp->ed_name, zap.za_name, 2410da6c28aaSamw EDIRENT_NAMELEN(reclen)); 2411da6c28aaSamw eodp = (edirent_t *)((intptr_t)eodp + reclen); 2412da6c28aaSamw } else { 2413da6c28aaSamw /* 2414da6c28aaSamw * Add normal entry: 2415da6c28aaSamw */ 2416da6c28aaSamw odp->d_ino = objnum; 2417da6c28aaSamw odp->d_reclen = reclen; 2418da6c28aaSamw /* NOTE: d_off is the offset for the *next* entry */ 2419da6c28aaSamw next = &(odp->d_off); 2420da6c28aaSamw (void) strncpy(odp->d_name, zap.za_name, 2421da6c28aaSamw DIRENT64_NAMELEN(reclen)); 2422da6c28aaSamw odp = (dirent64_t *)((intptr_t)odp + reclen); 2423da6c28aaSamw } 2424b1b8ab34Slling outcount += reclen; 2425fa9e4066Sahrens 2426fa9e4066Sahrens ASSERT(outcount <= bufsize); 2427fa9e4066Sahrens 2428fa9e4066Sahrens /* Prefetch znode */ 24297f6e3e7dSperrin if (prefetch) 2430a2cdcdd2SPaul Dagnelie dmu_prefetch(os, objnum, 0, 0, 0, 2431a2cdcdd2SPaul Dagnelie ZIO_PRIORITY_SYNC_READ); 2432fa9e4066Sahrens 2433e802abbdSTim Haley skip_entry: 2434fa9e4066Sahrens /* 2435fa9e4066Sahrens * Move to the next entry, fill in the previous offset. 2436fa9e4066Sahrens */ 2437fa9e4066Sahrens if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2438fa9e4066Sahrens zap_cursor_advance(&zc); 2439fa9e4066Sahrens offset = zap_cursor_serialize(&zc); 2440fa9e4066Sahrens } else { 2441fa9e4066Sahrens offset += 1; 2442fa9e4066Sahrens } 244397f85387STim Haley if (next) 244497f85387STim Haley *next = offset; 2445fa9e4066Sahrens } 24467f6e3e7dSperrin zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2447fa9e4066Sahrens 2448fa9e4066Sahrens if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2449fa9e4066Sahrens iovp->iov_base += outcount; 2450fa9e4066Sahrens iovp->iov_len -= outcount; 2451fa9e4066Sahrens uio->uio_resid -= outcount; 2452fa9e4066Sahrens } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2453fa9e4066Sahrens /* 2454fa9e4066Sahrens * Reset the pointer. 2455fa9e4066Sahrens */ 2456fa9e4066Sahrens offset = uio->uio_loffset; 2457fa9e4066Sahrens } 2458fa9e4066Sahrens 2459fa9e4066Sahrens update: 246087e5029aSahrens zap_cursor_fini(&zc); 2461fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2462fa9e4066Sahrens kmem_free(outbuf, bufsize); 2463fa9e4066Sahrens 2464fa9e4066Sahrens if (error == ENOENT) 2465fa9e4066Sahrens error = 0; 2466fa9e4066Sahrens 2467fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2468fa9e4066Sahrens 2469fa9e4066Sahrens uio->uio_loffset = offset; 2470fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2471fa9e4066Sahrens return (error); 2472fa9e4066Sahrens } 2473fa9e4066Sahrens 2474ec533521Sfr ulong_t zfs_fsync_sync_cnt = 4; 2475ec533521Sfr 2476fa9e4066Sahrens static int 2477da6c28aaSamw zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2478fa9e4066Sahrens { 2479fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2480fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2481fa9e4066Sahrens 2482b468a217Seschrock /* 2483b468a217Seschrock * Regardless of whether this is required for standards conformance, 2484b468a217Seschrock * this is the logical behavior when fsync() is called on a file with 2485b468a217Seschrock * dirty pages. We use B_ASYNC since the ZIL transactions are already 2486b468a217Seschrock * going to be pushed out as part of the zil_commit(). 2487b468a217Seschrock */ 2488b468a217Seschrock if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && 2489b468a217Seschrock (vp->v_type == VREG) && !(IS_SWAPVP(vp))) 2490da6c28aaSamw (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); 2491b468a217Seschrock 2492ec533521Sfr (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2493ec533521Sfr 249455da60b9SMark J Musante if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 249555da60b9SMark J Musante ZFS_ENTER(zfsvfs); 249655da60b9SMark J Musante ZFS_VERIFY_ZP(zp); 24975002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 249855da60b9SMark J Musante ZFS_EXIT(zfsvfs); 249955da60b9SMark J Musante } 2500fa9e4066Sahrens return (0); 2501fa9e4066Sahrens } 2502fa9e4066Sahrens 2503da6c28aaSamw 2504fa9e4066Sahrens /* 2505fa9e4066Sahrens * Get the requested file attributes and place them in the provided 2506fa9e4066Sahrens * vattr structure. 2507fa9e4066Sahrens * 2508fa9e4066Sahrens * IN: vp - vnode of file. 2509fa9e4066Sahrens * vap - va_mask identifies requested attributes. 2510da6c28aaSamw * If AT_XVATTR set, then optional attrs are requested 2511da6c28aaSamw * flags - ATTR_NOACLCHECK (CIFS server context) 2512fa9e4066Sahrens * cr - credentials of caller. 2513da6c28aaSamw * ct - caller context 2514fa9e4066Sahrens * 2515fa9e4066Sahrens * OUT: vap - attribute values. 2516fa9e4066Sahrens * 2517f7170741SWill Andrews * RETURN: 0 (always succeeds). 2518fa9e4066Sahrens */ 2519fa9e4066Sahrens /* ARGSUSED */ 2520fa9e4066Sahrens static int 2521da6c28aaSamw zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2522da6c28aaSamw caller_context_t *ct) 2523fa9e4066Sahrens { 2524fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2525fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2526da6c28aaSamw int error = 0; 2527ecd6cf80Smarks uint64_t links; 25280a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2529da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2530da6c28aaSamw xoptattr_t *xoap = NULL; 2531da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 25320a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[2]; 25330a586ceaSMark Shellenbaum int count = 0; 2534fa9e4066Sahrens 25353cb34c60Sahrens ZFS_ENTER(zfsvfs); 25363cb34c60Sahrens ZFS_VERIFY_ZP(zp); 25370a586ceaSMark Shellenbaum 2538f1696b23SMark Shellenbaum zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2539f1696b23SMark Shellenbaum 25400a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 25410a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 25420a586ceaSMark Shellenbaum 25430a586ceaSMark Shellenbaum if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 25440a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 25450a586ceaSMark Shellenbaum return (error); 25460a586ceaSMark Shellenbaum } 2547fa9e4066Sahrens 2548da6c28aaSamw /* 2549da6c28aaSamw * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2550da6c28aaSamw * Also, if we are the owner don't bother, since owner should 2551da6c28aaSamw * always be allowed to read basic attributes of file. 2552da6c28aaSamw */ 2553f1696b23SMark Shellenbaum if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2554f1696b23SMark Shellenbaum (vap->va_uid != crgetuid(cr))) { 2555da6c28aaSamw if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2556da6c28aaSamw skipaclchk, cr)) { 2557da6c28aaSamw ZFS_EXIT(zfsvfs); 2558da6c28aaSamw return (error); 2559da6c28aaSamw } 2560da6c28aaSamw } 2561da6c28aaSamw 2562fa9e4066Sahrens /* 2563fa9e4066Sahrens * Return all attributes. It's cheaper to provide the answer 2564fa9e4066Sahrens * than to determine whether we were asked the question. 2565fa9e4066Sahrens */ 2566fa9e4066Sahrens 256734f345efSRay Hassan mutex_enter(&zp->z_lock); 2568fa9e4066Sahrens vap->va_type = vp->v_type; 25690a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode & MODEMASK; 2570fa9e4066Sahrens vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2571fa9e4066Sahrens vap->va_nodeid = zp->z_id; 2572ecd6cf80Smarks if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 25730a586ceaSMark Shellenbaum links = zp->z_links + 1; 2574ecd6cf80Smarks else 25750a586ceaSMark Shellenbaum links = zp->z_links; 2576ecd6cf80Smarks vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 25770a586ceaSMark Shellenbaum vap->va_size = zp->z_size; 257872fc53bcSmarks vap->va_rdev = vp->v_rdev; 2579fa9e4066Sahrens vap->va_seq = zp->z_seq; 2580fa9e4066Sahrens 2581fa9e4066Sahrens /* 2582da6c28aaSamw * Add in any requested optional attributes and the create time. 2583da6c28aaSamw * Also set the corresponding bits in the returned attribute bitmap. 2584fa9e4066Sahrens */ 2585da6c28aaSamw if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2586da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2587da6c28aaSamw xoap->xoa_archive = 25880a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2589da6c28aaSamw XVA_SET_RTN(xvap, XAT_ARCHIVE); 2590da6c28aaSamw } 2591da6c28aaSamw 2592da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2593da6c28aaSamw xoap->xoa_readonly = 25940a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_READONLY) != 0); 2595da6c28aaSamw XVA_SET_RTN(xvap, XAT_READONLY); 2596da6c28aaSamw } 2597da6c28aaSamw 2598da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2599da6c28aaSamw xoap->xoa_system = 26000a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_SYSTEM) != 0); 2601da6c28aaSamw XVA_SET_RTN(xvap, XAT_SYSTEM); 2602da6c28aaSamw } 2603da6c28aaSamw 2604da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2605da6c28aaSamw xoap->xoa_hidden = 26060a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_HIDDEN) != 0); 2607da6c28aaSamw XVA_SET_RTN(xvap, XAT_HIDDEN); 2608da6c28aaSamw } 2609da6c28aaSamw 2610da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2611da6c28aaSamw xoap->xoa_nounlink = 26120a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2613da6c28aaSamw XVA_SET_RTN(xvap, XAT_NOUNLINK); 2614da6c28aaSamw } 2615da6c28aaSamw 2616da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2617da6c28aaSamw xoap->xoa_immutable = 26180a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2619da6c28aaSamw XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2620da6c28aaSamw } 2621da6c28aaSamw 2622da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2623da6c28aaSamw xoap->xoa_appendonly = 26240a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2625da6c28aaSamw XVA_SET_RTN(xvap, XAT_APPENDONLY); 2626da6c28aaSamw } 2627da6c28aaSamw 2628da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2629da6c28aaSamw xoap->xoa_nodump = 26300a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0); 2631da6c28aaSamw XVA_SET_RTN(xvap, XAT_NODUMP); 2632da6c28aaSamw } 2633da6c28aaSamw 2634da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2635da6c28aaSamw xoap->xoa_opaque = 26360a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_OPAQUE) != 0); 2637da6c28aaSamw XVA_SET_RTN(xvap, XAT_OPAQUE); 2638da6c28aaSamw } 2639da6c28aaSamw 2640da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2641da6c28aaSamw xoap->xoa_av_quarantined = 26420a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2643da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2644da6c28aaSamw } 2645da6c28aaSamw 2646da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2647da6c28aaSamw xoap->xoa_av_modified = 26480a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2649da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2650da6c28aaSamw } 2651da6c28aaSamw 2652da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 26530a586ceaSMark Shellenbaum vp->v_type == VREG) { 26540a586ceaSMark Shellenbaum zfs_sa_get_scanstamp(zp, xvap); 2655da6c28aaSamw } 2656da6c28aaSamw 2657da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 26580a586ceaSMark Shellenbaum uint64_t times[2]; 26590a586ceaSMark Shellenbaum 26600a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 26610a586ceaSMark Shellenbaum times, sizeof (times)); 26620a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2663da6c28aaSamw XVA_SET_RTN(xvap, XAT_CREATETIME); 2664fa9e4066Sahrens } 26657a286c47SDai Ngo 26667a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 26670a586ceaSMark Shellenbaum xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 26687a286c47SDai Ngo XVA_SET_RTN(xvap, XAT_REPARSE); 26697a286c47SDai Ngo } 267099d5e173STim Haley if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 267199d5e173STim Haley xoap->xoa_generation = zp->z_gen; 267299d5e173STim Haley XVA_SET_RTN(xvap, XAT_GEN); 267399d5e173STim Haley } 2674fd9ee8b5Sjoyce mcintosh 2675fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2676fd9ee8b5Sjoyce mcintosh xoap->xoa_offline = 2677fd9ee8b5Sjoyce mcintosh ((zp->z_pflags & ZFS_OFFLINE) != 0); 2678fd9ee8b5Sjoyce mcintosh XVA_SET_RTN(xvap, XAT_OFFLINE); 2679fd9ee8b5Sjoyce mcintosh } 2680fd9ee8b5Sjoyce mcintosh 2681fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2682fd9ee8b5Sjoyce mcintosh xoap->xoa_sparse = 2683fd9ee8b5Sjoyce mcintosh ((zp->z_pflags & ZFS_SPARSE) != 0); 2684fd9ee8b5Sjoyce mcintosh XVA_SET_RTN(xvap, XAT_SPARSE); 2685fd9ee8b5Sjoyce mcintosh } 2686fa9e4066Sahrens } 2687fa9e4066Sahrens 26880a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 26890a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_mtime, mtime); 26900a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2691da6c28aaSamw 2692fa9e4066Sahrens mutex_exit(&zp->z_lock); 2693fa9e4066Sahrens 26940a586ceaSMark Shellenbaum sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks); 2695fa9e4066Sahrens 2696fa9e4066Sahrens if (zp->z_blksz == 0) { 2697fa9e4066Sahrens /* 2698fa9e4066Sahrens * Block size hasn't been set; suggest maximal I/O transfers. 2699fa9e4066Sahrens */ 2700fa9e4066Sahrens vap->va_blksize = zfsvfs->z_max_blksz; 2701fa9e4066Sahrens } 2702fa9e4066Sahrens 2703fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2704fa9e4066Sahrens return (0); 2705fa9e4066Sahrens } 2706fa9e4066Sahrens 2707fa9e4066Sahrens /* 2708fa9e4066Sahrens * Set the file attributes to the values contained in the 2709fa9e4066Sahrens * vattr structure. 2710fa9e4066Sahrens * 2711fa9e4066Sahrens * IN: vp - vnode of file to be modified. 2712fa9e4066Sahrens * vap - new attribute values. 2713da6c28aaSamw * If AT_XVATTR set, then optional attrs are being set 2714fa9e4066Sahrens * flags - ATTR_UTIME set if non-default time values provided. 2715da6c28aaSamw * - ATTR_NOACLCHECK (CIFS context only). 2716fa9e4066Sahrens * cr - credentials of caller. 2717da6c28aaSamw * ct - caller context 2718fa9e4066Sahrens * 2719f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 2720fa9e4066Sahrens * 2721fa9e4066Sahrens * Timestamps: 2722fa9e4066Sahrens * vp - ctime updated, mtime updated if size changed. 2723fa9e4066Sahrens */ 2724fa9e4066Sahrens /* ARGSUSED */ 2725fa9e4066Sahrens static int 2726fa9e4066Sahrens zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2727f7170741SWill Andrews caller_context_t *ct) 2728fa9e4066Sahrens { 2729f18faf3fSek znode_t *zp = VTOZ(vp); 2730fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2731f18faf3fSek zilog_t *zilog; 2732fa9e4066Sahrens dmu_tx_t *tx; 2733fa9e4066Sahrens vattr_t oldva; 2734ae4caef8SMark Shellenbaum xvattr_t tmpxvattr; 27355730cc9aSmaybee uint_t mask = vap->va_mask; 2736d5285caeSGeorge Wilson uint_t saved_mask = 0; 2737f92daba9Smarks int trim_mask = 0; 2738fa9e4066Sahrens uint64_t new_mode; 273989459e17SMark Shellenbaum uint64_t new_uid, new_gid; 27400b2a8171SMark Shellenbaum uint64_t xattr_obj; 27410a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2742d2443e76Smarks znode_t *attrzp; 2743fa9e4066Sahrens int need_policy = FALSE; 27440a586ceaSMark Shellenbaum int err, err2; 2745da6c28aaSamw zfs_fuid_info_t *fuidp = NULL; 2746da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2747da6c28aaSamw xoptattr_t *xoap; 27480b2a8171SMark Shellenbaum zfs_acl_t *aclp; 2749da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 27500a586ceaSMark Shellenbaum boolean_t fuid_dirtied = B_FALSE; 27510a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[7], xattr_bulk[7]; 27520a586ceaSMark Shellenbaum int count = 0, xattr_count = 0; 2753fa9e4066Sahrens 2754fa9e4066Sahrens if (mask == 0) 2755fa9e4066Sahrens return (0); 2756fa9e4066Sahrens 2757fa9e4066Sahrens if (mask & AT_NOSET) 2758be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 2759fa9e4066Sahrens 27603cb34c60Sahrens ZFS_ENTER(zfsvfs); 27613cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2762da6c28aaSamw 2763da6c28aaSamw zilog = zfsvfs->z_log; 2764da6c28aaSamw 2765da6c28aaSamw /* 2766da6c28aaSamw * Make sure that if we have ephemeral uid/gid or xvattr specified 2767da6c28aaSamw * that file system is at proper version level 2768da6c28aaSamw */ 2769da6c28aaSamw 2770da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 2771da6c28aaSamw (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2772da6c28aaSamw ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 277302dcba3bStimh (mask & AT_XVATTR))) { 277402dcba3bStimh ZFS_EXIT(zfsvfs); 2775be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 277602dcba3bStimh } 2777da6c28aaSamw 277802dcba3bStimh if (mask & AT_SIZE && vp->v_type == VDIR) { 277902dcba3bStimh ZFS_EXIT(zfsvfs); 2780be6fd75aSMatthew Ahrens return (SET_ERROR(EISDIR)); 278102dcba3bStimh } 2782fa9e4066Sahrens 278302dcba3bStimh if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 278402dcba3bStimh ZFS_EXIT(zfsvfs); 2785be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 278602dcba3bStimh } 278784c5a155Smarks 2788da6c28aaSamw /* 2789da6c28aaSamw * If this is an xvattr_t, then get a pointer to the structure of 2790da6c28aaSamw * optional attributes. If this is NULL, then we have a vattr_t. 2791da6c28aaSamw */ 2792da6c28aaSamw xoap = xva_getxoptattr(xvap); 2793da6c28aaSamw 2794ae4caef8SMark Shellenbaum xva_init(&tmpxvattr); 2795ae4caef8SMark Shellenbaum 2796da6c28aaSamw /* 2797da6c28aaSamw * Immutable files can only alter immutable bit and atime 2798da6c28aaSamw */ 27990a586ceaSMark Shellenbaum if ((zp->z_pflags & ZFS_IMMUTABLE) && 2800da6c28aaSamw ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 280102dcba3bStimh ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 280202dcba3bStimh ZFS_EXIT(zfsvfs); 2803be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 280402dcba3bStimh } 2805da6c28aaSamw 28062889ec41SGordon Ross /* 28072889ec41SGordon Ross * Note: ZFS_READONLY is handled in zfs_zaccess_common. 28082889ec41SGordon Ross */ 2809fa9e4066Sahrens 281093129341Smarks /* 281193129341Smarks * Verify timestamps doesn't overflow 32 bits. 281293129341Smarks * ZFS can handle large timestamps, but 32bit syscalls can't 281393129341Smarks * handle times greater than 2039. This check should be removed 281493129341Smarks * once large timestamps are fully supported. 281593129341Smarks */ 281693129341Smarks if (mask & (AT_ATIME | AT_MTIME)) { 281793129341Smarks if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 281893129341Smarks ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 281993129341Smarks ZFS_EXIT(zfsvfs); 2820be6fd75aSMatthew Ahrens return (SET_ERROR(EOVERFLOW)); 282193129341Smarks } 282293129341Smarks } 282393129341Smarks 2824fa9e4066Sahrens top: 2825d2443e76Smarks attrzp = NULL; 28260b2a8171SMark Shellenbaum aclp = NULL; 2827fa9e4066Sahrens 2828d47621a4STim Haley /* Can this be moved to before the top label? */ 2829fa9e4066Sahrens if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2830fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2831be6fd75aSMatthew Ahrens return (SET_ERROR(EROFS)); 2832fa9e4066Sahrens } 2833fa9e4066Sahrens 2834fa9e4066Sahrens /* 2835fa9e4066Sahrens * First validate permissions 2836fa9e4066Sahrens */ 2837fa9e4066Sahrens 2838fa9e4066Sahrens if (mask & AT_SIZE) { 2839da6c28aaSamw err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2840fa9e4066Sahrens if (err) { 2841fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2842fa9e4066Sahrens return (err); 2843fa9e4066Sahrens } 28445730cc9aSmaybee /* 28455730cc9aSmaybee * XXX - Note, we are not providing any open 28465730cc9aSmaybee * mode flags here (like FNDELAY), so we may 28475730cc9aSmaybee * block if there are locks present... this 28485730cc9aSmaybee * should be addressed in openat(). 28495730cc9aSmaybee */ 2850cdb0ab79Smaybee /* XXX - would it be OK to generate a log record here? */ 2851cdb0ab79Smaybee err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 28525730cc9aSmaybee if (err) { 28535730cc9aSmaybee ZFS_EXIT(zfsvfs); 28545730cc9aSmaybee return (err); 28555730cc9aSmaybee } 285672102e74SBryan Cantrill 285772102e74SBryan Cantrill if (vap->va_size == 0) 285872102e74SBryan Cantrill vnevent_truncate(ZTOV(zp), ct); 2859fa9e4066Sahrens } 2860fa9e4066Sahrens 2861da6c28aaSamw if (mask & (AT_ATIME|AT_MTIME) || 2862da6c28aaSamw ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2863da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_READONLY) || 2864da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2865fd9ee8b5Sjoyce mcintosh XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2866fd9ee8b5Sjoyce mcintosh XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2867da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 28680a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2869da6c28aaSamw need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2870da6c28aaSamw skipaclchk, cr); 28710a586ceaSMark Shellenbaum } 2872fa9e4066Sahrens 2873fa9e4066Sahrens if (mask & (AT_UID|AT_GID)) { 2874fa9e4066Sahrens int idmask = (mask & (AT_UID|AT_GID)); 2875fa9e4066Sahrens int take_owner; 2876fa9e4066Sahrens int take_group; 2877fa9e4066Sahrens 2878a933bc41Smarks /* 2879a933bc41Smarks * NOTE: even if a new mode is being set, 2880a933bc41Smarks * we may clear S_ISUID/S_ISGID bits. 2881a933bc41Smarks */ 2882a933bc41Smarks 2883a933bc41Smarks if (!(mask & AT_MODE)) 28840a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode; 2885a933bc41Smarks 2886fa9e4066Sahrens /* 2887fa9e4066Sahrens * Take ownership or chgrp to group we are a member of 2888fa9e4066Sahrens */ 2889fa9e4066Sahrens 2890fa9e4066Sahrens take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2891da6c28aaSamw take_group = (mask & AT_GID) && 2892da6c28aaSamw zfs_groupmember(zfsvfs, vap->va_gid, cr); 2893fa9e4066Sahrens 2894fa9e4066Sahrens /* 2895fa9e4066Sahrens * If both AT_UID and AT_GID are set then take_owner and 2896fa9e4066Sahrens * take_group must both be set in order to allow taking 2897fa9e4066Sahrens * ownership. 2898fa9e4066Sahrens * 2899fa9e4066Sahrens * Otherwise, send the check through secpolicy_vnode_setattr() 2900fa9e4066Sahrens * 2901fa9e4066Sahrens */ 2902fa9e4066Sahrens 2903fa9e4066Sahrens if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2904fa9e4066Sahrens ((idmask == AT_UID) && take_owner) || 2905fa9e4066Sahrens ((idmask == AT_GID) && take_group)) { 2906da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2907da6c28aaSamw skipaclchk, cr) == 0) { 2908fa9e4066Sahrens /* 2909fa9e4066Sahrens * Remove setuid/setgid for non-privileged users 2910fa9e4066Sahrens */ 291113f9f30eSmarks secpolicy_setid_clear(vap, cr); 2912f92daba9Smarks trim_mask = (mask & (AT_UID|AT_GID)); 2913fa9e4066Sahrens } else { 2914fa9e4066Sahrens need_policy = TRUE; 2915fa9e4066Sahrens } 2916fa9e4066Sahrens } else { 2917fa9e4066Sahrens need_policy = TRUE; 2918fa9e4066Sahrens } 2919fa9e4066Sahrens } 2920fa9e4066Sahrens 2921f92daba9Smarks mutex_enter(&zp->z_lock); 29220a586ceaSMark Shellenbaum oldva.va_mode = zp->z_mode; 2923f1696b23SMark Shellenbaum zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2924da6c28aaSamw if (mask & AT_XVATTR) { 2925ae4caef8SMark Shellenbaum /* 2926ae4caef8SMark Shellenbaum * Update xvattr mask to include only those attributes 2927ae4caef8SMark Shellenbaum * that are actually changing. 2928ae4caef8SMark Shellenbaum * 2929ae4caef8SMark Shellenbaum * the bits will be restored prior to actually setting 2930ae4caef8SMark Shellenbaum * the attributes so the caller thinks they were set. 2931ae4caef8SMark Shellenbaum */ 2932ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2933ae4caef8SMark Shellenbaum if (xoap->xoa_appendonly != 29340a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 2935ae4caef8SMark Shellenbaum need_policy = TRUE; 2936ae4caef8SMark Shellenbaum } else { 2937ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2938ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2939ae4caef8SMark Shellenbaum } 2940ae4caef8SMark Shellenbaum } 2941ae4caef8SMark Shellenbaum 2942ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2943ae4caef8SMark Shellenbaum if (xoap->xoa_nounlink != 29440a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 2945ae4caef8SMark Shellenbaum need_policy = TRUE; 2946ae4caef8SMark Shellenbaum } else { 2947ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2948ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2949ae4caef8SMark Shellenbaum } 2950ae4caef8SMark Shellenbaum } 2951ae4caef8SMark Shellenbaum 2952ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2953ae4caef8SMark Shellenbaum if (xoap->xoa_immutable != 29540a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 2955ae4caef8SMark Shellenbaum need_policy = TRUE; 2956ae4caef8SMark Shellenbaum } else { 2957ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2958ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2959ae4caef8SMark Shellenbaum } 2960ae4caef8SMark Shellenbaum } 2961ae4caef8SMark Shellenbaum 2962ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2963ae4caef8SMark Shellenbaum if (xoap->xoa_nodump != 29640a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0)) { 2965ae4caef8SMark Shellenbaum need_policy = TRUE; 2966ae4caef8SMark Shellenbaum } else { 2967ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NODUMP); 2968ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2969ae4caef8SMark Shellenbaum } 2970ae4caef8SMark Shellenbaum } 2971ae4caef8SMark Shellenbaum 2972ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2973ae4caef8SMark Shellenbaum if (xoap->xoa_av_modified != 29740a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 2975ae4caef8SMark Shellenbaum need_policy = TRUE; 2976ae4caef8SMark Shellenbaum } else { 2977ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2978ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2979ae4caef8SMark Shellenbaum } 2980ae4caef8SMark Shellenbaum } 2981ae4caef8SMark Shellenbaum 2982ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2983ae4caef8SMark Shellenbaum if ((vp->v_type != VREG && 2984ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined) || 2985ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined != 29860a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 2987ae4caef8SMark Shellenbaum need_policy = TRUE; 2988ae4caef8SMark Shellenbaum } else { 2989ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2990ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2991ae4caef8SMark Shellenbaum } 2992ae4caef8SMark Shellenbaum } 2993ae4caef8SMark Shellenbaum 29947a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 29957a286c47SDai Ngo mutex_exit(&zp->z_lock); 29967a286c47SDai Ngo ZFS_EXIT(zfsvfs); 2997be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 29987a286c47SDai Ngo } 29997a286c47SDai Ngo 3000ae4caef8SMark Shellenbaum if (need_policy == FALSE && 3001ae4caef8SMark Shellenbaum (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3002ae4caef8SMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3003da6c28aaSamw need_policy = TRUE; 3004da6c28aaSamw } 3005da6c28aaSamw } 3006da6c28aaSamw 3007f92daba9Smarks mutex_exit(&zp->z_lock); 3008fa9e4066Sahrens 3009f92daba9Smarks if (mask & AT_MODE) { 3010da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3011f92daba9Smarks err = secpolicy_setid_setsticky_clear(vp, vap, 3012f92daba9Smarks &oldva, cr); 3013f92daba9Smarks if (err) { 3014f92daba9Smarks ZFS_EXIT(zfsvfs); 3015f92daba9Smarks return (err); 3016f92daba9Smarks } 3017f92daba9Smarks trim_mask |= AT_MODE; 3018f92daba9Smarks } else { 3019f92daba9Smarks need_policy = TRUE; 3020f92daba9Smarks } 3021f92daba9Smarks } 302213f9f30eSmarks 3023f92daba9Smarks if (need_policy) { 302413f9f30eSmarks /* 302513f9f30eSmarks * If trim_mask is set then take ownership 3026f92daba9Smarks * has been granted or write_acl is present and user 3027f92daba9Smarks * has the ability to modify mode. In that case remove 3028f92daba9Smarks * UID|GID and or MODE from mask so that 302913f9f30eSmarks * secpolicy_vnode_setattr() doesn't revoke it. 303013f9f30eSmarks */ 303113f9f30eSmarks 3032f92daba9Smarks if (trim_mask) { 3033f92daba9Smarks saved_mask = vap->va_mask; 3034f92daba9Smarks vap->va_mask &= ~trim_mask; 3035f92daba9Smarks } 3036fa9e4066Sahrens err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3037da6c28aaSamw (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3038fa9e4066Sahrens if (err) { 3039fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3040fa9e4066Sahrens return (err); 3041fa9e4066Sahrens } 304213f9f30eSmarks 304313f9f30eSmarks if (trim_mask) 3044f92daba9Smarks vap->va_mask |= saved_mask; 3045fa9e4066Sahrens } 3046fa9e4066Sahrens 3047fa9e4066Sahrens /* 3048fa9e4066Sahrens * secpolicy_vnode_setattr, or take ownership may have 3049fa9e4066Sahrens * changed va_mask 3050fa9e4066Sahrens */ 3051fa9e4066Sahrens mask = vap->va_mask; 3052fa9e4066Sahrens 30530a586ceaSMark Shellenbaum if ((mask & (AT_UID | AT_GID))) { 30540b2a8171SMark Shellenbaum err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 30550b2a8171SMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 30560a586ceaSMark Shellenbaum 30570b2a8171SMark Shellenbaum if (err == 0 && xattr_obj) { 30580a586ceaSMark Shellenbaum err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 30590a586ceaSMark Shellenbaum if (err) 30600a586ceaSMark Shellenbaum goto out2; 30610a586ceaSMark Shellenbaum } 30620a586ceaSMark Shellenbaum if (mask & AT_UID) { 30630a586ceaSMark Shellenbaum new_uid = zfs_fuid_create(zfsvfs, 30640a586ceaSMark Shellenbaum (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3065f1696b23SMark Shellenbaum if (new_uid != zp->z_uid && 30660a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 30670b2a8171SMark Shellenbaum if (attrzp) 30680b2a8171SMark Shellenbaum VN_RELE(ZTOV(attrzp)); 3069be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 30700a586ceaSMark Shellenbaum goto out2; 30710a586ceaSMark Shellenbaum } 30720a586ceaSMark Shellenbaum } 30730a586ceaSMark Shellenbaum 30740a586ceaSMark Shellenbaum if (mask & AT_GID) { 30750a586ceaSMark Shellenbaum new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 30760a586ceaSMark Shellenbaum cr, ZFS_GROUP, &fuidp); 30770a586ceaSMark Shellenbaum if (new_gid != zp->z_gid && 30780a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 30790b2a8171SMark Shellenbaum if (attrzp) 30800b2a8171SMark Shellenbaum VN_RELE(ZTOV(attrzp)); 3081be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 30820a586ceaSMark Shellenbaum goto out2; 30830a586ceaSMark Shellenbaum } 30840a586ceaSMark Shellenbaum } 30850a586ceaSMark Shellenbaum } 3086fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 3087fa9e4066Sahrens 3088fa9e4066Sahrens if (mask & AT_MODE) { 30890a586ceaSMark Shellenbaum uint64_t pmode = zp->z_mode; 30901412a1a2SMark Shellenbaum uint64_t acl_obj; 3091169cdae2Smarks new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3092fa9e4066Sahrens 309371dbfc28SPaul B. Henson if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 309471dbfc28SPaul B. Henson !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3095be6fd75aSMatthew Ahrens err = SET_ERROR(EPERM); 309671dbfc28SPaul B. Henson goto out; 309771dbfc28SPaul B. Henson } 309871dbfc28SPaul B. Henson 3099a3c49ce1SAlbert Lee if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3100a3c49ce1SAlbert Lee goto out; 31010a586ceaSMark Shellenbaum 31021412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 31031412a1a2SMark Shellenbaum if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 31040a586ceaSMark Shellenbaum /* 31050a586ceaSMark Shellenbaum * Are we upgrading ACL from old V0 format 31060a586ceaSMark Shellenbaum * to V1 format? 31070a586ceaSMark Shellenbaum */ 31082bd6c4deSMark Shellenbaum if (zfsvfs->z_version >= ZPL_VERSION_FUID && 31091412a1a2SMark Shellenbaum zfs_znode_acl_version(zp) == 3110da6c28aaSamw ZFS_ACL_VERSION_INITIAL) { 31111412a1a2SMark Shellenbaum dmu_tx_hold_free(tx, acl_obj, 0, 3112da6c28aaSamw DMU_OBJECT_END); 3113da6c28aaSamw dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 31144c841f60Smarks 0, aclp->z_acl_bytes); 3115da6c28aaSamw } else { 31161412a1a2SMark Shellenbaum dmu_tx_hold_write(tx, acl_obj, 0, 31174c841f60Smarks aclp->z_acl_bytes); 31184c841f60Smarks } 31190a586ceaSMark Shellenbaum } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 31206d38e247Smarks dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 31216d38e247Smarks 0, aclp->z_acl_bytes); 3122da6c28aaSamw } 31231412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 31240a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 31250a586ceaSMark Shellenbaum } else { 31260a586ceaSMark Shellenbaum if ((mask & AT_XVATTR) && 31270a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 31280a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 31290a586ceaSMark Shellenbaum else 31300a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3131fa9e4066Sahrens } 3132fa9e4066Sahrens 31330a586ceaSMark Shellenbaum if (attrzp) { 31340a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3135d2443e76Smarks } 3136d2443e76Smarks 31370a586ceaSMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 31380a586ceaSMark Shellenbaum if (fuid_dirtied) 31390a586ceaSMark Shellenbaum zfs_fuid_txhold(zfsvfs, tx); 31400a586ceaSMark Shellenbaum 31410a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 31420a586ceaSMark Shellenbaum 3143e722410cSMatthew Ahrens err = dmu_tx_assign(tx, TXG_WAIT); 3144e722410cSMatthew Ahrens if (err) 314514843421SMatthew Ahrens goto out; 3146fa9e4066Sahrens 31470a586ceaSMark Shellenbaum count = 0; 3148fa9e4066Sahrens /* 3149fa9e4066Sahrens * Set each attribute requested. 3150fa9e4066Sahrens * We group settings according to the locks they need to acquire. 3151fa9e4066Sahrens * 3152fa9e4066Sahrens * Note: you cannot set ctime directly, although it will be 3153fa9e4066Sahrens * updated as a side-effect of calling this function. 3154fa9e4066Sahrens */ 3155fa9e4066Sahrens 31561412a1a2SMark Shellenbaum 31571412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 31581412a1a2SMark Shellenbaum mutex_enter(&zp->z_acl_lock); 3159fa9e4066Sahrens mutex_enter(&zp->z_lock); 3160fa9e4066Sahrens 3161db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3162db9986c7SMark Shellenbaum &zp->z_pflags, sizeof (zp->z_pflags)); 3163db9986c7SMark Shellenbaum 3164db9986c7SMark Shellenbaum if (attrzp) { 31651412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 31661412a1a2SMark Shellenbaum mutex_enter(&attrzp->z_acl_lock); 31670a586ceaSMark Shellenbaum mutex_enter(&attrzp->z_lock); 3168db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3169db9986c7SMark Shellenbaum SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3170db9986c7SMark Shellenbaum sizeof (attrzp->z_pflags)); 3171db9986c7SMark Shellenbaum } 31720a586ceaSMark Shellenbaum 317327dd1e87SMark Shellenbaum if (mask & (AT_UID|AT_GID)) { 317427dd1e87SMark Shellenbaum 317527dd1e87SMark Shellenbaum if (mask & AT_UID) { 317627dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 317727dd1e87SMark Shellenbaum &new_uid, sizeof (new_uid)); 3178f1696b23SMark Shellenbaum zp->z_uid = new_uid; 317927dd1e87SMark Shellenbaum if (attrzp) { 318027dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 318127dd1e87SMark Shellenbaum SA_ZPL_UID(zfsvfs), NULL, &new_uid, 318227dd1e87SMark Shellenbaum sizeof (new_uid)); 3183f1696b23SMark Shellenbaum attrzp->z_uid = new_uid; 318427dd1e87SMark Shellenbaum } 31850a586ceaSMark Shellenbaum } 31860a586ceaSMark Shellenbaum 318727dd1e87SMark Shellenbaum if (mask & AT_GID) { 318827dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 318927dd1e87SMark Shellenbaum NULL, &new_gid, sizeof (new_gid)); 3190f1696b23SMark Shellenbaum zp->z_gid = new_gid; 319127dd1e87SMark Shellenbaum if (attrzp) { 319227dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 319327dd1e87SMark Shellenbaum SA_ZPL_GID(zfsvfs), NULL, &new_gid, 319427dd1e87SMark Shellenbaum sizeof (new_gid)); 3195f1696b23SMark Shellenbaum attrzp->z_gid = new_gid; 319627dd1e87SMark Shellenbaum } 319727dd1e87SMark Shellenbaum } 319827dd1e87SMark Shellenbaum if (!(mask & AT_MODE)) { 319927dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 320027dd1e87SMark Shellenbaum NULL, &new_mode, sizeof (new_mode)); 320127dd1e87SMark Shellenbaum new_mode = zp->z_mode; 320227dd1e87SMark Shellenbaum } 320327dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(zp); 320427dd1e87SMark Shellenbaum ASSERT(err == 0); 32050a586ceaSMark Shellenbaum if (attrzp) { 320627dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(attrzp); 320727dd1e87SMark Shellenbaum ASSERT(err == 0); 32080a586ceaSMark Shellenbaum } 32090a586ceaSMark Shellenbaum } 32100a586ceaSMark Shellenbaum 3211fa9e4066Sahrens if (mask & AT_MODE) { 32120a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 32130a586ceaSMark Shellenbaum &new_mode, sizeof (new_mode)); 32140a586ceaSMark Shellenbaum zp->z_mode = new_mode; 321527dd1e87SMark Shellenbaum ASSERT3U((uintptr_t)aclp, !=, NULL); 321689459e17SMark Shellenbaum err = zfs_aclset_common(zp, aclp, cr, tx); 3217fb09f5aaSMadhav Suresh ASSERT0(err); 32180b2a8171SMark Shellenbaum if (zp->z_acl_cached) 32190b2a8171SMark Shellenbaum zfs_acl_free(zp->z_acl_cached); 32204929fd5eSTim Haley zp->z_acl_cached = aclp; 32214929fd5eSTim Haley aclp = NULL; 3222fa9e4066Sahrens } 3223fa9e4066Sahrens 3224d2443e76Smarks 32250a586ceaSMark Shellenbaum if (mask & AT_ATIME) { 32260a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 32270a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 32280a586ceaSMark Shellenbaum &zp->z_atime, sizeof (zp->z_atime)); 3229d2443e76Smarks } 3230fa9e4066Sahrens 32310a586ceaSMark Shellenbaum if (mask & AT_MTIME) { 32320a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 32330a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 32340a586ceaSMark Shellenbaum mtime, sizeof (mtime)); 3235d2443e76Smarks } 3236d2443e76Smarks 3237cdb0ab79Smaybee /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 32380a586ceaSMark Shellenbaum if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3239db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3240db9986c7SMark Shellenbaum NULL, mtime, sizeof (mtime)); 32410a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 32420a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 32430a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 32440a586ceaSMark Shellenbaum B_TRUE); 32450a586ceaSMark Shellenbaum } else if (mask != 0) { 32460a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 32470a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 32480a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 32490a586ceaSMark Shellenbaum B_TRUE); 32500a586ceaSMark Shellenbaum if (attrzp) { 32510a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 32520a586ceaSMark Shellenbaum SA_ZPL_CTIME(zfsvfs), NULL, 32530a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 32540a586ceaSMark Shellenbaum zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 32550a586ceaSMark Shellenbaum mtime, ctime, B_TRUE); 32560a586ceaSMark Shellenbaum } 32570a586ceaSMark Shellenbaum } 3258da6c28aaSamw /* 3259da6c28aaSamw * Do this after setting timestamps to prevent timestamp 3260da6c28aaSamw * update from toggling bit 3261da6c28aaSamw */ 3262da6c28aaSamw 3263da6c28aaSamw if (xoap && (mask & AT_XVATTR)) { 3264ae4caef8SMark Shellenbaum 3265ae4caef8SMark Shellenbaum /* 3266ae4caef8SMark Shellenbaum * restore trimmed off masks 3267ae4caef8SMark Shellenbaum * so that return masks can be set for caller. 3268ae4caef8SMark Shellenbaum */ 3269ae4caef8SMark Shellenbaum 3270ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3271ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_APPENDONLY); 3272ae4caef8SMark Shellenbaum } 3273ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3274ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NOUNLINK); 3275ae4caef8SMark Shellenbaum } 3276ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3277ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3278ae4caef8SMark Shellenbaum } 3279ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3280ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NODUMP); 3281ae4caef8SMark Shellenbaum } 3282ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3283ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3284ae4caef8SMark Shellenbaum } 3285ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3286ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3287ae4caef8SMark Shellenbaum } 3288ae4caef8SMark Shellenbaum 32890a586ceaSMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3290da6c28aaSamw ASSERT(vp->v_type == VREG); 3291da6c28aaSamw 32920a586ceaSMark Shellenbaum zfs_xvattr_set(zp, xvap, tx); 3293da6c28aaSamw } 3294fa9e4066Sahrens 329589459e17SMark Shellenbaum if (fuid_dirtied) 329689459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 329789459e17SMark Shellenbaum 32985730cc9aSmaybee if (mask != 0) 3299da6c28aaSamw zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3300fa9e4066Sahrens 3301fa9e4066Sahrens mutex_exit(&zp->z_lock); 33021412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 33031412a1a2SMark Shellenbaum mutex_exit(&zp->z_acl_lock); 3304fa9e4066Sahrens 33051412a1a2SMark Shellenbaum if (attrzp) { 33061412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 33071412a1a2SMark Shellenbaum mutex_exit(&attrzp->z_acl_lock); 33081412a1a2SMark Shellenbaum mutex_exit(&attrzp->z_lock); 33091412a1a2SMark Shellenbaum } 331014843421SMatthew Ahrens out: 33110a586ceaSMark Shellenbaum if (err == 0 && attrzp) { 33120a586ceaSMark Shellenbaum err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 33130a586ceaSMark Shellenbaum xattr_count, tx); 33140a586ceaSMark Shellenbaum ASSERT(err2 == 0); 33150a586ceaSMark Shellenbaum } 33160a586ceaSMark Shellenbaum 3317d2443e76Smarks if (attrzp) 3318d2443e76Smarks VN_RELE(ZTOV(attrzp)); 3319f7170741SWill Andrews 33204929fd5eSTim Haley if (aclp) 33214929fd5eSTim Haley zfs_acl_free(aclp); 33224929fd5eSTim Haley 332314843421SMatthew Ahrens if (fuidp) { 332414843421SMatthew Ahrens zfs_fuid_info_free(fuidp); 332514843421SMatthew Ahrens fuidp = NULL; 332614843421SMatthew Ahrens } 332714843421SMatthew Ahrens 33280a586ceaSMark Shellenbaum if (err) { 332914843421SMatthew Ahrens dmu_tx_abort(tx); 33300a586ceaSMark Shellenbaum if (err == ERESTART) 33310a586ceaSMark Shellenbaum goto top; 33320a586ceaSMark Shellenbaum } else { 33330a586ceaSMark Shellenbaum err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 333414843421SMatthew Ahrens dmu_tx_commit(tx); 33350a586ceaSMark Shellenbaum } 333614843421SMatthew Ahrens 33370a586ceaSMark Shellenbaum out2: 333855da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 33395002558fSNeil Perrin zil_commit(zilog, 0); 334055da60b9SMark J Musante 3341fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3342fa9e4066Sahrens return (err); 3343fa9e4066Sahrens } 3344fa9e4066Sahrens 3345fa9e4066Sahrens typedef struct zfs_zlock { 3346fa9e4066Sahrens krwlock_t *zl_rwlock; /* lock we acquired */ 3347fa9e4066Sahrens znode_t *zl_znode; /* znode we held */ 3348fa9e4066Sahrens struct zfs_zlock *zl_next; /* next in list */ 3349fa9e4066Sahrens } zfs_zlock_t; 3350fa9e4066Sahrens 3351ff008e00Smaybee /* 3352ff008e00Smaybee * Drop locks and release vnodes that were held by zfs_rename_lock(). 3353ff008e00Smaybee */ 3354ff008e00Smaybee static void 3355ff008e00Smaybee zfs_rename_unlock(zfs_zlock_t **zlpp) 3356ff008e00Smaybee { 3357ff008e00Smaybee zfs_zlock_t *zl; 3358ff008e00Smaybee 3359ff008e00Smaybee while ((zl = *zlpp) != NULL) { 3360ff008e00Smaybee if (zl->zl_znode != NULL) 3361ff008e00Smaybee VN_RELE(ZTOV(zl->zl_znode)); 3362ff008e00Smaybee rw_exit(zl->zl_rwlock); 3363ff008e00Smaybee *zlpp = zl->zl_next; 3364ff008e00Smaybee kmem_free(zl, sizeof (*zl)); 3365ff008e00Smaybee } 3366ff008e00Smaybee } 3367ff008e00Smaybee 3368ff008e00Smaybee /* 3369ff008e00Smaybee * Search back through the directory tree, using the ".." entries. 3370ff008e00Smaybee * Lock each directory in the chain to prevent concurrent renames. 3371ff008e00Smaybee * Fail any attempt to move a directory into one of its own descendants. 3372ff008e00Smaybee * XXX - z_parent_lock can overlap with map or grow locks 3373ff008e00Smaybee */ 3374fa9e4066Sahrens static int 3375fa9e4066Sahrens zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3376fa9e4066Sahrens { 3377fa9e4066Sahrens zfs_zlock_t *zl; 3378feb08c6bSbillm znode_t *zp = tdzp; 3379fa9e4066Sahrens uint64_t rootid = zp->z_zfsvfs->z_root; 33800a586ceaSMark Shellenbaum uint64_t oidp = zp->z_id; 3381fa9e4066Sahrens krwlock_t *rwlp = &szp->z_parent_lock; 3382fa9e4066Sahrens krw_t rw = RW_WRITER; 3383fa9e4066Sahrens 3384fa9e4066Sahrens /* 3385fa9e4066Sahrens * First pass write-locks szp and compares to zp->z_id. 3386fa9e4066Sahrens * Later passes read-lock zp and compare to zp->z_parent. 3387fa9e4066Sahrens */ 3388fa9e4066Sahrens do { 3389ff008e00Smaybee if (!rw_tryenter(rwlp, rw)) { 3390ff008e00Smaybee /* 3391ff008e00Smaybee * Another thread is renaming in this path. 3392ff008e00Smaybee * Note that if we are a WRITER, we don't have any 3393ff008e00Smaybee * parent_locks held yet. 3394ff008e00Smaybee */ 3395ff008e00Smaybee if (rw == RW_READER && zp->z_id > szp->z_id) { 3396ff008e00Smaybee /* 3397ff008e00Smaybee * Drop our locks and restart 3398ff008e00Smaybee */ 3399ff008e00Smaybee zfs_rename_unlock(&zl); 3400ff008e00Smaybee *zlpp = NULL; 3401ff008e00Smaybee zp = tdzp; 34020a586ceaSMark Shellenbaum oidp = zp->z_id; 3403ff008e00Smaybee rwlp = &szp->z_parent_lock; 3404ff008e00Smaybee rw = RW_WRITER; 3405ff008e00Smaybee continue; 3406ff008e00Smaybee } else { 3407ff008e00Smaybee /* 3408ff008e00Smaybee * Wait for other thread to drop its locks 3409ff008e00Smaybee */ 3410ff008e00Smaybee rw_enter(rwlp, rw); 3411ff008e00Smaybee } 3412ff008e00Smaybee } 3413ff008e00Smaybee 3414fa9e4066Sahrens zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3415fa9e4066Sahrens zl->zl_rwlock = rwlp; 3416fa9e4066Sahrens zl->zl_znode = NULL; 3417fa9e4066Sahrens zl->zl_next = *zlpp; 3418fa9e4066Sahrens *zlpp = zl; 3419fa9e4066Sahrens 34200a586ceaSMark Shellenbaum if (oidp == szp->z_id) /* We're a descendant of szp */ 3421be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 3422fa9e4066Sahrens 34230a586ceaSMark Shellenbaum if (oidp == rootid) /* We've hit the top */ 3424fa9e4066Sahrens return (0); 3425fa9e4066Sahrens 3426fa9e4066Sahrens if (rw == RW_READER) { /* i.e. not the first pass */ 34270a586ceaSMark Shellenbaum int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3428fa9e4066Sahrens if (error) 3429fa9e4066Sahrens return (error); 3430fa9e4066Sahrens zl->zl_znode = zp; 3431fa9e4066Sahrens } 34320a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 34330a586ceaSMark Shellenbaum &oidp, sizeof (oidp)); 3434fa9e4066Sahrens rwlp = &zp->z_parent_lock; 3435fa9e4066Sahrens rw = RW_READER; 3436fa9e4066Sahrens 3437fa9e4066Sahrens } while (zp->z_id != sdzp->z_id); 3438fa9e4066Sahrens 3439fa9e4066Sahrens return (0); 3440fa9e4066Sahrens } 3441fa9e4066Sahrens 3442fa9e4066Sahrens /* 3443fa9e4066Sahrens * Move an entry from the provided source directory to the target 3444fa9e4066Sahrens * directory. Change the entry name as indicated. 3445fa9e4066Sahrens * 3446fa9e4066Sahrens * IN: sdvp - Source directory containing the "old entry". 3447fa9e4066Sahrens * snm - Old entry name. 3448fa9e4066Sahrens * tdvp - Target directory to contain the "new entry". 3449fa9e4066Sahrens * tnm - New entry name. 3450fa9e4066Sahrens * cr - credentials of caller. 3451da6c28aaSamw * ct - caller context 3452da6c28aaSamw * flags - case flags 3453fa9e4066Sahrens * 3454f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3455fa9e4066Sahrens * 3456fa9e4066Sahrens * Timestamps: 3457fa9e4066Sahrens * sdvp,tdvp - ctime|mtime updated 3458fa9e4066Sahrens */ 3459da6c28aaSamw /*ARGSUSED*/ 3460fa9e4066Sahrens static int 3461da6c28aaSamw zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3462da6c28aaSamw caller_context_t *ct, int flags) 3463fa9e4066Sahrens { 3464fa9e4066Sahrens znode_t *tdzp, *szp, *tzp; 3465fa9e4066Sahrens znode_t *sdzp = VTOZ(sdvp); 3466fa9e4066Sahrens zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3467f18faf3fSek zilog_t *zilog; 3468fa9e4066Sahrens vnode_t *realvp; 3469fa9e4066Sahrens zfs_dirlock_t *sdl, *tdl; 3470fa9e4066Sahrens dmu_tx_t *tx; 3471fa9e4066Sahrens zfs_zlock_t *zl; 3472da6c28aaSamw int cmp, serr, terr; 347354207fd2SJerry Jelinek int error = 0, rm_err = 0; 3474da6c28aaSamw int zflg = 0; 347569962b56SMatthew Ahrens boolean_t waited = B_FALSE; 3476fa9e4066Sahrens 34773cb34c60Sahrens ZFS_ENTER(zfsvfs); 34783cb34c60Sahrens ZFS_VERIFY_ZP(sdzp); 3479f18faf3fSek zilog = zfsvfs->z_log; 3480fa9e4066Sahrens 3481fa9e4066Sahrens /* 3482fa9e4066Sahrens * Make sure we have the real vp for the target directory. 3483fa9e4066Sahrens */ 3484da6c28aaSamw if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3485fa9e4066Sahrens tdvp = realvp; 3486fa9e4066Sahrens 348718e64978SMarcel Telka tdzp = VTOZ(tdvp); 348818e64978SMarcel Telka ZFS_VERIFY_ZP(tdzp); 348918e64978SMarcel Telka 349018e64978SMarcel Telka /* 349118e64978SMarcel Telka * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 349218e64978SMarcel Telka * ctldir appear to have the same v_vfsp. 349318e64978SMarcel Telka */ 349418e64978SMarcel Telka if (tdzp->z_zfsvfs != zfsvfs || zfsctl_is_node(tdvp)) { 3495fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3496be6fd75aSMatthew Ahrens return (SET_ERROR(EXDEV)); 3497fa9e4066Sahrens } 3498fa9e4066Sahrens 3499de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(tnm, 3500da6c28aaSamw strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3501da6c28aaSamw ZFS_EXIT(zfsvfs); 3502be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3503da6c28aaSamw } 3504da6c28aaSamw 3505da6c28aaSamw if (flags & FIGNORECASE) 3506da6c28aaSamw zflg |= ZCILOOK; 3507da6c28aaSamw 3508fa9e4066Sahrens top: 3509fa9e4066Sahrens szp = NULL; 3510fa9e4066Sahrens tzp = NULL; 3511fa9e4066Sahrens zl = NULL; 3512fa9e4066Sahrens 3513fa9e4066Sahrens /* 3514fa9e4066Sahrens * This is to prevent the creation of links into attribute space 3515fa9e4066Sahrens * by renaming a linked file into/outof an attribute directory. 3516fa9e4066Sahrens * See the comment in zfs_link() for why this is considered bad. 3517fa9e4066Sahrens */ 35180a586ceaSMark Shellenbaum if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3519fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3520be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 3521fa9e4066Sahrens } 3522fa9e4066Sahrens 3523fa9e4066Sahrens /* 3524fa9e4066Sahrens * Lock source and target directory entries. To prevent deadlock, 3525fa9e4066Sahrens * a lock ordering must be defined. We lock the directory with 3526fa9e4066Sahrens * the smallest object id first, or if it's a tie, the one with 3527fa9e4066Sahrens * the lexically first name. 3528fa9e4066Sahrens */ 3529fa9e4066Sahrens if (sdzp->z_id < tdzp->z_id) { 3530fa9e4066Sahrens cmp = -1; 3531fa9e4066Sahrens } else if (sdzp->z_id > tdzp->z_id) { 3532fa9e4066Sahrens cmp = 1; 3533fa9e4066Sahrens } else { 3534da6c28aaSamw /* 3535da6c28aaSamw * First compare the two name arguments without 3536da6c28aaSamw * considering any case folding. 3537da6c28aaSamw */ 3538da6c28aaSamw int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3539da6c28aaSamw 3540da6c28aaSamw cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3541de8267e0Stimh ASSERT(error == 0 || !zfsvfs->z_utf8); 3542fa9e4066Sahrens if (cmp == 0) { 3543fa9e4066Sahrens /* 3544fa9e4066Sahrens * POSIX: "If the old argument and the new argument 3545fa9e4066Sahrens * both refer to links to the same existing file, 3546fa9e4066Sahrens * the rename() function shall return successfully 3547fa9e4066Sahrens * and perform no other action." 3548fa9e4066Sahrens */ 3549fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3550fa9e4066Sahrens return (0); 3551fa9e4066Sahrens } 3552da6c28aaSamw /* 3553da6c28aaSamw * If the file system is case-folding, then we may 3554da6c28aaSamw * have some more checking to do. A case-folding file 3555da6c28aaSamw * system is either supporting mixed case sensitivity 3556da6c28aaSamw * access or is completely case-insensitive. Note 3557da6c28aaSamw * that the file system is always case preserving. 3558da6c28aaSamw * 3559da6c28aaSamw * In mixed sensitivity mode case sensitive behavior 3560da6c28aaSamw * is the default. FIGNORECASE must be used to 3561da6c28aaSamw * explicitly request case insensitive behavior. 3562da6c28aaSamw * 3563da6c28aaSamw * If the source and target names provided differ only 3564da6c28aaSamw * by case (e.g., a request to rename 'tim' to 'Tim'), 3565da6c28aaSamw * we will treat this as a special case in the 3566da6c28aaSamw * case-insensitive mode: as long as the source name 3567da6c28aaSamw * is an exact match, we will allow this to proceed as 3568da6c28aaSamw * a name-change request. 3569da6c28aaSamw */ 3570de8267e0Stimh if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3571de8267e0Stimh (zfsvfs->z_case == ZFS_CASE_MIXED && 3572de8267e0Stimh flags & FIGNORECASE)) && 3573da6c28aaSamw u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3574da6c28aaSamw &error) == 0) { 3575da6c28aaSamw /* 3576da6c28aaSamw * case preserving rename request, require exact 3577da6c28aaSamw * name matches 3578da6c28aaSamw */ 3579da6c28aaSamw zflg |= ZCIEXACT; 3580da6c28aaSamw zflg &= ~ZCILOOK; 3581da6c28aaSamw } 3582fa9e4066Sahrens } 3583da6c28aaSamw 3584afefc7e4SSanjeev Bagewadi /* 3585afefc7e4SSanjeev Bagewadi * If the source and destination directories are the same, we should 3586afefc7e4SSanjeev Bagewadi * grab the z_name_lock of that directory only once. 3587afefc7e4SSanjeev Bagewadi */ 3588afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) { 3589afefc7e4SSanjeev Bagewadi zflg |= ZHAVELOCK; 3590afefc7e4SSanjeev Bagewadi rw_enter(&sdzp->z_name_lock, RW_READER); 3591afefc7e4SSanjeev Bagewadi } 3592afefc7e4SSanjeev Bagewadi 3593fa9e4066Sahrens if (cmp < 0) { 3594da6c28aaSamw serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3595da6c28aaSamw ZEXISTS | zflg, NULL, NULL); 3596da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3597da6c28aaSamw tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3598fa9e4066Sahrens } else { 3599da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3600da6c28aaSamw tdzp, tnm, &tzp, zflg, NULL, NULL); 3601da6c28aaSamw serr = zfs_dirent_lock(&sdl, 3602da6c28aaSamw sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3603da6c28aaSamw NULL, NULL); 3604fa9e4066Sahrens } 3605fa9e4066Sahrens 3606fa9e4066Sahrens if (serr) { 3607fa9e4066Sahrens /* 3608fa9e4066Sahrens * Source entry invalid or not there. 3609fa9e4066Sahrens */ 3610fa9e4066Sahrens if (!terr) { 3611fa9e4066Sahrens zfs_dirent_unlock(tdl); 3612fa9e4066Sahrens if (tzp) 3613fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3614fa9e4066Sahrens } 3615afefc7e4SSanjeev Bagewadi 3616afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3617afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3618afefc7e4SSanjeev Bagewadi 3619fa9e4066Sahrens if (strcmp(snm, "..") == 0) 3620be6fd75aSMatthew Ahrens serr = SET_ERROR(EINVAL); 3621fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3622fa9e4066Sahrens return (serr); 3623fa9e4066Sahrens } 3624fa9e4066Sahrens if (terr) { 3625fa9e4066Sahrens zfs_dirent_unlock(sdl); 3626fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3627afefc7e4SSanjeev Bagewadi 3628afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3629afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3630afefc7e4SSanjeev Bagewadi 3631fa9e4066Sahrens if (strcmp(tnm, "..") == 0) 3632be6fd75aSMatthew Ahrens terr = SET_ERROR(EINVAL); 3633fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3634fa9e4066Sahrens return (terr); 3635fa9e4066Sahrens } 3636fa9e4066Sahrens 3637fa9e4066Sahrens /* 3638fa9e4066Sahrens * Must have write access at the source to remove the old entry 3639fa9e4066Sahrens * and write access at the target to create the new entry. 3640fa9e4066Sahrens * Note that if target and source are the same, this can be 3641fa9e4066Sahrens * done in a single check. 3642fa9e4066Sahrens */ 3643fa9e4066Sahrens 3644fa9e4066Sahrens if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3645fa9e4066Sahrens goto out; 3646fa9e4066Sahrens 3647fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3648fa9e4066Sahrens /* 3649fa9e4066Sahrens * Check to make sure rename is valid. 3650fa9e4066Sahrens * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3651fa9e4066Sahrens */ 3652fa9e4066Sahrens if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3653fa9e4066Sahrens goto out; 3654fa9e4066Sahrens } 3655fa9e4066Sahrens 3656fa9e4066Sahrens /* 3657fa9e4066Sahrens * Does target exist? 3658fa9e4066Sahrens */ 3659fa9e4066Sahrens if (tzp) { 3660fa9e4066Sahrens /* 3661fa9e4066Sahrens * Source and target must be the same type. 3662fa9e4066Sahrens */ 3663fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3664fa9e4066Sahrens if (ZTOV(tzp)->v_type != VDIR) { 3665be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTDIR); 3666fa9e4066Sahrens goto out; 3667fa9e4066Sahrens } 3668fa9e4066Sahrens } else { 3669fa9e4066Sahrens if (ZTOV(tzp)->v_type == VDIR) { 3670be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 3671fa9e4066Sahrens goto out; 3672fa9e4066Sahrens } 3673fa9e4066Sahrens } 3674fa9e4066Sahrens /* 3675fa9e4066Sahrens * POSIX dictates that when the source and target 3676fa9e4066Sahrens * entries refer to the same file object, rename 3677fa9e4066Sahrens * must do nothing and exit without error. 3678fa9e4066Sahrens */ 3679fa9e4066Sahrens if (szp->z_id == tzp->z_id) { 3680fa9e4066Sahrens error = 0; 3681fa9e4066Sahrens goto out; 3682fa9e4066Sahrens } 3683fa9e4066Sahrens } 3684fa9e4066Sahrens 368554207fd2SJerry Jelinek vnevent_pre_rename_src(ZTOV(szp), sdvp, snm, ct); 3686fa9e4066Sahrens if (tzp) 368754207fd2SJerry Jelinek vnevent_pre_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3688df2381bfSpraks 3689df2381bfSpraks /* 3690df2381bfSpraks * notify the target directory if it is not the same 3691df2381bfSpraks * as source directory. 3692df2381bfSpraks */ 3693df2381bfSpraks if (tdvp != sdvp) { 369454207fd2SJerry Jelinek vnevent_pre_rename_dest_dir(tdvp, ZTOV(szp), tnm, ct); 3695df2381bfSpraks } 3696fa9e4066Sahrens 3697fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 36980a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 36990a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3700ea8dc4b6Seschrock dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3701ea8dc4b6Seschrock dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 37020a586ceaSMark Shellenbaum if (sdzp != tdzp) { 37030a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 37040a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tdzp); 37050a586ceaSMark Shellenbaum } 37060a586ceaSMark Shellenbaum if (tzp) { 37070a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 37080a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tzp); 37090a586ceaSMark Shellenbaum } 37100a586ceaSMark Shellenbaum 37110a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 3712893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 3713*f864f99eSPrakash Surya error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 3714fa9e4066Sahrens if (error) { 3715fa9e4066Sahrens if (zl != NULL) 3716fa9e4066Sahrens zfs_rename_unlock(&zl); 3717fa9e4066Sahrens zfs_dirent_unlock(sdl); 3718fa9e4066Sahrens zfs_dirent_unlock(tdl); 3719afefc7e4SSanjeev Bagewadi 3720afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3721afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3722afefc7e4SSanjeev Bagewadi 3723fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3724fa9e4066Sahrens if (tzp) 3725fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 37261209a471SNeil Perrin if (error == ERESTART) { 372769962b56SMatthew Ahrens waited = B_TRUE; 37288a2f1b91Sahrens dmu_tx_wait(tx); 37298a2f1b91Sahrens dmu_tx_abort(tx); 3730fa9e4066Sahrens goto top; 3731fa9e4066Sahrens } 37328a2f1b91Sahrens dmu_tx_abort(tx); 3733fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3734fa9e4066Sahrens return (error); 3735fa9e4066Sahrens } 3736fa9e4066Sahrens 3737fa9e4066Sahrens if (tzp) /* Attempt to remove the existing target */ 373854207fd2SJerry Jelinek error = rm_err = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3739fa9e4066Sahrens 3740fa9e4066Sahrens if (error == 0) { 3741fa9e4066Sahrens error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3742fa9e4066Sahrens if (error == 0) { 37430a586ceaSMark Shellenbaum szp->z_pflags |= ZFS_AV_MODIFIED; 37440a586ceaSMark Shellenbaum 37450a586ceaSMark Shellenbaum error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 37460a586ceaSMark Shellenbaum (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3747fb09f5aaSMadhav Suresh ASSERT0(error); 3748da6c28aaSamw 3749fa9e4066Sahrens error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 37506ed5e6abSSam Falkner if (error == 0) { 37516ed5e6abSSam Falkner zfs_log_rename(zilog, tx, TX_RENAME | 375291de656bSNeil Perrin (flags & FIGNORECASE ? TX_CI : 0), sdzp, 375391de656bSNeil Perrin sdl->dl_name, tdzp, tdl->dl_name, szp); 375451ece835Seschrock 37556ed5e6abSSam Falkner /* 37566ed5e6abSSam Falkner * Update path information for the target vnode 37576ed5e6abSSam Falkner */ 37586ed5e6abSSam Falkner vn_renamepath(tdvp, ZTOV(szp), tnm, 37596ed5e6abSSam Falkner strlen(tnm)); 37606ed5e6abSSam Falkner } else { 37616ed5e6abSSam Falkner /* 37626ed5e6abSSam Falkner * At this point, we have successfully created 37636ed5e6abSSam Falkner * the target name, but have failed to remove 37646ed5e6abSSam Falkner * the source name. Since the create was done 37656ed5e6abSSam Falkner * with the ZRENAMING flag, there are 37666ed5e6abSSam Falkner * complications; for one, the link count is 37676ed5e6abSSam Falkner * wrong. The easiest way to deal with this 37686ed5e6abSSam Falkner * is to remove the newly created target, and 37696ed5e6abSSam Falkner * return the original error. This must 37706ed5e6abSSam Falkner * succeed; fortunately, it is very unlikely to 37716ed5e6abSSam Falkner * fail, since we just created it. 37726ed5e6abSSam Falkner */ 37736ed5e6abSSam Falkner VERIFY3U(zfs_link_destroy(tdl, szp, tx, 37746ed5e6abSSam Falkner ZRENAMING, NULL), ==, 0); 37756ed5e6abSSam Falkner } 3776fa9e4066Sahrens } 3777fa9e4066Sahrens } 3778fa9e4066Sahrens 3779fa9e4066Sahrens dmu_tx_commit(tx); 378054207fd2SJerry Jelinek 378154207fd2SJerry Jelinek if (tzp && rm_err == 0) 378254207fd2SJerry Jelinek vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 378354207fd2SJerry Jelinek 378454207fd2SJerry Jelinek if (error == 0) { 378554207fd2SJerry Jelinek vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 378654207fd2SJerry Jelinek /* notify the target dir if it is not the same as source dir */ 378754207fd2SJerry Jelinek if (tdvp != sdvp) 378854207fd2SJerry Jelinek vnevent_rename_dest_dir(tdvp, ct); 378954207fd2SJerry Jelinek } 3790fa9e4066Sahrens out: 3791fa9e4066Sahrens if (zl != NULL) 3792fa9e4066Sahrens zfs_rename_unlock(&zl); 3793fa9e4066Sahrens 3794fa9e4066Sahrens zfs_dirent_unlock(sdl); 3795fa9e4066Sahrens zfs_dirent_unlock(tdl); 3796fa9e4066Sahrens 3797afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3798afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3799afefc7e4SSanjeev Bagewadi 3800afefc7e4SSanjeev Bagewadi 3801fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3802fa9e4066Sahrens if (tzp) 3803fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3804fa9e4066Sahrens 380555da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 38065002558fSNeil Perrin zil_commit(zilog, 0); 380755da60b9SMark J Musante 3808fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3809fa9e4066Sahrens return (error); 3810fa9e4066Sahrens } 3811fa9e4066Sahrens 3812fa9e4066Sahrens /* 3813fa9e4066Sahrens * Insert the indicated symbolic reference entry into the directory. 3814fa9e4066Sahrens * 3815fa9e4066Sahrens * IN: dvp - Directory to contain new symbolic link. 3816fa9e4066Sahrens * link - Name for new symlink entry. 3817fa9e4066Sahrens * vap - Attributes of new entry. 3818fa9e4066Sahrens * cr - credentials of caller. 3819da6c28aaSamw * ct - caller context 3820da6c28aaSamw * flags - case flags 3821fa9e4066Sahrens * 3822f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3823fa9e4066Sahrens * 3824fa9e4066Sahrens * Timestamps: 3825fa9e4066Sahrens * dvp - ctime|mtime updated 3826fa9e4066Sahrens */ 3827da6c28aaSamw /*ARGSUSED*/ 3828fa9e4066Sahrens static int 3829da6c28aaSamw zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, 3830da6c28aaSamw caller_context_t *ct, int flags) 3831fa9e4066Sahrens { 3832fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 3833fa9e4066Sahrens zfs_dirlock_t *dl; 3834fa9e4066Sahrens dmu_tx_t *tx; 3835fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3836f18faf3fSek zilog_t *zilog; 38370a586ceaSMark Shellenbaum uint64_t len = strlen(link); 3838fa9e4066Sahrens int error; 3839da6c28aaSamw int zflg = ZNEW; 384089459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 384189459e17SMark Shellenbaum boolean_t fuid_dirtied; 38420a586ceaSMark Shellenbaum uint64_t txtype = TX_SYMLINK; 384369962b56SMatthew Ahrens boolean_t waited = B_FALSE; 3844fa9e4066Sahrens 3845fa9e4066Sahrens ASSERT(vap->va_type == VLNK); 3846fa9e4066Sahrens 38473cb34c60Sahrens ZFS_ENTER(zfsvfs); 38483cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 3849f18faf3fSek zilog = zfsvfs->z_log; 3850da6c28aaSamw 3851de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3852da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3853da6c28aaSamw ZFS_EXIT(zfsvfs); 3854be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3855da6c28aaSamw } 3856da6c28aaSamw if (flags & FIGNORECASE) 3857da6c28aaSamw zflg |= ZCILOOK; 3858fa9e4066Sahrens 3859fa9e4066Sahrens if (len > MAXPATHLEN) { 3860fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3861be6fd75aSMatthew Ahrens return (SET_ERROR(ENAMETOOLONG)); 3862fa9e4066Sahrens } 3863fa9e4066Sahrens 3864c8c24165SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, 3865c8c24165SMark Shellenbaum vap, cr, NULL, &acl_ids)) != 0) { 3866c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 3867c8c24165SMark Shellenbaum return (error); 3868c8c24165SMark Shellenbaum } 3869c8c24165SMark Shellenbaum top: 3870fa9e4066Sahrens /* 3871fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 3872fa9e4066Sahrens */ 3873da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 3874da6c28aaSamw if (error) { 3875c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3876c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 3877c8c24165SMark Shellenbaum return (error); 3878c8c24165SMark Shellenbaum } 3879c8c24165SMark Shellenbaum 3880c8c24165SMark Shellenbaum if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3881c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 38828e303ae0SMark Shellenbaum zfs_dirent_unlock(dl); 3883fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3884fa9e4066Sahrens return (error); 3885fa9e4066Sahrens } 3886fa9e4066Sahrens 388714843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 388814843421SMatthew Ahrens zfs_acl_ids_free(&acl_ids); 388914843421SMatthew Ahrens zfs_dirent_unlock(dl); 389014843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 3891be6fd75aSMatthew Ahrens return (SET_ERROR(EDQUOT)); 389214843421SMatthew Ahrens } 3893fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 389489459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 3895fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3896ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 38970a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 38980a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE + len); 38990a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 39000a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 39010a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 39020a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 39030a586ceaSMark Shellenbaum } 390414843421SMatthew Ahrens if (fuid_dirtied) 390514843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 3906*f864f99eSPrakash Surya error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 3907fa9e4066Sahrens if (error) { 3908fa9e4066Sahrens zfs_dirent_unlock(dl); 39091209a471SNeil Perrin if (error == ERESTART) { 391069962b56SMatthew Ahrens waited = B_TRUE; 39118a2f1b91Sahrens dmu_tx_wait(tx); 39128a2f1b91Sahrens dmu_tx_abort(tx); 3913fa9e4066Sahrens goto top; 3914fa9e4066Sahrens } 3915c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 39168a2f1b91Sahrens dmu_tx_abort(tx); 3917fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3918fa9e4066Sahrens return (error); 3919fa9e4066Sahrens } 3920fa9e4066Sahrens 3921fa9e4066Sahrens /* 3922fa9e4066Sahrens * Create a new object for the symlink. 39230a586ceaSMark Shellenbaum * for version 4 ZPL datsets the symlink will be an SA attribute 3924fa9e4066Sahrens */ 39250a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 3926fa9e4066Sahrens 39270a586ceaSMark Shellenbaum if (fuid_dirtied) 39280a586ceaSMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 3929fa9e4066Sahrens 39301412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 39310a586ceaSMark Shellenbaum if (zp->z_is_sa) 39320a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 39330a586ceaSMark Shellenbaum link, len, tx); 39340a586ceaSMark Shellenbaum else 39350a586ceaSMark Shellenbaum zfs_sa_symlink(zp, link, len, tx); 39361412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 3937fa9e4066Sahrens 39380a586ceaSMark Shellenbaum zp->z_size = len; 39390a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 39400a586ceaSMark Shellenbaum &zp->z_size, sizeof (zp->z_size), tx); 3941fa9e4066Sahrens /* 3942fa9e4066Sahrens * Insert the new object into the directory. 3943fa9e4066Sahrens */ 3944fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 39450a586ceaSMark Shellenbaum 39460a586ceaSMark Shellenbaum if (flags & FIGNORECASE) 39470a586ceaSMark Shellenbaum txtype |= TX_CI; 39480a586ceaSMark Shellenbaum zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 394989459e17SMark Shellenbaum 395089459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3951fa9e4066Sahrens 3952fa9e4066Sahrens dmu_tx_commit(tx); 3953fa9e4066Sahrens 3954fa9e4066Sahrens zfs_dirent_unlock(dl); 3955fa9e4066Sahrens 3956fa9e4066Sahrens VN_RELE(ZTOV(zp)); 3957fa9e4066Sahrens 395855da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 39595002558fSNeil Perrin zil_commit(zilog, 0); 396055da60b9SMark J Musante 3961fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3962fa9e4066Sahrens return (error); 3963fa9e4066Sahrens } 3964fa9e4066Sahrens 3965fa9e4066Sahrens /* 3966fa9e4066Sahrens * Return, in the buffer contained in the provided uio structure, 3967fa9e4066Sahrens * the symbolic path referred to by vp. 3968fa9e4066Sahrens * 3969fa9e4066Sahrens * IN: vp - vnode of symbolic link. 3970f7170741SWill Andrews * uio - structure to contain the link path. 3971fa9e4066Sahrens * cr - credentials of caller. 3972da6c28aaSamw * ct - caller context 3973fa9e4066Sahrens * 3974f7170741SWill Andrews * OUT: uio - structure containing the link path. 3975fa9e4066Sahrens * 3976f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3977fa9e4066Sahrens * 3978fa9e4066Sahrens * Timestamps: 3979fa9e4066Sahrens * vp - atime updated 3980fa9e4066Sahrens */ 3981fa9e4066Sahrens /* ARGSUSED */ 3982fa9e4066Sahrens static int 3983da6c28aaSamw zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3984fa9e4066Sahrens { 3985fa9e4066Sahrens znode_t *zp = VTOZ(vp); 3986fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3987fa9e4066Sahrens int error; 3988fa9e4066Sahrens 39893cb34c60Sahrens ZFS_ENTER(zfsvfs); 39903cb34c60Sahrens ZFS_VERIFY_ZP(zp); 3991fa9e4066Sahrens 39921412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 39930a586ceaSMark Shellenbaum if (zp->z_is_sa) 39940a586ceaSMark Shellenbaum error = sa_lookup_uio(zp->z_sa_hdl, 39950a586ceaSMark Shellenbaum SA_ZPL_SYMLINK(zfsvfs), uio); 39960a586ceaSMark Shellenbaum else 39970a586ceaSMark Shellenbaum error = zfs_sa_readlink(zp, uio); 39981412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 3999fa9e4066Sahrens 4000fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 40010a586ceaSMark Shellenbaum 4002fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4003fa9e4066Sahrens return (error); 4004fa9e4066Sahrens } 4005fa9e4066Sahrens 4006fa9e4066Sahrens /* 4007fa9e4066Sahrens * Insert a new entry into directory tdvp referencing svp. 4008fa9e4066Sahrens * 4009fa9e4066Sahrens * IN: tdvp - Directory to contain new entry. 4010fa9e4066Sahrens * svp - vnode of new entry. 4011fa9e4066Sahrens * name - name of new entry. 4012fa9e4066Sahrens * cr - credentials of caller. 4013da6c28aaSamw * ct - caller context 4014fa9e4066Sahrens * 4015f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4016fa9e4066Sahrens * 4017fa9e4066Sahrens * Timestamps: 4018fa9e4066Sahrens * tdvp - ctime|mtime updated 4019fa9e4066Sahrens * svp - ctime updated 4020fa9e4066Sahrens */ 4021fa9e4066Sahrens /* ARGSUSED */ 4022fa9e4066Sahrens static int 4023da6c28aaSamw zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4024da6c28aaSamw caller_context_t *ct, int flags) 4025fa9e4066Sahrens { 4026fa9e4066Sahrens znode_t *dzp = VTOZ(tdvp); 4027fa9e4066Sahrens znode_t *tzp, *szp; 4028fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4029f18faf3fSek zilog_t *zilog; 4030fa9e4066Sahrens zfs_dirlock_t *dl; 4031fa9e4066Sahrens dmu_tx_t *tx; 4032fa9e4066Sahrens vnode_t *realvp; 4033fa9e4066Sahrens int error; 4034da6c28aaSamw int zf = ZNEW; 4035d39ee142SMark Shellenbaum uint64_t parent; 4036f1696b23SMark Shellenbaum uid_t owner; 403769962b56SMatthew Ahrens boolean_t waited = B_FALSE; 4038fa9e4066Sahrens 4039fa9e4066Sahrens ASSERT(tdvp->v_type == VDIR); 4040fa9e4066Sahrens 40413cb34c60Sahrens ZFS_ENTER(zfsvfs); 40423cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 4043f18faf3fSek zilog = zfsvfs->z_log; 4044fa9e4066Sahrens 4045da6c28aaSamw if (VOP_REALVP(svp, &realvp, ct) == 0) 4046fa9e4066Sahrens svp = realvp; 4047fa9e4066Sahrens 4048d39ee142SMark Shellenbaum /* 4049d39ee142SMark Shellenbaum * POSIX dictates that we return EPERM here. 4050d39ee142SMark Shellenbaum * Better choices include ENOTSUP or EISDIR. 4051d39ee142SMark Shellenbaum */ 4052d39ee142SMark Shellenbaum if (svp->v_type == VDIR) { 4053d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 4054be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 4055d39ee142SMark Shellenbaum } 4056d39ee142SMark Shellenbaum 405718e64978SMarcel Telka szp = VTOZ(svp); 405818e64978SMarcel Telka ZFS_VERIFY_ZP(szp); 405918e64978SMarcel Telka 406018e64978SMarcel Telka /* 406118e64978SMarcel Telka * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 406218e64978SMarcel Telka * ctldir appear to have the same v_vfsp. 406318e64978SMarcel Telka */ 406418e64978SMarcel Telka if (szp->z_zfsvfs != zfsvfs || zfsctl_is_node(svp)) { 4065fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4066be6fd75aSMatthew Ahrens return (SET_ERROR(EXDEV)); 4067fa9e4066Sahrens } 4068d39ee142SMark Shellenbaum 4069d39ee142SMark Shellenbaum /* Prevent links to .zfs/shares files */ 4070d39ee142SMark Shellenbaum 4071d39ee142SMark Shellenbaum if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4072d39ee142SMark Shellenbaum &parent, sizeof (uint64_t))) != 0) { 4073d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 4074d39ee142SMark Shellenbaum return (error); 4075d39ee142SMark Shellenbaum } 4076d39ee142SMark Shellenbaum if (parent == zfsvfs->z_shares_dir) { 4077d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 4078be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 4079d39ee142SMark Shellenbaum } 4080d39ee142SMark Shellenbaum 4081de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, 4082da6c28aaSamw strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4083da6c28aaSamw ZFS_EXIT(zfsvfs); 4084be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 4085da6c28aaSamw } 4086da6c28aaSamw if (flags & FIGNORECASE) 4087da6c28aaSamw zf |= ZCILOOK; 4088da6c28aaSamw 4089fa9e4066Sahrens /* 4090fa9e4066Sahrens * We do not support links between attributes and non-attributes 4091fa9e4066Sahrens * because of the potential security risk of creating links 4092fa9e4066Sahrens * into "normal" file space in order to circumvent restrictions 4093fa9e4066Sahrens * imposed in attribute space. 4094fa9e4066Sahrens */ 40950a586ceaSMark Shellenbaum if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4096fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4097be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4098fa9e4066Sahrens } 4099fa9e4066Sahrens 4100fa9e4066Sahrens 4101f1696b23SMark Shellenbaum owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4102f1696b23SMark Shellenbaum if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) { 4103fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4104be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 4105fa9e4066Sahrens } 4106fa9e4066Sahrens 4107da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4108fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4109fa9e4066Sahrens return (error); 4110fa9e4066Sahrens } 4111fa9e4066Sahrens 4112d39ee142SMark Shellenbaum top: 4113fa9e4066Sahrens /* 4114fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 4115fa9e4066Sahrens */ 4116da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4117da6c28aaSamw if (error) { 4118fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4119fa9e4066Sahrens return (error); 4120fa9e4066Sahrens } 4121fa9e4066Sahrens 4122fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 41230a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4124ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 41250a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 41260a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 4127*f864f99eSPrakash Surya error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); 4128fa9e4066Sahrens if (error) { 4129fa9e4066Sahrens zfs_dirent_unlock(dl); 41301209a471SNeil Perrin if (error == ERESTART) { 413169962b56SMatthew Ahrens waited = B_TRUE; 41328a2f1b91Sahrens dmu_tx_wait(tx); 41338a2f1b91Sahrens dmu_tx_abort(tx); 4134fa9e4066Sahrens goto top; 4135fa9e4066Sahrens } 41368a2f1b91Sahrens dmu_tx_abort(tx); 4137fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4138fa9e4066Sahrens return (error); 4139fa9e4066Sahrens } 4140fa9e4066Sahrens 4141fa9e4066Sahrens error = zfs_link_create(dl, szp, tx, 0); 4142fa9e4066Sahrens 4143da6c28aaSamw if (error == 0) { 4144da6c28aaSamw uint64_t txtype = TX_LINK; 4145da6c28aaSamw if (flags & FIGNORECASE) 4146da6c28aaSamw txtype |= TX_CI; 4147da6c28aaSamw zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4148da6c28aaSamw } 4149fa9e4066Sahrens 4150fa9e4066Sahrens dmu_tx_commit(tx); 4151fa9e4066Sahrens 4152fa9e4066Sahrens zfs_dirent_unlock(dl); 4153fa9e4066Sahrens 4154df2381bfSpraks if (error == 0) { 4155da6c28aaSamw vnevent_link(svp, ct); 4156df2381bfSpraks } 4157df2381bfSpraks 415855da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 41595002558fSNeil Perrin zil_commit(zilog, 0); 416055da60b9SMark J Musante 4161fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4162fa9e4066Sahrens return (error); 4163fa9e4066Sahrens } 4164fa9e4066Sahrens 4165fa9e4066Sahrens /* 4166fa9e4066Sahrens * zfs_null_putapage() is used when the file system has been force 4167fa9e4066Sahrens * unmounted. It just drops the pages. 4168fa9e4066Sahrens */ 4169fa9e4066Sahrens /* ARGSUSED */ 4170fa9e4066Sahrens static int 4171fa9e4066Sahrens zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 41729a686fbcSPaul Dagnelie size_t *lenp, int flags, cred_t *cr) 4173fa9e4066Sahrens { 4174fa9e4066Sahrens pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4175fa9e4066Sahrens return (0); 4176fa9e4066Sahrens } 4177fa9e4066Sahrens 417844eda4d7Smaybee /* 417944eda4d7Smaybee * Push a page out to disk, klustering if possible. 418044eda4d7Smaybee * 418144eda4d7Smaybee * IN: vp - file to push page to. 418244eda4d7Smaybee * pp - page to push. 418344eda4d7Smaybee * flags - additional flags. 418444eda4d7Smaybee * cr - credentials of caller. 418544eda4d7Smaybee * 418644eda4d7Smaybee * OUT: offp - start of range pushed. 418744eda4d7Smaybee * lenp - len of range pushed. 418844eda4d7Smaybee * 4189f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 419044eda4d7Smaybee * 419144eda4d7Smaybee * NOTE: callers must have locked the page to be pushed. On 419244eda4d7Smaybee * exit, the page (and all other pages in the kluster) must be 419344eda4d7Smaybee * unlocked. 419444eda4d7Smaybee */ 4195fa9e4066Sahrens /* ARGSUSED */ 4196fa9e4066Sahrens static int 4197fa9e4066Sahrens zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 41989a686fbcSPaul Dagnelie size_t *lenp, int flags, cred_t *cr) 4199fa9e4066Sahrens { 4200fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4201fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4202fa9e4066Sahrens dmu_tx_t *tx; 420344eda4d7Smaybee u_offset_t off, koff; 420444eda4d7Smaybee size_t len, klen; 4205fa9e4066Sahrens int err; 4206fa9e4066Sahrens 4207fa9e4066Sahrens off = pp->p_offset; 420844eda4d7Smaybee len = PAGESIZE; 420944eda4d7Smaybee /* 421044eda4d7Smaybee * If our blocksize is bigger than the page size, try to kluster 42111209a471SNeil Perrin * multiple pages so that we write a full block (thus avoiding 421244eda4d7Smaybee * a read-modify-write). 421344eda4d7Smaybee */ 42140a586ceaSMark Shellenbaum if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4215ac05c741SMark Maybee klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4216ac05c741SMark Maybee koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 42170a586ceaSMark Shellenbaum ASSERT(koff <= zp->z_size); 42180a586ceaSMark Shellenbaum if (koff + klen > zp->z_size) 42190a586ceaSMark Shellenbaum klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 422044eda4d7Smaybee pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 422144eda4d7Smaybee } 422244eda4d7Smaybee ASSERT3U(btop(len), ==, btopr(len)); 4223ac05c741SMark Maybee 4224dd6ef538Smaybee /* 4225dd6ef538Smaybee * Can't push pages past end-of-file. 4226dd6ef538Smaybee */ 42270a586ceaSMark Shellenbaum if (off >= zp->z_size) { 4228f4d2e9e6Smaybee /* ignore all pages */ 422944eda4d7Smaybee err = 0; 423044eda4d7Smaybee goto out; 42310a586ceaSMark Shellenbaum } else if (off + len > zp->z_size) { 42320a586ceaSMark Shellenbaum int npages = btopr(zp->z_size - off); 423344eda4d7Smaybee page_t *trunc; 423444eda4d7Smaybee 423544eda4d7Smaybee page_list_break(&pp, &trunc, npages); 4236f4d2e9e6Smaybee /* ignore pages past end of file */ 423744eda4d7Smaybee if (trunc) 4238f4d2e9e6Smaybee pvn_write_done(trunc, flags); 42390a586ceaSMark Shellenbaum len = zp->z_size - off; 4240dd6ef538Smaybee } 424114843421SMatthew Ahrens 42420a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 42430a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4244be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 424514843421SMatthew Ahrens goto out; 424614843421SMatthew Ahrens } 4247fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 4248fa9e4066Sahrens dmu_tx_hold_write(tx, zp->z_id, off, len); 42490a586ceaSMark Shellenbaum 42500a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 42510a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 4252e722410cSMatthew Ahrens err = dmu_tx_assign(tx, TXG_WAIT); 4253fa9e4066Sahrens if (err != 0) { 42548a2f1b91Sahrens dmu_tx_abort(tx); 4255fa9e4066Sahrens goto out; 4256fa9e4066Sahrens } 4257fa9e4066Sahrens 425844eda4d7Smaybee if (zp->z_blksz <= PAGESIZE) { 42590fab61baSJonathan W Adams caddr_t va = zfs_map_page(pp, S_READ); 426044eda4d7Smaybee ASSERT3U(len, <=, PAGESIZE); 426144eda4d7Smaybee dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 42620fab61baSJonathan W Adams zfs_unmap_page(pp, va); 426344eda4d7Smaybee } else { 426444eda4d7Smaybee err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 426544eda4d7Smaybee } 4266fa9e4066Sahrens 426744eda4d7Smaybee if (err == 0) { 42680a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 4269db9986c7SMark Shellenbaum sa_bulk_attr_t bulk[3]; 42700a586ceaSMark Shellenbaum int count = 0; 42710a586ceaSMark Shellenbaum 42720a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 42730a586ceaSMark Shellenbaum &mtime, 16); 42740a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 42750a586ceaSMark Shellenbaum &ctime, 16); 4276db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4277db9986c7SMark Shellenbaum &zp->z_pflags, 8); 42780a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 42790a586ceaSMark Shellenbaum B_TRUE); 428080e10fd0SAndriy Gapon err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 428180e10fd0SAndriy Gapon ASSERT0(err); 4282ac05c741SMark Maybee zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 428344eda4d7Smaybee } 428468857716SLin Ling dmu_tx_commit(tx); 4285fa9e4066Sahrens 428644eda4d7Smaybee out: 4287f4d2e9e6Smaybee pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4288fa9e4066Sahrens if (offp) 4289fa9e4066Sahrens *offp = off; 4290fa9e4066Sahrens if (lenp) 4291fa9e4066Sahrens *lenp = len; 4292fa9e4066Sahrens 4293fa9e4066Sahrens return (err); 4294fa9e4066Sahrens } 4295fa9e4066Sahrens 4296fa9e4066Sahrens /* 4297fa9e4066Sahrens * Copy the portion of the file indicated from pages into the file. 4298fa9e4066Sahrens * The pages are stored in a page list attached to the files vnode. 4299fa9e4066Sahrens * 4300fa9e4066Sahrens * IN: vp - vnode of file to push page data to. 4301fa9e4066Sahrens * off - position in file to put data. 4302fa9e4066Sahrens * len - amount of data to write. 4303fa9e4066Sahrens * flags - flags to control the operation. 4304fa9e4066Sahrens * cr - credentials of caller. 4305da6c28aaSamw * ct - caller context. 4306fa9e4066Sahrens * 4307f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4308fa9e4066Sahrens * 4309fa9e4066Sahrens * Timestamps: 4310fa9e4066Sahrens * vp - ctime|mtime updated 4311fa9e4066Sahrens */ 4312da6c28aaSamw /*ARGSUSED*/ 4313fa9e4066Sahrens static int 4314da6c28aaSamw zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4315da6c28aaSamw caller_context_t *ct) 4316fa9e4066Sahrens { 4317fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4318fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4319fa9e4066Sahrens page_t *pp; 4320fa9e4066Sahrens size_t io_len; 4321fa9e4066Sahrens u_offset_t io_off; 4322ac05c741SMark Maybee uint_t blksz; 4323ac05c741SMark Maybee rl_t *rl; 4324fa9e4066Sahrens int error = 0; 4325fa9e4066Sahrens 43263cb34c60Sahrens ZFS_ENTER(zfsvfs); 43273cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4328fa9e4066Sahrens 4329c4fc6b21SGarrett D'Amore /* 4330c4fc6b21SGarrett D'Amore * There's nothing to do if no data is cached. 4331c4fc6b21SGarrett D'Amore */ 4332c4fc6b21SGarrett D'Amore if (!vn_has_cached_data(vp)) { 4333c4fc6b21SGarrett D'Amore ZFS_EXIT(zfsvfs); 4334c4fc6b21SGarrett D'Amore return (0); 4335c4fc6b21SGarrett D'Amore } 4336c4fc6b21SGarrett D'Amore 4337ac05c741SMark Maybee /* 4338ac05c741SMark Maybee * Align this request to the file block size in case we kluster. 4339ac05c741SMark Maybee * XXX - this can result in pretty aggresive locking, which can 4340ac05c741SMark Maybee * impact simultanious read/write access. One option might be 4341ac05c741SMark Maybee * to break up long requests (len == 0) into block-by-block 4342ac05c741SMark Maybee * operations to get narrower locking. 4343ac05c741SMark Maybee */ 4344ac05c741SMark Maybee blksz = zp->z_blksz; 4345ac05c741SMark Maybee if (ISP2(blksz)) 4346ac05c741SMark Maybee io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4347ac05c741SMark Maybee else 4348ac05c741SMark Maybee io_off = 0; 4349ac05c741SMark Maybee if (len > 0 && ISP2(blksz)) 43505a6f5619SMark Maybee io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4351ac05c741SMark Maybee else 4352ac05c741SMark Maybee io_len = 0; 4353ac05c741SMark Maybee 4354ac05c741SMark Maybee if (io_len == 0) { 4355fa9e4066Sahrens /* 4356ac05c741SMark Maybee * Search the entire vp list for pages >= io_off. 4357fa9e4066Sahrens */ 4358ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4359ac05c741SMark Maybee error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4360fe9cf88cSperrin goto out; 4361fa9e4066Sahrens } 4362ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4363fa9e4066Sahrens 43640a586ceaSMark Shellenbaum if (off > zp->z_size) { 4365fa9e4066Sahrens /* past end of file */ 4366ac05c741SMark Maybee zfs_range_unlock(rl); 4367fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4368fa9e4066Sahrens return (0); 4369fa9e4066Sahrens } 4370fa9e4066Sahrens 43710a586ceaSMark Shellenbaum len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4372fa9e4066Sahrens 4373ac05c741SMark Maybee for (off = io_off; io_off < off + len; io_off += io_len) { 4374fa9e4066Sahrens if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4375104e2ed7Sperrin pp = page_lookup(vp, io_off, 4376ecb72030Sperrin (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4377fa9e4066Sahrens } else { 4378fa9e4066Sahrens pp = page_lookup_nowait(vp, io_off, 4379ecb72030Sperrin (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4380fa9e4066Sahrens } 4381fa9e4066Sahrens 4382fa9e4066Sahrens if (pp != NULL && pvn_getdirty(pp, flags)) { 4383fa9e4066Sahrens int err; 4384fa9e4066Sahrens 4385fa9e4066Sahrens /* 4386fa9e4066Sahrens * Found a dirty page to push 4387fa9e4066Sahrens */ 4388104e2ed7Sperrin err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4389104e2ed7Sperrin if (err) 4390fa9e4066Sahrens error = err; 4391fa9e4066Sahrens } else { 4392fa9e4066Sahrens io_len = PAGESIZE; 4393fa9e4066Sahrens } 4394fa9e4066Sahrens } 4395fe9cf88cSperrin out: 4396ac05c741SMark Maybee zfs_range_unlock(rl); 439755da60b9SMark J Musante if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 43985002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 4399fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4400fa9e4066Sahrens return (error); 4401fa9e4066Sahrens } 4402fa9e4066Sahrens 4403da6c28aaSamw /*ARGSUSED*/ 4404fa9e4066Sahrens void 4405da6c28aaSamw zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4406fa9e4066Sahrens { 4407fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4408fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4409fa9e4066Sahrens int error; 4410fa9e4066Sahrens 4411f18faf3fSek rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 44120a586ceaSMark Shellenbaum if (zp->z_sa_hdl == NULL) { 44134ccbb6e7Sahrens /* 4414874395d5Smaybee * The fs has been unmounted, or we did a 4415874395d5Smaybee * suspend/resume and this file no longer exists. 44164ccbb6e7Sahrens */ 4417fa9e4066Sahrens if (vn_has_cached_data(vp)) { 4418fa9e4066Sahrens (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage, 4419fa9e4066Sahrens B_INVAL, cr); 4420fa9e4066Sahrens } 4421fa9e4066Sahrens 4422ea8dc4b6Seschrock mutex_enter(&zp->z_lock); 4423cd2adeceSChris Kirby mutex_enter(&vp->v_lock); 4424cd2adeceSChris Kirby ASSERT(vp->v_count == 1); 4425ade42b55SSebastien Roy VN_RELE_LOCKED(vp); 4426cd2adeceSChris Kirby mutex_exit(&vp->v_lock); 44274ccbb6e7Sahrens mutex_exit(&zp->z_lock); 4428f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 4429874395d5Smaybee zfs_znode_free(zp); 4430fa9e4066Sahrens return; 4431fa9e4066Sahrens } 4432fa9e4066Sahrens 4433fa9e4066Sahrens /* 4434fa9e4066Sahrens * Attempt to push any data in the page cache. If this fails 4435fa9e4066Sahrens * we will get kicked out later in zfs_zinactive(). 4436fa9e4066Sahrens */ 44378afd4dd6Sperrin if (vn_has_cached_data(vp)) { 44388afd4dd6Sperrin (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC, 44398afd4dd6Sperrin cr); 44408afd4dd6Sperrin } 4441fa9e4066Sahrens 4442893a6d32Sahrens if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4443fa9e4066Sahrens dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4444fa9e4066Sahrens 44450a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 44460a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 4447fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 4448fa9e4066Sahrens if (error) { 4449fa9e4066Sahrens dmu_tx_abort(tx); 4450fa9e4066Sahrens } else { 4451fa9e4066Sahrens mutex_enter(&zp->z_lock); 44520a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 44530a586ceaSMark Shellenbaum (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4454fa9e4066Sahrens zp->z_atime_dirty = 0; 4455fa9e4066Sahrens mutex_exit(&zp->z_lock); 4456fa9e4066Sahrens dmu_tx_commit(tx); 4457fa9e4066Sahrens } 4458fa9e4066Sahrens } 4459fa9e4066Sahrens 4460fa9e4066Sahrens zfs_zinactive(zp); 4461f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 4462fa9e4066Sahrens } 4463fa9e4066Sahrens 4464fa9e4066Sahrens /* 4465fa9e4066Sahrens * Bounds-check the seek operation. 4466fa9e4066Sahrens * 4467fa9e4066Sahrens * IN: vp - vnode seeking within 4468fa9e4066Sahrens * ooff - old file offset 4469fa9e4066Sahrens * noffp - pointer to new file offset 4470da6c28aaSamw * ct - caller context 4471fa9e4066Sahrens * 4472f7170741SWill Andrews * RETURN: 0 on success, EINVAL if new offset invalid. 4473fa9e4066Sahrens */ 4474fa9e4066Sahrens /* ARGSUSED */ 4475fa9e4066Sahrens static int 4476da6c28aaSamw zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4477da6c28aaSamw caller_context_t *ct) 4478fa9e4066Sahrens { 4479fa9e4066Sahrens if (vp->v_type == VDIR) 4480fa9e4066Sahrens return (0); 4481fa9e4066Sahrens return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4482fa9e4066Sahrens } 4483fa9e4066Sahrens 4484fa9e4066Sahrens /* 4485fa9e4066Sahrens * Pre-filter the generic locking function to trap attempts to place 4486fa9e4066Sahrens * a mandatory lock on a memory mapped file. 4487fa9e4066Sahrens */ 4488fa9e4066Sahrens static int 4489fa9e4066Sahrens zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4490da6c28aaSamw flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4491fa9e4066Sahrens { 4492fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4493fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4494fa9e4066Sahrens 44953cb34c60Sahrens ZFS_ENTER(zfsvfs); 44963cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4497fa9e4066Sahrens 4498fa9e4066Sahrens /* 4499ea8dc4b6Seschrock * We are following the UFS semantics with respect to mapcnt 4500ea8dc4b6Seschrock * here: If we see that the file is mapped already, then we will 4501ea8dc4b6Seschrock * return an error, but we don't worry about races between this 4502ea8dc4b6Seschrock * function and zfs_map(). 4503fa9e4066Sahrens */ 45040a586ceaSMark Shellenbaum if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4505fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4506be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 4507fa9e4066Sahrens } 4508fa9e4066Sahrens ZFS_EXIT(zfsvfs); 450904ce3d0bSMark Shellenbaum return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4510fa9e4066Sahrens } 4511fa9e4066Sahrens 4512fa9e4066Sahrens /* 4513fa9e4066Sahrens * If we can't find a page in the cache, we will create a new page 4514fa9e4066Sahrens * and fill it with file data. For efficiency, we may try to fill 4515ac05c741SMark Maybee * multiple pages at once (klustering) to fill up the supplied page 4516ed886187SMark Maybee * list. Note that the pages to be filled are held with an exclusive 4517ed886187SMark Maybee * lock to prevent access by other threads while they are being filled. 4518fa9e4066Sahrens */ 4519fa9e4066Sahrens static int 4520fa9e4066Sahrens zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4521fa9e4066Sahrens caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4522fa9e4066Sahrens { 4523fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4524fa9e4066Sahrens page_t *pp, *cur_pp; 4525fa9e4066Sahrens objset_t *os = zp->z_zfsvfs->z_os; 4526fa9e4066Sahrens u_offset_t io_off, total; 4527fa9e4066Sahrens size_t io_len; 4528fa9e4066Sahrens int err; 4529fa9e4066Sahrens 453044eda4d7Smaybee if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4531ac05c741SMark Maybee /* 4532ac05c741SMark Maybee * We only have a single page, don't bother klustering 4533ac05c741SMark Maybee */ 4534fa9e4066Sahrens io_off = off; 4535fa9e4066Sahrens io_len = PAGESIZE; 4536ed886187SMark Maybee pp = page_create_va(vp, io_off, io_len, 4537ed886187SMark Maybee PG_EXCL | PG_WAIT, seg, addr); 4538fa9e4066Sahrens } else { 4539fa9e4066Sahrens /* 4540ac05c741SMark Maybee * Try to find enough pages to fill the page list 4541fa9e4066Sahrens */ 4542fa9e4066Sahrens pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4543ac05c741SMark Maybee &io_len, off, plsz, 0); 4544fa9e4066Sahrens } 4545fa9e4066Sahrens if (pp == NULL) { 4546fa9e4066Sahrens /* 4547ac05c741SMark Maybee * The page already exists, nothing to do here. 4548fa9e4066Sahrens */ 4549fa9e4066Sahrens *pl = NULL; 4550fa9e4066Sahrens return (0); 4551fa9e4066Sahrens } 4552fa9e4066Sahrens 4553fa9e4066Sahrens /* 4554fa9e4066Sahrens * Fill the pages in the kluster. 4555fa9e4066Sahrens */ 4556fa9e4066Sahrens cur_pp = pp; 4557fa9e4066Sahrens for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4558ac05c741SMark Maybee caddr_t va; 4559ac05c741SMark Maybee 456044eda4d7Smaybee ASSERT3U(io_off, ==, cur_pp->p_offset); 45610fab61baSJonathan W Adams va = zfs_map_page(cur_pp, S_WRITE); 45627bfdf011SNeil Perrin err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 45637bfdf011SNeil Perrin DMU_READ_PREFETCH); 45640fab61baSJonathan W Adams zfs_unmap_page(cur_pp, va); 4565fa9e4066Sahrens if (err) { 4566fa9e4066Sahrens /* On error, toss the entire kluster */ 4567fa9e4066Sahrens pvn_read_done(pp, B_ERROR); 4568b87f3af3Sperrin /* convert checksum errors into IO errors */ 4569b87f3af3Sperrin if (err == ECKSUM) 4570be6fd75aSMatthew Ahrens err = SET_ERROR(EIO); 4571fa9e4066Sahrens return (err); 4572fa9e4066Sahrens } 4573fa9e4066Sahrens cur_pp = cur_pp->p_next; 4574fa9e4066Sahrens } 4575ac05c741SMark Maybee 4576fa9e4066Sahrens /* 4577ac05c741SMark Maybee * Fill in the page list array from the kluster starting 4578ac05c741SMark Maybee * from the desired offset `off'. 4579fa9e4066Sahrens * NOTE: the page list will always be null terminated. 4580fa9e4066Sahrens */ 4581fa9e4066Sahrens pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4582ac05c741SMark Maybee ASSERT(pl == NULL || (*pl)->p_offset == off); 4583fa9e4066Sahrens 4584fa9e4066Sahrens return (0); 4585fa9e4066Sahrens } 4586fa9e4066Sahrens 4587fa9e4066Sahrens /* 4588fa9e4066Sahrens * Return pointers to the pages for the file region [off, off + len] 4589fa9e4066Sahrens * in the pl array. If plsz is greater than len, this function may 4590ac05c741SMark Maybee * also return page pointers from after the specified region 4591ac05c741SMark Maybee * (i.e. the region [off, off + plsz]). These additional pages are 4592ac05c741SMark Maybee * only returned if they are already in the cache, or were created as 4593ac05c741SMark Maybee * part of a klustered read. 4594fa9e4066Sahrens * 4595fa9e4066Sahrens * IN: vp - vnode of file to get data from. 4596fa9e4066Sahrens * off - position in file to get data from. 4597fa9e4066Sahrens * len - amount of data to retrieve. 4598fa9e4066Sahrens * plsz - length of provided page list. 4599fa9e4066Sahrens * seg - segment to obtain pages for. 4600fa9e4066Sahrens * addr - virtual address of fault. 4601fa9e4066Sahrens * rw - mode of created pages. 4602fa9e4066Sahrens * cr - credentials of caller. 4603da6c28aaSamw * ct - caller context. 4604fa9e4066Sahrens * 4605fa9e4066Sahrens * OUT: protp - protection mode of created pages. 4606fa9e4066Sahrens * pl - list of pages created. 4607fa9e4066Sahrens * 4608f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4609fa9e4066Sahrens * 4610fa9e4066Sahrens * Timestamps: 4611fa9e4066Sahrens * vp - atime updated 4612fa9e4066Sahrens */ 4613fa9e4066Sahrens /* ARGSUSED */ 4614fa9e4066Sahrens static int 4615fa9e4066Sahrens zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4616f7170741SWill Andrews page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4617f7170741SWill Andrews enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4618fa9e4066Sahrens { 4619fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4620fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4621ac05c741SMark Maybee page_t **pl0 = pl; 4622ac05c741SMark Maybee int err = 0; 4623ac05c741SMark Maybee 4624ac05c741SMark Maybee /* we do our own caching, faultahead is unnecessary */ 4625ac05c741SMark Maybee if (pl == NULL) 4626ac05c741SMark Maybee return (0); 4627ac05c741SMark Maybee else if (len > plsz) 4628ac05c741SMark Maybee len = plsz; 462927bd165aSMark Maybee else 463027bd165aSMark Maybee len = P2ROUNDUP(len, PAGESIZE); 4631ac05c741SMark Maybee ASSERT(plsz >= len); 4632fa9e4066Sahrens 46333cb34c60Sahrens ZFS_ENTER(zfsvfs); 46343cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4635fa9e4066Sahrens 4636fa9e4066Sahrens if (protp) 4637fa9e4066Sahrens *protp = PROT_ALL; 4638fa9e4066Sahrens 4639fa9e4066Sahrens /* 4640ed886187SMark Maybee * Loop through the requested range [off, off + len) looking 4641fa9e4066Sahrens * for pages. If we don't find a page, we will need to create 4642fa9e4066Sahrens * a new page and fill it with data from the file. 4643fa9e4066Sahrens */ 4644fa9e4066Sahrens while (len > 0) { 4645ac05c741SMark Maybee if (*pl = page_lookup(vp, off, SE_SHARED)) 4646ac05c741SMark Maybee *(pl+1) = NULL; 4647ac05c741SMark Maybee else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4648ac05c741SMark Maybee goto out; 4649ac05c741SMark Maybee while (*pl) { 4650ac05c741SMark Maybee ASSERT3U((*pl)->p_offset, ==, off); 4651fa9e4066Sahrens off += PAGESIZE; 4652fa9e4066Sahrens addr += PAGESIZE; 465327bd165aSMark Maybee if (len > 0) { 465427bd165aSMark Maybee ASSERT3U(len, >=, PAGESIZE); 4655ac05c741SMark Maybee len -= PAGESIZE; 465627bd165aSMark Maybee } 4657ac05c741SMark Maybee ASSERT3U(plsz, >=, PAGESIZE); 4658fa9e4066Sahrens plsz -= PAGESIZE; 4659ac05c741SMark Maybee pl++; 4660fa9e4066Sahrens } 4661fa9e4066Sahrens } 4662fa9e4066Sahrens 4663fa9e4066Sahrens /* 4664fa9e4066Sahrens * Fill out the page array with any pages already in the cache. 4665fa9e4066Sahrens */ 4666ac05c741SMark Maybee while (plsz > 0 && 4667ac05c741SMark Maybee (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4668ac05c741SMark Maybee off += PAGESIZE; 4669ac05c741SMark Maybee plsz -= PAGESIZE; 4670fa9e4066Sahrens } 4671fa9e4066Sahrens out: 4672fe2f476aSperrin if (err) { 4673fe2f476aSperrin /* 4674fe2f476aSperrin * Release any pages we have previously locked. 4675fe2f476aSperrin */ 4676fe2f476aSperrin while (pl > pl0) 4677fe2f476aSperrin page_unlock(*--pl); 4678ac05c741SMark Maybee } else { 4679ac05c741SMark Maybee ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4680fe2f476aSperrin } 4681fe2f476aSperrin 4682fa9e4066Sahrens *pl = NULL; 4683fa9e4066Sahrens 4684fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4685fa9e4066Sahrens return (err); 4686fa9e4066Sahrens } 4687fa9e4066Sahrens 4688ea8dc4b6Seschrock /* 4689ea8dc4b6Seschrock * Request a memory map for a section of a file. This code interacts 4690ea8dc4b6Seschrock * with common code and the VM system as follows: 4691ea8dc4b6Seschrock * 4692f7170741SWill Andrews * - common code calls mmap(), which ends up in smmap_common() 4693f7170741SWill Andrews * - this calls VOP_MAP(), which takes you into (say) zfs 4694f7170741SWill Andrews * - zfs_map() calls as_map(), passing segvn_create() as the callback 4695f7170741SWill Andrews * - segvn_create() creates the new segment and calls VOP_ADDMAP() 4696f7170741SWill Andrews * - zfs_addmap() updates z_mapcnt 4697ea8dc4b6Seschrock */ 4698da6c28aaSamw /*ARGSUSED*/ 4699fa9e4066Sahrens static int 4700fa9e4066Sahrens zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4701da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4702da6c28aaSamw caller_context_t *ct) 4703fa9e4066Sahrens { 4704fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4705fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4706fa9e4066Sahrens segvn_crargs_t vn_a; 4707fa9e4066Sahrens int error; 4708fa9e4066Sahrens 47090616c50eSmarks ZFS_ENTER(zfsvfs); 47100616c50eSmarks ZFS_VERIFY_ZP(zp); 47110616c50eSmarks 47122889ec41SGordon Ross /* 47132889ec41SGordon Ross * Note: ZFS_READONLY is handled in zfs_zaccess_common. 47142889ec41SGordon Ross */ 47152889ec41SGordon Ross 47160a586ceaSMark Shellenbaum if ((prot & PROT_WRITE) && (zp->z_pflags & 47172889ec41SGordon Ross (ZFS_IMMUTABLE | ZFS_APPENDONLY))) { 47180616c50eSmarks ZFS_EXIT(zfsvfs); 4719be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 47200616c50eSmarks } 4721da6c28aaSamw 47220616c50eSmarks if ((prot & (PROT_READ | PROT_EXEC)) && 47230a586ceaSMark Shellenbaum (zp->z_pflags & ZFS_AV_QUARANTINED)) { 47240616c50eSmarks ZFS_EXIT(zfsvfs); 4725be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 47260616c50eSmarks } 4727fa9e4066Sahrens 4728fa9e4066Sahrens if (vp->v_flag & VNOMAP) { 4729fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4730be6fd75aSMatthew Ahrens return (SET_ERROR(ENOSYS)); 4731fa9e4066Sahrens } 4732fa9e4066Sahrens 4733fa9e4066Sahrens if (off < 0 || len > MAXOFFSET_T - off) { 4734fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4735be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 4736fa9e4066Sahrens } 4737fa9e4066Sahrens 4738fa9e4066Sahrens if (vp->v_type != VREG) { 4739fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4740be6fd75aSMatthew Ahrens return (SET_ERROR(ENODEV)); 4741fa9e4066Sahrens } 4742fa9e4066Sahrens 4743fa9e4066Sahrens /* 4744fa9e4066Sahrens * If file is locked, disallow mapping. 4745fa9e4066Sahrens */ 47460a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 4747ea8dc4b6Seschrock ZFS_EXIT(zfsvfs); 4748be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 4749fa9e4066Sahrens } 4750fa9e4066Sahrens 4751fa9e4066Sahrens as_rangelock(as); 475260946fe0Smec error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 475360946fe0Smec if (error != 0) { 475460946fe0Smec as_rangeunlock(as); 475560946fe0Smec ZFS_EXIT(zfsvfs); 475660946fe0Smec return (error); 4757fa9e4066Sahrens } 4758fa9e4066Sahrens 4759fa9e4066Sahrens vn_a.vp = vp; 4760fa9e4066Sahrens vn_a.offset = (u_offset_t)off; 4761fa9e4066Sahrens vn_a.type = flags & MAP_TYPE; 4762fa9e4066Sahrens vn_a.prot = prot; 4763fa9e4066Sahrens vn_a.maxprot = maxprot; 4764fa9e4066Sahrens vn_a.cred = cr; 4765fa9e4066Sahrens vn_a.amp = NULL; 4766fa9e4066Sahrens vn_a.flags = flags & ~MAP_TYPE; 47674944b02eSkchow vn_a.szc = 0; 47684944b02eSkchow vn_a.lgrp_mem_policy_flags = 0; 4769fa9e4066Sahrens 4770fa9e4066Sahrens error = as_map(as, *addrp, len, segvn_create, &vn_a); 4771fa9e4066Sahrens 4772fa9e4066Sahrens as_rangeunlock(as); 4773fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4774fa9e4066Sahrens return (error); 4775fa9e4066Sahrens } 4776fa9e4066Sahrens 4777fa9e4066Sahrens /* ARGSUSED */ 4778fa9e4066Sahrens static int 4779fa9e4066Sahrens zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4780da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4781da6c28aaSamw caller_context_t *ct) 4782fa9e4066Sahrens { 4783ea8dc4b6Seschrock uint64_t pages = btopr(len); 4784ea8dc4b6Seschrock 4785ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 4786fa9e4066Sahrens return (0); 4787fa9e4066Sahrens } 4788fa9e4066Sahrens 4789b468a217Seschrock /* 4790b468a217Seschrock * The reason we push dirty pages as part of zfs_delmap() is so that we get a 4791b468a217Seschrock * more accurate mtime for the associated file. Since we don't have a way of 4792b468a217Seschrock * detecting when the data was actually modified, we have to resort to 4793b468a217Seschrock * heuristics. If an explicit msync() is done, then we mark the mtime when the 4794b468a217Seschrock * last page is pushed. The problem occurs when the msync() call is omitted, 4795b468a217Seschrock * which by far the most common case: 4796b468a217Seschrock * 47974bb73804SMatthew Ahrens * open() 47984bb73804SMatthew Ahrens * mmap() 47994bb73804SMatthew Ahrens * <modify memory> 48004bb73804SMatthew Ahrens * munmap() 48014bb73804SMatthew Ahrens * close() 48024bb73804SMatthew Ahrens * <time lapse> 48034bb73804SMatthew Ahrens * putpage() via fsflush 4804b468a217Seschrock * 4805b468a217Seschrock * If we wait until fsflush to come along, we can have a modification time that 4806b468a217Seschrock * is some arbitrary point in the future. In order to prevent this in the 4807b468a217Seschrock * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 4808b468a217Seschrock * torn down. 4809b468a217Seschrock */ 4810fa9e4066Sahrens /* ARGSUSED */ 4811fa9e4066Sahrens static int 4812fa9e4066Sahrens zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4813da6c28aaSamw size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 4814da6c28aaSamw caller_context_t *ct) 4815fa9e4066Sahrens { 4816ea8dc4b6Seschrock uint64_t pages = btopr(len); 4817ea8dc4b6Seschrock 4818ea8dc4b6Seschrock ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 4819ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 4820b468a217Seschrock 4821b468a217Seschrock if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 4822b468a217Seschrock vn_has_cached_data(vp)) 4823da6c28aaSamw (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 4824b468a217Seschrock 4825fa9e4066Sahrens return (0); 4826fa9e4066Sahrens } 4827fa9e4066Sahrens 4828fa9e4066Sahrens /* 4829fa9e4066Sahrens * Free or allocate space in a file. Currently, this function only 4830fa9e4066Sahrens * supports the `F_FREESP' command. However, this command is somewhat 4831fa9e4066Sahrens * misnamed, as its functionality includes the ability to allocate as 4832fa9e4066Sahrens * well as free space. 4833fa9e4066Sahrens * 4834fa9e4066Sahrens * IN: vp - vnode of file to free data in. 4835fa9e4066Sahrens * cmd - action to take (only F_FREESP supported). 4836fa9e4066Sahrens * bfp - section of file to free/alloc. 4837fa9e4066Sahrens * flag - current file open mode flags. 4838fa9e4066Sahrens * offset - current file offset. 4839fa9e4066Sahrens * cr - credentials of caller [UNUSED]. 4840da6c28aaSamw * ct - caller context. 4841fa9e4066Sahrens * 4842f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4843fa9e4066Sahrens * 4844fa9e4066Sahrens * Timestamps: 4845fa9e4066Sahrens * vp - ctime|mtime updated 4846fa9e4066Sahrens */ 4847fa9e4066Sahrens /* ARGSUSED */ 4848fa9e4066Sahrens static int 4849fa9e4066Sahrens zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 4850fa9e4066Sahrens offset_t offset, cred_t *cr, caller_context_t *ct) 4851fa9e4066Sahrens { 4852fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4853fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4854fa9e4066Sahrens uint64_t off, len; 4855fa9e4066Sahrens int error; 4856fa9e4066Sahrens 48573cb34c60Sahrens ZFS_ENTER(zfsvfs); 48583cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4859fa9e4066Sahrens 4860fa9e4066Sahrens if (cmd != F_FREESP) { 4861fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4862be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4863fa9e4066Sahrens } 4864fa9e4066Sahrens 48652144b121SMarcel Telka /* 48662144b121SMarcel Telka * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 48672144b121SMarcel Telka * callers might not be able to detect properly that we are read-only, 48682144b121SMarcel Telka * so check it explicitly here. 48692144b121SMarcel Telka */ 48702144b121SMarcel Telka if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 48712144b121SMarcel Telka ZFS_EXIT(zfsvfs); 48722144b121SMarcel Telka return (SET_ERROR(EROFS)); 48732144b121SMarcel Telka } 48742144b121SMarcel Telka 4875fa9e4066Sahrens if (error = convoff(vp, bfp, 0, offset)) { 4876fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4877fa9e4066Sahrens return (error); 4878fa9e4066Sahrens } 4879fa9e4066Sahrens 4880fa9e4066Sahrens if (bfp->l_len < 0) { 4881fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4882be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4883fa9e4066Sahrens } 4884fa9e4066Sahrens 4885fa9e4066Sahrens off = bfp->l_start; 4886104e2ed7Sperrin len = bfp->l_len; /* 0 means from off to end of file */ 4887104e2ed7Sperrin 4888cdb0ab79Smaybee error = zfs_freesp(zp, off, len, flag, TRUE); 4889fa9e4066Sahrens 489072102e74SBryan Cantrill if (error == 0 && off == 0 && len == 0) 489172102e74SBryan Cantrill vnevent_truncate(ZTOV(zp), ct); 489272102e74SBryan Cantrill 4893fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4894fa9e4066Sahrens return (error); 4895fa9e4066Sahrens } 4896fa9e4066Sahrens 4897da6c28aaSamw /*ARGSUSED*/ 4898fa9e4066Sahrens static int 4899da6c28aaSamw zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4900fa9e4066Sahrens { 4901fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4902fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4903f18faf3fSek uint32_t gen; 49040a586ceaSMark Shellenbaum uint64_t gen64; 4905fa9e4066Sahrens uint64_t object = zp->z_id; 4906fa9e4066Sahrens zfid_short_t *zfid; 49070a586ceaSMark Shellenbaum int size, i, error; 4908fa9e4066Sahrens 49093cb34c60Sahrens ZFS_ENTER(zfsvfs); 49103cb34c60Sahrens ZFS_VERIFY_ZP(zp); 49110a586ceaSMark Shellenbaum 49120a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4913f3e6fb2fSMark Shellenbaum &gen64, sizeof (uint64_t))) != 0) { 4914f3e6fb2fSMark Shellenbaum ZFS_EXIT(zfsvfs); 49150a586ceaSMark Shellenbaum return (error); 4916f3e6fb2fSMark Shellenbaum } 49170a586ceaSMark Shellenbaum 49180a586ceaSMark Shellenbaum gen = (uint32_t)gen64; 4919fa9e4066Sahrens 4920fa9e4066Sahrens size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4921fa9e4066Sahrens if (fidp->fid_len < size) { 4922fa9e4066Sahrens fidp->fid_len = size; 49230f2dc02eSek ZFS_EXIT(zfsvfs); 4924be6fd75aSMatthew Ahrens return (SET_ERROR(ENOSPC)); 4925fa9e4066Sahrens } 4926fa9e4066Sahrens 4927fa9e4066Sahrens zfid = (zfid_short_t *)fidp; 4928fa9e4066Sahrens 4929fa9e4066Sahrens zfid->zf_len = size; 4930fa9e4066Sahrens 4931fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 4932fa9e4066Sahrens zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4933fa9e4066Sahrens 4934fa9e4066Sahrens /* Must have a non-zero generation number to distinguish from .zfs */ 4935fa9e4066Sahrens if (gen == 0) 4936fa9e4066Sahrens gen = 1; 4937fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 4938fa9e4066Sahrens zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4939fa9e4066Sahrens 4940fa9e4066Sahrens if (size == LONG_FID_LEN) { 4941fa9e4066Sahrens uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4942fa9e4066Sahrens zfid_long_t *zlfid; 4943fa9e4066Sahrens 4944fa9e4066Sahrens zlfid = (zfid_long_t *)fidp; 4945fa9e4066Sahrens 4946fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4947fa9e4066Sahrens zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4948fa9e4066Sahrens 4949fa9e4066Sahrens /* XXX - this should be the generation number for the objset */ 4950fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4951fa9e4066Sahrens zlfid->zf_setgen[i] = 0; 4952fa9e4066Sahrens } 4953fa9e4066Sahrens 4954fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4955fa9e4066Sahrens return (0); 4956fa9e4066Sahrens } 4957fa9e4066Sahrens 4958fa9e4066Sahrens static int 4959da6c28aaSamw zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4960da6c28aaSamw caller_context_t *ct) 4961fa9e4066Sahrens { 4962fa9e4066Sahrens znode_t *zp, *xzp; 4963fa9e4066Sahrens zfsvfs_t *zfsvfs; 4964fa9e4066Sahrens zfs_dirlock_t *dl; 4965fa9e4066Sahrens int error; 4966fa9e4066Sahrens 4967fa9e4066Sahrens switch (cmd) { 4968fa9e4066Sahrens case _PC_LINK_MAX: 4969fa9e4066Sahrens *valp = ULONG_MAX; 4970fa9e4066Sahrens return (0); 4971fa9e4066Sahrens 4972fa9e4066Sahrens case _PC_FILESIZEBITS: 4973fa9e4066Sahrens *valp = 64; 4974fa9e4066Sahrens return (0); 4975fa9e4066Sahrens 4976fa9e4066Sahrens case _PC_XATTR_EXISTS: 4977fa9e4066Sahrens zp = VTOZ(vp); 4978fa9e4066Sahrens zfsvfs = zp->z_zfsvfs; 49793cb34c60Sahrens ZFS_ENTER(zfsvfs); 49803cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4981fa9e4066Sahrens *valp = 0; 4982fa9e4066Sahrens error = zfs_dirent_lock(&dl, zp, "", &xzp, 4983da6c28aaSamw ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 4984fa9e4066Sahrens if (error == 0) { 4985fa9e4066Sahrens zfs_dirent_unlock(dl); 4986fa9e4066Sahrens if (!zfs_dirempty(xzp)) 4987fa9e4066Sahrens *valp = 1; 4988fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 4989fa9e4066Sahrens } else if (error == ENOENT) { 4990fa9e4066Sahrens /* 4991fa9e4066Sahrens * If there aren't extended attributes, it's the 4992fa9e4066Sahrens * same as having zero of them. 4993fa9e4066Sahrens */ 4994fa9e4066Sahrens error = 0; 4995fa9e4066Sahrens } 4996fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4997fa9e4066Sahrens return (error); 4998fa9e4066Sahrens 4999da6c28aaSamw case _PC_SATTR_ENABLED: 5000da6c28aaSamw case _PC_SATTR_EXISTS: 50019660e5cbSJanice Chang *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 5002da6c28aaSamw (vp->v_type == VREG || vp->v_type == VDIR); 5003da6c28aaSamw return (0); 5004da6c28aaSamw 5005e802abbdSTim Haley case _PC_ACCESS_FILTERING: 5006e802abbdSTim Haley *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 5007e802abbdSTim Haley vp->v_type == VDIR; 5008e802abbdSTim Haley return (0); 5009e802abbdSTim Haley 5010fa9e4066Sahrens case _PC_ACL_ENABLED: 5011fa9e4066Sahrens *valp = _ACL_ACE_ENABLED; 5012fa9e4066Sahrens return (0); 5013fa9e4066Sahrens 5014fa9e4066Sahrens case _PC_MIN_HOLE_SIZE: 5015fa9e4066Sahrens *valp = (ulong_t)SPA_MINBLOCKSIZE; 5016fa9e4066Sahrens return (0); 5017fa9e4066Sahrens 50183b862e9aSRoger A. Faulkner case _PC_TIMESTAMP_RESOLUTION: 50193b862e9aSRoger A. Faulkner /* nanosecond timestamp resolution */ 50203b862e9aSRoger A. Faulkner *valp = 1L; 50213b862e9aSRoger A. Faulkner return (0); 50223b862e9aSRoger A. Faulkner 5023fa9e4066Sahrens default: 5024da6c28aaSamw return (fs_pathconf(vp, cmd, valp, cr, ct)); 5025fa9e4066Sahrens } 5026fa9e4066Sahrens } 5027fa9e4066Sahrens 5028fa9e4066Sahrens /*ARGSUSED*/ 5029fa9e4066Sahrens static int 5030da6c28aaSamw zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5031da6c28aaSamw caller_context_t *ct) 5032fa9e4066Sahrens { 5033fa9e4066Sahrens znode_t *zp = VTOZ(vp); 5034fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5035fa9e4066Sahrens int error; 5036da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5037fa9e4066Sahrens 50383cb34c60Sahrens ZFS_ENTER(zfsvfs); 50393cb34c60Sahrens ZFS_VERIFY_ZP(zp); 5040da6c28aaSamw error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5041fa9e4066Sahrens ZFS_EXIT(zfsvfs); 5042fa9e4066Sahrens 5043fa9e4066Sahrens return (error); 5044fa9e4066Sahrens } 5045fa9e4066Sahrens 5046fa9e4066Sahrens /*ARGSUSED*/ 5047fa9e4066Sahrens static int 5048da6c28aaSamw zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5049da6c28aaSamw caller_context_t *ct) 5050fa9e4066Sahrens { 5051fa9e4066Sahrens znode_t *zp = VTOZ(vp); 5052fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5053fa9e4066Sahrens int error; 5054da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 505555da60b9SMark J Musante zilog_t *zilog = zfsvfs->z_log; 5056fa9e4066Sahrens 50573cb34c60Sahrens ZFS_ENTER(zfsvfs); 50583cb34c60Sahrens ZFS_VERIFY_ZP(zp); 505955da60b9SMark J Musante 5060da6c28aaSamw error = zfs_setacl(zp, vsecp, skipaclchk, cr); 506155da60b9SMark J Musante 506255da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 50635002558fSNeil Perrin zil_commit(zilog, 0); 506455da60b9SMark J Musante 5065fa9e4066Sahrens ZFS_EXIT(zfsvfs); 5066fa9e4066Sahrens return (error); 5067fa9e4066Sahrens } 5068fa9e4066Sahrens 5069c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5070f7170741SWill Andrews * The smallest read we may consider to loan out an arcbuf. 5071f7170741SWill Andrews * This must be a power of 2. 5072c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5073c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_min = (1 << 10); /* 1K */ 5074f7170741SWill Andrews /* 5075f7170741SWill Andrews * If set to less than the file block size, allow loaning out of an 5076f7170741SWill Andrews * arcbuf for a partial block read. This must be a power of 2. 5077f7170741SWill Andrews */ 5078c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_max = (1 << 17); /* 128K */ 5079c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5080c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 5081c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 5082c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 5083c242f9a0Schunli zhang - Sun Microsystems - Irvine United States caller_context_t *ct) 5084c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 5085c242f9a0Schunli zhang - Sun Microsystems - Irvine United States znode_t *zp = VTOZ(vp); 5086c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5087c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int max_blksz = zfsvfs->z_max_blksz; 5088c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio_t *uio = &xuio->xu_uio; 5089c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t size = uio->uio_resid; 5090c242f9a0Schunli zhang - Sun Microsystems - Irvine United States offset_t offset = uio->uio_loffset; 5091c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz; 5092c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int fullblk, i; 5093c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 5094c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t maxsize; 5095c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int preamble, postamble; 5096c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5097c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5098be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5099c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5100c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_ENTER(zfsvfs); 5101c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_VERIFY_ZP(zp); 5102c242f9a0Schunli zhang - Sun Microsystems - Irvine United States switch (ioflag) { 5103c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_WRITE: 5104c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5105c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for write if write size is bigger than 5106c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * max_blksz, and the file's block size is also max_blksz. 5107c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5108c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = max_blksz; 5109c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || zp->z_blksz != blksz) { 5110c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5111be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5112c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5113c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5114c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Caller requests buffers for write before knowing where the 5115c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write offset might be (e.g. NFS TCP write). 5116c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5117c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (offset == -1) { 5118c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = 0; 5119c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 5120c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = P2PHASE(offset, blksz); 5121c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 5122c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = blksz - preamble; 5123c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= preamble; 5124c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5125c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5126c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5127c242f9a0Schunli zhang - Sun Microsystems - Irvine United States postamble = P2PHASE(size, blksz); 5128c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= postamble; 5129c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5130c242f9a0Schunli zhang - Sun Microsystems - Irvine United States fullblk = size / blksz; 5131570de38fSSurya Prakki (void) dmu_xuio_init(xuio, 5132c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 5133c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5134c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int, postamble, int, 5135c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 5136c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5137c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5138c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Have to fix iov base/len for partial buffers. They 5139c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * currently represent full arc_buf's. 5140c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5141c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 5142c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data begins in the middle of the arc_buf */ 51430a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 51440a586ceaSMark Shellenbaum blksz); 5145c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5146570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 5147570de38fSSurya Prakki blksz - preamble, preamble); 5148c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5149c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5150c242f9a0Schunli zhang - Sun Microsystems - Irvine United States for (i = 0; i < fullblk; i++) { 51510a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 51520a586ceaSMark Shellenbaum blksz); 5153c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5154570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5155c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5156c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5157c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (postamble) { 5158c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data ends in the middle of the arc_buf */ 51590a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 51600a586ceaSMark Shellenbaum blksz); 5161c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5162570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5163c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5164c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 5165c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_READ: 5166c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5167c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for read if the read size is larger than 5168c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * the current file block size. Block alignment is not 5169c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * considered. Partial arc_buf will be loaned out for read. 5170c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5171c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zp->z_blksz; 5172c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz < zcr_blksz_min) 5173c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_min; 5174c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > zcr_blksz_max) 5175c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_max; 5176c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* avoid potential complexity of dealing with it */ 5177c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > max_blksz) { 5178c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5179be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5180c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5181c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 51820a586ceaSMark Shellenbaum maxsize = zp->z_size - uio->uio_loffset; 5183c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size > maxsize) 5184c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size = maxsize; 5185c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5186c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || vn_has_cached_data(vp)) { 5187c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5188be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5189c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5190c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 5191c242f9a0Schunli zhang - Sun Microsystems - Irvine United States default: 5192c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5193be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5194c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5195c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5196c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio->uio_extflg = UIO_XUIO; 5197c242f9a0Schunli zhang - Sun Microsystems - Irvine United States XUIO_XUZC_RW(xuio) = ioflag; 5198c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5199c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 5200c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5201c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5202c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 5203c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 5204c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5205c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 5206c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i; 5207c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 5208c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int ioflag = XUIO_XUZC_RW(xuio); 5209c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5210c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5211c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5212c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i = dmu_xuio_cnt(xuio); 5213c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (i-- > 0) { 5214c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i); 5215c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5216c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * if abuf == NULL, it must be a write buffer 5217c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * that has been returned in zfs_write(). 5218c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5219c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (abuf) 5220c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 5221c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf || ioflag == UIO_WRITE); 5222c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5223c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5224c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_fini(xuio); 5225c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 5226c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5227c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5228fa9e4066Sahrens /* 5229fa9e4066Sahrens * Predeclare these here so that the compiler assumes that 5230fa9e4066Sahrens * this is an "old style" function declaration that does 5231fa9e4066Sahrens * not include arguments => we won't get type mismatch errors 5232fa9e4066Sahrens * in the initializations that follow. 5233fa9e4066Sahrens */ 5234fa9e4066Sahrens static int zfs_inval(); 5235fa9e4066Sahrens static int zfs_isdir(); 5236fa9e4066Sahrens 5237fa9e4066Sahrens static int 5238fa9e4066Sahrens zfs_inval() 5239fa9e4066Sahrens { 5240be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5241fa9e4066Sahrens } 5242fa9e4066Sahrens 5243fa9e4066Sahrens static int 5244fa9e4066Sahrens zfs_isdir() 5245fa9e4066Sahrens { 5246be6fd75aSMatthew Ahrens return (SET_ERROR(EISDIR)); 5247fa9e4066Sahrens } 5248fa9e4066Sahrens /* 5249fa9e4066Sahrens * Directory vnode operations template 5250fa9e4066Sahrens */ 5251fa9e4066Sahrens vnodeops_t *zfs_dvnodeops; 5252fa9e4066Sahrens const fs_operation_def_t zfs_dvnodeops_template[] = { 5253aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5254aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5255aa59c4cbSrsb VOPNAME_READ, { .error = zfs_isdir }, 5256aa59c4cbSrsb VOPNAME_WRITE, { .error = zfs_isdir }, 5257aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5258aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5259aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5260aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5261aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5262aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 5263aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5264aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 5265aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5266aa59c4cbSrsb VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5267aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5268aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5269aa59c4cbSrsb VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5270aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5271aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5272aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5273aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5274aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5275aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5276aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 52774bb73804SMatthew Ahrens VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5278aa59c4cbSrsb NULL, NULL 5279fa9e4066Sahrens }; 5280fa9e4066Sahrens 5281fa9e4066Sahrens /* 5282fa9e4066Sahrens * Regular file vnode operations template 5283fa9e4066Sahrens */ 5284fa9e4066Sahrens vnodeops_t *zfs_fvnodeops; 5285fa9e4066Sahrens const fs_operation_def_t zfs_fvnodeops_template[] = { 5286aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5287aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5288aa59c4cbSrsb VOPNAME_READ, { .vop_read = zfs_read }, 5289aa59c4cbSrsb VOPNAME_WRITE, { .vop_write = zfs_write }, 5290aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5291aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5292aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5293aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5294aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5295aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5296aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5297aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5298aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5299aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5300aa59c4cbSrsb VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5301aa59c4cbSrsb VOPNAME_SPACE, { .vop_space = zfs_space }, 5302aa59c4cbSrsb VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5303aa59c4cbSrsb VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5304aa59c4cbSrsb VOPNAME_MAP, { .vop_map = zfs_map }, 5305aa59c4cbSrsb VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5306aa59c4cbSrsb VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5307aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5308aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5309aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5310aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 53114bb73804SMatthew Ahrens VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 53124bb73804SMatthew Ahrens VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5313aa59c4cbSrsb NULL, NULL 5314fa9e4066Sahrens }; 5315fa9e4066Sahrens 5316fa9e4066Sahrens /* 5317fa9e4066Sahrens * Symbolic link vnode operations template 5318fa9e4066Sahrens */ 5319fa9e4066Sahrens vnodeops_t *zfs_symvnodeops; 5320fa9e4066Sahrens const fs_operation_def_t zfs_symvnodeops_template[] = { 5321aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5322aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5323aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5324aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5325aa59c4cbSrsb VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5326aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5327aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5328aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5329aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5330aa59c4cbSrsb NULL, NULL 5331fa9e4066Sahrens }; 5332fa9e4066Sahrens 5333743a77edSAlan Wright /* 5334743a77edSAlan Wright * special share hidden files vnode operations template 5335743a77edSAlan Wright */ 5336743a77edSAlan Wright vnodeops_t *zfs_sharevnodeops; 5337743a77edSAlan Wright const fs_operation_def_t zfs_sharevnodeops_template[] = { 5338743a77edSAlan Wright VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5339743a77edSAlan Wright VOPNAME_ACCESS, { .vop_access = zfs_access }, 5340743a77edSAlan Wright VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5341743a77edSAlan Wright VOPNAME_FID, { .vop_fid = zfs_fid }, 5342743a77edSAlan Wright VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5343743a77edSAlan Wright VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5344743a77edSAlan Wright VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5345743a77edSAlan Wright VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5346743a77edSAlan Wright NULL, NULL 5347743a77edSAlan Wright }; 5348743a77edSAlan Wright 5349fa9e4066Sahrens /* 5350fa9e4066Sahrens * Extended attribute directory vnode operations template 5351f7170741SWill Andrews * 5352f7170741SWill Andrews * This template is identical to the directory vnodes 5353f7170741SWill Andrews * operation template except for restricted operations: 5354f7170741SWill Andrews * VOP_MKDIR() 5355f7170741SWill Andrews * VOP_SYMLINK() 5356f7170741SWill Andrews * 5357fa9e4066Sahrens * Note that there are other restrictions embedded in: 5358fa9e4066Sahrens * zfs_create() - restrict type to VREG 5359fa9e4066Sahrens * zfs_link() - no links into/out of attribute space 5360fa9e4066Sahrens * zfs_rename() - no moves into/out of attribute space 5361fa9e4066Sahrens */ 5362fa9e4066Sahrens vnodeops_t *zfs_xdvnodeops; 5363fa9e4066Sahrens const fs_operation_def_t zfs_xdvnodeops_template[] = { 5364aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5365aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5366aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5367aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5368aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5369aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5370aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5371aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 5372aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5373aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 5374aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5375aa59c4cbSrsb VOPNAME_MKDIR, { .error = zfs_inval }, 5376aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5377aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5378aa59c4cbSrsb VOPNAME_SYMLINK, { .error = zfs_inval }, 5379aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5380aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5381aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5382aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5383aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5384aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5385aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5386aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5387aa59c4cbSrsb NULL, NULL 5388fa9e4066Sahrens }; 5389fa9e4066Sahrens 5390fa9e4066Sahrens /* 5391fa9e4066Sahrens * Error vnode operations template 5392fa9e4066Sahrens */ 5393fa9e4066Sahrens vnodeops_t *zfs_evnodeops; 5394fa9e4066Sahrens const fs_operation_def_t zfs_evnodeops_template[] = { 5395aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5396aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5397aa59c4cbSrsb NULL, NULL 5398fa9e4066Sahrens }; 5399