1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5736b9155Smarks * Common Development and Distribution License (the "License"). 6736b9155Smarks * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 211c17160aSKevin Crowe 22fa9e4066Sahrens /* 23d39ee142SMark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24ade42b55SSebastien Roy * Copyright (c) 2012, 2017 by Delphix. All rights reserved. 25c3d26abcSMatthew Ahrens * Copyright (c) 2014 Integros [integros.com] 2654207fd2SJerry Jelinek * Copyright 2015 Joyent, Inc. 271c17160aSKevin Crowe * Copyright 2017 Nexenta Systems, Inc. 28fa9e4066Sahrens */ 29fa9e4066Sahrens 3075c76197Speteh /* Portions Copyright 2007 Jeremy Teo */ 3155da60b9SMark J Musante /* Portions Copyright 2010 Robert Milkowski */ 3275c76197Speteh 33fa9e4066Sahrens #include <sys/types.h> 34fa9e4066Sahrens #include <sys/param.h> 35fa9e4066Sahrens #include <sys/time.h> 36fa9e4066Sahrens #include <sys/systm.h> 37fa9e4066Sahrens #include <sys/sysmacros.h> 38fa9e4066Sahrens #include <sys/resource.h> 39fa9e4066Sahrens #include <sys/vfs.h> 40aa59c4cbSrsb #include <sys/vfs_opreg.h> 41fa9e4066Sahrens #include <sys/vnode.h> 42fa9e4066Sahrens #include <sys/file.h> 43fa9e4066Sahrens #include <sys/stat.h> 44fa9e4066Sahrens #include <sys/kmem.h> 45fa9e4066Sahrens #include <sys/taskq.h> 46fa9e4066Sahrens #include <sys/uio.h> 47fa9e4066Sahrens #include <sys/vmsystm.h> 48fa9e4066Sahrens #include <sys/atomic.h> 4944eda4d7Smaybee #include <sys/vm.h> 50fa9e4066Sahrens #include <vm/seg_vn.h> 51fa9e4066Sahrens #include <vm/pvn.h> 52fa9e4066Sahrens #include <vm/as.h> 530fab61baSJonathan W Adams #include <vm/kpm.h> 540fab61baSJonathan W Adams #include <vm/seg_kpm.h> 55fa9e4066Sahrens #include <sys/mman.h> 56fa9e4066Sahrens #include <sys/pathname.h> 57fa9e4066Sahrens #include <sys/cmn_err.h> 58fa9e4066Sahrens #include <sys/errno.h> 59fa9e4066Sahrens #include <sys/unistd.h> 60fa9e4066Sahrens #include <sys/zfs_dir.h> 61fa9e4066Sahrens #include <sys/zfs_acl.h> 62fa9e4066Sahrens #include <sys/zfs_ioctl.h> 63fa9e4066Sahrens #include <sys/fs/zfs.h> 64fa9e4066Sahrens #include <sys/dmu.h> 6555da60b9SMark J Musante #include <sys/dmu_objset.h> 66fa9e4066Sahrens #include <sys/spa.h> 67fa9e4066Sahrens #include <sys/txg.h> 68fa9e4066Sahrens #include <sys/dbuf.h> 69fa9e4066Sahrens #include <sys/zap.h> 700a586ceaSMark Shellenbaum #include <sys/sa.h> 71fa9e4066Sahrens #include <sys/dirent.h> 72fa9e4066Sahrens #include <sys/policy.h> 73fa9e4066Sahrens #include <sys/sunddi.h> 74fa9e4066Sahrens #include <sys/filio.h> 75c1ce5987SMark Shellenbaum #include <sys/sid.h> 76fa9e4066Sahrens #include "fs/fs_subr.h" 77fa9e4066Sahrens #include <sys/zfs_ctldir.h> 78da6c28aaSamw #include <sys/zfs_fuid.h> 790a586ceaSMark Shellenbaum #include <sys/zfs_sa.h> 80033f9833Sek #include <sys/dnlc.h> 81104e2ed7Sperrin #include <sys/zfs_rlock.h> 82da6c28aaSamw #include <sys/extdirent.h> 83da6c28aaSamw #include <sys/kidmap.h> 8467dbe2beSCasper H.S. Dik #include <sys/cred.h> 85b38f0970Sck #include <sys/attr.h> 86*1271e4b1SPrakash Surya #include <sys/zil.h> 87fa9e4066Sahrens 88fa9e4066Sahrens /* 89fa9e4066Sahrens * Programming rules. 90fa9e4066Sahrens * 91fa9e4066Sahrens * Each vnode op performs some logical unit of work. To do this, the ZPL must 92fa9e4066Sahrens * properly lock its in-core state, create a DMU transaction, do the work, 93fa9e4066Sahrens * record this work in the intent log (ZIL), commit the DMU transaction, 94da6c28aaSamw * and wait for the intent log to commit if it is a synchronous operation. 95da6c28aaSamw * Moreover, the vnode ops must work in both normal and log replay context. 96fa9e4066Sahrens * The ordering of events is important to avoid deadlocks and references 97fa9e4066Sahrens * to freed memory. The example below illustrates the following Big Rules: 98fa9e4066Sahrens * 99f7170741SWill Andrews * (1) A check must be made in each zfs thread for a mounted file system. 1003cb34c60Sahrens * This is done avoiding races using ZFS_ENTER(zfsvfs). 101f7170741SWill Andrews * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 102f7170741SWill Andrews * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 103f7170741SWill Andrews * can return EIO from the calling function. 104fa9e4066Sahrens * 105fa9e4066Sahrens * (2) VN_RELE() should always be the last thing except for zil_commit() 106b19a79ecSperrin * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 107fa9e4066Sahrens * First, if it's the last reference, the vnode/znode 108fa9e4066Sahrens * can be freed, so the zp may point to freed memory. Second, the last 109fa9e4066Sahrens * reference will call zfs_zinactive(), which may induce a lot of work -- 110104e2ed7Sperrin * pushing cached pages (which acquires range locks) and syncing out 111fa9e4066Sahrens * cached atime changes. Third, zfs_zinactive() may require a new tx, 112fa9e4066Sahrens * which could deadlock the system if you were already holding one. 1139d3574bfSNeil Perrin * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 114fa9e4066Sahrens * 1157885c754Sperrin * (3) All range locks must be grabbed before calling dmu_tx_assign(), 1167885c754Sperrin * as they can span dmu_tx_assign() calls. 1177885c754Sperrin * 118e722410cSMatthew Ahrens * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 119e722410cSMatthew Ahrens * dmu_tx_assign(). This is critical because we don't want to block 120e722410cSMatthew Ahrens * while holding locks. 121e722410cSMatthew Ahrens * 122e722410cSMatthew Ahrens * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 123e722410cSMatthew Ahrens * reduces lock contention and CPU usage when we must wait (note that if 124e722410cSMatthew Ahrens * throughput is constrained by the storage, nearly every transaction 125e722410cSMatthew Ahrens * must wait). 126e722410cSMatthew Ahrens * 127e722410cSMatthew Ahrens * Note, in particular, that if a lock is sometimes acquired before 128e722410cSMatthew Ahrens * the tx assigns, and sometimes after (e.g. z_lock), then failing 129e722410cSMatthew Ahrens * to use a non-blocking assign can deadlock the system. The scenario: 130fa9e4066Sahrens * 131fa9e4066Sahrens * Thread A has grabbed a lock before calling dmu_tx_assign(). 132fa9e4066Sahrens * Thread B is in an already-assigned tx, and blocks for this lock. 133fa9e4066Sahrens * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 134fa9e4066Sahrens * forever, because the previous txg can't quiesce until B's tx commits. 135fa9e4066Sahrens * 136fa9e4066Sahrens * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 13769962b56SMatthew Ahrens * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 13869962b56SMatthew Ahrens * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 13969962b56SMatthew Ahrens * to indicate that this operation has already called dmu_tx_wait(). 14069962b56SMatthew Ahrens * This will ensure that we don't retry forever, waiting a short bit 14169962b56SMatthew Ahrens * each time. 142fa9e4066Sahrens * 1437885c754Sperrin * (5) If the operation succeeded, generate the intent log entry for it 144fa9e4066Sahrens * before dropping locks. This ensures that the ordering of events 145fa9e4066Sahrens * in the intent log matches the order in which they actually occurred. 146f7170741SWill Andrews * During ZIL replay the zfs_log_* functions will update the sequence 1471209a471SNeil Perrin * number to indicate the zil transaction has replayed. 148fa9e4066Sahrens * 1497885c754Sperrin * (6) At the end of each vnode op, the DMU tx must always commit, 150fa9e4066Sahrens * regardless of whether there were any errors. 151fa9e4066Sahrens * 1525002558fSNeil Perrin * (7) After dropping all locks, invoke zil_commit(zilog, foid) 153fa9e4066Sahrens * to ensure that synchronous semantics are provided when necessary. 154fa9e4066Sahrens * 155fa9e4066Sahrens * In general, this is how things should be ordered in each vnode op: 156fa9e4066Sahrens * 157fa9e4066Sahrens * ZFS_ENTER(zfsvfs); // exit if unmounted 158fa9e4066Sahrens * top: 159fa9e4066Sahrens * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 160fa9e4066Sahrens * rw_enter(...); // grab any other locks you need 161fa9e4066Sahrens * tx = dmu_tx_create(...); // get DMU tx 162fa9e4066Sahrens * dmu_tx_hold_*(); // hold each object you might modify 16369962b56SMatthew Ahrens * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 164fa9e4066Sahrens * if (error) { 165fa9e4066Sahrens * rw_exit(...); // drop locks 166fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 167fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1681209a471SNeil Perrin * if (error == ERESTART) { 16969962b56SMatthew Ahrens * waited = B_TRUE; 1708a2f1b91Sahrens * dmu_tx_wait(tx); 1718a2f1b91Sahrens * dmu_tx_abort(tx); 172fa9e4066Sahrens * goto top; 173fa9e4066Sahrens * } 1748a2f1b91Sahrens * dmu_tx_abort(tx); // abort DMU tx 175fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 176fa9e4066Sahrens * return (error); // really out of space 177fa9e4066Sahrens * } 178fa9e4066Sahrens * error = do_real_work(); // do whatever this VOP does 179fa9e4066Sahrens * if (error == 0) 180b19a79ecSperrin * zfs_log_*(...); // on success, make ZIL entry 181fa9e4066Sahrens * dmu_tx_commit(tx); // commit DMU tx -- error or not 182fa9e4066Sahrens * rw_exit(...); // drop locks 183fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 184fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1855002558fSNeil Perrin * zil_commit(zilog, foid); // synchronous when necessary 186fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 187fa9e4066Sahrens * return (error); // done, report error 188fa9e4066Sahrens */ 1893cb34c60Sahrens 190fa9e4066Sahrens /* ARGSUSED */ 191fa9e4066Sahrens static int 192da6c28aaSamw zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 193fa9e4066Sahrens { 19467bd71c6Sperrin znode_t *zp = VTOZ(*vpp); 195b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 196b614fdaaSMark Shellenbaum 197b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 198b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 19967bd71c6Sperrin 2000a586ceaSMark Shellenbaum if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 201da6c28aaSamw ((flag & FAPPEND) == 0)) { 202b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 203be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 204da6c28aaSamw } 205da6c28aaSamw 206da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 207da6c28aaSamw ZTOV(zp)->v_type == VREG && 2080a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 209b614fdaaSMark Shellenbaum if (fs_vscan(*vpp, cr, 0) != 0) { 210b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 211be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 212b614fdaaSMark Shellenbaum } 213b614fdaaSMark Shellenbaum } 214da6c28aaSamw 21567bd71c6Sperrin /* Keep a count of the synchronous opens in the znode */ 21667bd71c6Sperrin if (flag & (FSYNC | FDSYNC)) 21767bd71c6Sperrin atomic_inc_32(&zp->z_sync_cnt); 218da6c28aaSamw 219b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 220fa9e4066Sahrens return (0); 221fa9e4066Sahrens } 222fa9e4066Sahrens 223fa9e4066Sahrens /* ARGSUSED */ 224fa9e4066Sahrens static int 225da6c28aaSamw zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 226da6c28aaSamw caller_context_t *ct) 227fa9e4066Sahrens { 22867bd71c6Sperrin znode_t *zp = VTOZ(vp); 229b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 230b614fdaaSMark Shellenbaum 231ee8143cbSChris Kirby /* 232ee8143cbSChris Kirby * Clean up any locks held by this process on the vp. 233ee8143cbSChris Kirby */ 234ee8143cbSChris Kirby cleanlocks(vp, ddi_get_pid(), 0); 235ee8143cbSChris Kirby cleanshares(vp, ddi_get_pid()); 236ee8143cbSChris Kirby 237b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 238b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 23967bd71c6Sperrin 24067bd71c6Sperrin /* Decrement the synchronous opens in the znode */ 241ecb72030Sperrin if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 24267bd71c6Sperrin atomic_dec_32(&zp->z_sync_cnt); 24367bd71c6Sperrin 244da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 245da6c28aaSamw ZTOV(zp)->v_type == VREG && 2460a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 247da6c28aaSamw VERIFY(fs_vscan(vp, cr, 1) == 0); 248da6c28aaSamw 249b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 250fa9e4066Sahrens return (0); 251fa9e4066Sahrens } 252fa9e4066Sahrens 253fa9e4066Sahrens /* 254fa9e4066Sahrens * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 255fa9e4066Sahrens * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 256fa9e4066Sahrens */ 257fa9e4066Sahrens static int 258fa9e4066Sahrens zfs_holey(vnode_t *vp, int cmd, offset_t *off) 259fa9e4066Sahrens { 260fa9e4066Sahrens znode_t *zp = VTOZ(vp); 261fa9e4066Sahrens uint64_t noff = (uint64_t)*off; /* new offset */ 262fa9e4066Sahrens uint64_t file_sz; 263fa9e4066Sahrens int error; 264fa9e4066Sahrens boolean_t hole; 265fa9e4066Sahrens 2660a586ceaSMark Shellenbaum file_sz = zp->z_size; 267fa9e4066Sahrens if (noff >= file_sz) { 268be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 269fa9e4066Sahrens } 270fa9e4066Sahrens 271fa9e4066Sahrens if (cmd == _FIO_SEEK_HOLE) 272fa9e4066Sahrens hole = B_TRUE; 273fa9e4066Sahrens else 274fa9e4066Sahrens hole = B_FALSE; 275fa9e4066Sahrens 276fa9e4066Sahrens error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 277fa9e4066Sahrens 2780fbc0cd0SMatthew Ahrens if (error == ESRCH) 279be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 2800fbc0cd0SMatthew Ahrens 2810fbc0cd0SMatthew Ahrens /* 2820fbc0cd0SMatthew Ahrens * We could find a hole that begins after the logical end-of-file, 2830fbc0cd0SMatthew Ahrens * because dmu_offset_next() only works on whole blocks. If the 2840fbc0cd0SMatthew Ahrens * EOF falls mid-block, then indicate that the "virtual hole" 2850fbc0cd0SMatthew Ahrens * at the end of the file begins at the logical EOF, rather than 2860fbc0cd0SMatthew Ahrens * at the end of the last block. 2870fbc0cd0SMatthew Ahrens */ 2880fbc0cd0SMatthew Ahrens if (noff > file_sz) { 2890fbc0cd0SMatthew Ahrens ASSERT(hole); 2900fbc0cd0SMatthew Ahrens noff = file_sz; 291fa9e4066Sahrens } 292fa9e4066Sahrens 293fa9e4066Sahrens if (noff < *off) 294fa9e4066Sahrens return (error); 295fa9e4066Sahrens *off = noff; 296fa9e4066Sahrens return (error); 297fa9e4066Sahrens } 298fa9e4066Sahrens 299fa9e4066Sahrens /* ARGSUSED */ 300fa9e4066Sahrens static int 301fa9e4066Sahrens zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, 302da6c28aaSamw int *rvalp, caller_context_t *ct) 303fa9e4066Sahrens { 304fa9e4066Sahrens offset_t off; 3052bcf0248SMax Grossman offset_t ndata; 3062bcf0248SMax Grossman dmu_object_info_t doi; 307fa9e4066Sahrens int error; 308fa9e4066Sahrens zfsvfs_t *zfsvfs; 309f18faf3fSek znode_t *zp; 310fa9e4066Sahrens 311fa9e4066Sahrens switch (com) { 312ecb72030Sperrin case _FIOFFS: 3132bcf0248SMax Grossman { 314fa9e4066Sahrens return (zfs_sync(vp->v_vfsp, 0, cred)); 315fa9e4066Sahrens 316ea8dc4b6Seschrock /* 317ea8dc4b6Seschrock * The following two ioctls are used by bfu. Faking out, 318ea8dc4b6Seschrock * necessary to avoid bfu errors. 319ea8dc4b6Seschrock */ 3202bcf0248SMax Grossman } 321ecb72030Sperrin case _FIOGDIO: 322ecb72030Sperrin case _FIOSDIO: 3232bcf0248SMax Grossman { 324ea8dc4b6Seschrock return (0); 3252bcf0248SMax Grossman } 326ea8dc4b6Seschrock 327ecb72030Sperrin case _FIO_SEEK_DATA: 328ecb72030Sperrin case _FIO_SEEK_HOLE: 3292bcf0248SMax Grossman { 330fa9e4066Sahrens if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 331be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 332fa9e4066Sahrens 333f18faf3fSek zp = VTOZ(vp); 334f18faf3fSek zfsvfs = zp->z_zfsvfs; 3353cb34c60Sahrens ZFS_ENTER(zfsvfs); 3363cb34c60Sahrens ZFS_VERIFY_ZP(zp); 337fa9e4066Sahrens 338fa9e4066Sahrens /* offset parameter is in/out */ 339fa9e4066Sahrens error = zfs_holey(vp, com, &off); 340fa9e4066Sahrens ZFS_EXIT(zfsvfs); 341fa9e4066Sahrens if (error) 342fa9e4066Sahrens return (error); 343fa9e4066Sahrens if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 344be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 345fa9e4066Sahrens return (0); 346fa9e4066Sahrens } 3472bcf0248SMax Grossman case _FIO_COUNT_FILLED: 3482bcf0248SMax Grossman { 3492bcf0248SMax Grossman /* 3502bcf0248SMax Grossman * _FIO_COUNT_FILLED adds a new ioctl command which 3512bcf0248SMax Grossman * exposes the number of filled blocks in a 3522bcf0248SMax Grossman * ZFS object. 3532bcf0248SMax Grossman */ 3542bcf0248SMax Grossman zp = VTOZ(vp); 3552bcf0248SMax Grossman zfsvfs = zp->z_zfsvfs; 3562bcf0248SMax Grossman ZFS_ENTER(zfsvfs); 3572bcf0248SMax Grossman ZFS_VERIFY_ZP(zp); 3582bcf0248SMax Grossman 3592bcf0248SMax Grossman /* 3602bcf0248SMax Grossman * Wait for all dirty blocks for this object 3612bcf0248SMax Grossman * to get synced out to disk, and the DMU info 3622bcf0248SMax Grossman * updated. 3632bcf0248SMax Grossman */ 3642bcf0248SMax Grossman error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 3652bcf0248SMax Grossman if (error) { 3662bcf0248SMax Grossman ZFS_EXIT(zfsvfs); 3672bcf0248SMax Grossman return (error); 3682bcf0248SMax Grossman } 3692bcf0248SMax Grossman 3702bcf0248SMax Grossman /* 3712bcf0248SMax Grossman * Retrieve fill count from DMU object. 3722bcf0248SMax Grossman */ 3732bcf0248SMax Grossman error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 3742bcf0248SMax Grossman if (error) { 3752bcf0248SMax Grossman ZFS_EXIT(zfsvfs); 3762bcf0248SMax Grossman return (error); 3772bcf0248SMax Grossman } 3782bcf0248SMax Grossman 3792bcf0248SMax Grossman ndata = doi.doi_fill_count; 3802bcf0248SMax Grossman 3812bcf0248SMax Grossman ZFS_EXIT(zfsvfs); 3822bcf0248SMax Grossman if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 3832bcf0248SMax Grossman return (SET_ERROR(EFAULT)); 3842bcf0248SMax Grossman return (0); 3852bcf0248SMax Grossman } 3862bcf0248SMax Grossman } 387be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTTY)); 388fa9e4066Sahrens } 389fa9e4066Sahrens 3900fab61baSJonathan W Adams /* 3910fab61baSJonathan W Adams * Utility functions to map and unmap a single physical page. These 3920fab61baSJonathan W Adams * are used to manage the mappable copies of ZFS file data, and therefore 3930fab61baSJonathan W Adams * do not update ref/mod bits. 3940fab61baSJonathan W Adams */ 3950fab61baSJonathan W Adams caddr_t 3960fab61baSJonathan W Adams zfs_map_page(page_t *pp, enum seg_rw rw) 3970fab61baSJonathan W Adams { 3980fab61baSJonathan W Adams if (kpm_enable) 3990fab61baSJonathan W Adams return (hat_kpm_mapin(pp, 0)); 4000fab61baSJonathan W Adams ASSERT(rw == S_READ || rw == S_WRITE); 4010fab61baSJonathan W Adams return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), 4020fab61baSJonathan W Adams (caddr_t)-1)); 4030fab61baSJonathan W Adams } 4040fab61baSJonathan W Adams 4050fab61baSJonathan W Adams void 4060fab61baSJonathan W Adams zfs_unmap_page(page_t *pp, caddr_t addr) 4070fab61baSJonathan W Adams { 4080fab61baSJonathan W Adams if (kpm_enable) { 4090fab61baSJonathan W Adams hat_kpm_mapout(pp, 0, addr); 4100fab61baSJonathan W Adams } else { 4110fab61baSJonathan W Adams ppmapout(addr); 4120fab61baSJonathan W Adams } 4130fab61baSJonathan W Adams } 4140fab61baSJonathan W Adams 415fa9e4066Sahrens /* 416fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 417fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 418fa9e4066Sahrens * 419fa9e4066Sahrens * On Write: If we find a memory mapped page, we write to *both* 420fa9e4066Sahrens * the page and the dmu buffer. 421fa9e4066Sahrens */ 422ac05c741SMark Maybee static void 423ac05c741SMark Maybee update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) 424fa9e4066Sahrens { 425ac05c741SMark Maybee int64_t off; 426fa9e4066Sahrens 427fa9e4066Sahrens off = start & PAGEOFFSET; 428fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 429fa9e4066Sahrens page_t *pp; 430ac05c741SMark Maybee uint64_t nbytes = MIN(PAGESIZE - off, len); 431fa9e4066Sahrens 432fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 433fa9e4066Sahrens caddr_t va; 434fa9e4066Sahrens 4350fab61baSJonathan W Adams va = zfs_map_page(pp, S_WRITE); 4367bfdf011SNeil Perrin (void) dmu_read(os, oid, start+off, nbytes, va+off, 4377bfdf011SNeil Perrin DMU_READ_PREFETCH); 4380fab61baSJonathan W Adams zfs_unmap_page(pp, va); 439fa9e4066Sahrens page_unlock(pp); 440fa9e4066Sahrens } 441ac05c741SMark Maybee len -= nbytes; 442fa9e4066Sahrens off = 0; 443fa9e4066Sahrens } 444fa9e4066Sahrens } 445fa9e4066Sahrens 446fa9e4066Sahrens /* 447fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 448fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 449fa9e4066Sahrens * 450fa9e4066Sahrens * On Read: We "read" preferentially from memory mapped pages, 451fa9e4066Sahrens * else we default from the dmu buffer. 452fa9e4066Sahrens * 453fa9e4066Sahrens * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 454f7170741SWill Andrews * the file is memory mapped. 455fa9e4066Sahrens */ 456fa9e4066Sahrens static int 457feb08c6bSbillm mappedread(vnode_t *vp, int nbytes, uio_t *uio) 458fa9e4066Sahrens { 459feb08c6bSbillm znode_t *zp = VTOZ(vp); 460feb08c6bSbillm int64_t start, off; 461fa9e4066Sahrens int len = nbytes; 462fa9e4066Sahrens int error = 0; 463fa9e4066Sahrens 464fa9e4066Sahrens start = uio->uio_loffset; 465fa9e4066Sahrens off = start & PAGEOFFSET; 466fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 467fa9e4066Sahrens page_t *pp; 468feb08c6bSbillm uint64_t bytes = MIN(PAGESIZE - off, len); 469fa9e4066Sahrens 470fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 471fa9e4066Sahrens caddr_t va; 472fa9e4066Sahrens 4730fab61baSJonathan W Adams va = zfs_map_page(pp, S_READ); 474fa9e4066Sahrens error = uiomove(va + off, bytes, UIO_READ, uio); 4750fab61baSJonathan W Adams zfs_unmap_page(pp, va); 476fa9e4066Sahrens page_unlock(pp); 477fa9e4066Sahrens } else { 478f8554bb9SMatthew Ahrens error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 479f8554bb9SMatthew Ahrens uio, bytes); 480fa9e4066Sahrens } 481fa9e4066Sahrens len -= bytes; 482fa9e4066Sahrens off = 0; 483fa9e4066Sahrens if (error) 484fa9e4066Sahrens break; 485fa9e4066Sahrens } 486fa9e4066Sahrens return (error); 487fa9e4066Sahrens } 488fa9e4066Sahrens 489feb08c6bSbillm offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 490fa9e4066Sahrens 491fa9e4066Sahrens /* 492fa9e4066Sahrens * Read bytes from specified file into supplied buffer. 493fa9e4066Sahrens * 494fa9e4066Sahrens * IN: vp - vnode of file to be read from. 495fa9e4066Sahrens * uio - structure supplying read location, range info, 496fa9e4066Sahrens * and return buffer. 497fa9e4066Sahrens * ioflag - SYNC flags; used to provide FRSYNC semantics. 498fa9e4066Sahrens * cr - credentials of caller. 499da6c28aaSamw * ct - caller context 500fa9e4066Sahrens * 501fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 502fa9e4066Sahrens * 503f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 504fa9e4066Sahrens * 505fa9e4066Sahrens * Side Effects: 506fa9e4066Sahrens * vp - atime updated if byte count > 0 507fa9e4066Sahrens */ 508fa9e4066Sahrens /* ARGSUSED */ 509fa9e4066Sahrens static int 510fa9e4066Sahrens zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 511fa9e4066Sahrens { 512fa9e4066Sahrens znode_t *zp = VTOZ(vp); 513fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 514feb08c6bSbillm ssize_t n, nbytes; 515d5285caeSGeorge Wilson int error = 0; 516104e2ed7Sperrin rl_t *rl; 517c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 518fa9e4066Sahrens 5193cb34c60Sahrens ZFS_ENTER(zfsvfs); 5203cb34c60Sahrens ZFS_VERIFY_ZP(zp); 521fa9e4066Sahrens 5220a586ceaSMark Shellenbaum if (zp->z_pflags & ZFS_AV_QUARANTINED) { 5230616c50eSmarks ZFS_EXIT(zfsvfs); 524be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 5250616c50eSmarks } 5260616c50eSmarks 527fa9e4066Sahrens /* 528fa9e4066Sahrens * Validate file offset 529fa9e4066Sahrens */ 530fa9e4066Sahrens if (uio->uio_loffset < (offset_t)0) { 531fa9e4066Sahrens ZFS_EXIT(zfsvfs); 532be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 533fa9e4066Sahrens } 534fa9e4066Sahrens 535fa9e4066Sahrens /* 536fa9e4066Sahrens * Fasttrack empty reads 537fa9e4066Sahrens */ 538fa9e4066Sahrens if (uio->uio_resid == 0) { 539fa9e4066Sahrens ZFS_EXIT(zfsvfs); 540fa9e4066Sahrens return (0); 541fa9e4066Sahrens } 542fa9e4066Sahrens 543fa9e4066Sahrens /* 544104e2ed7Sperrin * Check for mandatory locks 545fa9e4066Sahrens */ 5460a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode)) { 547fa9e4066Sahrens if (error = chklock(vp, FREAD, 548fa9e4066Sahrens uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 549fa9e4066Sahrens ZFS_EXIT(zfsvfs); 550fa9e4066Sahrens return (error); 551fa9e4066Sahrens } 552fa9e4066Sahrens } 553fa9e4066Sahrens 554fa9e4066Sahrens /* 555fa9e4066Sahrens * If we're in FRSYNC mode, sync out this znode before reading it. 556fa9e4066Sahrens */ 55755da60b9SMark J Musante if (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5585002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 559fa9e4066Sahrens 560fa9e4066Sahrens /* 561104e2ed7Sperrin * Lock the range against changes. 562fa9e4066Sahrens */ 563104e2ed7Sperrin rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 564104e2ed7Sperrin 565fa9e4066Sahrens /* 566fa9e4066Sahrens * If we are reading past end-of-file we can skip 567fa9e4066Sahrens * to the end; but we might still need to set atime. 568fa9e4066Sahrens */ 5690a586ceaSMark Shellenbaum if (uio->uio_loffset >= zp->z_size) { 570fa9e4066Sahrens error = 0; 571fa9e4066Sahrens goto out; 572fa9e4066Sahrens } 573fa9e4066Sahrens 5740a586ceaSMark Shellenbaum ASSERT(uio->uio_loffset < zp->z_size); 5750a586ceaSMark Shellenbaum n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 576feb08c6bSbillm 577c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 578c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 579c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int nblk; 580c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz = zp->z_blksz; 581c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uint64_t offset = uio->uio_loffset; 582c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 583c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 584c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((ISP2(blksz))) { 585c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 586c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz)) / blksz; 587c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 588c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(offset + n <= blksz); 589c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = 1; 590c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 591570de38fSSurya Prakki (void) dmu_xuio_init(xuio, nblk); 592c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 593c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (vn_has_cached_data(vp)) { 594c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 595c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * For simplicity, we always allocate a full buffer 596c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * even if we only expect to read a portion of a block. 597c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 598c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (--nblk >= 0) { 599570de38fSSurya Prakki (void) dmu_xuio_add(xuio, 6000a586ceaSMark Shellenbaum dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 6010a586ceaSMark Shellenbaum blksz), 0, blksz); 602c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 603c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 604c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 605c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 606feb08c6bSbillm while (n > 0) { 607feb08c6bSbillm nbytes = MIN(n, zfs_read_chunk_size - 608feb08c6bSbillm P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 609fa9e4066Sahrens 610f8554bb9SMatthew Ahrens if (vn_has_cached_data(vp)) { 611feb08c6bSbillm error = mappedread(vp, nbytes, uio); 612f8554bb9SMatthew Ahrens } else { 613f8554bb9SMatthew Ahrens error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 614f8554bb9SMatthew Ahrens uio, nbytes); 615f8554bb9SMatthew Ahrens } 616b87f3af3Sperrin if (error) { 617b87f3af3Sperrin /* convert checksum errors into IO errors */ 618b87f3af3Sperrin if (error == ECKSUM) 619be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 620feb08c6bSbillm break; 621b87f3af3Sperrin } 622fa9e4066Sahrens 623feb08c6bSbillm n -= nbytes; 624fa9e4066Sahrens } 625fa9e4066Sahrens out: 626c5c6ffa0Smaybee zfs_range_unlock(rl); 627fa9e4066Sahrens 628fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 629fa9e4066Sahrens ZFS_EXIT(zfsvfs); 630fa9e4066Sahrens return (error); 631fa9e4066Sahrens } 632fa9e4066Sahrens 633fa9e4066Sahrens /* 634fa9e4066Sahrens * Write the bytes to a file. 635fa9e4066Sahrens * 636fa9e4066Sahrens * IN: vp - vnode of file to be written to. 637fa9e4066Sahrens * uio - structure supplying write location, range info, 638fa9e4066Sahrens * and data buffer. 639f7170741SWill Andrews * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 640f7170741SWill Andrews * set if in append mode. 641fa9e4066Sahrens * cr - credentials of caller. 642da6c28aaSamw * ct - caller context (NFS/CIFS fem monitor only) 643fa9e4066Sahrens * 644fa9e4066Sahrens * OUT: uio - updated offset and range. 645fa9e4066Sahrens * 646f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 647fa9e4066Sahrens * 648fa9e4066Sahrens * Timestamps: 649fa9e4066Sahrens * vp - ctime|mtime updated if byte count > 0 650fa9e4066Sahrens */ 6510a586ceaSMark Shellenbaum 652fa9e4066Sahrens /* ARGSUSED */ 653fa9e4066Sahrens static int 654fa9e4066Sahrens zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 655fa9e4066Sahrens { 656fa9e4066Sahrens znode_t *zp = VTOZ(vp); 657fa9e4066Sahrens rlim64_t limit = uio->uio_llimit; 658fa9e4066Sahrens ssize_t start_resid = uio->uio_resid; 659fa9e4066Sahrens ssize_t tx_bytes; 660fa9e4066Sahrens uint64_t end_size; 661fa9e4066Sahrens dmu_tx_t *tx; 662fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 663f18faf3fSek zilog_t *zilog; 664fa9e4066Sahrens offset_t woff; 665fa9e4066Sahrens ssize_t n, nbytes; 666104e2ed7Sperrin rl_t *rl; 667fa9e4066Sahrens int max_blksz = zfsvfs->z_max_blksz; 668d5285caeSGeorge Wilson int error = 0; 6692fdbea25SAleksandr Guzovskiy arc_buf_t *abuf; 670d5285caeSGeorge Wilson iovec_t *aiov = NULL; 671c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 672c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i_iov = 0; 673c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int iovcnt = uio->uio_iovcnt; 674c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *iovp = uio->uio_iov; 675c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int write_eof; 6760a586ceaSMark Shellenbaum int count = 0; 6770a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[4]; 6780a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 679fa9e4066Sahrens 680fa9e4066Sahrens /* 681fa9e4066Sahrens * Fasttrack empty write 682fa9e4066Sahrens */ 683104e2ed7Sperrin n = start_resid; 684fa9e4066Sahrens if (n == 0) 685fa9e4066Sahrens return (0); 686fa9e4066Sahrens 687104e2ed7Sperrin if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 688104e2ed7Sperrin limit = MAXOFFSET_T; 689104e2ed7Sperrin 6903cb34c60Sahrens ZFS_ENTER(zfsvfs); 6913cb34c60Sahrens ZFS_VERIFY_ZP(zp); 692c09193bfSmarks 6930a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 6940a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 6950a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 6960a586ceaSMark Shellenbaum &zp->z_size, 8); 6970a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 6980a586ceaSMark Shellenbaum &zp->z_pflags, 8); 6990a586ceaSMark Shellenbaum 7002144b121SMarcel Telka /* 7012144b121SMarcel Telka * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 7022144b121SMarcel Telka * callers might not be able to detect properly that we are read-only, 7032144b121SMarcel Telka * so check it explicitly here. 7042144b121SMarcel Telka */ 7052144b121SMarcel Telka if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 7062144b121SMarcel Telka ZFS_EXIT(zfsvfs); 7072144b121SMarcel Telka return (SET_ERROR(EROFS)); 7082144b121SMarcel Telka } 7092144b121SMarcel Telka 710c09193bfSmarks /* 7112889ec41SGordon Ross * If immutable or not appending then return EPERM. 7122889ec41SGordon Ross * Intentionally allow ZFS_READONLY through here. 7132889ec41SGordon Ross * See zfs_zaccess_common() 714c09193bfSmarks */ 7152889ec41SGordon Ross if ((zp->z_pflags & ZFS_IMMUTABLE) || 7160a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 7170a586ceaSMark Shellenbaum (uio->uio_loffset < zp->z_size))) { 718c09193bfSmarks ZFS_EXIT(zfsvfs); 719be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 720c09193bfSmarks } 721c09193bfSmarks 722f18faf3fSek zilog = zfsvfs->z_log; 723fa9e4066Sahrens 72441865f27SWilliam Gorrell /* 72541865f27SWilliam Gorrell * Validate file offset 72641865f27SWilliam Gorrell */ 7270a586ceaSMark Shellenbaum woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 72841865f27SWilliam Gorrell if (woff < 0) { 72941865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 730be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 73141865f27SWilliam Gorrell } 73241865f27SWilliam Gorrell 73341865f27SWilliam Gorrell /* 73441865f27SWilliam Gorrell * Check for mandatory locks before calling zfs_range_lock() 73541865f27SWilliam Gorrell * in order to prevent a deadlock with locks set via fcntl(). 73641865f27SWilliam Gorrell */ 7370a586ceaSMark Shellenbaum if (MANDMODE((mode_t)zp->z_mode) && 73841865f27SWilliam Gorrell (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 73941865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 74041865f27SWilliam Gorrell return (error); 74141865f27SWilliam Gorrell } 74241865f27SWilliam Gorrell 743fa9e4066Sahrens /* 744c5c6ffa0Smaybee * Pre-fault the pages to ensure slow (eg NFS) pages 745104e2ed7Sperrin * don't hold up txg. 746c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Skip this if uio contains loaned arc_buf. 747fa9e4066Sahrens */ 748c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 749c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 750c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 751c242f9a0Schunli zhang - Sun Microsystems - Irvine United States else 752ff866947SSanjeev Bagewadi uio_prefaultpages(MIN(n, max_blksz), uio); 753fa9e4066Sahrens 754fa9e4066Sahrens /* 755fa9e4066Sahrens * If in append mode, set the io offset pointer to eof. 756fa9e4066Sahrens */ 757104e2ed7Sperrin if (ioflag & FAPPEND) { 758104e2ed7Sperrin /* 75941865f27SWilliam Gorrell * Obtain an appending range lock to guarantee file append 76041865f27SWilliam Gorrell * semantics. We reset the write offset once we have the lock. 761104e2ed7Sperrin */ 762104e2ed7Sperrin rl = zfs_range_lock(zp, 0, n, RL_APPEND); 76341865f27SWilliam Gorrell woff = rl->r_off; 764104e2ed7Sperrin if (rl->r_len == UINT64_MAX) { 76541865f27SWilliam Gorrell /* 76641865f27SWilliam Gorrell * We overlocked the file because this write will cause 76741865f27SWilliam Gorrell * the file block size to increase. 76841865f27SWilliam Gorrell * Note that zp_size cannot change with this lock held. 76941865f27SWilliam Gorrell */ 7700a586ceaSMark Shellenbaum woff = zp->z_size; 771104e2ed7Sperrin } 77241865f27SWilliam Gorrell uio->uio_loffset = woff; 773fa9e4066Sahrens } else { 774fa9e4066Sahrens /* 77541865f27SWilliam Gorrell * Note that if the file block size will change as a result of 77641865f27SWilliam Gorrell * this write, then this range lock will lock the entire file 77741865f27SWilliam Gorrell * so that we can re-write the block safely. 778fa9e4066Sahrens */ 779104e2ed7Sperrin rl = zfs_range_lock(zp, woff, n, RL_WRITER); 780fa9e4066Sahrens } 781fa9e4066Sahrens 782fa9e4066Sahrens if (woff >= limit) { 783feb08c6bSbillm zfs_range_unlock(rl); 784feb08c6bSbillm ZFS_EXIT(zfsvfs); 785be6fd75aSMatthew Ahrens return (SET_ERROR(EFBIG)); 786fa9e4066Sahrens } 787fa9e4066Sahrens 788fa9e4066Sahrens if ((woff + n) > limit || woff > (limit - n)) 789fa9e4066Sahrens n = limit - woff; 790fa9e4066Sahrens 791c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* Will this write extend the file length? */ 7920a586ceaSMark Shellenbaum write_eof = (woff + n > zp->z_size); 793c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 7940a586ceaSMark Shellenbaum end_size = MAX(zp->z_size, woff + n); 795fa9e4066Sahrens 796104e2ed7Sperrin /* 797feb08c6bSbillm * Write the file in reasonable size chunks. Each chunk is written 798feb08c6bSbillm * in a separate transaction; this keeps the intent log records small 799feb08c6bSbillm * and allows us to do more fine-grained space accounting. 800104e2ed7Sperrin */ 801feb08c6bSbillm while (n > 0) { 8022fdbea25SAleksandr Guzovskiy abuf = NULL; 8032fdbea25SAleksandr Guzovskiy woff = uio->uio_loffset; 8040a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 8050a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 8062fdbea25SAleksandr Guzovskiy if (abuf != NULL) 8072fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 808be6fd75aSMatthew Ahrens error = SET_ERROR(EDQUOT); 80914843421SMatthew Ahrens break; 81014843421SMatthew Ahrens } 8112fdbea25SAleksandr Guzovskiy 812c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio && abuf == NULL) { 813c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(i_iov < iovcnt); 814c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov = &iovp[i_iov]; 815c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i_iov); 816c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_clear(xuio, i_iov); 817c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_cp_write, int, i_iov, 818c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *, aiov, arc_buf_t *, abuf); 819c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT((aiov->iov_base == abuf->b_data) || 820c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ((char *)aiov->iov_base - (char *)abuf->b_data + 821c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len == arc_buf_size(abuf))); 822c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i_iov++; 823c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else if (abuf == NULL && n >= max_blksz && 8240a586ceaSMark Shellenbaum woff >= zp->z_size && 8252fdbea25SAleksandr Guzovskiy P2PHASE(woff, max_blksz) == 0 && 8262fdbea25SAleksandr Guzovskiy zp->z_blksz == max_blksz) { 827c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 828c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * This write covers a full block. "Borrow" a buffer 829c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * from the dmu so that we can fill it before we enter 830c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * a transaction. This avoids the possibility of 831c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * holding up the transaction if the data copy hangs 832c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * up on a pagefault (e.g., from an NFS server mapping). 833c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 8342fdbea25SAleksandr Guzovskiy size_t cbytes; 8352fdbea25SAleksandr Guzovskiy 8360a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 8370a586ceaSMark Shellenbaum max_blksz); 8382fdbea25SAleksandr Guzovskiy ASSERT(abuf != NULL); 8392fdbea25SAleksandr Guzovskiy ASSERT(arc_buf_size(abuf) == max_blksz); 8402fdbea25SAleksandr Guzovskiy if (error = uiocopy(abuf->b_data, max_blksz, 8412fdbea25SAleksandr Guzovskiy UIO_WRITE, uio, &cbytes)) { 8422fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 8432fdbea25SAleksandr Guzovskiy break; 8442fdbea25SAleksandr Guzovskiy } 8452fdbea25SAleksandr Guzovskiy ASSERT(cbytes == max_blksz); 8462fdbea25SAleksandr Guzovskiy } 8472fdbea25SAleksandr Guzovskiy 8482fdbea25SAleksandr Guzovskiy /* 8492fdbea25SAleksandr Guzovskiy * Start a transaction. 8502fdbea25SAleksandr Guzovskiy */ 851feb08c6bSbillm tx = dmu_tx_create(zfsvfs->z_os); 8520a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 853feb08c6bSbillm dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 8540a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 855e722410cSMatthew Ahrens error = dmu_tx_assign(tx, TXG_WAIT); 856feb08c6bSbillm if (error) { 857feb08c6bSbillm dmu_tx_abort(tx); 8582fdbea25SAleksandr Guzovskiy if (abuf != NULL) 8592fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 860feb08c6bSbillm break; 861feb08c6bSbillm } 862104e2ed7Sperrin 863feb08c6bSbillm /* 864feb08c6bSbillm * If zfs_range_lock() over-locked we grow the blocksize 865feb08c6bSbillm * and then reduce the lock range. This will only happen 866feb08c6bSbillm * on the first iteration since zfs_range_reduce() will 867feb08c6bSbillm * shrink down r_len to the appropriate size. 868feb08c6bSbillm */ 869feb08c6bSbillm if (rl->r_len == UINT64_MAX) { 870feb08c6bSbillm uint64_t new_blksz; 871feb08c6bSbillm 872feb08c6bSbillm if (zp->z_blksz > max_blksz) { 873b5152584SMatthew Ahrens /* 874b5152584SMatthew Ahrens * File's blocksize is already larger than the 875b5152584SMatthew Ahrens * "recordsize" property. Only let it grow to 876b5152584SMatthew Ahrens * the next power of 2. 877b5152584SMatthew Ahrens */ 878feb08c6bSbillm ASSERT(!ISP2(zp->z_blksz)); 879b5152584SMatthew Ahrens new_blksz = MIN(end_size, 880b5152584SMatthew Ahrens 1 << highbit64(zp->z_blksz)); 881feb08c6bSbillm } else { 882feb08c6bSbillm new_blksz = MIN(end_size, max_blksz); 883feb08c6bSbillm } 884feb08c6bSbillm zfs_grow_blocksize(zp, new_blksz, tx); 885feb08c6bSbillm zfs_range_reduce(rl, woff, n); 886fa9e4066Sahrens } 887fa9e4066Sahrens 888fa9e4066Sahrens /* 889fa9e4066Sahrens * XXX - should we really limit each write to z_max_blksz? 890fa9e4066Sahrens * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 891fa9e4066Sahrens */ 892fa9e4066Sahrens nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 893fa9e4066Sahrens 8942fdbea25SAleksandr Guzovskiy if (abuf == NULL) { 8952fdbea25SAleksandr Guzovskiy tx_bytes = uio->uio_resid; 89694d1a210STim Haley error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 89794d1a210STim Haley uio, nbytes, tx); 8982fdbea25SAleksandr Guzovskiy tx_bytes -= uio->uio_resid; 8992fdbea25SAleksandr Guzovskiy } else { 9002fdbea25SAleksandr Guzovskiy tx_bytes = nbytes; 901c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 902c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 903c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * If this is not a full block write, but we are 904c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * extending the file past EOF and this data starts 905c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * block-aligned, use assign_arcbuf(). Otherwise, 906c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write via dmu_write(). 907c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 908c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (tx_bytes < max_blksz && (!write_eof || 909c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_base != abuf->b_data)) { 910c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio); 911c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_write(zfsvfs->z_os, zp->z_id, woff, 912c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len, aiov->iov_base, tx); 913c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 914c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_stat_wbuf_copied(); 915c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 916c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio || tx_bytes == max_blksz); 9170a586ceaSMark Shellenbaum dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 9180a586ceaSMark Shellenbaum woff, abuf, tx); 919c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 9202fdbea25SAleksandr Guzovskiy ASSERT(tx_bytes <= uio->uio_resid); 9212fdbea25SAleksandr Guzovskiy uioskip(uio, tx_bytes); 9222fdbea25SAleksandr Guzovskiy } 9232fdbea25SAleksandr Guzovskiy if (tx_bytes && vn_has_cached_data(vp)) { 924ac05c741SMark Maybee update_pages(vp, woff, 925ac05c741SMark Maybee tx_bytes, zfsvfs->z_os, zp->z_id); 9262fdbea25SAleksandr Guzovskiy } 927fa9e4066Sahrens 928feb08c6bSbillm /* 929feb08c6bSbillm * If we made no progress, we're done. If we made even 930feb08c6bSbillm * partial progress, update the znode and ZIL accordingly. 931feb08c6bSbillm */ 932feb08c6bSbillm if (tx_bytes == 0) { 9330a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 9340a586ceaSMark Shellenbaum (void *)&zp->z_size, sizeof (uint64_t), tx); 935af2c4821Smaybee dmu_tx_commit(tx); 936feb08c6bSbillm ASSERT(error != 0); 937fa9e4066Sahrens break; 938fa9e4066Sahrens } 939fa9e4066Sahrens 940169cdae2Smarks /* 941169cdae2Smarks * Clear Set-UID/Set-GID bits on successful write if not 942169cdae2Smarks * privileged and at least one of the excute bits is set. 943169cdae2Smarks * 944169cdae2Smarks * It would be nice to to this after all writes have 945169cdae2Smarks * been done, but that would still expose the ISUID/ISGID 946169cdae2Smarks * to another app after the partial write is committed. 947da6c28aaSamw * 948f1696b23SMark Shellenbaum * Note: we don't call zfs_fuid_map_id() here because 949f1696b23SMark Shellenbaum * user 0 is not an ephemeral uid. 950169cdae2Smarks */ 951169cdae2Smarks mutex_enter(&zp->z_acl_lock); 9520a586ceaSMark Shellenbaum if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 953169cdae2Smarks (S_IXUSR >> 6))) != 0 && 9540a586ceaSMark Shellenbaum (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 955169cdae2Smarks secpolicy_vnode_setid_retain(cr, 9560a586ceaSMark Shellenbaum (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 9570a586ceaSMark Shellenbaum uint64_t newmode; 9580a586ceaSMark Shellenbaum zp->z_mode &= ~(S_ISUID | S_ISGID); 9590a586ceaSMark Shellenbaum newmode = zp->z_mode; 9600a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 9610a586ceaSMark Shellenbaum (void *)&newmode, sizeof (uint64_t), tx); 962169cdae2Smarks } 963169cdae2Smarks mutex_exit(&zp->z_acl_lock); 964169cdae2Smarks 9650a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 9660a586ceaSMark Shellenbaum B_TRUE); 967fa9e4066Sahrens 968fa9e4066Sahrens /* 969feb08c6bSbillm * Update the file size (zp_size) if it has changed; 970feb08c6bSbillm * account for possible concurrent updates. 971fa9e4066Sahrens */ 9720a586ceaSMark Shellenbaum while ((end_size = zp->z_size) < uio->uio_loffset) { 9730a586ceaSMark Shellenbaum (void) atomic_cas_64(&zp->z_size, end_size, 974fa9e4066Sahrens uio->uio_loffset); 9750a586ceaSMark Shellenbaum ASSERT(error == 0); 9760a586ceaSMark Shellenbaum } 977c0e50c98SNeil Perrin /* 978c0e50c98SNeil Perrin * If we are replaying and eof is non zero then force 979c0e50c98SNeil Perrin * the file size to the specified eof. Note, there's no 980c0e50c98SNeil Perrin * concurrency during replay. 981c0e50c98SNeil Perrin */ 982c0e50c98SNeil Perrin if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 983c0e50c98SNeil Perrin zp->z_size = zfsvfs->z_replay_eof; 984c0e50c98SNeil Perrin 9850a586ceaSMark Shellenbaum error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 9860a586ceaSMark Shellenbaum 987feb08c6bSbillm zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 988feb08c6bSbillm dmu_tx_commit(tx); 989fa9e4066Sahrens 990feb08c6bSbillm if (error != 0) 991feb08c6bSbillm break; 992feb08c6bSbillm ASSERT(tx_bytes == nbytes); 993feb08c6bSbillm n -= nbytes; 994ff866947SSanjeev Bagewadi 995ff866947SSanjeev Bagewadi if (!xuio && n > 0) 996ff866947SSanjeev Bagewadi uio_prefaultpages(MIN(n, max_blksz), uio); 997feb08c6bSbillm } 998fa9e4066Sahrens 999c5c6ffa0Smaybee zfs_range_unlock(rl); 1000fa9e4066Sahrens 1001fa9e4066Sahrens /* 1002fa9e4066Sahrens * If we're in replay mode, or we made no progress, return error. 1003fa9e4066Sahrens * Otherwise, it's at least a partial write, so it's successful. 1004fa9e4066Sahrens */ 10051209a471SNeil Perrin if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1006fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1007fa9e4066Sahrens return (error); 1008fa9e4066Sahrens } 1009fa9e4066Sahrens 101055da60b9SMark J Musante if (ioflag & (FSYNC | FDSYNC) || 101155da60b9SMark J Musante zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 10125002558fSNeil Perrin zil_commit(zilog, zp->z_id); 1013fa9e4066Sahrens 1014fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1015fa9e4066Sahrens return (0); 1016fa9e4066Sahrens } 1017fa9e4066Sahrens 1018c5c6ffa0Smaybee void 1019b24ab676SJeff Bonwick zfs_get_done(zgd_t *zgd, int error) 1020c5c6ffa0Smaybee { 1021b24ab676SJeff Bonwick znode_t *zp = zgd->zgd_private; 1022b24ab676SJeff Bonwick objset_t *os = zp->z_zfsvfs->z_os; 1023b24ab676SJeff Bonwick 1024b24ab676SJeff Bonwick if (zgd->zgd_db) 1025b24ab676SJeff Bonwick dmu_buf_rele(zgd->zgd_db, zgd); 1026b24ab676SJeff Bonwick 1027b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 1028c5c6ffa0Smaybee 10299d3574bfSNeil Perrin /* 10309d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 10319d3574bfSNeil Perrin * txg stopped from syncing. 10329d3574bfSNeil Perrin */ 1033b24ab676SJeff Bonwick VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1034b24ab676SJeff Bonwick 1035b24ab676SJeff Bonwick if (error == 0 && zgd->zgd_bp) 1036*1271e4b1SPrakash Surya zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp); 1037b24ab676SJeff Bonwick 103867bd71c6Sperrin kmem_free(zgd, sizeof (zgd_t)); 1039c5c6ffa0Smaybee } 1040c5c6ffa0Smaybee 1041c87b8fc5SMark J Musante #ifdef DEBUG 1042c87b8fc5SMark J Musante static int zil_fault_io = 0; 1043c87b8fc5SMark J Musante #endif 1044c87b8fc5SMark J Musante 1045fa9e4066Sahrens /* 1046fa9e4066Sahrens * Get data to generate a TX_WRITE intent log record. 1047fa9e4066Sahrens */ 1048fa9e4066Sahrens int 1049*1271e4b1SPrakash Surya zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio) 1050fa9e4066Sahrens { 1051fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 1052fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 1053fa9e4066Sahrens znode_t *zp; 1054b24ab676SJeff Bonwick uint64_t object = lr->lr_foid; 1055b24ab676SJeff Bonwick uint64_t offset = lr->lr_offset; 1056b24ab676SJeff Bonwick uint64_t size = lr->lr_length; 1057c5c6ffa0Smaybee dmu_buf_t *db; 105867bd71c6Sperrin zgd_t *zgd; 1059fa9e4066Sahrens int error = 0; 1060fa9e4066Sahrens 1061*1271e4b1SPrakash Surya ASSERT3P(lwb, !=, NULL); 1062*1271e4b1SPrakash Surya ASSERT3P(zio, !=, NULL); 1063*1271e4b1SPrakash Surya ASSERT3U(size, !=, 0); 1064fa9e4066Sahrens 1065fa9e4066Sahrens /* 1066104e2ed7Sperrin * Nothing to do if the file has been removed 1067fa9e4066Sahrens */ 1068b24ab676SJeff Bonwick if (zfs_zget(zfsvfs, object, &zp) != 0) 1069be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1070893a6d32Sahrens if (zp->z_unlinked) { 10719d3574bfSNeil Perrin /* 10729d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 10739d3574bfSNeil Perrin * txg stopped from syncing. 10749d3574bfSNeil Perrin */ 10759d3574bfSNeil Perrin VN_RELE_ASYNC(ZTOV(zp), 10769d3574bfSNeil Perrin dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1077be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1078fa9e4066Sahrens } 1079fa9e4066Sahrens 1080b24ab676SJeff Bonwick zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1081*1271e4b1SPrakash Surya zgd->zgd_lwb = lwb; 1082b24ab676SJeff Bonwick zgd->zgd_private = zp; 1083b24ab676SJeff Bonwick 1084fa9e4066Sahrens /* 1085fa9e4066Sahrens * Write records come in two flavors: immediate and indirect. 1086fa9e4066Sahrens * For small writes it's cheaper to store the data with the 1087fa9e4066Sahrens * log record (immediate); for large writes it's cheaper to 1088fa9e4066Sahrens * sync the data and get a pointer to it (indirect) so that 1089fa9e4066Sahrens * we don't have to write the data twice. 1090fa9e4066Sahrens */ 1091104e2ed7Sperrin if (buf != NULL) { /* immediate write */ 1092b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1093104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 10940a586ceaSMark Shellenbaum if (offset >= zp->z_size) { 1095be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 1096b24ab676SJeff Bonwick } else { 1097b24ab676SJeff Bonwick error = dmu_read(os, object, offset, size, buf, 1098b24ab676SJeff Bonwick DMU_READ_NO_PREFETCH); 1099104e2ed7Sperrin } 1100b24ab676SJeff Bonwick ASSERT(error == 0 || error == ENOENT); 1101104e2ed7Sperrin } else { /* indirect write */ 1102fa9e4066Sahrens /* 1103104e2ed7Sperrin * Have to lock the whole block to ensure when it's 1104104e2ed7Sperrin * written out and it's checksum is being calculated 1105104e2ed7Sperrin * that no one can change the data. We need to re-check 1106104e2ed7Sperrin * blocksize after we get the lock in case it's changed! 1107fa9e4066Sahrens */ 1108104e2ed7Sperrin for (;;) { 1109b24ab676SJeff Bonwick uint64_t blkoff; 1110b24ab676SJeff Bonwick size = zp->z_blksz; 1111dfe73b3dSJeff Bonwick blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1112b24ab676SJeff Bonwick offset -= blkoff; 1113b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1114b24ab676SJeff Bonwick RL_READER); 1115b24ab676SJeff Bonwick if (zp->z_blksz == size) 1116104e2ed7Sperrin break; 1117b24ab676SJeff Bonwick offset += blkoff; 1118b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 1119104e2ed7Sperrin } 1120104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 11210a586ceaSMark Shellenbaum if (lr->lr_offset >= zp->z_size) 1122be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 1123c87b8fc5SMark J Musante #ifdef DEBUG 1124c87b8fc5SMark J Musante if (zil_fault_io) { 1125be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 1126c87b8fc5SMark J Musante zil_fault_io = 0; 1127c87b8fc5SMark J Musante } 1128c87b8fc5SMark J Musante #endif 1129b24ab676SJeff Bonwick if (error == 0) 113047cb52daSJeff Bonwick error = dmu_buf_hold(os, object, offset, zgd, &db, 113147cb52daSJeff Bonwick DMU_READ_NO_PREFETCH); 1132c87b8fc5SMark J Musante 1133975c32a0SNeil Perrin if (error == 0) { 1134b7edcb94SMatthew Ahrens blkptr_t *bp = &lr->lr_blkptr; 113580901aeaSGeorge Wilson 1136b24ab676SJeff Bonwick zgd->zgd_db = db; 1137b24ab676SJeff Bonwick zgd->zgd_bp = bp; 1138b24ab676SJeff Bonwick 1139b24ab676SJeff Bonwick ASSERT(db->db_offset == offset); 1140b24ab676SJeff Bonwick ASSERT(db->db_size == size); 1141b24ab676SJeff Bonwick 1142b24ab676SJeff Bonwick error = dmu_sync(zio, lr->lr_common.lrc_txg, 1143b24ab676SJeff Bonwick zfs_get_done, zgd); 11440c94e1afSAndriy Gapon ASSERT(error || lr->lr_length <= size); 1145b24ab676SJeff Bonwick 1146975c32a0SNeil Perrin /* 1147b24ab676SJeff Bonwick * On success, we need to wait for the write I/O 1148b24ab676SJeff Bonwick * initiated by dmu_sync() to complete before we can 1149b24ab676SJeff Bonwick * release this dbuf. We will finish everything up 1150b24ab676SJeff Bonwick * in the zfs_get_done() callback. 1151975c32a0SNeil Perrin */ 1152b24ab676SJeff Bonwick if (error == 0) 1153b24ab676SJeff Bonwick return (0); 1154975c32a0SNeil Perrin 1155b24ab676SJeff Bonwick if (error == EALREADY) { 1156b24ab676SJeff Bonwick lr->lr_common.lrc_txtype = TX_WRITE2; 1157b24ab676SJeff Bonwick error = 0; 1158b24ab676SJeff Bonwick } 1159975c32a0SNeil Perrin } 1160fa9e4066Sahrens } 1161b24ab676SJeff Bonwick 1162b24ab676SJeff Bonwick zfs_get_done(zgd, error); 1163b24ab676SJeff Bonwick 1164fa9e4066Sahrens return (error); 1165fa9e4066Sahrens } 1166fa9e4066Sahrens 1167fa9e4066Sahrens /*ARGSUSED*/ 1168fa9e4066Sahrens static int 1169da6c28aaSamw zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1170da6c28aaSamw caller_context_t *ct) 1171fa9e4066Sahrens { 1172fa9e4066Sahrens znode_t *zp = VTOZ(vp); 1173fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1174fa9e4066Sahrens int error; 1175fa9e4066Sahrens 11763cb34c60Sahrens ZFS_ENTER(zfsvfs); 11773cb34c60Sahrens ZFS_VERIFY_ZP(zp); 1178da6c28aaSamw 1179da6c28aaSamw if (flag & V_ACE_MASK) 1180da6c28aaSamw error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1181da6c28aaSamw else 1182da6c28aaSamw error = zfs_zaccess_rwx(zp, mode, flag, cr); 1183da6c28aaSamw 1184fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1185fa9e4066Sahrens return (error); 1186fa9e4066Sahrens } 1187fa9e4066Sahrens 1188d47621a4STim Haley /* 1189d47621a4STim Haley * If vnode is for a device return a specfs vnode instead. 1190d47621a4STim Haley */ 1191d47621a4STim Haley static int 1192d47621a4STim Haley specvp_check(vnode_t **vpp, cred_t *cr) 1193d47621a4STim Haley { 1194d47621a4STim Haley int error = 0; 1195d47621a4STim Haley 1196d47621a4STim Haley if (IS_DEVVP(*vpp)) { 1197d47621a4STim Haley struct vnode *svp; 1198d47621a4STim Haley 1199d47621a4STim Haley svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1200d47621a4STim Haley VN_RELE(*vpp); 1201d47621a4STim Haley if (svp == NULL) 1202be6fd75aSMatthew Ahrens error = SET_ERROR(ENOSYS); 1203d47621a4STim Haley *vpp = svp; 1204d47621a4STim Haley } 1205d47621a4STim Haley return (error); 1206d47621a4STim Haley } 1207d47621a4STim Haley 1208d47621a4STim Haley 1209fa9e4066Sahrens /* 1210fa9e4066Sahrens * Lookup an entry in a directory, or an extended attribute directory. 1211fa9e4066Sahrens * If it exists, return a held vnode reference for it. 1212fa9e4066Sahrens * 1213fa9e4066Sahrens * IN: dvp - vnode of directory to search. 1214fa9e4066Sahrens * nm - name of entry to lookup. 1215fa9e4066Sahrens * pnp - full pathname to lookup [UNUSED]. 1216fa9e4066Sahrens * flags - LOOKUP_XATTR set if looking for an attribute. 1217fa9e4066Sahrens * rdir - root directory vnode [UNUSED]. 1218fa9e4066Sahrens * cr - credentials of caller. 1219da6c28aaSamw * ct - caller context 1220da6c28aaSamw * direntflags - directory lookup flags 1221da6c28aaSamw * realpnp - returned pathname. 1222fa9e4066Sahrens * 1223fa9e4066Sahrens * OUT: vpp - vnode of located entry, NULL if not found. 1224fa9e4066Sahrens * 1225f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1226fa9e4066Sahrens * 1227fa9e4066Sahrens * Timestamps: 1228fa9e4066Sahrens * NA 1229fa9e4066Sahrens */ 1230fa9e4066Sahrens /* ARGSUSED */ 1231fa9e4066Sahrens static int 1232fa9e4066Sahrens zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 1233da6c28aaSamw int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 1234da6c28aaSamw int *direntflags, pathname_t *realpnp) 1235fa9e4066Sahrens { 1236fa9e4066Sahrens znode_t *zdp = VTOZ(dvp); 1237fa9e4066Sahrens zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1238d47621a4STim Haley int error = 0; 1239d47621a4STim Haley 12401c17160aSKevin Crowe /* 12411c17160aSKevin Crowe * Fast path lookup, however we must skip DNLC lookup 12421c17160aSKevin Crowe * for case folding or normalizing lookups because the 12431c17160aSKevin Crowe * DNLC code only stores the passed in name. This means 12441c17160aSKevin Crowe * creating 'a' and removing 'A' on a case insensitive 12451c17160aSKevin Crowe * file system would work, but DNLC still thinks 'a' 12461c17160aSKevin Crowe * exists and won't let you create it again on the next 12471c17160aSKevin Crowe * pass through fast path. 12481c17160aSKevin Crowe */ 1249d47621a4STim Haley if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1250d47621a4STim Haley 1251d47621a4STim Haley if (dvp->v_type != VDIR) { 1252be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTDIR)); 12530a586ceaSMark Shellenbaum } else if (zdp->z_sa_hdl == NULL) { 1254be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 1255d47621a4STim Haley } 1256d47621a4STim Haley 1257d47621a4STim Haley if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1258d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1259d47621a4STim Haley if (!error) { 1260d47621a4STim Haley *vpp = dvp; 1261d47621a4STim Haley VN_HOLD(*vpp); 1262d47621a4STim Haley return (0); 1263d47621a4STim Haley } 1264d47621a4STim Haley return (error); 12651c17160aSKevin Crowe } else if (!zdp->z_zfsvfs->z_norm && 12661c17160aSKevin Crowe (zdp->z_zfsvfs->z_case == ZFS_CASE_SENSITIVE)) { 12671c17160aSKevin Crowe 1268d47621a4STim Haley vnode_t *tvp = dnlc_lookup(dvp, nm); 1269d47621a4STim Haley 1270d47621a4STim Haley if (tvp) { 1271d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1272d47621a4STim Haley if (error) { 1273d47621a4STim Haley VN_RELE(tvp); 1274d47621a4STim Haley return (error); 1275d47621a4STim Haley } 1276d47621a4STim Haley if (tvp == DNLC_NO_VNODE) { 1277d47621a4STim Haley VN_RELE(tvp); 1278be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1279d47621a4STim Haley } else { 1280d47621a4STim Haley *vpp = tvp; 1281d47621a4STim Haley return (specvp_check(vpp, cr)); 1282d47621a4STim Haley } 1283d47621a4STim Haley } 1284d47621a4STim Haley } 1285d47621a4STim Haley } 1286d47621a4STim Haley 1287d47621a4STim Haley DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1288fa9e4066Sahrens 12893cb34c60Sahrens ZFS_ENTER(zfsvfs); 12903cb34c60Sahrens ZFS_VERIFY_ZP(zdp); 1291fa9e4066Sahrens 1292fa9e4066Sahrens *vpp = NULL; 1293fa9e4066Sahrens 1294fa9e4066Sahrens if (flags & LOOKUP_XATTR) { 12957b55fa8eSck /* 12967b55fa8eSck * If the xattr property is off, refuse the lookup request. 12977b55fa8eSck */ 12987b55fa8eSck if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 12997b55fa8eSck ZFS_EXIT(zfsvfs); 1300be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 13017b55fa8eSck } 13027b55fa8eSck 1303fa9e4066Sahrens /* 1304fa9e4066Sahrens * We don't allow recursive attributes.. 1305fa9e4066Sahrens * Maybe someday we will. 1306fa9e4066Sahrens */ 13070a586ceaSMark Shellenbaum if (zdp->z_pflags & ZFS_XATTR) { 1308fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1309be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1310fa9e4066Sahrens } 1311fa9e4066Sahrens 13123f063a9dSck if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1313fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1314fa9e4066Sahrens return (error); 1315fa9e4066Sahrens } 1316fa9e4066Sahrens 1317fa9e4066Sahrens /* 1318fa9e4066Sahrens * Do we have permission to get into attribute directory? 1319fa9e4066Sahrens */ 1320fa9e4066Sahrens 1321da6c28aaSamw if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1322da6c28aaSamw B_FALSE, cr)) { 1323fa9e4066Sahrens VN_RELE(*vpp); 1324da6c28aaSamw *vpp = NULL; 1325fa9e4066Sahrens } 1326fa9e4066Sahrens 1327fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1328fa9e4066Sahrens return (error); 1329fa9e4066Sahrens } 1330fa9e4066Sahrens 13310f2dc02eSek if (dvp->v_type != VDIR) { 13320f2dc02eSek ZFS_EXIT(zfsvfs); 1333be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTDIR)); 13340f2dc02eSek } 1335736b9155Smarks 1336fa9e4066Sahrens /* 1337fa9e4066Sahrens * Check accessibility of directory. 1338fa9e4066Sahrens */ 1339fa9e4066Sahrens 1340da6c28aaSamw if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1341fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1342fa9e4066Sahrens return (error); 1343fa9e4066Sahrens } 1344fa9e4066Sahrens 1345de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1346da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1347da6c28aaSamw ZFS_EXIT(zfsvfs); 1348be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1349da6c28aaSamw } 1350fa9e4066Sahrens 1351da6c28aaSamw error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1352d47621a4STim Haley if (error == 0) 1353d47621a4STim Haley error = specvp_check(vpp, cr); 1354fa9e4066Sahrens 1355fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1356fa9e4066Sahrens return (error); 1357fa9e4066Sahrens } 1358fa9e4066Sahrens 1359fa9e4066Sahrens /* 1360fa9e4066Sahrens * Attempt to create a new entry in a directory. If the entry 1361fa9e4066Sahrens * already exists, truncate the file if permissible, else return 1362fa9e4066Sahrens * an error. Return the vp of the created or trunc'd file. 1363fa9e4066Sahrens * 1364fa9e4066Sahrens * IN: dvp - vnode of directory to put new file entry in. 1365fa9e4066Sahrens * name - name of new file entry. 1366fa9e4066Sahrens * vap - attributes of new file. 1367fa9e4066Sahrens * excl - flag indicating exclusive or non-exclusive mode. 1368fa9e4066Sahrens * mode - mode to open file with. 1369fa9e4066Sahrens * cr - credentials of caller. 1370fa9e4066Sahrens * flag - large file flag [UNUSED]. 1371da6c28aaSamw * ct - caller context 13724bb73804SMatthew Ahrens * vsecp - ACL to be set 1373fa9e4066Sahrens * 1374fa9e4066Sahrens * OUT: vpp - vnode of created or trunc'd entry. 1375fa9e4066Sahrens * 1376f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1377fa9e4066Sahrens * 1378fa9e4066Sahrens * Timestamps: 1379fa9e4066Sahrens * dvp - ctime|mtime updated if new entry created 1380fa9e4066Sahrens * vp - ctime|mtime always, atime if new 1381fa9e4066Sahrens */ 1382da6c28aaSamw 1383fa9e4066Sahrens /* ARGSUSED */ 1384fa9e4066Sahrens static int 1385fa9e4066Sahrens zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, 1386da6c28aaSamw int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, 1387da6c28aaSamw vsecattr_t *vsecp) 1388fa9e4066Sahrens { 1389fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1390fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1391f18faf3fSek zilog_t *zilog; 1392f18faf3fSek objset_t *os; 1393fa9e4066Sahrens zfs_dirlock_t *dl; 1394fa9e4066Sahrens dmu_tx_t *tx; 1395fa9e4066Sahrens int error; 1396c1ce5987SMark Shellenbaum ksid_t *ksid; 1397c1ce5987SMark Shellenbaum uid_t uid; 1398c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 13990a586ceaSMark Shellenbaum zfs_acl_ids_t acl_ids; 140089459e17SMark Shellenbaum boolean_t fuid_dirtied; 1401c8c24165SMark Shellenbaum boolean_t have_acl = B_FALSE; 140269962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1403da6c28aaSamw 1404da6c28aaSamw /* 1405da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1406da6c28aaSamw * make sure file system is at proper version 1407da6c28aaSamw */ 1408da6c28aaSamw 1409c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1410c1ce5987SMark Shellenbaum if (ksid) 1411c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1412c1ce5987SMark Shellenbaum else 1413c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1414c1ce5987SMark Shellenbaum 1415da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1416da6c28aaSamw (vsecp || (vap->va_mask & AT_XVATTR) || 1417c1ce5987SMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1418be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1419fa9e4066Sahrens 14203cb34c60Sahrens ZFS_ENTER(zfsvfs); 14213cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1422f18faf3fSek os = zfsvfs->z_os; 1423f18faf3fSek zilog = zfsvfs->z_log; 1424fa9e4066Sahrens 1425de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1426da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1427da6c28aaSamw ZFS_EXIT(zfsvfs); 1428be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1429da6c28aaSamw } 1430da6c28aaSamw 1431da6c28aaSamw if (vap->va_mask & AT_XVATTR) { 1432da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1433da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1434da6c28aaSamw ZFS_EXIT(zfsvfs); 1435da6c28aaSamw return (error); 1436da6c28aaSamw } 1437da6c28aaSamw } 1438fa9e4066Sahrens top: 1439fa9e4066Sahrens *vpp = NULL; 1440fa9e4066Sahrens 1441fa9e4066Sahrens if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) 1442fa9e4066Sahrens vap->va_mode &= ~VSVTX; 1443fa9e4066Sahrens 1444fa9e4066Sahrens if (*name == '\0') { 1445fa9e4066Sahrens /* 1446fa9e4066Sahrens * Null component name refers to the directory itself. 1447fa9e4066Sahrens */ 1448fa9e4066Sahrens VN_HOLD(dvp); 1449fa9e4066Sahrens zp = dzp; 1450fa9e4066Sahrens dl = NULL; 1451fa9e4066Sahrens error = 0; 1452fa9e4066Sahrens } else { 1453fa9e4066Sahrens /* possible VN_HOLD(zp) */ 1454da6c28aaSamw int zflg = 0; 1455da6c28aaSamw 1456da6c28aaSamw if (flag & FIGNORECASE) 1457da6c28aaSamw zflg |= ZCILOOK; 1458da6c28aaSamw 1459da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1460da6c28aaSamw NULL, NULL); 1461da6c28aaSamw if (error) { 14620b2a8171SMark Shellenbaum if (have_acl) 14630b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1464fa9e4066Sahrens if (strcmp(name, "..") == 0) 1465be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 1466fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1467fa9e4066Sahrens return (error); 1468fa9e4066Sahrens } 1469fa9e4066Sahrens } 14700a586ceaSMark Shellenbaum 1471fa9e4066Sahrens if (zp == NULL) { 1472da6c28aaSamw uint64_t txtype; 1473da6c28aaSamw 1474fa9e4066Sahrens /* 1475fa9e4066Sahrens * Create a new file object and update the directory 1476fa9e4066Sahrens * to reference it. 1477fa9e4066Sahrens */ 1478da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 14790b2a8171SMark Shellenbaum if (have_acl) 14800b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1481fa9e4066Sahrens goto out; 1482fa9e4066Sahrens } 1483fa9e4066Sahrens 1484fa9e4066Sahrens /* 1485fa9e4066Sahrens * We only support the creation of regular files in 1486fa9e4066Sahrens * extended attribute directories. 1487fa9e4066Sahrens */ 14880a586ceaSMark Shellenbaum 14890a586ceaSMark Shellenbaum if ((dzp->z_pflags & ZFS_XATTR) && 1490fa9e4066Sahrens (vap->va_type != VREG)) { 14910b2a8171SMark Shellenbaum if (have_acl) 14920b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1493be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 1494fa9e4066Sahrens goto out; 1495fa9e4066Sahrens } 1496fa9e4066Sahrens 1497c8c24165SMark Shellenbaum if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1498c8c24165SMark Shellenbaum cr, vsecp, &acl_ids)) != 0) 149989459e17SMark Shellenbaum goto out; 1500c8c24165SMark Shellenbaum have_acl = B_TRUE; 1501c8c24165SMark Shellenbaum 150214843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 15034929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 1504be6fd75aSMatthew Ahrens error = SET_ERROR(EDQUOT); 150514843421SMatthew Ahrens goto out; 150614843421SMatthew Ahrens } 150789459e17SMark Shellenbaum 1508fa9e4066Sahrens tx = dmu_tx_create(os); 15090a586ceaSMark Shellenbaum 15100a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 15110a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 15120a586ceaSMark Shellenbaum 151389459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 151414843421SMatthew Ahrens if (fuid_dirtied) 151514843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 1516ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 15170a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 15180a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && 15190a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1520fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 15210a586ceaSMark Shellenbaum 0, acl_ids.z_aclp->z_acl_bytes); 1522da6c28aaSamw } 152369962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1524fa9e4066Sahrens if (error) { 1525fa9e4066Sahrens zfs_dirent_unlock(dl); 15261209a471SNeil Perrin if (error == ERESTART) { 152769962b56SMatthew Ahrens waited = B_TRUE; 15288a2f1b91Sahrens dmu_tx_wait(tx); 15298a2f1b91Sahrens dmu_tx_abort(tx); 1530fa9e4066Sahrens goto top; 1531fa9e4066Sahrens } 1532c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 15338a2f1b91Sahrens dmu_tx_abort(tx); 1534fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1535fa9e4066Sahrens return (error); 1536fa9e4066Sahrens } 15370a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 153889459e17SMark Shellenbaum 153989459e17SMark Shellenbaum if (fuid_dirtied) 154089459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 154189459e17SMark Shellenbaum 1542fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1543da6c28aaSamw txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1544da6c28aaSamw if (flag & FIGNORECASE) 1545da6c28aaSamw txtype |= TX_CI; 1546da6c28aaSamw zfs_log_create(zilog, tx, txtype, dzp, zp, name, 154789459e17SMark Shellenbaum vsecp, acl_ids.z_fuidp, vap); 154889459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1549fa9e4066Sahrens dmu_tx_commit(tx); 1550fa9e4066Sahrens } else { 1551da6c28aaSamw int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1552da6c28aaSamw 15530b2a8171SMark Shellenbaum if (have_acl) 15540b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 15550b2a8171SMark Shellenbaum have_acl = B_FALSE; 15560b2a8171SMark Shellenbaum 1557fa9e4066Sahrens /* 1558fa9e4066Sahrens * A directory entry already exists for this name. 1559fa9e4066Sahrens */ 1560fa9e4066Sahrens /* 1561fa9e4066Sahrens * Can't truncate an existing file if in exclusive mode. 1562fa9e4066Sahrens */ 1563fa9e4066Sahrens if (excl == EXCL) { 1564be6fd75aSMatthew Ahrens error = SET_ERROR(EEXIST); 1565fa9e4066Sahrens goto out; 1566fa9e4066Sahrens } 1567fa9e4066Sahrens /* 1568fa9e4066Sahrens * Can't open a directory for writing. 1569fa9e4066Sahrens */ 1570fa9e4066Sahrens if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1571be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 1572fa9e4066Sahrens goto out; 1573fa9e4066Sahrens } 1574fa9e4066Sahrens /* 1575fa9e4066Sahrens * Verify requested access to file. 1576fa9e4066Sahrens */ 1577da6c28aaSamw if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1578fa9e4066Sahrens goto out; 1579fa9e4066Sahrens } 1580fa9e4066Sahrens 1581fa9e4066Sahrens mutex_enter(&dzp->z_lock); 1582fa9e4066Sahrens dzp->z_seq++; 1583fa9e4066Sahrens mutex_exit(&dzp->z_lock); 1584fa9e4066Sahrens 15855730cc9aSmaybee /* 15865730cc9aSmaybee * Truncate regular files if requested. 15875730cc9aSmaybee */ 15885730cc9aSmaybee if ((ZTOV(zp)->v_type == VREG) && 1589fa9e4066Sahrens (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1590cdb0ab79Smaybee /* we can't hold any locks when calling zfs_freesp() */ 1591cdb0ab79Smaybee zfs_dirent_unlock(dl); 1592cdb0ab79Smaybee dl = NULL; 15935730cc9aSmaybee error = zfs_freesp(zp, 0, 0, mode, TRUE); 1594df2381bfSpraks if (error == 0) { 1595da6c28aaSamw vnevent_create(ZTOV(zp), ct); 1596df2381bfSpraks } 1597fa9e4066Sahrens } 1598fa9e4066Sahrens } 1599fa9e4066Sahrens out: 1600fa9e4066Sahrens 1601fa9e4066Sahrens if (dl) 1602fa9e4066Sahrens zfs_dirent_unlock(dl); 1603fa9e4066Sahrens 1604fa9e4066Sahrens if (error) { 1605fa9e4066Sahrens if (zp) 1606fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1607fa9e4066Sahrens } else { 1608fa9e4066Sahrens *vpp = ZTOV(zp); 1609d47621a4STim Haley error = specvp_check(vpp, cr); 1610fa9e4066Sahrens } 1611fa9e4066Sahrens 161255da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 16135002558fSNeil Perrin zil_commit(zilog, 0); 161455da60b9SMark J Musante 1615fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1616fa9e4066Sahrens return (error); 1617fa9e4066Sahrens } 1618fa9e4066Sahrens 1619fa9e4066Sahrens /* 1620fa9e4066Sahrens * Remove an entry from a directory. 1621fa9e4066Sahrens * 1622fa9e4066Sahrens * IN: dvp - vnode of directory to remove entry from. 1623fa9e4066Sahrens * name - name of entry to remove. 1624fa9e4066Sahrens * cr - credentials of caller. 1625da6c28aaSamw * ct - caller context 1626da6c28aaSamw * flags - case flags 1627fa9e4066Sahrens * 1628f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1629fa9e4066Sahrens * 1630fa9e4066Sahrens * Timestamps: 1631fa9e4066Sahrens * dvp - ctime|mtime 1632fa9e4066Sahrens * vp - ctime (if nlink > 0) 1633fa9e4066Sahrens */ 16340a586ceaSMark Shellenbaum 16350a586ceaSMark Shellenbaum uint64_t null_xattr = 0; 16360a586ceaSMark Shellenbaum 1637da6c28aaSamw /*ARGSUSED*/ 1638fa9e4066Sahrens static int 1639da6c28aaSamw zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1640da6c28aaSamw int flags) 1641fa9e4066Sahrens { 1642fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 16430b2a8171SMark Shellenbaum znode_t *xzp; 1644fa9e4066Sahrens vnode_t *vp; 1645fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1646f18faf3fSek zilog_t *zilog; 16470b2a8171SMark Shellenbaum uint64_t acl_obj, xattr_obj; 16484bb73804SMatthew Ahrens uint64_t xattr_obj_unlinked = 0; 164951bd2f97SNeil Perrin uint64_t obj = 0; 1650fa9e4066Sahrens zfs_dirlock_t *dl; 1651fa9e4066Sahrens dmu_tx_t *tx; 1652893a6d32Sahrens boolean_t may_delete_now, delete_now = FALSE; 1653cdb0ab79Smaybee boolean_t unlinked, toobig = FALSE; 1654da6c28aaSamw uint64_t txtype; 1655da6c28aaSamw pathname_t *realnmp = NULL; 1656da6c28aaSamw pathname_t realnm; 1657fa9e4066Sahrens int error; 1658da6c28aaSamw int zflg = ZEXISTS; 165969962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1660fa9e4066Sahrens 16613cb34c60Sahrens ZFS_ENTER(zfsvfs); 16623cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1663f18faf3fSek zilog = zfsvfs->z_log; 1664fa9e4066Sahrens 1665da6c28aaSamw if (flags & FIGNORECASE) { 1666da6c28aaSamw zflg |= ZCILOOK; 1667da6c28aaSamw pn_alloc(&realnm); 1668da6c28aaSamw realnmp = &realnm; 1669da6c28aaSamw } 1670da6c28aaSamw 1671fa9e4066Sahrens top: 16720b2a8171SMark Shellenbaum xattr_obj = 0; 16730b2a8171SMark Shellenbaum xzp = NULL; 1674fa9e4066Sahrens /* 1675fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1676fa9e4066Sahrens */ 1677da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1678da6c28aaSamw NULL, realnmp)) { 1679da6c28aaSamw if (realnmp) 1680da6c28aaSamw pn_free(realnmp); 1681fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1682fa9e4066Sahrens return (error); 1683fa9e4066Sahrens } 1684fa9e4066Sahrens 1685fa9e4066Sahrens vp = ZTOV(zp); 1686fa9e4066Sahrens 1687fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1688fa9e4066Sahrens goto out; 1689fa9e4066Sahrens } 1690fa9e4066Sahrens 1691fa9e4066Sahrens /* 1692fa9e4066Sahrens * Need to use rmdir for removing directories. 1693fa9e4066Sahrens */ 1694fa9e4066Sahrens if (vp->v_type == VDIR) { 1695be6fd75aSMatthew Ahrens error = SET_ERROR(EPERM); 1696fa9e4066Sahrens goto out; 1697fa9e4066Sahrens } 1698fa9e4066Sahrens 1699da6c28aaSamw vnevent_remove(vp, dvp, name, ct); 1700fa9e4066Sahrens 1701da6c28aaSamw if (realnmp) 1702ab04eb8eStimh dnlc_remove(dvp, realnmp->pn_buf); 1703da6c28aaSamw else 1704da6c28aaSamw dnlc_remove(dvp, name); 1705033f9833Sek 1706fa9e4066Sahrens mutex_enter(&vp->v_lock); 1707fa9e4066Sahrens may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1708fa9e4066Sahrens mutex_exit(&vp->v_lock); 1709fa9e4066Sahrens 1710fa9e4066Sahrens /* 1711893a6d32Sahrens * We may delete the znode now, or we may put it in the unlinked set; 1712fa9e4066Sahrens * it depends on whether we're the last link, and on whether there are 1713fa9e4066Sahrens * other holds on the vnode. So we dmu_tx_hold() the right things to 1714fa9e4066Sahrens * allow for either case. 1715fa9e4066Sahrens */ 171651bd2f97SNeil Perrin obj = zp->z_id; 1717fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1718ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 17190a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 17200a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 17210a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 1722cdb0ab79Smaybee if (may_delete_now) { 1723cdb0ab79Smaybee toobig = 17240a586ceaSMark Shellenbaum zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1725cdb0ab79Smaybee /* if the file is too big, only hold_free a token amount */ 1726cdb0ab79Smaybee dmu_tx_hold_free(tx, zp->z_id, 0, 1727cdb0ab79Smaybee (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1728cdb0ab79Smaybee } 1729fa9e4066Sahrens 1730fa9e4066Sahrens /* are there any extended attributes? */ 17310a586ceaSMark Shellenbaum error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 17320a586ceaSMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 17330b2a8171SMark Shellenbaum if (error == 0 && xattr_obj) { 17340a586ceaSMark Shellenbaum error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1735fb09f5aaSMadhav Suresh ASSERT0(error); 17360a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 17370a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1738fa9e4066Sahrens } 1739fa9e4066Sahrens 17401412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 17411412a1a2SMark Shellenbaum if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1742fa9e4066Sahrens dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 17431412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 1744fa9e4066Sahrens 1745fa9e4066Sahrens /* charge as an update -- would be nice not to charge at all */ 1746893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1747fa9e4066Sahrens 17484bb73804SMatthew Ahrens /* 17496575bca0SSimon Klinkert * Mark this transaction as typically resulting in a net free of space 17504bb73804SMatthew Ahrens */ 17516575bca0SSimon Klinkert dmu_tx_mark_netfree(tx); 17524bb73804SMatthew Ahrens 175369962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1754fa9e4066Sahrens if (error) { 1755fa9e4066Sahrens zfs_dirent_unlock(dl); 1756fa9e4066Sahrens VN_RELE(vp); 17570b2a8171SMark Shellenbaum if (xzp) 17580b2a8171SMark Shellenbaum VN_RELE(ZTOV(xzp)); 17591209a471SNeil Perrin if (error == ERESTART) { 176069962b56SMatthew Ahrens waited = B_TRUE; 17618a2f1b91Sahrens dmu_tx_wait(tx); 17628a2f1b91Sahrens dmu_tx_abort(tx); 1763fa9e4066Sahrens goto top; 1764fa9e4066Sahrens } 1765da6c28aaSamw if (realnmp) 1766da6c28aaSamw pn_free(realnmp); 17678a2f1b91Sahrens dmu_tx_abort(tx); 1768fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1769fa9e4066Sahrens return (error); 1770fa9e4066Sahrens } 1771fa9e4066Sahrens 1772fa9e4066Sahrens /* 1773fa9e4066Sahrens * Remove the directory entry. 1774fa9e4066Sahrens */ 1775da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1776fa9e4066Sahrens 1777fa9e4066Sahrens if (error) { 1778fa9e4066Sahrens dmu_tx_commit(tx); 1779fa9e4066Sahrens goto out; 1780fa9e4066Sahrens } 1781fa9e4066Sahrens 1782893a6d32Sahrens if (unlinked) { 17831412a1a2SMark Shellenbaum /* 17841412a1a2SMark Shellenbaum * Hold z_lock so that we can make sure that the ACL obj 17851412a1a2SMark Shellenbaum * hasn't changed. Could have been deleted due to 17861412a1a2SMark Shellenbaum * zfs_sa_upgrade(). 17871412a1a2SMark Shellenbaum */ 17881412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 1789fa9e4066Sahrens mutex_enter(&vp->v_lock); 17900a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 17910a586ceaSMark Shellenbaum &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1792cdb0ab79Smaybee delete_now = may_delete_now && !toobig && 1793fa9e4066Sahrens vp->v_count == 1 && !vn_has_cached_data(vp) && 17941412a1a2SMark Shellenbaum xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 17950a586ceaSMark Shellenbaum acl_obj; 1796fa9e4066Sahrens mutex_exit(&vp->v_lock); 1797fa9e4066Sahrens } 1798fa9e4066Sahrens 1799fa9e4066Sahrens if (delete_now) { 18000a586ceaSMark Shellenbaum if (xattr_obj_unlinked) { 18010a586ceaSMark Shellenbaum ASSERT3U(xzp->z_links, ==, 2); 1802fa9e4066Sahrens mutex_enter(&xzp->z_lock); 1803893a6d32Sahrens xzp->z_unlinked = 1; 18040a586ceaSMark Shellenbaum xzp->z_links = 0; 18050a586ceaSMark Shellenbaum error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 18060a586ceaSMark Shellenbaum &xzp->z_links, sizeof (xzp->z_links), tx); 18070a586ceaSMark Shellenbaum ASSERT3U(error, ==, 0); 1808fa9e4066Sahrens mutex_exit(&xzp->z_lock); 1809893a6d32Sahrens zfs_unlinked_add(xzp, tx); 18101412a1a2SMark Shellenbaum 18110a586ceaSMark Shellenbaum if (zp->z_is_sa) 18120a586ceaSMark Shellenbaum error = sa_remove(zp->z_sa_hdl, 18130a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), tx); 18140a586ceaSMark Shellenbaum else 18150a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, 18160a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), &null_xattr, 18170a586ceaSMark Shellenbaum sizeof (uint64_t), tx); 1818fb09f5aaSMadhav Suresh ASSERT0(error); 1819fa9e4066Sahrens } 1820fa9e4066Sahrens mutex_enter(&vp->v_lock); 1821ade42b55SSebastien Roy VN_RELE_LOCKED(vp); 1822fb09f5aaSMadhav Suresh ASSERT0(vp->v_count); 1823fa9e4066Sahrens mutex_exit(&vp->v_lock); 1824fa9e4066Sahrens mutex_exit(&zp->z_lock); 1825fa9e4066Sahrens zfs_znode_delete(zp, tx); 1826893a6d32Sahrens } else if (unlinked) { 18271412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 1828893a6d32Sahrens zfs_unlinked_add(zp, tx); 1829fa9e4066Sahrens } 1830fa9e4066Sahrens 1831da6c28aaSamw txtype = TX_REMOVE; 1832da6c28aaSamw if (flags & FIGNORECASE) 1833da6c28aaSamw txtype |= TX_CI; 183451bd2f97SNeil Perrin zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 1835fa9e4066Sahrens 1836fa9e4066Sahrens dmu_tx_commit(tx); 1837fa9e4066Sahrens out: 1838da6c28aaSamw if (realnmp) 1839da6c28aaSamw pn_free(realnmp); 1840da6c28aaSamw 1841fa9e4066Sahrens zfs_dirent_unlock(dl); 1842fa9e4066Sahrens 184306e0070dSMark Shellenbaum if (!delete_now) 1844fa9e4066Sahrens VN_RELE(vp); 184506e0070dSMark Shellenbaum if (xzp) 1846fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 1847fa9e4066Sahrens 184855da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 18495002558fSNeil Perrin zil_commit(zilog, 0); 185055da60b9SMark J Musante 1851fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1852fa9e4066Sahrens return (error); 1853fa9e4066Sahrens } 1854fa9e4066Sahrens 1855fa9e4066Sahrens /* 1856fa9e4066Sahrens * Create a new directory and insert it into dvp using the name 1857fa9e4066Sahrens * provided. Return a pointer to the inserted directory. 1858fa9e4066Sahrens * 1859fa9e4066Sahrens * IN: dvp - vnode of directory to add subdir to. 1860fa9e4066Sahrens * dirname - name of new directory. 1861fa9e4066Sahrens * vap - attributes of new directory. 1862fa9e4066Sahrens * cr - credentials of caller. 1863da6c28aaSamw * ct - caller context 1864f7170741SWill Andrews * flags - case flags 1865da6c28aaSamw * vsecp - ACL to be set 1866fa9e4066Sahrens * 1867fa9e4066Sahrens * OUT: vpp - vnode of created directory. 1868fa9e4066Sahrens * 1869f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1870fa9e4066Sahrens * 1871fa9e4066Sahrens * Timestamps: 1872fa9e4066Sahrens * dvp - ctime|mtime updated 1873fa9e4066Sahrens * vp - ctime|mtime|atime updated 1874fa9e4066Sahrens */ 1875da6c28aaSamw /*ARGSUSED*/ 1876fa9e4066Sahrens static int 1877da6c28aaSamw zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1878da6c28aaSamw caller_context_t *ct, int flags, vsecattr_t *vsecp) 1879fa9e4066Sahrens { 1880fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1881fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1882f18faf3fSek zilog_t *zilog; 1883fa9e4066Sahrens zfs_dirlock_t *dl; 1884da6c28aaSamw uint64_t txtype; 1885fa9e4066Sahrens dmu_tx_t *tx; 1886fa9e4066Sahrens int error; 1887da6c28aaSamw int zf = ZNEW; 1888c1ce5987SMark Shellenbaum ksid_t *ksid; 1889c1ce5987SMark Shellenbaum uid_t uid; 1890c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 18910a586ceaSMark Shellenbaum zfs_acl_ids_t acl_ids; 189289459e17SMark Shellenbaum boolean_t fuid_dirtied; 189369962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1894fa9e4066Sahrens 1895fa9e4066Sahrens ASSERT(vap->va_type == VDIR); 1896fa9e4066Sahrens 1897da6c28aaSamw /* 1898da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1899da6c28aaSamw * make sure file system is at proper version 1900da6c28aaSamw */ 1901da6c28aaSamw 1902c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1903c1ce5987SMark Shellenbaum if (ksid) 1904c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1905c1ce5987SMark Shellenbaum else 1906c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1907da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1908c1ce5987SMark Shellenbaum (vsecp || (vap->va_mask & AT_XVATTR) || 1909756962ecSMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1910be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1911da6c28aaSamw 19123cb34c60Sahrens ZFS_ENTER(zfsvfs); 19133cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1914f18faf3fSek zilog = zfsvfs->z_log; 1915fa9e4066Sahrens 19160a586ceaSMark Shellenbaum if (dzp->z_pflags & ZFS_XATTR) { 1917fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1918be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1919fa9e4066Sahrens } 1920da6c28aaSamw 1921de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(dirname, 1922da6c28aaSamw strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1923da6c28aaSamw ZFS_EXIT(zfsvfs); 1924be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1925da6c28aaSamw } 1926da6c28aaSamw if (flags & FIGNORECASE) 1927da6c28aaSamw zf |= ZCILOOK; 1928da6c28aaSamw 1929c8c24165SMark Shellenbaum if (vap->va_mask & AT_XVATTR) { 1930da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1931da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1932da6c28aaSamw ZFS_EXIT(zfsvfs); 1933da6c28aaSamw return (error); 1934da6c28aaSamw } 1935c8c24165SMark Shellenbaum } 1936fa9e4066Sahrens 1937c8c24165SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 1938c8c24165SMark Shellenbaum vsecp, &acl_ids)) != 0) { 1939c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 1940c8c24165SMark Shellenbaum return (error); 1941c8c24165SMark Shellenbaum } 1942fa9e4066Sahrens /* 1943fa9e4066Sahrens * First make sure the new directory doesn't exist. 1944c8c24165SMark Shellenbaum * 1945c8c24165SMark Shellenbaum * Existence is checked first to make sure we don't return 1946c8c24165SMark Shellenbaum * EACCES instead of EEXIST which can cause some applications 1947c8c24165SMark Shellenbaum * to fail. 1948fa9e4066Sahrens */ 1949da6c28aaSamw top: 1950da6c28aaSamw *vpp = NULL; 1951da6c28aaSamw 1952da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1953da6c28aaSamw NULL, NULL)) { 1954c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1955fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1956fa9e4066Sahrens return (error); 1957fa9e4066Sahrens } 1958fa9e4066Sahrens 1959da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1960c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1961d2443e76Smarks zfs_dirent_unlock(dl); 1962d2443e76Smarks ZFS_EXIT(zfsvfs); 1963d2443e76Smarks return (error); 1964d2443e76Smarks } 1965d2443e76Smarks 196614843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 19674929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 196814843421SMatthew Ahrens zfs_dirent_unlock(dl); 196914843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 1970be6fd75aSMatthew Ahrens return (SET_ERROR(EDQUOT)); 197114843421SMatthew Ahrens } 197289459e17SMark Shellenbaum 1973fa9e4066Sahrens /* 1974fa9e4066Sahrens * Add a new entry to the directory. 1975fa9e4066Sahrens */ 1976fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1977ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1978ea8dc4b6Seschrock dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 197989459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 198014843421SMatthew Ahrens if (fuid_dirtied) 198114843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 19820a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 19830a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 19840a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 19850a586ceaSMark Shellenbaum } 19860a586ceaSMark Shellenbaum 19870a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 19880a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 19890a586ceaSMark Shellenbaum 199069962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1991fa9e4066Sahrens if (error) { 1992fa9e4066Sahrens zfs_dirent_unlock(dl); 19931209a471SNeil Perrin if (error == ERESTART) { 199469962b56SMatthew Ahrens waited = B_TRUE; 19958a2f1b91Sahrens dmu_tx_wait(tx); 19968a2f1b91Sahrens dmu_tx_abort(tx); 1997fa9e4066Sahrens goto top; 1998fa9e4066Sahrens } 1999c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 20008a2f1b91Sahrens dmu_tx_abort(tx); 2001fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2002fa9e4066Sahrens return (error); 2003fa9e4066Sahrens } 2004fa9e4066Sahrens 2005fa9e4066Sahrens /* 2006fa9e4066Sahrens * Create new node. 2007fa9e4066Sahrens */ 20080a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2009fa9e4066Sahrens 201089459e17SMark Shellenbaum if (fuid_dirtied) 201189459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 20120a586ceaSMark Shellenbaum 2013fa9e4066Sahrens /* 2014fa9e4066Sahrens * Now put new name in parent dir. 2015fa9e4066Sahrens */ 2016fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 2017fa9e4066Sahrens 2018fa9e4066Sahrens *vpp = ZTOV(zp); 2019fa9e4066Sahrens 2020da6c28aaSamw txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 2021da6c28aaSamw if (flags & FIGNORECASE) 2022da6c28aaSamw txtype |= TX_CI; 202389459e17SMark Shellenbaum zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 202489459e17SMark Shellenbaum acl_ids.z_fuidp, vap); 2025da6c28aaSamw 202689459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 20270a586ceaSMark Shellenbaum 2028fa9e4066Sahrens dmu_tx_commit(tx); 2029fa9e4066Sahrens 2030fa9e4066Sahrens zfs_dirent_unlock(dl); 2031fa9e4066Sahrens 203255da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 20335002558fSNeil Perrin zil_commit(zilog, 0); 203455da60b9SMark J Musante 2035fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2036fa9e4066Sahrens return (0); 2037fa9e4066Sahrens } 2038fa9e4066Sahrens 2039fa9e4066Sahrens /* 2040fa9e4066Sahrens * Remove a directory subdir entry. If the current working 2041fa9e4066Sahrens * directory is the same as the subdir to be removed, the 2042fa9e4066Sahrens * remove will fail. 2043fa9e4066Sahrens * 2044fa9e4066Sahrens * IN: dvp - vnode of directory to remove from. 2045fa9e4066Sahrens * name - name of directory to be removed. 2046fa9e4066Sahrens * cwd - vnode of current working directory. 2047fa9e4066Sahrens * cr - credentials of caller. 2048da6c28aaSamw * ct - caller context 2049da6c28aaSamw * flags - case flags 2050fa9e4066Sahrens * 2051f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 2052fa9e4066Sahrens * 2053fa9e4066Sahrens * Timestamps: 2054fa9e4066Sahrens * dvp - ctime|mtime updated 2055fa9e4066Sahrens */ 2056da6c28aaSamw /*ARGSUSED*/ 2057fa9e4066Sahrens static int 2058da6c28aaSamw zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2059da6c28aaSamw caller_context_t *ct, int flags) 2060fa9e4066Sahrens { 2061fa9e4066Sahrens znode_t *dzp = VTOZ(dvp); 2062fa9e4066Sahrens znode_t *zp; 2063fa9e4066Sahrens vnode_t *vp; 2064fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2065f18faf3fSek zilog_t *zilog; 2066fa9e4066Sahrens zfs_dirlock_t *dl; 2067fa9e4066Sahrens dmu_tx_t *tx; 2068fa9e4066Sahrens int error; 2069da6c28aaSamw int zflg = ZEXISTS; 207069962b56SMatthew Ahrens boolean_t waited = B_FALSE; 2071fa9e4066Sahrens 20723cb34c60Sahrens ZFS_ENTER(zfsvfs); 20733cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 2074f18faf3fSek zilog = zfsvfs->z_log; 2075fa9e4066Sahrens 2076da6c28aaSamw if (flags & FIGNORECASE) 2077da6c28aaSamw zflg |= ZCILOOK; 2078fa9e4066Sahrens top: 2079fa9e4066Sahrens zp = NULL; 2080fa9e4066Sahrens 2081fa9e4066Sahrens /* 2082fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 2083fa9e4066Sahrens */ 2084da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2085da6c28aaSamw NULL, NULL)) { 2086fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2087fa9e4066Sahrens return (error); 2088fa9e4066Sahrens } 2089fa9e4066Sahrens 2090fa9e4066Sahrens vp = ZTOV(zp); 2091fa9e4066Sahrens 2092fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2093fa9e4066Sahrens goto out; 2094fa9e4066Sahrens } 2095fa9e4066Sahrens 2096fa9e4066Sahrens if (vp->v_type != VDIR) { 2097be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTDIR); 2098fa9e4066Sahrens goto out; 2099fa9e4066Sahrens } 2100fa9e4066Sahrens 2101fa9e4066Sahrens if (vp == cwd) { 2102be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 2103fa9e4066Sahrens goto out; 2104fa9e4066Sahrens } 2105fa9e4066Sahrens 2106da6c28aaSamw vnevent_rmdir(vp, dvp, name, ct); 2107fa9e4066Sahrens 2108fa9e4066Sahrens /* 2109af2c4821Smaybee * Grab a lock on the directory to make sure that noone is 2110af2c4821Smaybee * trying to add (or lookup) entries while we are removing it. 2111af2c4821Smaybee */ 2112af2c4821Smaybee rw_enter(&zp->z_name_lock, RW_WRITER); 2113af2c4821Smaybee 2114af2c4821Smaybee /* 2115af2c4821Smaybee * Grab a lock on the parent pointer to make sure we play well 2116fa9e4066Sahrens * with the treewalk and directory rename code. 2117fa9e4066Sahrens */ 2118fa9e4066Sahrens rw_enter(&zp->z_parent_lock, RW_WRITER); 2119fa9e4066Sahrens 2120fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 2121ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 21220a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2123893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 21240a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 21250a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 212699189164SSimon Klinkert dmu_tx_mark_netfree(tx); 212769962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 2128fa9e4066Sahrens if (error) { 2129fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 2130af2c4821Smaybee rw_exit(&zp->z_name_lock); 2131fa9e4066Sahrens zfs_dirent_unlock(dl); 2132fa9e4066Sahrens VN_RELE(vp); 21331209a471SNeil Perrin if (error == ERESTART) { 213469962b56SMatthew Ahrens waited = B_TRUE; 21358a2f1b91Sahrens dmu_tx_wait(tx); 21368a2f1b91Sahrens dmu_tx_abort(tx); 2137fa9e4066Sahrens goto top; 2138fa9e4066Sahrens } 21398a2f1b91Sahrens dmu_tx_abort(tx); 2140fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2141fa9e4066Sahrens return (error); 2142fa9e4066Sahrens } 2143fa9e4066Sahrens 2144da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2145fa9e4066Sahrens 2146da6c28aaSamw if (error == 0) { 2147da6c28aaSamw uint64_t txtype = TX_RMDIR; 2148da6c28aaSamw if (flags & FIGNORECASE) 2149da6c28aaSamw txtype |= TX_CI; 21505002558fSNeil Perrin zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2151da6c28aaSamw } 2152fa9e4066Sahrens 2153fa9e4066Sahrens dmu_tx_commit(tx); 2154fa9e4066Sahrens 2155fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 2156af2c4821Smaybee rw_exit(&zp->z_name_lock); 2157fa9e4066Sahrens out: 2158fa9e4066Sahrens zfs_dirent_unlock(dl); 2159fa9e4066Sahrens 2160fa9e4066Sahrens VN_RELE(vp); 2161fa9e4066Sahrens 216255da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 21635002558fSNeil Perrin zil_commit(zilog, 0); 216455da60b9SMark J Musante 2165fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2166fa9e4066Sahrens return (error); 2167fa9e4066Sahrens } 2168fa9e4066Sahrens 2169fa9e4066Sahrens /* 2170fa9e4066Sahrens * Read as many directory entries as will fit into the provided 2171fa9e4066Sahrens * buffer from the given directory cursor position (specified in 2172f7170741SWill Andrews * the uio structure). 2173fa9e4066Sahrens * 2174fa9e4066Sahrens * IN: vp - vnode of directory to read. 2175fa9e4066Sahrens * uio - structure supplying read location, range info, 2176fa9e4066Sahrens * and return buffer. 2177fa9e4066Sahrens * cr - credentials of caller. 2178da6c28aaSamw * ct - caller context 2179da6c28aaSamw * flags - case flags 2180fa9e4066Sahrens * 2181fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 2182fa9e4066Sahrens * eofp - set to true if end-of-file detected. 2183fa9e4066Sahrens * 2184f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 2185fa9e4066Sahrens * 2186fa9e4066Sahrens * Timestamps: 2187fa9e4066Sahrens * vp - atime updated 2188fa9e4066Sahrens * 2189fa9e4066Sahrens * Note that the low 4 bits of the cookie returned by zap is always zero. 2190fa9e4066Sahrens * This allows us to use the low range for "special" directory entries: 2191fa9e4066Sahrens * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2192fa9e4066Sahrens * we use the offset 2 for the '.zfs' directory. 2193fa9e4066Sahrens */ 2194fa9e4066Sahrens /* ARGSUSED */ 2195fa9e4066Sahrens static int 2196da6c28aaSamw zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, 2197da6c28aaSamw caller_context_t *ct, int flags) 2198fa9e4066Sahrens { 2199fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2200fa9e4066Sahrens iovec_t *iovp; 2201da6c28aaSamw edirent_t *eodp; 2202fa9e4066Sahrens dirent64_t *odp; 2203fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 22047f6e3e7dSperrin objset_t *os; 2205fa9e4066Sahrens caddr_t outbuf; 2206fa9e4066Sahrens size_t bufsize; 2207fa9e4066Sahrens zap_cursor_t zc; 2208fa9e4066Sahrens zap_attribute_t zap; 2209fa9e4066Sahrens uint_t bytes_wanted; 2210fa9e4066Sahrens uint64_t offset; /* must be unsigned; checks for < 1 */ 22110a586ceaSMark Shellenbaum uint64_t parent; 2212fa9e4066Sahrens int local_eof; 22137f6e3e7dSperrin int outcount; 22147f6e3e7dSperrin int error; 22157f6e3e7dSperrin uint8_t prefetch; 2216b38f0970Sck boolean_t check_sysattrs; 2217fa9e4066Sahrens 22183cb34c60Sahrens ZFS_ENTER(zfsvfs); 22193cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2220fa9e4066Sahrens 22210a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 22220a586ceaSMark Shellenbaum &parent, sizeof (parent))) != 0) { 22230a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 22240a586ceaSMark Shellenbaum return (error); 22250a586ceaSMark Shellenbaum } 22260a586ceaSMark Shellenbaum 2227fa9e4066Sahrens /* 2228fa9e4066Sahrens * If we are not given an eof variable, 2229fa9e4066Sahrens * use a local one. 2230fa9e4066Sahrens */ 2231fa9e4066Sahrens if (eofp == NULL) 2232fa9e4066Sahrens eofp = &local_eof; 2233fa9e4066Sahrens 2234fa9e4066Sahrens /* 2235fa9e4066Sahrens * Check for valid iov_len. 2236fa9e4066Sahrens */ 2237fa9e4066Sahrens if (uio->uio_iov->iov_len <= 0) { 2238fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2239be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 2240fa9e4066Sahrens } 2241fa9e4066Sahrens 2242fa9e4066Sahrens /* 2243fa9e4066Sahrens * Quit if directory has been removed (posix) 2244fa9e4066Sahrens */ 2245893a6d32Sahrens if ((*eofp = zp->z_unlinked) != 0) { 2246fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2247fa9e4066Sahrens return (0); 2248fa9e4066Sahrens } 2249fa9e4066Sahrens 22507f6e3e7dSperrin error = 0; 22517f6e3e7dSperrin os = zfsvfs->z_os; 22527f6e3e7dSperrin offset = uio->uio_loffset; 22537f6e3e7dSperrin prefetch = zp->z_zn_prefetch; 22547f6e3e7dSperrin 2255fa9e4066Sahrens /* 2256fa9e4066Sahrens * Initialize the iterator cursor. 2257fa9e4066Sahrens */ 2258fa9e4066Sahrens if (offset <= 3) { 2259fa9e4066Sahrens /* 2260fa9e4066Sahrens * Start iteration from the beginning of the directory. 2261fa9e4066Sahrens */ 22627f6e3e7dSperrin zap_cursor_init(&zc, os, zp->z_id); 2263fa9e4066Sahrens } else { 2264fa9e4066Sahrens /* 2265fa9e4066Sahrens * The offset is a serialized cursor. 2266fa9e4066Sahrens */ 22677f6e3e7dSperrin zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2268fa9e4066Sahrens } 2269fa9e4066Sahrens 2270fa9e4066Sahrens /* 2271fa9e4066Sahrens * Get space to change directory entries into fs independent format. 2272fa9e4066Sahrens */ 2273fa9e4066Sahrens iovp = uio->uio_iov; 2274fa9e4066Sahrens bytes_wanted = iovp->iov_len; 2275fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2276fa9e4066Sahrens bufsize = bytes_wanted; 2277fa9e4066Sahrens outbuf = kmem_alloc(bufsize, KM_SLEEP); 2278fa9e4066Sahrens odp = (struct dirent64 *)outbuf; 2279fa9e4066Sahrens } else { 2280fa9e4066Sahrens bufsize = bytes_wanted; 2281d5285caeSGeorge Wilson outbuf = NULL; 2282fa9e4066Sahrens odp = (struct dirent64 *)iovp->iov_base; 2283fa9e4066Sahrens } 2284da6c28aaSamw eodp = (struct edirent *)odp; 2285fa9e4066Sahrens 2286b38f0970Sck /* 22879660e5cbSJanice Chang * If this VFS supports the system attribute view interface; and 22889660e5cbSJanice Chang * we're looking at an extended attribute directory; and we care 22899660e5cbSJanice Chang * about normalization conflicts on this vfs; then we must check 22909660e5cbSJanice Chang * for normalization conflicts with the sysattr name space. 2291b38f0970Sck */ 22929660e5cbSJanice Chang check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2293b38f0970Sck (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2294b38f0970Sck (flags & V_RDDIR_ENTFLAGS); 2295b38f0970Sck 2296fa9e4066Sahrens /* 2297fa9e4066Sahrens * Transform to file-system independent format 2298fa9e4066Sahrens */ 2299fa9e4066Sahrens outcount = 0; 2300fa9e4066Sahrens while (outcount < bytes_wanted) { 2301b1b8ab34Slling ino64_t objnum; 2302b1b8ab34Slling ushort_t reclen; 230397f85387STim Haley off64_t *next = NULL; 2304b1b8ab34Slling 2305fa9e4066Sahrens /* 2306fa9e4066Sahrens * Special case `.', `..', and `.zfs'. 2307fa9e4066Sahrens */ 2308fa9e4066Sahrens if (offset == 0) { 2309fa9e4066Sahrens (void) strcpy(zap.za_name, "."); 2310da6c28aaSamw zap.za_normalization_conflict = 0; 2311b1b8ab34Slling objnum = zp->z_id; 2312fa9e4066Sahrens } else if (offset == 1) { 2313fa9e4066Sahrens (void) strcpy(zap.za_name, ".."); 2314da6c28aaSamw zap.za_normalization_conflict = 0; 23150a586ceaSMark Shellenbaum objnum = parent; 2316fa9e4066Sahrens } else if (offset == 2 && zfs_show_ctldir(zp)) { 2317fa9e4066Sahrens (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2318da6c28aaSamw zap.za_normalization_conflict = 0; 2319b1b8ab34Slling objnum = ZFSCTL_INO_ROOT; 2320fa9e4066Sahrens } else { 2321fa9e4066Sahrens /* 2322fa9e4066Sahrens * Grab next entry. 2323fa9e4066Sahrens */ 2324fa9e4066Sahrens if (error = zap_cursor_retrieve(&zc, &zap)) { 2325fa9e4066Sahrens if ((*eofp = (error == ENOENT)) != 0) 2326fa9e4066Sahrens break; 2327fa9e4066Sahrens else 2328fa9e4066Sahrens goto update; 2329fa9e4066Sahrens } 2330fa9e4066Sahrens 2331fa9e4066Sahrens if (zap.za_integer_length != 8 || 2332fa9e4066Sahrens zap.za_num_integers != 1) { 2333fa9e4066Sahrens cmn_err(CE_WARN, "zap_readdir: bad directory " 2334fa9e4066Sahrens "entry, obj = %lld, offset = %lld\n", 2335fa9e4066Sahrens (u_longlong_t)zp->z_id, 2336fa9e4066Sahrens (u_longlong_t)offset); 2337be6fd75aSMatthew Ahrens error = SET_ERROR(ENXIO); 2338fa9e4066Sahrens goto update; 2339fa9e4066Sahrens } 2340b1b8ab34Slling 2341b1b8ab34Slling objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2342b1b8ab34Slling /* 2343b1b8ab34Slling * MacOS X can extract the object type here such as: 2344b1b8ab34Slling * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2345b1b8ab34Slling */ 2346b38f0970Sck 2347b38f0970Sck if (check_sysattrs && !zap.za_normalization_conflict) { 2348b38f0970Sck zap.za_normalization_conflict = 2349b38f0970Sck xattr_sysattr_casechk(zap.za_name); 2350b38f0970Sck } 2351fa9e4066Sahrens } 2352da6c28aaSamw 2353e802abbdSTim Haley if (flags & V_RDDIR_ACCFILTER) { 2354e802abbdSTim Haley /* 2355e802abbdSTim Haley * If we have no access at all, don't include 2356e802abbdSTim Haley * this entry in the returned information 2357e802abbdSTim Haley */ 2358e802abbdSTim Haley znode_t *ezp; 2359e802abbdSTim Haley if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2360e802abbdSTim Haley goto skip_entry; 2361e802abbdSTim Haley if (!zfs_has_access(ezp, cr)) { 2362e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2363e802abbdSTim Haley goto skip_entry; 2364e802abbdSTim Haley } 2365e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2366e802abbdSTim Haley } 2367e802abbdSTim Haley 2368da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) 2369da6c28aaSamw reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2370da6c28aaSamw else 2371da6c28aaSamw reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2372fa9e4066Sahrens 2373fa9e4066Sahrens /* 2374fa9e4066Sahrens * Will this entry fit in the buffer? 2375fa9e4066Sahrens */ 2376b1b8ab34Slling if (outcount + reclen > bufsize) { 2377fa9e4066Sahrens /* 2378fa9e4066Sahrens * Did we manage to fit anything in the buffer? 2379fa9e4066Sahrens */ 2380fa9e4066Sahrens if (!outcount) { 2381be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 2382fa9e4066Sahrens goto update; 2383fa9e4066Sahrens } 2384fa9e4066Sahrens break; 2385fa9e4066Sahrens } 2386da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) { 2387da6c28aaSamw /* 2388da6c28aaSamw * Add extended flag entry: 2389da6c28aaSamw */ 2390da6c28aaSamw eodp->ed_ino = objnum; 2391da6c28aaSamw eodp->ed_reclen = reclen; 2392da6c28aaSamw /* NOTE: ed_off is the offset for the *next* entry */ 2393da6c28aaSamw next = &(eodp->ed_off); 2394da6c28aaSamw eodp->ed_eflags = zap.za_normalization_conflict ? 2395da6c28aaSamw ED_CASE_CONFLICT : 0; 2396da6c28aaSamw (void) strncpy(eodp->ed_name, zap.za_name, 2397da6c28aaSamw EDIRENT_NAMELEN(reclen)); 2398da6c28aaSamw eodp = (edirent_t *)((intptr_t)eodp + reclen); 2399da6c28aaSamw } else { 2400da6c28aaSamw /* 2401da6c28aaSamw * Add normal entry: 2402da6c28aaSamw */ 2403da6c28aaSamw odp->d_ino = objnum; 2404da6c28aaSamw odp->d_reclen = reclen; 2405da6c28aaSamw /* NOTE: d_off is the offset for the *next* entry */ 2406da6c28aaSamw next = &(odp->d_off); 2407da6c28aaSamw (void) strncpy(odp->d_name, zap.za_name, 2408da6c28aaSamw DIRENT64_NAMELEN(reclen)); 2409da6c28aaSamw odp = (dirent64_t *)((intptr_t)odp + reclen); 2410da6c28aaSamw } 2411b1b8ab34Slling outcount += reclen; 2412fa9e4066Sahrens 2413fa9e4066Sahrens ASSERT(outcount <= bufsize); 2414fa9e4066Sahrens 2415fa9e4066Sahrens /* Prefetch znode */ 24167f6e3e7dSperrin if (prefetch) 2417a2cdcdd2SPaul Dagnelie dmu_prefetch(os, objnum, 0, 0, 0, 2418a2cdcdd2SPaul Dagnelie ZIO_PRIORITY_SYNC_READ); 2419fa9e4066Sahrens 2420e802abbdSTim Haley skip_entry: 2421fa9e4066Sahrens /* 2422fa9e4066Sahrens * Move to the next entry, fill in the previous offset. 2423fa9e4066Sahrens */ 2424fa9e4066Sahrens if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2425fa9e4066Sahrens zap_cursor_advance(&zc); 2426fa9e4066Sahrens offset = zap_cursor_serialize(&zc); 2427fa9e4066Sahrens } else { 2428fa9e4066Sahrens offset += 1; 2429fa9e4066Sahrens } 243097f85387STim Haley if (next) 243197f85387STim Haley *next = offset; 2432fa9e4066Sahrens } 24337f6e3e7dSperrin zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2434fa9e4066Sahrens 2435fa9e4066Sahrens if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2436fa9e4066Sahrens iovp->iov_base += outcount; 2437fa9e4066Sahrens iovp->iov_len -= outcount; 2438fa9e4066Sahrens uio->uio_resid -= outcount; 2439fa9e4066Sahrens } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2440fa9e4066Sahrens /* 2441fa9e4066Sahrens * Reset the pointer. 2442fa9e4066Sahrens */ 2443fa9e4066Sahrens offset = uio->uio_loffset; 2444fa9e4066Sahrens } 2445fa9e4066Sahrens 2446fa9e4066Sahrens update: 244787e5029aSahrens zap_cursor_fini(&zc); 2448fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2449fa9e4066Sahrens kmem_free(outbuf, bufsize); 2450fa9e4066Sahrens 2451fa9e4066Sahrens if (error == ENOENT) 2452fa9e4066Sahrens error = 0; 2453fa9e4066Sahrens 2454fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2455fa9e4066Sahrens 2456fa9e4066Sahrens uio->uio_loffset = offset; 2457fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2458fa9e4066Sahrens return (error); 2459fa9e4066Sahrens } 2460fa9e4066Sahrens 2461ec533521Sfr ulong_t zfs_fsync_sync_cnt = 4; 2462ec533521Sfr 2463fa9e4066Sahrens static int 2464da6c28aaSamw zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2465fa9e4066Sahrens { 2466fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2467fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2468fa9e4066Sahrens 2469b468a217Seschrock /* 2470b468a217Seschrock * Regardless of whether this is required for standards conformance, 2471b468a217Seschrock * this is the logical behavior when fsync() is called on a file with 2472b468a217Seschrock * dirty pages. We use B_ASYNC since the ZIL transactions are already 2473b468a217Seschrock * going to be pushed out as part of the zil_commit(). 2474b468a217Seschrock */ 2475b468a217Seschrock if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && 2476b468a217Seschrock (vp->v_type == VREG) && !(IS_SWAPVP(vp))) 2477da6c28aaSamw (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); 2478b468a217Seschrock 2479ec533521Sfr (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2480ec533521Sfr 248155da60b9SMark J Musante if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 248255da60b9SMark J Musante ZFS_ENTER(zfsvfs); 248355da60b9SMark J Musante ZFS_VERIFY_ZP(zp); 24845002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 248555da60b9SMark J Musante ZFS_EXIT(zfsvfs); 248655da60b9SMark J Musante } 2487fa9e4066Sahrens return (0); 2488fa9e4066Sahrens } 2489fa9e4066Sahrens 2490da6c28aaSamw 2491fa9e4066Sahrens /* 2492fa9e4066Sahrens * Get the requested file attributes and place them in the provided 2493fa9e4066Sahrens * vattr structure. 2494fa9e4066Sahrens * 2495fa9e4066Sahrens * IN: vp - vnode of file. 2496fa9e4066Sahrens * vap - va_mask identifies requested attributes. 2497da6c28aaSamw * If AT_XVATTR set, then optional attrs are requested 2498da6c28aaSamw * flags - ATTR_NOACLCHECK (CIFS server context) 2499fa9e4066Sahrens * cr - credentials of caller. 2500da6c28aaSamw * ct - caller context 2501fa9e4066Sahrens * 2502fa9e4066Sahrens * OUT: vap - attribute values. 2503fa9e4066Sahrens * 2504f7170741SWill Andrews * RETURN: 0 (always succeeds). 2505fa9e4066Sahrens */ 2506fa9e4066Sahrens /* ARGSUSED */ 2507fa9e4066Sahrens static int 2508da6c28aaSamw zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2509da6c28aaSamw caller_context_t *ct) 2510fa9e4066Sahrens { 2511fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2512fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2513da6c28aaSamw int error = 0; 2514ecd6cf80Smarks uint64_t links; 25150a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2516da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2517da6c28aaSamw xoptattr_t *xoap = NULL; 2518da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 25190a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[2]; 25200a586ceaSMark Shellenbaum int count = 0; 2521fa9e4066Sahrens 25223cb34c60Sahrens ZFS_ENTER(zfsvfs); 25233cb34c60Sahrens ZFS_VERIFY_ZP(zp); 25240a586ceaSMark Shellenbaum 2525f1696b23SMark Shellenbaum zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2526f1696b23SMark Shellenbaum 25270a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 25280a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 25290a586ceaSMark Shellenbaum 25300a586ceaSMark Shellenbaum if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 25310a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 25320a586ceaSMark Shellenbaum return (error); 25330a586ceaSMark Shellenbaum } 2534fa9e4066Sahrens 2535da6c28aaSamw /* 2536da6c28aaSamw * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2537da6c28aaSamw * Also, if we are the owner don't bother, since owner should 2538da6c28aaSamw * always be allowed to read basic attributes of file. 2539da6c28aaSamw */ 2540f1696b23SMark Shellenbaum if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2541f1696b23SMark Shellenbaum (vap->va_uid != crgetuid(cr))) { 2542da6c28aaSamw if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2543da6c28aaSamw skipaclchk, cr)) { 2544da6c28aaSamw ZFS_EXIT(zfsvfs); 2545da6c28aaSamw return (error); 2546da6c28aaSamw } 2547da6c28aaSamw } 2548da6c28aaSamw 2549fa9e4066Sahrens /* 2550fa9e4066Sahrens * Return all attributes. It's cheaper to provide the answer 2551fa9e4066Sahrens * than to determine whether we were asked the question. 2552fa9e4066Sahrens */ 2553fa9e4066Sahrens 255434f345efSRay Hassan mutex_enter(&zp->z_lock); 2555fa9e4066Sahrens vap->va_type = vp->v_type; 25560a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode & MODEMASK; 2557fa9e4066Sahrens vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2558fa9e4066Sahrens vap->va_nodeid = zp->z_id; 2559ecd6cf80Smarks if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 25600a586ceaSMark Shellenbaum links = zp->z_links + 1; 2561ecd6cf80Smarks else 25620a586ceaSMark Shellenbaum links = zp->z_links; 2563ecd6cf80Smarks vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 25640a586ceaSMark Shellenbaum vap->va_size = zp->z_size; 256572fc53bcSmarks vap->va_rdev = vp->v_rdev; 2566fa9e4066Sahrens vap->va_seq = zp->z_seq; 2567fa9e4066Sahrens 2568fa9e4066Sahrens /* 2569da6c28aaSamw * Add in any requested optional attributes and the create time. 2570da6c28aaSamw * Also set the corresponding bits in the returned attribute bitmap. 2571fa9e4066Sahrens */ 2572da6c28aaSamw if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2573da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2574da6c28aaSamw xoap->xoa_archive = 25750a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2576da6c28aaSamw XVA_SET_RTN(xvap, XAT_ARCHIVE); 2577da6c28aaSamw } 2578da6c28aaSamw 2579da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2580da6c28aaSamw xoap->xoa_readonly = 25810a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_READONLY) != 0); 2582da6c28aaSamw XVA_SET_RTN(xvap, XAT_READONLY); 2583da6c28aaSamw } 2584da6c28aaSamw 2585da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2586da6c28aaSamw xoap->xoa_system = 25870a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_SYSTEM) != 0); 2588da6c28aaSamw XVA_SET_RTN(xvap, XAT_SYSTEM); 2589da6c28aaSamw } 2590da6c28aaSamw 2591da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2592da6c28aaSamw xoap->xoa_hidden = 25930a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_HIDDEN) != 0); 2594da6c28aaSamw XVA_SET_RTN(xvap, XAT_HIDDEN); 2595da6c28aaSamw } 2596da6c28aaSamw 2597da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2598da6c28aaSamw xoap->xoa_nounlink = 25990a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2600da6c28aaSamw XVA_SET_RTN(xvap, XAT_NOUNLINK); 2601da6c28aaSamw } 2602da6c28aaSamw 2603da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2604da6c28aaSamw xoap->xoa_immutable = 26050a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2606da6c28aaSamw XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2607da6c28aaSamw } 2608da6c28aaSamw 2609da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2610da6c28aaSamw xoap->xoa_appendonly = 26110a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2612da6c28aaSamw XVA_SET_RTN(xvap, XAT_APPENDONLY); 2613da6c28aaSamw } 2614da6c28aaSamw 2615da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2616da6c28aaSamw xoap->xoa_nodump = 26170a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0); 2618da6c28aaSamw XVA_SET_RTN(xvap, XAT_NODUMP); 2619da6c28aaSamw } 2620da6c28aaSamw 2621da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2622da6c28aaSamw xoap->xoa_opaque = 26230a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_OPAQUE) != 0); 2624da6c28aaSamw XVA_SET_RTN(xvap, XAT_OPAQUE); 2625da6c28aaSamw } 2626da6c28aaSamw 2627da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2628da6c28aaSamw xoap->xoa_av_quarantined = 26290a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2630da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2631da6c28aaSamw } 2632da6c28aaSamw 2633da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2634da6c28aaSamw xoap->xoa_av_modified = 26350a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2636da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2637da6c28aaSamw } 2638da6c28aaSamw 2639da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 26400a586ceaSMark Shellenbaum vp->v_type == VREG) { 26410a586ceaSMark Shellenbaum zfs_sa_get_scanstamp(zp, xvap); 2642da6c28aaSamw } 2643da6c28aaSamw 2644da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 26450a586ceaSMark Shellenbaum uint64_t times[2]; 26460a586ceaSMark Shellenbaum 26470a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 26480a586ceaSMark Shellenbaum times, sizeof (times)); 26490a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2650da6c28aaSamw XVA_SET_RTN(xvap, XAT_CREATETIME); 2651fa9e4066Sahrens } 26527a286c47SDai Ngo 26537a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 26540a586ceaSMark Shellenbaum xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 26557a286c47SDai Ngo XVA_SET_RTN(xvap, XAT_REPARSE); 26567a286c47SDai Ngo } 265799d5e173STim Haley if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 265899d5e173STim Haley xoap->xoa_generation = zp->z_gen; 265999d5e173STim Haley XVA_SET_RTN(xvap, XAT_GEN); 266099d5e173STim Haley } 2661fd9ee8b5Sjoyce mcintosh 2662fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2663fd9ee8b5Sjoyce mcintosh xoap->xoa_offline = 2664fd9ee8b5Sjoyce mcintosh ((zp->z_pflags & ZFS_OFFLINE) != 0); 2665fd9ee8b5Sjoyce mcintosh XVA_SET_RTN(xvap, XAT_OFFLINE); 2666fd9ee8b5Sjoyce mcintosh } 2667fd9ee8b5Sjoyce mcintosh 2668fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2669fd9ee8b5Sjoyce mcintosh xoap->xoa_sparse = 2670fd9ee8b5Sjoyce mcintosh ((zp->z_pflags & ZFS_SPARSE) != 0); 2671fd9ee8b5Sjoyce mcintosh XVA_SET_RTN(xvap, XAT_SPARSE); 2672fd9ee8b5Sjoyce mcintosh } 2673fa9e4066Sahrens } 2674fa9e4066Sahrens 26750a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 26760a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_mtime, mtime); 26770a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2678da6c28aaSamw 2679fa9e4066Sahrens mutex_exit(&zp->z_lock); 2680fa9e4066Sahrens 26810a586ceaSMark Shellenbaum sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks); 2682fa9e4066Sahrens 2683fa9e4066Sahrens if (zp->z_blksz == 0) { 2684fa9e4066Sahrens /* 2685fa9e4066Sahrens * Block size hasn't been set; suggest maximal I/O transfers. 2686fa9e4066Sahrens */ 2687fa9e4066Sahrens vap->va_blksize = zfsvfs->z_max_blksz; 2688fa9e4066Sahrens } 2689fa9e4066Sahrens 2690fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2691fa9e4066Sahrens return (0); 2692fa9e4066Sahrens } 2693fa9e4066Sahrens 2694fa9e4066Sahrens /* 2695fa9e4066Sahrens * Set the file attributes to the values contained in the 2696fa9e4066Sahrens * vattr structure. 2697fa9e4066Sahrens * 2698fa9e4066Sahrens * IN: vp - vnode of file to be modified. 2699fa9e4066Sahrens * vap - new attribute values. 2700da6c28aaSamw * If AT_XVATTR set, then optional attrs are being set 2701fa9e4066Sahrens * flags - ATTR_UTIME set if non-default time values provided. 2702da6c28aaSamw * - ATTR_NOACLCHECK (CIFS context only). 2703fa9e4066Sahrens * cr - credentials of caller. 2704da6c28aaSamw * ct - caller context 2705fa9e4066Sahrens * 2706f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 2707fa9e4066Sahrens * 2708fa9e4066Sahrens * Timestamps: 2709fa9e4066Sahrens * vp - ctime updated, mtime updated if size changed. 2710fa9e4066Sahrens */ 2711fa9e4066Sahrens /* ARGSUSED */ 2712fa9e4066Sahrens static int 2713fa9e4066Sahrens zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2714f7170741SWill Andrews caller_context_t *ct) 2715fa9e4066Sahrens { 2716f18faf3fSek znode_t *zp = VTOZ(vp); 2717fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2718f18faf3fSek zilog_t *zilog; 2719fa9e4066Sahrens dmu_tx_t *tx; 2720fa9e4066Sahrens vattr_t oldva; 2721ae4caef8SMark Shellenbaum xvattr_t tmpxvattr; 27225730cc9aSmaybee uint_t mask = vap->va_mask; 2723d5285caeSGeorge Wilson uint_t saved_mask = 0; 2724f92daba9Smarks int trim_mask = 0; 2725fa9e4066Sahrens uint64_t new_mode; 272689459e17SMark Shellenbaum uint64_t new_uid, new_gid; 27270b2a8171SMark Shellenbaum uint64_t xattr_obj; 27280a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2729d2443e76Smarks znode_t *attrzp; 2730fa9e4066Sahrens int need_policy = FALSE; 27310a586ceaSMark Shellenbaum int err, err2; 2732da6c28aaSamw zfs_fuid_info_t *fuidp = NULL; 2733da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2734da6c28aaSamw xoptattr_t *xoap; 27350b2a8171SMark Shellenbaum zfs_acl_t *aclp; 2736da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 27370a586ceaSMark Shellenbaum boolean_t fuid_dirtied = B_FALSE; 27380a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[7], xattr_bulk[7]; 27390a586ceaSMark Shellenbaum int count = 0, xattr_count = 0; 2740fa9e4066Sahrens 2741fa9e4066Sahrens if (mask == 0) 2742fa9e4066Sahrens return (0); 2743fa9e4066Sahrens 2744fa9e4066Sahrens if (mask & AT_NOSET) 2745be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 2746fa9e4066Sahrens 27473cb34c60Sahrens ZFS_ENTER(zfsvfs); 27483cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2749da6c28aaSamw 2750da6c28aaSamw zilog = zfsvfs->z_log; 2751da6c28aaSamw 2752da6c28aaSamw /* 2753da6c28aaSamw * Make sure that if we have ephemeral uid/gid or xvattr specified 2754da6c28aaSamw * that file system is at proper version level 2755da6c28aaSamw */ 2756da6c28aaSamw 2757da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 2758da6c28aaSamw (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2759da6c28aaSamw ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 276002dcba3bStimh (mask & AT_XVATTR))) { 276102dcba3bStimh ZFS_EXIT(zfsvfs); 2762be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 276302dcba3bStimh } 2764da6c28aaSamw 276502dcba3bStimh if (mask & AT_SIZE && vp->v_type == VDIR) { 276602dcba3bStimh ZFS_EXIT(zfsvfs); 2767be6fd75aSMatthew Ahrens return (SET_ERROR(EISDIR)); 276802dcba3bStimh } 2769fa9e4066Sahrens 277002dcba3bStimh if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 277102dcba3bStimh ZFS_EXIT(zfsvfs); 2772be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 277302dcba3bStimh } 277484c5a155Smarks 2775da6c28aaSamw /* 2776da6c28aaSamw * If this is an xvattr_t, then get a pointer to the structure of 2777da6c28aaSamw * optional attributes. If this is NULL, then we have a vattr_t. 2778da6c28aaSamw */ 2779da6c28aaSamw xoap = xva_getxoptattr(xvap); 2780da6c28aaSamw 2781ae4caef8SMark Shellenbaum xva_init(&tmpxvattr); 2782ae4caef8SMark Shellenbaum 2783da6c28aaSamw /* 2784da6c28aaSamw * Immutable files can only alter immutable bit and atime 2785da6c28aaSamw */ 27860a586ceaSMark Shellenbaum if ((zp->z_pflags & ZFS_IMMUTABLE) && 2787da6c28aaSamw ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 278802dcba3bStimh ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 278902dcba3bStimh ZFS_EXIT(zfsvfs); 2790be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 279102dcba3bStimh } 2792da6c28aaSamw 27932889ec41SGordon Ross /* 27942889ec41SGordon Ross * Note: ZFS_READONLY is handled in zfs_zaccess_common. 27952889ec41SGordon Ross */ 2796fa9e4066Sahrens 279793129341Smarks /* 279893129341Smarks * Verify timestamps doesn't overflow 32 bits. 279993129341Smarks * ZFS can handle large timestamps, but 32bit syscalls can't 280093129341Smarks * handle times greater than 2039. This check should be removed 280193129341Smarks * once large timestamps are fully supported. 280293129341Smarks */ 280393129341Smarks if (mask & (AT_ATIME | AT_MTIME)) { 280493129341Smarks if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 280593129341Smarks ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 280693129341Smarks ZFS_EXIT(zfsvfs); 2807be6fd75aSMatthew Ahrens return (SET_ERROR(EOVERFLOW)); 280893129341Smarks } 280993129341Smarks } 281093129341Smarks 2811fa9e4066Sahrens top: 2812d2443e76Smarks attrzp = NULL; 28130b2a8171SMark Shellenbaum aclp = NULL; 2814fa9e4066Sahrens 2815d47621a4STim Haley /* Can this be moved to before the top label? */ 2816fa9e4066Sahrens if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2817fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2818be6fd75aSMatthew Ahrens return (SET_ERROR(EROFS)); 2819fa9e4066Sahrens } 2820fa9e4066Sahrens 2821fa9e4066Sahrens /* 2822fa9e4066Sahrens * First validate permissions 2823fa9e4066Sahrens */ 2824fa9e4066Sahrens 2825fa9e4066Sahrens if (mask & AT_SIZE) { 2826da6c28aaSamw err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2827fa9e4066Sahrens if (err) { 2828fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2829fa9e4066Sahrens return (err); 2830fa9e4066Sahrens } 28315730cc9aSmaybee /* 28325730cc9aSmaybee * XXX - Note, we are not providing any open 28335730cc9aSmaybee * mode flags here (like FNDELAY), so we may 28345730cc9aSmaybee * block if there are locks present... this 28355730cc9aSmaybee * should be addressed in openat(). 28365730cc9aSmaybee */ 2837cdb0ab79Smaybee /* XXX - would it be OK to generate a log record here? */ 2838cdb0ab79Smaybee err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 28395730cc9aSmaybee if (err) { 28405730cc9aSmaybee ZFS_EXIT(zfsvfs); 28415730cc9aSmaybee return (err); 28425730cc9aSmaybee } 284372102e74SBryan Cantrill 284472102e74SBryan Cantrill if (vap->va_size == 0) 284572102e74SBryan Cantrill vnevent_truncate(ZTOV(zp), ct); 2846fa9e4066Sahrens } 2847fa9e4066Sahrens 2848da6c28aaSamw if (mask & (AT_ATIME|AT_MTIME) || 2849da6c28aaSamw ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2850da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_READONLY) || 2851da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2852fd9ee8b5Sjoyce mcintosh XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2853fd9ee8b5Sjoyce mcintosh XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2854da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 28550a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2856da6c28aaSamw need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2857da6c28aaSamw skipaclchk, cr); 28580a586ceaSMark Shellenbaum } 2859fa9e4066Sahrens 2860fa9e4066Sahrens if (mask & (AT_UID|AT_GID)) { 2861fa9e4066Sahrens int idmask = (mask & (AT_UID|AT_GID)); 2862fa9e4066Sahrens int take_owner; 2863fa9e4066Sahrens int take_group; 2864fa9e4066Sahrens 2865a933bc41Smarks /* 2866a933bc41Smarks * NOTE: even if a new mode is being set, 2867a933bc41Smarks * we may clear S_ISUID/S_ISGID bits. 2868a933bc41Smarks */ 2869a933bc41Smarks 2870a933bc41Smarks if (!(mask & AT_MODE)) 28710a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode; 2872a933bc41Smarks 2873fa9e4066Sahrens /* 2874fa9e4066Sahrens * Take ownership or chgrp to group we are a member of 2875fa9e4066Sahrens */ 2876fa9e4066Sahrens 2877fa9e4066Sahrens take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2878da6c28aaSamw take_group = (mask & AT_GID) && 2879da6c28aaSamw zfs_groupmember(zfsvfs, vap->va_gid, cr); 2880fa9e4066Sahrens 2881fa9e4066Sahrens /* 2882fa9e4066Sahrens * If both AT_UID and AT_GID are set then take_owner and 2883fa9e4066Sahrens * take_group must both be set in order to allow taking 2884fa9e4066Sahrens * ownership. 2885fa9e4066Sahrens * 2886fa9e4066Sahrens * Otherwise, send the check through secpolicy_vnode_setattr() 2887fa9e4066Sahrens * 2888fa9e4066Sahrens */ 2889fa9e4066Sahrens 2890fa9e4066Sahrens if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2891fa9e4066Sahrens ((idmask == AT_UID) && take_owner) || 2892fa9e4066Sahrens ((idmask == AT_GID) && take_group)) { 2893da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2894da6c28aaSamw skipaclchk, cr) == 0) { 2895fa9e4066Sahrens /* 2896fa9e4066Sahrens * Remove setuid/setgid for non-privileged users 2897fa9e4066Sahrens */ 289813f9f30eSmarks secpolicy_setid_clear(vap, cr); 2899f92daba9Smarks trim_mask = (mask & (AT_UID|AT_GID)); 2900fa9e4066Sahrens } else { 2901fa9e4066Sahrens need_policy = TRUE; 2902fa9e4066Sahrens } 2903fa9e4066Sahrens } else { 2904fa9e4066Sahrens need_policy = TRUE; 2905fa9e4066Sahrens } 2906fa9e4066Sahrens } 2907fa9e4066Sahrens 2908f92daba9Smarks mutex_enter(&zp->z_lock); 29090a586ceaSMark Shellenbaum oldva.va_mode = zp->z_mode; 2910f1696b23SMark Shellenbaum zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2911da6c28aaSamw if (mask & AT_XVATTR) { 2912ae4caef8SMark Shellenbaum /* 2913ae4caef8SMark Shellenbaum * Update xvattr mask to include only those attributes 2914ae4caef8SMark Shellenbaum * that are actually changing. 2915ae4caef8SMark Shellenbaum * 2916ae4caef8SMark Shellenbaum * the bits will be restored prior to actually setting 2917ae4caef8SMark Shellenbaum * the attributes so the caller thinks they were set. 2918ae4caef8SMark Shellenbaum */ 2919ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2920ae4caef8SMark Shellenbaum if (xoap->xoa_appendonly != 29210a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 2922ae4caef8SMark Shellenbaum need_policy = TRUE; 2923ae4caef8SMark Shellenbaum } else { 2924ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2925ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2926ae4caef8SMark Shellenbaum } 2927ae4caef8SMark Shellenbaum } 2928ae4caef8SMark Shellenbaum 2929ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2930ae4caef8SMark Shellenbaum if (xoap->xoa_nounlink != 29310a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 2932ae4caef8SMark Shellenbaum need_policy = TRUE; 2933ae4caef8SMark Shellenbaum } else { 2934ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2935ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2936ae4caef8SMark Shellenbaum } 2937ae4caef8SMark Shellenbaum } 2938ae4caef8SMark Shellenbaum 2939ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2940ae4caef8SMark Shellenbaum if (xoap->xoa_immutable != 29410a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 2942ae4caef8SMark Shellenbaum need_policy = TRUE; 2943ae4caef8SMark Shellenbaum } else { 2944ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2945ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2946ae4caef8SMark Shellenbaum } 2947ae4caef8SMark Shellenbaum } 2948ae4caef8SMark Shellenbaum 2949ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2950ae4caef8SMark Shellenbaum if (xoap->xoa_nodump != 29510a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0)) { 2952ae4caef8SMark Shellenbaum need_policy = TRUE; 2953ae4caef8SMark Shellenbaum } else { 2954ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NODUMP); 2955ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2956ae4caef8SMark Shellenbaum } 2957ae4caef8SMark Shellenbaum } 2958ae4caef8SMark Shellenbaum 2959ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2960ae4caef8SMark Shellenbaum if (xoap->xoa_av_modified != 29610a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 2962ae4caef8SMark Shellenbaum need_policy = TRUE; 2963ae4caef8SMark Shellenbaum } else { 2964ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2965ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2966ae4caef8SMark Shellenbaum } 2967ae4caef8SMark Shellenbaum } 2968ae4caef8SMark Shellenbaum 2969ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2970ae4caef8SMark Shellenbaum if ((vp->v_type != VREG && 2971ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined) || 2972ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined != 29730a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 2974ae4caef8SMark Shellenbaum need_policy = TRUE; 2975ae4caef8SMark Shellenbaum } else { 2976ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2977ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2978ae4caef8SMark Shellenbaum } 2979ae4caef8SMark Shellenbaum } 2980ae4caef8SMark Shellenbaum 29817a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 29827a286c47SDai Ngo mutex_exit(&zp->z_lock); 29837a286c47SDai Ngo ZFS_EXIT(zfsvfs); 2984be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 29857a286c47SDai Ngo } 29867a286c47SDai Ngo 2987ae4caef8SMark Shellenbaum if (need_policy == FALSE && 2988ae4caef8SMark Shellenbaum (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2989ae4caef8SMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2990da6c28aaSamw need_policy = TRUE; 2991da6c28aaSamw } 2992da6c28aaSamw } 2993da6c28aaSamw 2994f92daba9Smarks mutex_exit(&zp->z_lock); 2995fa9e4066Sahrens 2996f92daba9Smarks if (mask & AT_MODE) { 2997da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2998f92daba9Smarks err = secpolicy_setid_setsticky_clear(vp, vap, 2999f92daba9Smarks &oldva, cr); 3000f92daba9Smarks if (err) { 3001f92daba9Smarks ZFS_EXIT(zfsvfs); 3002f92daba9Smarks return (err); 3003f92daba9Smarks } 3004f92daba9Smarks trim_mask |= AT_MODE; 3005f92daba9Smarks } else { 3006f92daba9Smarks need_policy = TRUE; 3007f92daba9Smarks } 3008f92daba9Smarks } 300913f9f30eSmarks 3010f92daba9Smarks if (need_policy) { 301113f9f30eSmarks /* 301213f9f30eSmarks * If trim_mask is set then take ownership 3013f92daba9Smarks * has been granted or write_acl is present and user 3014f92daba9Smarks * has the ability to modify mode. In that case remove 3015f92daba9Smarks * UID|GID and or MODE from mask so that 301613f9f30eSmarks * secpolicy_vnode_setattr() doesn't revoke it. 301713f9f30eSmarks */ 301813f9f30eSmarks 3019f92daba9Smarks if (trim_mask) { 3020f92daba9Smarks saved_mask = vap->va_mask; 3021f92daba9Smarks vap->va_mask &= ~trim_mask; 3022f92daba9Smarks } 3023fa9e4066Sahrens err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3024da6c28aaSamw (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3025fa9e4066Sahrens if (err) { 3026fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3027fa9e4066Sahrens return (err); 3028fa9e4066Sahrens } 302913f9f30eSmarks 303013f9f30eSmarks if (trim_mask) 3031f92daba9Smarks vap->va_mask |= saved_mask; 3032fa9e4066Sahrens } 3033fa9e4066Sahrens 3034fa9e4066Sahrens /* 3035fa9e4066Sahrens * secpolicy_vnode_setattr, or take ownership may have 3036fa9e4066Sahrens * changed va_mask 3037fa9e4066Sahrens */ 3038fa9e4066Sahrens mask = vap->va_mask; 3039fa9e4066Sahrens 30400a586ceaSMark Shellenbaum if ((mask & (AT_UID | AT_GID))) { 30410b2a8171SMark Shellenbaum err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 30420b2a8171SMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 30430a586ceaSMark Shellenbaum 30440b2a8171SMark Shellenbaum if (err == 0 && xattr_obj) { 30450a586ceaSMark Shellenbaum err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 30460a586ceaSMark Shellenbaum if (err) 30470a586ceaSMark Shellenbaum goto out2; 30480a586ceaSMark Shellenbaum } 30490a586ceaSMark Shellenbaum if (mask & AT_UID) { 30500a586ceaSMark Shellenbaum new_uid = zfs_fuid_create(zfsvfs, 30510a586ceaSMark Shellenbaum (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3052f1696b23SMark Shellenbaum if (new_uid != zp->z_uid && 30530a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 30540b2a8171SMark Shellenbaum if (attrzp) 30550b2a8171SMark Shellenbaum VN_RELE(ZTOV(attrzp)); 3056be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 30570a586ceaSMark Shellenbaum goto out2; 30580a586ceaSMark Shellenbaum } 30590a586ceaSMark Shellenbaum } 30600a586ceaSMark Shellenbaum 30610a586ceaSMark Shellenbaum if (mask & AT_GID) { 30620a586ceaSMark Shellenbaum new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 30630a586ceaSMark Shellenbaum cr, ZFS_GROUP, &fuidp); 30640a586ceaSMark Shellenbaum if (new_gid != zp->z_gid && 30650a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 30660b2a8171SMark Shellenbaum if (attrzp) 30670b2a8171SMark Shellenbaum VN_RELE(ZTOV(attrzp)); 3068be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 30690a586ceaSMark Shellenbaum goto out2; 30700a586ceaSMark Shellenbaum } 30710a586ceaSMark Shellenbaum } 30720a586ceaSMark Shellenbaum } 3073fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 3074fa9e4066Sahrens 3075fa9e4066Sahrens if (mask & AT_MODE) { 30760a586ceaSMark Shellenbaum uint64_t pmode = zp->z_mode; 30771412a1a2SMark Shellenbaum uint64_t acl_obj; 3078169cdae2Smarks new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 3079fa9e4066Sahrens 308071dbfc28SPaul B. Henson if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 308171dbfc28SPaul B. Henson !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3082be6fd75aSMatthew Ahrens err = SET_ERROR(EPERM); 308371dbfc28SPaul B. Henson goto out; 308471dbfc28SPaul B. Henson } 308571dbfc28SPaul B. Henson 3086a3c49ce1SAlbert Lee if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3087a3c49ce1SAlbert Lee goto out; 30880a586ceaSMark Shellenbaum 30891412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 30901412a1a2SMark Shellenbaum if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 30910a586ceaSMark Shellenbaum /* 30920a586ceaSMark Shellenbaum * Are we upgrading ACL from old V0 format 30930a586ceaSMark Shellenbaum * to V1 format? 30940a586ceaSMark Shellenbaum */ 30952bd6c4deSMark Shellenbaum if (zfsvfs->z_version >= ZPL_VERSION_FUID && 30961412a1a2SMark Shellenbaum zfs_znode_acl_version(zp) == 3097da6c28aaSamw ZFS_ACL_VERSION_INITIAL) { 30981412a1a2SMark Shellenbaum dmu_tx_hold_free(tx, acl_obj, 0, 3099da6c28aaSamw DMU_OBJECT_END); 3100da6c28aaSamw dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 31014c841f60Smarks 0, aclp->z_acl_bytes); 3102da6c28aaSamw } else { 31031412a1a2SMark Shellenbaum dmu_tx_hold_write(tx, acl_obj, 0, 31044c841f60Smarks aclp->z_acl_bytes); 31054c841f60Smarks } 31060a586ceaSMark Shellenbaum } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 31076d38e247Smarks dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 31086d38e247Smarks 0, aclp->z_acl_bytes); 3109da6c28aaSamw } 31101412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 31110a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 31120a586ceaSMark Shellenbaum } else { 31130a586ceaSMark Shellenbaum if ((mask & AT_XVATTR) && 31140a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 31150a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 31160a586ceaSMark Shellenbaum else 31170a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3118fa9e4066Sahrens } 3119fa9e4066Sahrens 31200a586ceaSMark Shellenbaum if (attrzp) { 31210a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3122d2443e76Smarks } 3123d2443e76Smarks 31240a586ceaSMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 31250a586ceaSMark Shellenbaum if (fuid_dirtied) 31260a586ceaSMark Shellenbaum zfs_fuid_txhold(zfsvfs, tx); 31270a586ceaSMark Shellenbaum 31280a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 31290a586ceaSMark Shellenbaum 3130e722410cSMatthew Ahrens err = dmu_tx_assign(tx, TXG_WAIT); 3131e722410cSMatthew Ahrens if (err) 313214843421SMatthew Ahrens goto out; 3133fa9e4066Sahrens 31340a586ceaSMark Shellenbaum count = 0; 3135fa9e4066Sahrens /* 3136fa9e4066Sahrens * Set each attribute requested. 3137fa9e4066Sahrens * We group settings according to the locks they need to acquire. 3138fa9e4066Sahrens * 3139fa9e4066Sahrens * Note: you cannot set ctime directly, although it will be 3140fa9e4066Sahrens * updated as a side-effect of calling this function. 3141fa9e4066Sahrens */ 3142fa9e4066Sahrens 31431412a1a2SMark Shellenbaum 31441412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 31451412a1a2SMark Shellenbaum mutex_enter(&zp->z_acl_lock); 3146fa9e4066Sahrens mutex_enter(&zp->z_lock); 3147fa9e4066Sahrens 3148db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3149db9986c7SMark Shellenbaum &zp->z_pflags, sizeof (zp->z_pflags)); 3150db9986c7SMark Shellenbaum 3151db9986c7SMark Shellenbaum if (attrzp) { 31521412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 31531412a1a2SMark Shellenbaum mutex_enter(&attrzp->z_acl_lock); 31540a586ceaSMark Shellenbaum mutex_enter(&attrzp->z_lock); 3155db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3156db9986c7SMark Shellenbaum SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3157db9986c7SMark Shellenbaum sizeof (attrzp->z_pflags)); 3158db9986c7SMark Shellenbaum } 31590a586ceaSMark Shellenbaum 316027dd1e87SMark Shellenbaum if (mask & (AT_UID|AT_GID)) { 316127dd1e87SMark Shellenbaum 316227dd1e87SMark Shellenbaum if (mask & AT_UID) { 316327dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 316427dd1e87SMark Shellenbaum &new_uid, sizeof (new_uid)); 3165f1696b23SMark Shellenbaum zp->z_uid = new_uid; 316627dd1e87SMark Shellenbaum if (attrzp) { 316727dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 316827dd1e87SMark Shellenbaum SA_ZPL_UID(zfsvfs), NULL, &new_uid, 316927dd1e87SMark Shellenbaum sizeof (new_uid)); 3170f1696b23SMark Shellenbaum attrzp->z_uid = new_uid; 317127dd1e87SMark Shellenbaum } 31720a586ceaSMark Shellenbaum } 31730a586ceaSMark Shellenbaum 317427dd1e87SMark Shellenbaum if (mask & AT_GID) { 317527dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 317627dd1e87SMark Shellenbaum NULL, &new_gid, sizeof (new_gid)); 3177f1696b23SMark Shellenbaum zp->z_gid = new_gid; 317827dd1e87SMark Shellenbaum if (attrzp) { 317927dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 318027dd1e87SMark Shellenbaum SA_ZPL_GID(zfsvfs), NULL, &new_gid, 318127dd1e87SMark Shellenbaum sizeof (new_gid)); 3182f1696b23SMark Shellenbaum attrzp->z_gid = new_gid; 318327dd1e87SMark Shellenbaum } 318427dd1e87SMark Shellenbaum } 318527dd1e87SMark Shellenbaum if (!(mask & AT_MODE)) { 318627dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 318727dd1e87SMark Shellenbaum NULL, &new_mode, sizeof (new_mode)); 318827dd1e87SMark Shellenbaum new_mode = zp->z_mode; 318927dd1e87SMark Shellenbaum } 319027dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(zp); 319127dd1e87SMark Shellenbaum ASSERT(err == 0); 31920a586ceaSMark Shellenbaum if (attrzp) { 319327dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(attrzp); 319427dd1e87SMark Shellenbaum ASSERT(err == 0); 31950a586ceaSMark Shellenbaum } 31960a586ceaSMark Shellenbaum } 31970a586ceaSMark Shellenbaum 3198fa9e4066Sahrens if (mask & AT_MODE) { 31990a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 32000a586ceaSMark Shellenbaum &new_mode, sizeof (new_mode)); 32010a586ceaSMark Shellenbaum zp->z_mode = new_mode; 320227dd1e87SMark Shellenbaum ASSERT3U((uintptr_t)aclp, !=, NULL); 320389459e17SMark Shellenbaum err = zfs_aclset_common(zp, aclp, cr, tx); 3204fb09f5aaSMadhav Suresh ASSERT0(err); 32050b2a8171SMark Shellenbaum if (zp->z_acl_cached) 32060b2a8171SMark Shellenbaum zfs_acl_free(zp->z_acl_cached); 32074929fd5eSTim Haley zp->z_acl_cached = aclp; 32084929fd5eSTim Haley aclp = NULL; 3209fa9e4066Sahrens } 3210fa9e4066Sahrens 3211d2443e76Smarks 32120a586ceaSMark Shellenbaum if (mask & AT_ATIME) { 32130a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 32140a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 32150a586ceaSMark Shellenbaum &zp->z_atime, sizeof (zp->z_atime)); 3216d2443e76Smarks } 3217fa9e4066Sahrens 32180a586ceaSMark Shellenbaum if (mask & AT_MTIME) { 32190a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 32200a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 32210a586ceaSMark Shellenbaum mtime, sizeof (mtime)); 3222d2443e76Smarks } 3223d2443e76Smarks 3224cdb0ab79Smaybee /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 32250a586ceaSMark Shellenbaum if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3226db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3227db9986c7SMark Shellenbaum NULL, mtime, sizeof (mtime)); 32280a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 32290a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 32300a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 32310a586ceaSMark Shellenbaum B_TRUE); 32320a586ceaSMark Shellenbaum } else if (mask != 0) { 32330a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 32340a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 32350a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 32360a586ceaSMark Shellenbaum B_TRUE); 32370a586ceaSMark Shellenbaum if (attrzp) { 32380a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 32390a586ceaSMark Shellenbaum SA_ZPL_CTIME(zfsvfs), NULL, 32400a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 32410a586ceaSMark Shellenbaum zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 32420a586ceaSMark Shellenbaum mtime, ctime, B_TRUE); 32430a586ceaSMark Shellenbaum } 32440a586ceaSMark Shellenbaum } 3245da6c28aaSamw /* 3246da6c28aaSamw * Do this after setting timestamps to prevent timestamp 3247da6c28aaSamw * update from toggling bit 3248da6c28aaSamw */ 3249da6c28aaSamw 3250da6c28aaSamw if (xoap && (mask & AT_XVATTR)) { 3251ae4caef8SMark Shellenbaum 3252ae4caef8SMark Shellenbaum /* 3253ae4caef8SMark Shellenbaum * restore trimmed off masks 3254ae4caef8SMark Shellenbaum * so that return masks can be set for caller. 3255ae4caef8SMark Shellenbaum */ 3256ae4caef8SMark Shellenbaum 3257ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3258ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_APPENDONLY); 3259ae4caef8SMark Shellenbaum } 3260ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3261ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NOUNLINK); 3262ae4caef8SMark Shellenbaum } 3263ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3264ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3265ae4caef8SMark Shellenbaum } 3266ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3267ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NODUMP); 3268ae4caef8SMark Shellenbaum } 3269ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3270ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3271ae4caef8SMark Shellenbaum } 3272ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3273ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3274ae4caef8SMark Shellenbaum } 3275ae4caef8SMark Shellenbaum 32760a586ceaSMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3277da6c28aaSamw ASSERT(vp->v_type == VREG); 3278da6c28aaSamw 32790a586ceaSMark Shellenbaum zfs_xvattr_set(zp, xvap, tx); 3280da6c28aaSamw } 3281fa9e4066Sahrens 328289459e17SMark Shellenbaum if (fuid_dirtied) 328389459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 328489459e17SMark Shellenbaum 32855730cc9aSmaybee if (mask != 0) 3286da6c28aaSamw zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3287fa9e4066Sahrens 3288fa9e4066Sahrens mutex_exit(&zp->z_lock); 32891412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 32901412a1a2SMark Shellenbaum mutex_exit(&zp->z_acl_lock); 3291fa9e4066Sahrens 32921412a1a2SMark Shellenbaum if (attrzp) { 32931412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 32941412a1a2SMark Shellenbaum mutex_exit(&attrzp->z_acl_lock); 32951412a1a2SMark Shellenbaum mutex_exit(&attrzp->z_lock); 32961412a1a2SMark Shellenbaum } 329714843421SMatthew Ahrens out: 32980a586ceaSMark Shellenbaum if (err == 0 && attrzp) { 32990a586ceaSMark Shellenbaum err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 33000a586ceaSMark Shellenbaum xattr_count, tx); 33010a586ceaSMark Shellenbaum ASSERT(err2 == 0); 33020a586ceaSMark Shellenbaum } 33030a586ceaSMark Shellenbaum 3304d2443e76Smarks if (attrzp) 3305d2443e76Smarks VN_RELE(ZTOV(attrzp)); 3306f7170741SWill Andrews 33074929fd5eSTim Haley if (aclp) 33084929fd5eSTim Haley zfs_acl_free(aclp); 33094929fd5eSTim Haley 331014843421SMatthew Ahrens if (fuidp) { 331114843421SMatthew Ahrens zfs_fuid_info_free(fuidp); 331214843421SMatthew Ahrens fuidp = NULL; 331314843421SMatthew Ahrens } 331414843421SMatthew Ahrens 33150a586ceaSMark Shellenbaum if (err) { 331614843421SMatthew Ahrens dmu_tx_abort(tx); 33170a586ceaSMark Shellenbaum if (err == ERESTART) 33180a586ceaSMark Shellenbaum goto top; 33190a586ceaSMark Shellenbaum } else { 33200a586ceaSMark Shellenbaum err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 332114843421SMatthew Ahrens dmu_tx_commit(tx); 33220a586ceaSMark Shellenbaum } 332314843421SMatthew Ahrens 33240a586ceaSMark Shellenbaum out2: 332555da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 33265002558fSNeil Perrin zil_commit(zilog, 0); 332755da60b9SMark J Musante 3328fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3329fa9e4066Sahrens return (err); 3330fa9e4066Sahrens } 3331fa9e4066Sahrens 3332fa9e4066Sahrens typedef struct zfs_zlock { 3333fa9e4066Sahrens krwlock_t *zl_rwlock; /* lock we acquired */ 3334fa9e4066Sahrens znode_t *zl_znode; /* znode we held */ 3335fa9e4066Sahrens struct zfs_zlock *zl_next; /* next in list */ 3336fa9e4066Sahrens } zfs_zlock_t; 3337fa9e4066Sahrens 3338ff008e00Smaybee /* 3339ff008e00Smaybee * Drop locks and release vnodes that were held by zfs_rename_lock(). 3340ff008e00Smaybee */ 3341ff008e00Smaybee static void 3342ff008e00Smaybee zfs_rename_unlock(zfs_zlock_t **zlpp) 3343ff008e00Smaybee { 3344ff008e00Smaybee zfs_zlock_t *zl; 3345ff008e00Smaybee 3346ff008e00Smaybee while ((zl = *zlpp) != NULL) { 3347ff008e00Smaybee if (zl->zl_znode != NULL) 3348ff008e00Smaybee VN_RELE(ZTOV(zl->zl_znode)); 3349ff008e00Smaybee rw_exit(zl->zl_rwlock); 3350ff008e00Smaybee *zlpp = zl->zl_next; 3351ff008e00Smaybee kmem_free(zl, sizeof (*zl)); 3352ff008e00Smaybee } 3353ff008e00Smaybee } 3354ff008e00Smaybee 3355ff008e00Smaybee /* 3356ff008e00Smaybee * Search back through the directory tree, using the ".." entries. 3357ff008e00Smaybee * Lock each directory in the chain to prevent concurrent renames. 3358ff008e00Smaybee * Fail any attempt to move a directory into one of its own descendants. 3359ff008e00Smaybee * XXX - z_parent_lock can overlap with map or grow locks 3360ff008e00Smaybee */ 3361fa9e4066Sahrens static int 3362fa9e4066Sahrens zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3363fa9e4066Sahrens { 3364fa9e4066Sahrens zfs_zlock_t *zl; 3365feb08c6bSbillm znode_t *zp = tdzp; 3366fa9e4066Sahrens uint64_t rootid = zp->z_zfsvfs->z_root; 33670a586ceaSMark Shellenbaum uint64_t oidp = zp->z_id; 3368fa9e4066Sahrens krwlock_t *rwlp = &szp->z_parent_lock; 3369fa9e4066Sahrens krw_t rw = RW_WRITER; 3370fa9e4066Sahrens 3371fa9e4066Sahrens /* 3372fa9e4066Sahrens * First pass write-locks szp and compares to zp->z_id. 3373fa9e4066Sahrens * Later passes read-lock zp and compare to zp->z_parent. 3374fa9e4066Sahrens */ 3375fa9e4066Sahrens do { 3376ff008e00Smaybee if (!rw_tryenter(rwlp, rw)) { 3377ff008e00Smaybee /* 3378ff008e00Smaybee * Another thread is renaming in this path. 3379ff008e00Smaybee * Note that if we are a WRITER, we don't have any 3380ff008e00Smaybee * parent_locks held yet. 3381ff008e00Smaybee */ 3382ff008e00Smaybee if (rw == RW_READER && zp->z_id > szp->z_id) { 3383ff008e00Smaybee /* 3384ff008e00Smaybee * Drop our locks and restart 3385ff008e00Smaybee */ 3386ff008e00Smaybee zfs_rename_unlock(&zl); 3387ff008e00Smaybee *zlpp = NULL; 3388ff008e00Smaybee zp = tdzp; 33890a586ceaSMark Shellenbaum oidp = zp->z_id; 3390ff008e00Smaybee rwlp = &szp->z_parent_lock; 3391ff008e00Smaybee rw = RW_WRITER; 3392ff008e00Smaybee continue; 3393ff008e00Smaybee } else { 3394ff008e00Smaybee /* 3395ff008e00Smaybee * Wait for other thread to drop its locks 3396ff008e00Smaybee */ 3397ff008e00Smaybee rw_enter(rwlp, rw); 3398ff008e00Smaybee } 3399ff008e00Smaybee } 3400ff008e00Smaybee 3401fa9e4066Sahrens zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3402fa9e4066Sahrens zl->zl_rwlock = rwlp; 3403fa9e4066Sahrens zl->zl_znode = NULL; 3404fa9e4066Sahrens zl->zl_next = *zlpp; 3405fa9e4066Sahrens *zlpp = zl; 3406fa9e4066Sahrens 34070a586ceaSMark Shellenbaum if (oidp == szp->z_id) /* We're a descendant of szp */ 3408be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 3409fa9e4066Sahrens 34100a586ceaSMark Shellenbaum if (oidp == rootid) /* We've hit the top */ 3411fa9e4066Sahrens return (0); 3412fa9e4066Sahrens 3413fa9e4066Sahrens if (rw == RW_READER) { /* i.e. not the first pass */ 34140a586ceaSMark Shellenbaum int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3415fa9e4066Sahrens if (error) 3416fa9e4066Sahrens return (error); 3417fa9e4066Sahrens zl->zl_znode = zp; 3418fa9e4066Sahrens } 34190a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 34200a586ceaSMark Shellenbaum &oidp, sizeof (oidp)); 3421fa9e4066Sahrens rwlp = &zp->z_parent_lock; 3422fa9e4066Sahrens rw = RW_READER; 3423fa9e4066Sahrens 3424fa9e4066Sahrens } while (zp->z_id != sdzp->z_id); 3425fa9e4066Sahrens 3426fa9e4066Sahrens return (0); 3427fa9e4066Sahrens } 3428fa9e4066Sahrens 3429fa9e4066Sahrens /* 3430fa9e4066Sahrens * Move an entry from the provided source directory to the target 3431fa9e4066Sahrens * directory. Change the entry name as indicated. 3432fa9e4066Sahrens * 3433fa9e4066Sahrens * IN: sdvp - Source directory containing the "old entry". 3434fa9e4066Sahrens * snm - Old entry name. 3435fa9e4066Sahrens * tdvp - Target directory to contain the "new entry". 3436fa9e4066Sahrens * tnm - New entry name. 3437fa9e4066Sahrens * cr - credentials of caller. 3438da6c28aaSamw * ct - caller context 3439da6c28aaSamw * flags - case flags 3440fa9e4066Sahrens * 3441f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3442fa9e4066Sahrens * 3443fa9e4066Sahrens * Timestamps: 3444fa9e4066Sahrens * sdvp,tdvp - ctime|mtime updated 3445fa9e4066Sahrens */ 3446da6c28aaSamw /*ARGSUSED*/ 3447fa9e4066Sahrens static int 3448da6c28aaSamw zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3449da6c28aaSamw caller_context_t *ct, int flags) 3450fa9e4066Sahrens { 3451fa9e4066Sahrens znode_t *tdzp, *szp, *tzp; 3452fa9e4066Sahrens znode_t *sdzp = VTOZ(sdvp); 3453fa9e4066Sahrens zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3454f18faf3fSek zilog_t *zilog; 3455fa9e4066Sahrens vnode_t *realvp; 3456fa9e4066Sahrens zfs_dirlock_t *sdl, *tdl; 3457fa9e4066Sahrens dmu_tx_t *tx; 3458fa9e4066Sahrens zfs_zlock_t *zl; 3459da6c28aaSamw int cmp, serr, terr; 346054207fd2SJerry Jelinek int error = 0, rm_err = 0; 3461da6c28aaSamw int zflg = 0; 346269962b56SMatthew Ahrens boolean_t waited = B_FALSE; 3463fa9e4066Sahrens 34643cb34c60Sahrens ZFS_ENTER(zfsvfs); 34653cb34c60Sahrens ZFS_VERIFY_ZP(sdzp); 3466f18faf3fSek zilog = zfsvfs->z_log; 3467fa9e4066Sahrens 3468fa9e4066Sahrens /* 3469fa9e4066Sahrens * Make sure we have the real vp for the target directory. 3470fa9e4066Sahrens */ 3471da6c28aaSamw if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3472fa9e4066Sahrens tdvp = realvp; 3473fa9e4066Sahrens 347418e64978SMarcel Telka tdzp = VTOZ(tdvp); 347518e64978SMarcel Telka ZFS_VERIFY_ZP(tdzp); 347618e64978SMarcel Telka 347718e64978SMarcel Telka /* 347818e64978SMarcel Telka * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 347918e64978SMarcel Telka * ctldir appear to have the same v_vfsp. 348018e64978SMarcel Telka */ 348118e64978SMarcel Telka if (tdzp->z_zfsvfs != zfsvfs || zfsctl_is_node(tdvp)) { 3482fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3483be6fd75aSMatthew Ahrens return (SET_ERROR(EXDEV)); 3484fa9e4066Sahrens } 3485fa9e4066Sahrens 3486de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(tnm, 3487da6c28aaSamw strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3488da6c28aaSamw ZFS_EXIT(zfsvfs); 3489be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3490da6c28aaSamw } 3491da6c28aaSamw 3492da6c28aaSamw if (flags & FIGNORECASE) 3493da6c28aaSamw zflg |= ZCILOOK; 3494da6c28aaSamw 3495fa9e4066Sahrens top: 3496fa9e4066Sahrens szp = NULL; 3497fa9e4066Sahrens tzp = NULL; 3498fa9e4066Sahrens zl = NULL; 3499fa9e4066Sahrens 3500fa9e4066Sahrens /* 3501fa9e4066Sahrens * This is to prevent the creation of links into attribute space 3502fa9e4066Sahrens * by renaming a linked file into/outof an attribute directory. 3503fa9e4066Sahrens * See the comment in zfs_link() for why this is considered bad. 3504fa9e4066Sahrens */ 35050a586ceaSMark Shellenbaum if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3506fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3507be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 3508fa9e4066Sahrens } 3509fa9e4066Sahrens 3510fa9e4066Sahrens /* 3511fa9e4066Sahrens * Lock source and target directory entries. To prevent deadlock, 3512fa9e4066Sahrens * a lock ordering must be defined. We lock the directory with 3513fa9e4066Sahrens * the smallest object id first, or if it's a tie, the one with 3514fa9e4066Sahrens * the lexically first name. 3515fa9e4066Sahrens */ 3516fa9e4066Sahrens if (sdzp->z_id < tdzp->z_id) { 3517fa9e4066Sahrens cmp = -1; 3518fa9e4066Sahrens } else if (sdzp->z_id > tdzp->z_id) { 3519fa9e4066Sahrens cmp = 1; 3520fa9e4066Sahrens } else { 3521da6c28aaSamw /* 3522da6c28aaSamw * First compare the two name arguments without 3523da6c28aaSamw * considering any case folding. 3524da6c28aaSamw */ 3525da6c28aaSamw int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3526da6c28aaSamw 3527da6c28aaSamw cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3528de8267e0Stimh ASSERT(error == 0 || !zfsvfs->z_utf8); 3529fa9e4066Sahrens if (cmp == 0) { 3530fa9e4066Sahrens /* 3531fa9e4066Sahrens * POSIX: "If the old argument and the new argument 3532fa9e4066Sahrens * both refer to links to the same existing file, 3533fa9e4066Sahrens * the rename() function shall return successfully 3534fa9e4066Sahrens * and perform no other action." 3535fa9e4066Sahrens */ 3536fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3537fa9e4066Sahrens return (0); 3538fa9e4066Sahrens } 3539da6c28aaSamw /* 3540da6c28aaSamw * If the file system is case-folding, then we may 3541da6c28aaSamw * have some more checking to do. A case-folding file 3542da6c28aaSamw * system is either supporting mixed case sensitivity 3543da6c28aaSamw * access or is completely case-insensitive. Note 3544da6c28aaSamw * that the file system is always case preserving. 3545da6c28aaSamw * 3546da6c28aaSamw * In mixed sensitivity mode case sensitive behavior 3547da6c28aaSamw * is the default. FIGNORECASE must be used to 3548da6c28aaSamw * explicitly request case insensitive behavior. 3549da6c28aaSamw * 3550da6c28aaSamw * If the source and target names provided differ only 3551da6c28aaSamw * by case (e.g., a request to rename 'tim' to 'Tim'), 3552da6c28aaSamw * we will treat this as a special case in the 3553da6c28aaSamw * case-insensitive mode: as long as the source name 3554da6c28aaSamw * is an exact match, we will allow this to proceed as 3555da6c28aaSamw * a name-change request. 3556da6c28aaSamw */ 3557de8267e0Stimh if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3558de8267e0Stimh (zfsvfs->z_case == ZFS_CASE_MIXED && 3559de8267e0Stimh flags & FIGNORECASE)) && 3560da6c28aaSamw u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3561da6c28aaSamw &error) == 0) { 3562da6c28aaSamw /* 3563da6c28aaSamw * case preserving rename request, require exact 3564da6c28aaSamw * name matches 3565da6c28aaSamw */ 3566da6c28aaSamw zflg |= ZCIEXACT; 3567da6c28aaSamw zflg &= ~ZCILOOK; 3568da6c28aaSamw } 3569fa9e4066Sahrens } 3570da6c28aaSamw 3571afefc7e4SSanjeev Bagewadi /* 3572afefc7e4SSanjeev Bagewadi * If the source and destination directories are the same, we should 3573afefc7e4SSanjeev Bagewadi * grab the z_name_lock of that directory only once. 3574afefc7e4SSanjeev Bagewadi */ 3575afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) { 3576afefc7e4SSanjeev Bagewadi zflg |= ZHAVELOCK; 3577afefc7e4SSanjeev Bagewadi rw_enter(&sdzp->z_name_lock, RW_READER); 3578afefc7e4SSanjeev Bagewadi } 3579afefc7e4SSanjeev Bagewadi 3580fa9e4066Sahrens if (cmp < 0) { 3581da6c28aaSamw serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3582da6c28aaSamw ZEXISTS | zflg, NULL, NULL); 3583da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3584da6c28aaSamw tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3585fa9e4066Sahrens } else { 3586da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3587da6c28aaSamw tdzp, tnm, &tzp, zflg, NULL, NULL); 3588da6c28aaSamw serr = zfs_dirent_lock(&sdl, 3589da6c28aaSamw sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3590da6c28aaSamw NULL, NULL); 3591fa9e4066Sahrens } 3592fa9e4066Sahrens 3593fa9e4066Sahrens if (serr) { 3594fa9e4066Sahrens /* 3595fa9e4066Sahrens * Source entry invalid or not there. 3596fa9e4066Sahrens */ 3597fa9e4066Sahrens if (!terr) { 3598fa9e4066Sahrens zfs_dirent_unlock(tdl); 3599fa9e4066Sahrens if (tzp) 3600fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3601fa9e4066Sahrens } 3602afefc7e4SSanjeev Bagewadi 3603afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3604afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3605afefc7e4SSanjeev Bagewadi 3606fa9e4066Sahrens if (strcmp(snm, "..") == 0) 3607be6fd75aSMatthew Ahrens serr = SET_ERROR(EINVAL); 3608fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3609fa9e4066Sahrens return (serr); 3610fa9e4066Sahrens } 3611fa9e4066Sahrens if (terr) { 3612fa9e4066Sahrens zfs_dirent_unlock(sdl); 3613fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3614afefc7e4SSanjeev Bagewadi 3615afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3616afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3617afefc7e4SSanjeev Bagewadi 3618fa9e4066Sahrens if (strcmp(tnm, "..") == 0) 3619be6fd75aSMatthew Ahrens terr = SET_ERROR(EINVAL); 3620fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3621fa9e4066Sahrens return (terr); 3622fa9e4066Sahrens } 3623fa9e4066Sahrens 3624fa9e4066Sahrens /* 3625fa9e4066Sahrens * Must have write access at the source to remove the old entry 3626fa9e4066Sahrens * and write access at the target to create the new entry. 3627fa9e4066Sahrens * Note that if target and source are the same, this can be 3628fa9e4066Sahrens * done in a single check. 3629fa9e4066Sahrens */ 3630fa9e4066Sahrens 3631fa9e4066Sahrens if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3632fa9e4066Sahrens goto out; 3633fa9e4066Sahrens 3634fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3635fa9e4066Sahrens /* 3636fa9e4066Sahrens * Check to make sure rename is valid. 3637fa9e4066Sahrens * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3638fa9e4066Sahrens */ 3639fa9e4066Sahrens if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3640fa9e4066Sahrens goto out; 3641fa9e4066Sahrens } 3642fa9e4066Sahrens 3643fa9e4066Sahrens /* 3644fa9e4066Sahrens * Does target exist? 3645fa9e4066Sahrens */ 3646fa9e4066Sahrens if (tzp) { 3647fa9e4066Sahrens /* 3648fa9e4066Sahrens * Source and target must be the same type. 3649fa9e4066Sahrens */ 3650fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3651fa9e4066Sahrens if (ZTOV(tzp)->v_type != VDIR) { 3652be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTDIR); 3653fa9e4066Sahrens goto out; 3654fa9e4066Sahrens } 3655fa9e4066Sahrens } else { 3656fa9e4066Sahrens if (ZTOV(tzp)->v_type == VDIR) { 3657be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 3658fa9e4066Sahrens goto out; 3659fa9e4066Sahrens } 3660fa9e4066Sahrens } 3661fa9e4066Sahrens /* 3662fa9e4066Sahrens * POSIX dictates that when the source and target 3663fa9e4066Sahrens * entries refer to the same file object, rename 3664fa9e4066Sahrens * must do nothing and exit without error. 3665fa9e4066Sahrens */ 3666fa9e4066Sahrens if (szp->z_id == tzp->z_id) { 3667fa9e4066Sahrens error = 0; 3668fa9e4066Sahrens goto out; 3669fa9e4066Sahrens } 3670fa9e4066Sahrens } 3671fa9e4066Sahrens 367254207fd2SJerry Jelinek vnevent_pre_rename_src(ZTOV(szp), sdvp, snm, ct); 3673fa9e4066Sahrens if (tzp) 367454207fd2SJerry Jelinek vnevent_pre_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3675df2381bfSpraks 3676df2381bfSpraks /* 3677df2381bfSpraks * notify the target directory if it is not the same 3678df2381bfSpraks * as source directory. 3679df2381bfSpraks */ 3680df2381bfSpraks if (tdvp != sdvp) { 368154207fd2SJerry Jelinek vnevent_pre_rename_dest_dir(tdvp, ZTOV(szp), tnm, ct); 3682df2381bfSpraks } 3683fa9e4066Sahrens 3684fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 36850a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 36860a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3687ea8dc4b6Seschrock dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3688ea8dc4b6Seschrock dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 36890a586ceaSMark Shellenbaum if (sdzp != tdzp) { 36900a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 36910a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tdzp); 36920a586ceaSMark Shellenbaum } 36930a586ceaSMark Shellenbaum if (tzp) { 36940a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 36950a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tzp); 36960a586ceaSMark Shellenbaum } 36970a586ceaSMark Shellenbaum 36980a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 3699893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 370069962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 3701fa9e4066Sahrens if (error) { 3702fa9e4066Sahrens if (zl != NULL) 3703fa9e4066Sahrens zfs_rename_unlock(&zl); 3704fa9e4066Sahrens zfs_dirent_unlock(sdl); 3705fa9e4066Sahrens zfs_dirent_unlock(tdl); 3706afefc7e4SSanjeev Bagewadi 3707afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3708afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3709afefc7e4SSanjeev Bagewadi 3710fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3711fa9e4066Sahrens if (tzp) 3712fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 37131209a471SNeil Perrin if (error == ERESTART) { 371469962b56SMatthew Ahrens waited = B_TRUE; 37158a2f1b91Sahrens dmu_tx_wait(tx); 37168a2f1b91Sahrens dmu_tx_abort(tx); 3717fa9e4066Sahrens goto top; 3718fa9e4066Sahrens } 37198a2f1b91Sahrens dmu_tx_abort(tx); 3720fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3721fa9e4066Sahrens return (error); 3722fa9e4066Sahrens } 3723fa9e4066Sahrens 3724fa9e4066Sahrens if (tzp) /* Attempt to remove the existing target */ 372554207fd2SJerry Jelinek error = rm_err = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3726fa9e4066Sahrens 3727fa9e4066Sahrens if (error == 0) { 3728fa9e4066Sahrens error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3729fa9e4066Sahrens if (error == 0) { 37300a586ceaSMark Shellenbaum szp->z_pflags |= ZFS_AV_MODIFIED; 37310a586ceaSMark Shellenbaum 37320a586ceaSMark Shellenbaum error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 37330a586ceaSMark Shellenbaum (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3734fb09f5aaSMadhav Suresh ASSERT0(error); 3735da6c28aaSamw 3736fa9e4066Sahrens error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 37376ed5e6abSSam Falkner if (error == 0) { 37386ed5e6abSSam Falkner zfs_log_rename(zilog, tx, TX_RENAME | 373991de656bSNeil Perrin (flags & FIGNORECASE ? TX_CI : 0), sdzp, 374091de656bSNeil Perrin sdl->dl_name, tdzp, tdl->dl_name, szp); 374151ece835Seschrock 37426ed5e6abSSam Falkner /* 37436ed5e6abSSam Falkner * Update path information for the target vnode 37446ed5e6abSSam Falkner */ 37456ed5e6abSSam Falkner vn_renamepath(tdvp, ZTOV(szp), tnm, 37466ed5e6abSSam Falkner strlen(tnm)); 37476ed5e6abSSam Falkner } else { 37486ed5e6abSSam Falkner /* 37496ed5e6abSSam Falkner * At this point, we have successfully created 37506ed5e6abSSam Falkner * the target name, but have failed to remove 37516ed5e6abSSam Falkner * the source name. Since the create was done 37526ed5e6abSSam Falkner * with the ZRENAMING flag, there are 37536ed5e6abSSam Falkner * complications; for one, the link count is 37546ed5e6abSSam Falkner * wrong. The easiest way to deal with this 37556ed5e6abSSam Falkner * is to remove the newly created target, and 37566ed5e6abSSam Falkner * return the original error. This must 37576ed5e6abSSam Falkner * succeed; fortunately, it is very unlikely to 37586ed5e6abSSam Falkner * fail, since we just created it. 37596ed5e6abSSam Falkner */ 37606ed5e6abSSam Falkner VERIFY3U(zfs_link_destroy(tdl, szp, tx, 37616ed5e6abSSam Falkner ZRENAMING, NULL), ==, 0); 37626ed5e6abSSam Falkner } 3763fa9e4066Sahrens } 3764fa9e4066Sahrens } 3765fa9e4066Sahrens 3766fa9e4066Sahrens dmu_tx_commit(tx); 376754207fd2SJerry Jelinek 376854207fd2SJerry Jelinek if (tzp && rm_err == 0) 376954207fd2SJerry Jelinek vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 377054207fd2SJerry Jelinek 377154207fd2SJerry Jelinek if (error == 0) { 377254207fd2SJerry Jelinek vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 377354207fd2SJerry Jelinek /* notify the target dir if it is not the same as source dir */ 377454207fd2SJerry Jelinek if (tdvp != sdvp) 377554207fd2SJerry Jelinek vnevent_rename_dest_dir(tdvp, ct); 377654207fd2SJerry Jelinek } 3777fa9e4066Sahrens out: 3778fa9e4066Sahrens if (zl != NULL) 3779fa9e4066Sahrens zfs_rename_unlock(&zl); 3780fa9e4066Sahrens 3781fa9e4066Sahrens zfs_dirent_unlock(sdl); 3782fa9e4066Sahrens zfs_dirent_unlock(tdl); 3783fa9e4066Sahrens 3784afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3785afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3786afefc7e4SSanjeev Bagewadi 3787afefc7e4SSanjeev Bagewadi 3788fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3789fa9e4066Sahrens if (tzp) 3790fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3791fa9e4066Sahrens 379255da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 37935002558fSNeil Perrin zil_commit(zilog, 0); 379455da60b9SMark J Musante 3795fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3796fa9e4066Sahrens return (error); 3797fa9e4066Sahrens } 3798fa9e4066Sahrens 3799fa9e4066Sahrens /* 3800fa9e4066Sahrens * Insert the indicated symbolic reference entry into the directory. 3801fa9e4066Sahrens * 3802fa9e4066Sahrens * IN: dvp - Directory to contain new symbolic link. 3803fa9e4066Sahrens * link - Name for new symlink entry. 3804fa9e4066Sahrens * vap - Attributes of new entry. 3805fa9e4066Sahrens * cr - credentials of caller. 3806da6c28aaSamw * ct - caller context 3807da6c28aaSamw * flags - case flags 3808fa9e4066Sahrens * 3809f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3810fa9e4066Sahrens * 3811fa9e4066Sahrens * Timestamps: 3812fa9e4066Sahrens * dvp - ctime|mtime updated 3813fa9e4066Sahrens */ 3814da6c28aaSamw /*ARGSUSED*/ 3815fa9e4066Sahrens static int 3816da6c28aaSamw zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, 3817da6c28aaSamw caller_context_t *ct, int flags) 3818fa9e4066Sahrens { 3819fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 3820fa9e4066Sahrens zfs_dirlock_t *dl; 3821fa9e4066Sahrens dmu_tx_t *tx; 3822fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3823f18faf3fSek zilog_t *zilog; 38240a586ceaSMark Shellenbaum uint64_t len = strlen(link); 3825fa9e4066Sahrens int error; 3826da6c28aaSamw int zflg = ZNEW; 382789459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 382889459e17SMark Shellenbaum boolean_t fuid_dirtied; 38290a586ceaSMark Shellenbaum uint64_t txtype = TX_SYMLINK; 383069962b56SMatthew Ahrens boolean_t waited = B_FALSE; 3831fa9e4066Sahrens 3832fa9e4066Sahrens ASSERT(vap->va_type == VLNK); 3833fa9e4066Sahrens 38343cb34c60Sahrens ZFS_ENTER(zfsvfs); 38353cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 3836f18faf3fSek zilog = zfsvfs->z_log; 3837da6c28aaSamw 3838de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3839da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3840da6c28aaSamw ZFS_EXIT(zfsvfs); 3841be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3842da6c28aaSamw } 3843da6c28aaSamw if (flags & FIGNORECASE) 3844da6c28aaSamw zflg |= ZCILOOK; 3845fa9e4066Sahrens 3846fa9e4066Sahrens if (len > MAXPATHLEN) { 3847fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3848be6fd75aSMatthew Ahrens return (SET_ERROR(ENAMETOOLONG)); 3849fa9e4066Sahrens } 3850fa9e4066Sahrens 3851c8c24165SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, 3852c8c24165SMark Shellenbaum vap, cr, NULL, &acl_ids)) != 0) { 3853c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 3854c8c24165SMark Shellenbaum return (error); 3855c8c24165SMark Shellenbaum } 3856c8c24165SMark Shellenbaum top: 3857fa9e4066Sahrens /* 3858fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 3859fa9e4066Sahrens */ 3860da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 3861da6c28aaSamw if (error) { 3862c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3863c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 3864c8c24165SMark Shellenbaum return (error); 3865c8c24165SMark Shellenbaum } 3866c8c24165SMark Shellenbaum 3867c8c24165SMark Shellenbaum if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3868c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 38698e303ae0SMark Shellenbaum zfs_dirent_unlock(dl); 3870fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3871fa9e4066Sahrens return (error); 3872fa9e4066Sahrens } 3873fa9e4066Sahrens 387414843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 387514843421SMatthew Ahrens zfs_acl_ids_free(&acl_ids); 387614843421SMatthew Ahrens zfs_dirent_unlock(dl); 387714843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 3878be6fd75aSMatthew Ahrens return (SET_ERROR(EDQUOT)); 387914843421SMatthew Ahrens } 3880fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 388189459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 3882fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3883ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 38840a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 38850a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE + len); 38860a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 38870a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 38880a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 38890a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 38900a586ceaSMark Shellenbaum } 389114843421SMatthew Ahrens if (fuid_dirtied) 389214843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 389369962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 3894fa9e4066Sahrens if (error) { 3895fa9e4066Sahrens zfs_dirent_unlock(dl); 38961209a471SNeil Perrin if (error == ERESTART) { 389769962b56SMatthew Ahrens waited = B_TRUE; 38988a2f1b91Sahrens dmu_tx_wait(tx); 38998a2f1b91Sahrens dmu_tx_abort(tx); 3900fa9e4066Sahrens goto top; 3901fa9e4066Sahrens } 3902c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 39038a2f1b91Sahrens dmu_tx_abort(tx); 3904fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3905fa9e4066Sahrens return (error); 3906fa9e4066Sahrens } 3907fa9e4066Sahrens 3908fa9e4066Sahrens /* 3909fa9e4066Sahrens * Create a new object for the symlink. 39100a586ceaSMark Shellenbaum * for version 4 ZPL datsets the symlink will be an SA attribute 3911fa9e4066Sahrens */ 39120a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 3913fa9e4066Sahrens 39140a586ceaSMark Shellenbaum if (fuid_dirtied) 39150a586ceaSMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 3916fa9e4066Sahrens 39171412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 39180a586ceaSMark Shellenbaum if (zp->z_is_sa) 39190a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 39200a586ceaSMark Shellenbaum link, len, tx); 39210a586ceaSMark Shellenbaum else 39220a586ceaSMark Shellenbaum zfs_sa_symlink(zp, link, len, tx); 39231412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 3924fa9e4066Sahrens 39250a586ceaSMark Shellenbaum zp->z_size = len; 39260a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 39270a586ceaSMark Shellenbaum &zp->z_size, sizeof (zp->z_size), tx); 3928fa9e4066Sahrens /* 3929fa9e4066Sahrens * Insert the new object into the directory. 3930fa9e4066Sahrens */ 3931fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 39320a586ceaSMark Shellenbaum 39330a586ceaSMark Shellenbaum if (flags & FIGNORECASE) 39340a586ceaSMark Shellenbaum txtype |= TX_CI; 39350a586ceaSMark Shellenbaum zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 393689459e17SMark Shellenbaum 393789459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3938fa9e4066Sahrens 3939fa9e4066Sahrens dmu_tx_commit(tx); 3940fa9e4066Sahrens 3941fa9e4066Sahrens zfs_dirent_unlock(dl); 3942fa9e4066Sahrens 3943fa9e4066Sahrens VN_RELE(ZTOV(zp)); 3944fa9e4066Sahrens 394555da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 39465002558fSNeil Perrin zil_commit(zilog, 0); 394755da60b9SMark J Musante 3948fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3949fa9e4066Sahrens return (error); 3950fa9e4066Sahrens } 3951fa9e4066Sahrens 3952fa9e4066Sahrens /* 3953fa9e4066Sahrens * Return, in the buffer contained in the provided uio structure, 3954fa9e4066Sahrens * the symbolic path referred to by vp. 3955fa9e4066Sahrens * 3956fa9e4066Sahrens * IN: vp - vnode of symbolic link. 3957f7170741SWill Andrews * uio - structure to contain the link path. 3958fa9e4066Sahrens * cr - credentials of caller. 3959da6c28aaSamw * ct - caller context 3960fa9e4066Sahrens * 3961f7170741SWill Andrews * OUT: uio - structure containing the link path. 3962fa9e4066Sahrens * 3963f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3964fa9e4066Sahrens * 3965fa9e4066Sahrens * Timestamps: 3966fa9e4066Sahrens * vp - atime updated 3967fa9e4066Sahrens */ 3968fa9e4066Sahrens /* ARGSUSED */ 3969fa9e4066Sahrens static int 3970da6c28aaSamw zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3971fa9e4066Sahrens { 3972fa9e4066Sahrens znode_t *zp = VTOZ(vp); 3973fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3974fa9e4066Sahrens int error; 3975fa9e4066Sahrens 39763cb34c60Sahrens ZFS_ENTER(zfsvfs); 39773cb34c60Sahrens ZFS_VERIFY_ZP(zp); 3978fa9e4066Sahrens 39791412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 39800a586ceaSMark Shellenbaum if (zp->z_is_sa) 39810a586ceaSMark Shellenbaum error = sa_lookup_uio(zp->z_sa_hdl, 39820a586ceaSMark Shellenbaum SA_ZPL_SYMLINK(zfsvfs), uio); 39830a586ceaSMark Shellenbaum else 39840a586ceaSMark Shellenbaum error = zfs_sa_readlink(zp, uio); 39851412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 3986fa9e4066Sahrens 3987fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 39880a586ceaSMark Shellenbaum 3989fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3990fa9e4066Sahrens return (error); 3991fa9e4066Sahrens } 3992fa9e4066Sahrens 3993fa9e4066Sahrens /* 3994fa9e4066Sahrens * Insert a new entry into directory tdvp referencing svp. 3995fa9e4066Sahrens * 3996fa9e4066Sahrens * IN: tdvp - Directory to contain new entry. 3997fa9e4066Sahrens * svp - vnode of new entry. 3998fa9e4066Sahrens * name - name of new entry. 3999fa9e4066Sahrens * cr - credentials of caller. 4000da6c28aaSamw * ct - caller context 4001fa9e4066Sahrens * 4002f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4003fa9e4066Sahrens * 4004fa9e4066Sahrens * Timestamps: 4005fa9e4066Sahrens * tdvp - ctime|mtime updated 4006fa9e4066Sahrens * svp - ctime updated 4007fa9e4066Sahrens */ 4008fa9e4066Sahrens /* ARGSUSED */ 4009fa9e4066Sahrens static int 4010da6c28aaSamw zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4011da6c28aaSamw caller_context_t *ct, int flags) 4012fa9e4066Sahrens { 4013fa9e4066Sahrens znode_t *dzp = VTOZ(tdvp); 4014fa9e4066Sahrens znode_t *tzp, *szp; 4015fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4016f18faf3fSek zilog_t *zilog; 4017fa9e4066Sahrens zfs_dirlock_t *dl; 4018fa9e4066Sahrens dmu_tx_t *tx; 4019fa9e4066Sahrens vnode_t *realvp; 4020fa9e4066Sahrens int error; 4021da6c28aaSamw int zf = ZNEW; 4022d39ee142SMark Shellenbaum uint64_t parent; 4023f1696b23SMark Shellenbaum uid_t owner; 402469962b56SMatthew Ahrens boolean_t waited = B_FALSE; 4025fa9e4066Sahrens 4026fa9e4066Sahrens ASSERT(tdvp->v_type == VDIR); 4027fa9e4066Sahrens 40283cb34c60Sahrens ZFS_ENTER(zfsvfs); 40293cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 4030f18faf3fSek zilog = zfsvfs->z_log; 4031fa9e4066Sahrens 4032da6c28aaSamw if (VOP_REALVP(svp, &realvp, ct) == 0) 4033fa9e4066Sahrens svp = realvp; 4034fa9e4066Sahrens 4035d39ee142SMark Shellenbaum /* 4036d39ee142SMark Shellenbaum * POSIX dictates that we return EPERM here. 4037d39ee142SMark Shellenbaum * Better choices include ENOTSUP or EISDIR. 4038d39ee142SMark Shellenbaum */ 4039d39ee142SMark Shellenbaum if (svp->v_type == VDIR) { 4040d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 4041be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 4042d39ee142SMark Shellenbaum } 4043d39ee142SMark Shellenbaum 404418e64978SMarcel Telka szp = VTOZ(svp); 404518e64978SMarcel Telka ZFS_VERIFY_ZP(szp); 404618e64978SMarcel Telka 404718e64978SMarcel Telka /* 404818e64978SMarcel Telka * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 404918e64978SMarcel Telka * ctldir appear to have the same v_vfsp. 405018e64978SMarcel Telka */ 405118e64978SMarcel Telka if (szp->z_zfsvfs != zfsvfs || zfsctl_is_node(svp)) { 4052fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4053be6fd75aSMatthew Ahrens return (SET_ERROR(EXDEV)); 4054fa9e4066Sahrens } 4055d39ee142SMark Shellenbaum 4056d39ee142SMark Shellenbaum /* Prevent links to .zfs/shares files */ 4057d39ee142SMark Shellenbaum 4058d39ee142SMark Shellenbaum if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4059d39ee142SMark Shellenbaum &parent, sizeof (uint64_t))) != 0) { 4060d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 4061d39ee142SMark Shellenbaum return (error); 4062d39ee142SMark Shellenbaum } 4063d39ee142SMark Shellenbaum if (parent == zfsvfs->z_shares_dir) { 4064d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 4065be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 4066d39ee142SMark Shellenbaum } 4067d39ee142SMark Shellenbaum 4068de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, 4069da6c28aaSamw strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4070da6c28aaSamw ZFS_EXIT(zfsvfs); 4071be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 4072da6c28aaSamw } 4073da6c28aaSamw if (flags & FIGNORECASE) 4074da6c28aaSamw zf |= ZCILOOK; 4075da6c28aaSamw 4076fa9e4066Sahrens /* 4077fa9e4066Sahrens * We do not support links between attributes and non-attributes 4078fa9e4066Sahrens * because of the potential security risk of creating links 4079fa9e4066Sahrens * into "normal" file space in order to circumvent restrictions 4080fa9e4066Sahrens * imposed in attribute space. 4081fa9e4066Sahrens */ 40820a586ceaSMark Shellenbaum if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4083fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4084be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4085fa9e4066Sahrens } 4086fa9e4066Sahrens 4087fa9e4066Sahrens 4088f1696b23SMark Shellenbaum owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4089f1696b23SMark Shellenbaum if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) { 4090fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4091be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 4092fa9e4066Sahrens } 4093fa9e4066Sahrens 4094da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4095fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4096fa9e4066Sahrens return (error); 4097fa9e4066Sahrens } 4098fa9e4066Sahrens 4099d39ee142SMark Shellenbaum top: 4100fa9e4066Sahrens /* 4101fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 4102fa9e4066Sahrens */ 4103da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4104da6c28aaSamw if (error) { 4105fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4106fa9e4066Sahrens return (error); 4107fa9e4066Sahrens } 4108fa9e4066Sahrens 4109fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 41100a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4111ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 41120a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 41130a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 411469962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 4115fa9e4066Sahrens if (error) { 4116fa9e4066Sahrens zfs_dirent_unlock(dl); 41171209a471SNeil Perrin if (error == ERESTART) { 411869962b56SMatthew Ahrens waited = B_TRUE; 41198a2f1b91Sahrens dmu_tx_wait(tx); 41208a2f1b91Sahrens dmu_tx_abort(tx); 4121fa9e4066Sahrens goto top; 4122fa9e4066Sahrens } 41238a2f1b91Sahrens dmu_tx_abort(tx); 4124fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4125fa9e4066Sahrens return (error); 4126fa9e4066Sahrens } 4127fa9e4066Sahrens 4128fa9e4066Sahrens error = zfs_link_create(dl, szp, tx, 0); 4129fa9e4066Sahrens 4130da6c28aaSamw if (error == 0) { 4131da6c28aaSamw uint64_t txtype = TX_LINK; 4132da6c28aaSamw if (flags & FIGNORECASE) 4133da6c28aaSamw txtype |= TX_CI; 4134da6c28aaSamw zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4135da6c28aaSamw } 4136fa9e4066Sahrens 4137fa9e4066Sahrens dmu_tx_commit(tx); 4138fa9e4066Sahrens 4139fa9e4066Sahrens zfs_dirent_unlock(dl); 4140fa9e4066Sahrens 4141df2381bfSpraks if (error == 0) { 4142da6c28aaSamw vnevent_link(svp, ct); 4143df2381bfSpraks } 4144df2381bfSpraks 414555da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 41465002558fSNeil Perrin zil_commit(zilog, 0); 414755da60b9SMark J Musante 4148fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4149fa9e4066Sahrens return (error); 4150fa9e4066Sahrens } 4151fa9e4066Sahrens 4152fa9e4066Sahrens /* 4153fa9e4066Sahrens * zfs_null_putapage() is used when the file system has been force 4154fa9e4066Sahrens * unmounted. It just drops the pages. 4155fa9e4066Sahrens */ 4156fa9e4066Sahrens /* ARGSUSED */ 4157fa9e4066Sahrens static int 4158fa9e4066Sahrens zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 41599a686fbcSPaul Dagnelie size_t *lenp, int flags, cred_t *cr) 4160fa9e4066Sahrens { 4161fa9e4066Sahrens pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4162fa9e4066Sahrens return (0); 4163fa9e4066Sahrens } 4164fa9e4066Sahrens 416544eda4d7Smaybee /* 416644eda4d7Smaybee * Push a page out to disk, klustering if possible. 416744eda4d7Smaybee * 416844eda4d7Smaybee * IN: vp - file to push page to. 416944eda4d7Smaybee * pp - page to push. 417044eda4d7Smaybee * flags - additional flags. 417144eda4d7Smaybee * cr - credentials of caller. 417244eda4d7Smaybee * 417344eda4d7Smaybee * OUT: offp - start of range pushed. 417444eda4d7Smaybee * lenp - len of range pushed. 417544eda4d7Smaybee * 4176f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 417744eda4d7Smaybee * 417844eda4d7Smaybee * NOTE: callers must have locked the page to be pushed. On 417944eda4d7Smaybee * exit, the page (and all other pages in the kluster) must be 418044eda4d7Smaybee * unlocked. 418144eda4d7Smaybee */ 4182fa9e4066Sahrens /* ARGSUSED */ 4183fa9e4066Sahrens static int 4184fa9e4066Sahrens zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 41859a686fbcSPaul Dagnelie size_t *lenp, int flags, cred_t *cr) 4186fa9e4066Sahrens { 4187fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4188fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4189fa9e4066Sahrens dmu_tx_t *tx; 419044eda4d7Smaybee u_offset_t off, koff; 419144eda4d7Smaybee size_t len, klen; 4192fa9e4066Sahrens int err; 4193fa9e4066Sahrens 4194fa9e4066Sahrens off = pp->p_offset; 419544eda4d7Smaybee len = PAGESIZE; 419644eda4d7Smaybee /* 419744eda4d7Smaybee * If our blocksize is bigger than the page size, try to kluster 41981209a471SNeil Perrin * multiple pages so that we write a full block (thus avoiding 419944eda4d7Smaybee * a read-modify-write). 420044eda4d7Smaybee */ 42010a586ceaSMark Shellenbaum if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4202ac05c741SMark Maybee klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4203ac05c741SMark Maybee koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 42040a586ceaSMark Shellenbaum ASSERT(koff <= zp->z_size); 42050a586ceaSMark Shellenbaum if (koff + klen > zp->z_size) 42060a586ceaSMark Shellenbaum klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 420744eda4d7Smaybee pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 420844eda4d7Smaybee } 420944eda4d7Smaybee ASSERT3U(btop(len), ==, btopr(len)); 4210ac05c741SMark Maybee 4211dd6ef538Smaybee /* 4212dd6ef538Smaybee * Can't push pages past end-of-file. 4213dd6ef538Smaybee */ 42140a586ceaSMark Shellenbaum if (off >= zp->z_size) { 4215f4d2e9e6Smaybee /* ignore all pages */ 421644eda4d7Smaybee err = 0; 421744eda4d7Smaybee goto out; 42180a586ceaSMark Shellenbaum } else if (off + len > zp->z_size) { 42190a586ceaSMark Shellenbaum int npages = btopr(zp->z_size - off); 422044eda4d7Smaybee page_t *trunc; 422144eda4d7Smaybee 422244eda4d7Smaybee page_list_break(&pp, &trunc, npages); 4223f4d2e9e6Smaybee /* ignore pages past end of file */ 422444eda4d7Smaybee if (trunc) 4225f4d2e9e6Smaybee pvn_write_done(trunc, flags); 42260a586ceaSMark Shellenbaum len = zp->z_size - off; 4227dd6ef538Smaybee } 422814843421SMatthew Ahrens 42290a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 42300a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4231be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 423214843421SMatthew Ahrens goto out; 423314843421SMatthew Ahrens } 4234fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 4235fa9e4066Sahrens dmu_tx_hold_write(tx, zp->z_id, off, len); 42360a586ceaSMark Shellenbaum 42370a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 42380a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 4239e722410cSMatthew Ahrens err = dmu_tx_assign(tx, TXG_WAIT); 4240fa9e4066Sahrens if (err != 0) { 42418a2f1b91Sahrens dmu_tx_abort(tx); 4242fa9e4066Sahrens goto out; 4243fa9e4066Sahrens } 4244fa9e4066Sahrens 424544eda4d7Smaybee if (zp->z_blksz <= PAGESIZE) { 42460fab61baSJonathan W Adams caddr_t va = zfs_map_page(pp, S_READ); 424744eda4d7Smaybee ASSERT3U(len, <=, PAGESIZE); 424844eda4d7Smaybee dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 42490fab61baSJonathan W Adams zfs_unmap_page(pp, va); 425044eda4d7Smaybee } else { 425144eda4d7Smaybee err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 425244eda4d7Smaybee } 4253fa9e4066Sahrens 425444eda4d7Smaybee if (err == 0) { 42550a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 4256db9986c7SMark Shellenbaum sa_bulk_attr_t bulk[3]; 42570a586ceaSMark Shellenbaum int count = 0; 42580a586ceaSMark Shellenbaum 42590a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 42600a586ceaSMark Shellenbaum &mtime, 16); 42610a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 42620a586ceaSMark Shellenbaum &ctime, 16); 4263db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4264db9986c7SMark Shellenbaum &zp->z_pflags, 8); 42650a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 42660a586ceaSMark Shellenbaum B_TRUE); 426780e10fd0SAndriy Gapon err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 426880e10fd0SAndriy Gapon ASSERT0(err); 4269ac05c741SMark Maybee zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 427044eda4d7Smaybee } 427168857716SLin Ling dmu_tx_commit(tx); 4272fa9e4066Sahrens 427344eda4d7Smaybee out: 4274f4d2e9e6Smaybee pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4275fa9e4066Sahrens if (offp) 4276fa9e4066Sahrens *offp = off; 4277fa9e4066Sahrens if (lenp) 4278fa9e4066Sahrens *lenp = len; 4279fa9e4066Sahrens 4280fa9e4066Sahrens return (err); 4281fa9e4066Sahrens } 4282fa9e4066Sahrens 4283fa9e4066Sahrens /* 4284fa9e4066Sahrens * Copy the portion of the file indicated from pages into the file. 4285fa9e4066Sahrens * The pages are stored in a page list attached to the files vnode. 4286fa9e4066Sahrens * 4287fa9e4066Sahrens * IN: vp - vnode of file to push page data to. 4288fa9e4066Sahrens * off - position in file to put data. 4289fa9e4066Sahrens * len - amount of data to write. 4290fa9e4066Sahrens * flags - flags to control the operation. 4291fa9e4066Sahrens * cr - credentials of caller. 4292da6c28aaSamw * ct - caller context. 4293fa9e4066Sahrens * 4294f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4295fa9e4066Sahrens * 4296fa9e4066Sahrens * Timestamps: 4297fa9e4066Sahrens * vp - ctime|mtime updated 4298fa9e4066Sahrens */ 4299da6c28aaSamw /*ARGSUSED*/ 4300fa9e4066Sahrens static int 4301da6c28aaSamw zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4302da6c28aaSamw caller_context_t *ct) 4303fa9e4066Sahrens { 4304fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4305fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4306fa9e4066Sahrens page_t *pp; 4307fa9e4066Sahrens size_t io_len; 4308fa9e4066Sahrens u_offset_t io_off; 4309ac05c741SMark Maybee uint_t blksz; 4310ac05c741SMark Maybee rl_t *rl; 4311fa9e4066Sahrens int error = 0; 4312fa9e4066Sahrens 43133cb34c60Sahrens ZFS_ENTER(zfsvfs); 43143cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4315fa9e4066Sahrens 4316c4fc6b21SGarrett D'Amore /* 4317c4fc6b21SGarrett D'Amore * There's nothing to do if no data is cached. 4318c4fc6b21SGarrett D'Amore */ 4319c4fc6b21SGarrett D'Amore if (!vn_has_cached_data(vp)) { 4320c4fc6b21SGarrett D'Amore ZFS_EXIT(zfsvfs); 4321c4fc6b21SGarrett D'Amore return (0); 4322c4fc6b21SGarrett D'Amore } 4323c4fc6b21SGarrett D'Amore 4324ac05c741SMark Maybee /* 4325ac05c741SMark Maybee * Align this request to the file block size in case we kluster. 4326ac05c741SMark Maybee * XXX - this can result in pretty aggresive locking, which can 4327ac05c741SMark Maybee * impact simultanious read/write access. One option might be 4328ac05c741SMark Maybee * to break up long requests (len == 0) into block-by-block 4329ac05c741SMark Maybee * operations to get narrower locking. 4330ac05c741SMark Maybee */ 4331ac05c741SMark Maybee blksz = zp->z_blksz; 4332ac05c741SMark Maybee if (ISP2(blksz)) 4333ac05c741SMark Maybee io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4334ac05c741SMark Maybee else 4335ac05c741SMark Maybee io_off = 0; 4336ac05c741SMark Maybee if (len > 0 && ISP2(blksz)) 43375a6f5619SMark Maybee io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4338ac05c741SMark Maybee else 4339ac05c741SMark Maybee io_len = 0; 4340ac05c741SMark Maybee 4341ac05c741SMark Maybee if (io_len == 0) { 4342fa9e4066Sahrens /* 4343ac05c741SMark Maybee * Search the entire vp list for pages >= io_off. 4344fa9e4066Sahrens */ 4345ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4346ac05c741SMark Maybee error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4347fe9cf88cSperrin goto out; 4348fa9e4066Sahrens } 4349ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4350fa9e4066Sahrens 43510a586ceaSMark Shellenbaum if (off > zp->z_size) { 4352fa9e4066Sahrens /* past end of file */ 4353ac05c741SMark Maybee zfs_range_unlock(rl); 4354fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4355fa9e4066Sahrens return (0); 4356fa9e4066Sahrens } 4357fa9e4066Sahrens 43580a586ceaSMark Shellenbaum len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4359fa9e4066Sahrens 4360ac05c741SMark Maybee for (off = io_off; io_off < off + len; io_off += io_len) { 4361fa9e4066Sahrens if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4362104e2ed7Sperrin pp = page_lookup(vp, io_off, 4363ecb72030Sperrin (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4364fa9e4066Sahrens } else { 4365fa9e4066Sahrens pp = page_lookup_nowait(vp, io_off, 4366ecb72030Sperrin (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4367fa9e4066Sahrens } 4368fa9e4066Sahrens 4369fa9e4066Sahrens if (pp != NULL && pvn_getdirty(pp, flags)) { 4370fa9e4066Sahrens int err; 4371fa9e4066Sahrens 4372fa9e4066Sahrens /* 4373fa9e4066Sahrens * Found a dirty page to push 4374fa9e4066Sahrens */ 4375104e2ed7Sperrin err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4376104e2ed7Sperrin if (err) 4377fa9e4066Sahrens error = err; 4378fa9e4066Sahrens } else { 4379fa9e4066Sahrens io_len = PAGESIZE; 4380fa9e4066Sahrens } 4381fa9e4066Sahrens } 4382fe9cf88cSperrin out: 4383ac05c741SMark Maybee zfs_range_unlock(rl); 438455da60b9SMark J Musante if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 43855002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 4386fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4387fa9e4066Sahrens return (error); 4388fa9e4066Sahrens } 4389fa9e4066Sahrens 4390da6c28aaSamw /*ARGSUSED*/ 4391fa9e4066Sahrens void 4392da6c28aaSamw zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4393fa9e4066Sahrens { 4394fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4395fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4396fa9e4066Sahrens int error; 4397fa9e4066Sahrens 4398f18faf3fSek rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 43990a586ceaSMark Shellenbaum if (zp->z_sa_hdl == NULL) { 44004ccbb6e7Sahrens /* 4401874395d5Smaybee * The fs has been unmounted, or we did a 4402874395d5Smaybee * suspend/resume and this file no longer exists. 44034ccbb6e7Sahrens */ 4404fa9e4066Sahrens if (vn_has_cached_data(vp)) { 4405fa9e4066Sahrens (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage, 4406fa9e4066Sahrens B_INVAL, cr); 4407fa9e4066Sahrens } 4408fa9e4066Sahrens 4409ea8dc4b6Seschrock mutex_enter(&zp->z_lock); 4410cd2adeceSChris Kirby mutex_enter(&vp->v_lock); 4411cd2adeceSChris Kirby ASSERT(vp->v_count == 1); 4412ade42b55SSebastien Roy VN_RELE_LOCKED(vp); 4413cd2adeceSChris Kirby mutex_exit(&vp->v_lock); 44144ccbb6e7Sahrens mutex_exit(&zp->z_lock); 4415f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 4416874395d5Smaybee zfs_znode_free(zp); 4417fa9e4066Sahrens return; 4418fa9e4066Sahrens } 4419fa9e4066Sahrens 4420fa9e4066Sahrens /* 4421fa9e4066Sahrens * Attempt to push any data in the page cache. If this fails 4422fa9e4066Sahrens * we will get kicked out later in zfs_zinactive(). 4423fa9e4066Sahrens */ 44248afd4dd6Sperrin if (vn_has_cached_data(vp)) { 44258afd4dd6Sperrin (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC, 44268afd4dd6Sperrin cr); 44278afd4dd6Sperrin } 4428fa9e4066Sahrens 4429893a6d32Sahrens if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4430fa9e4066Sahrens dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4431fa9e4066Sahrens 44320a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 44330a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 4434fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 4435fa9e4066Sahrens if (error) { 4436fa9e4066Sahrens dmu_tx_abort(tx); 4437fa9e4066Sahrens } else { 4438fa9e4066Sahrens mutex_enter(&zp->z_lock); 44390a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 44400a586ceaSMark Shellenbaum (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4441fa9e4066Sahrens zp->z_atime_dirty = 0; 4442fa9e4066Sahrens mutex_exit(&zp->z_lock); 4443fa9e4066Sahrens dmu_tx_commit(tx); 4444fa9e4066Sahrens } 4445fa9e4066Sahrens } 4446fa9e4066Sahrens 4447fa9e4066Sahrens zfs_zinactive(zp); 4448f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 4449fa9e4066Sahrens } 4450fa9e4066Sahrens 4451fa9e4066Sahrens /* 4452fa9e4066Sahrens * Bounds-check the seek operation. 4453fa9e4066Sahrens * 4454fa9e4066Sahrens * IN: vp - vnode seeking within 4455fa9e4066Sahrens * ooff - old file offset 4456fa9e4066Sahrens * noffp - pointer to new file offset 4457da6c28aaSamw * ct - caller context 4458fa9e4066Sahrens * 4459f7170741SWill Andrews * RETURN: 0 on success, EINVAL if new offset invalid. 4460fa9e4066Sahrens */ 4461fa9e4066Sahrens /* ARGSUSED */ 4462fa9e4066Sahrens static int 4463da6c28aaSamw zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4464da6c28aaSamw caller_context_t *ct) 4465fa9e4066Sahrens { 4466fa9e4066Sahrens if (vp->v_type == VDIR) 4467fa9e4066Sahrens return (0); 4468fa9e4066Sahrens return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4469fa9e4066Sahrens } 4470fa9e4066Sahrens 4471fa9e4066Sahrens /* 4472fa9e4066Sahrens * Pre-filter the generic locking function to trap attempts to place 4473fa9e4066Sahrens * a mandatory lock on a memory mapped file. 4474fa9e4066Sahrens */ 4475fa9e4066Sahrens static int 4476fa9e4066Sahrens zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4477da6c28aaSamw flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4478fa9e4066Sahrens { 4479fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4480fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4481fa9e4066Sahrens 44823cb34c60Sahrens ZFS_ENTER(zfsvfs); 44833cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4484fa9e4066Sahrens 4485fa9e4066Sahrens /* 4486ea8dc4b6Seschrock * We are following the UFS semantics with respect to mapcnt 4487ea8dc4b6Seschrock * here: If we see that the file is mapped already, then we will 4488ea8dc4b6Seschrock * return an error, but we don't worry about races between this 4489ea8dc4b6Seschrock * function and zfs_map(). 4490fa9e4066Sahrens */ 44910a586ceaSMark Shellenbaum if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4492fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4493be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 4494fa9e4066Sahrens } 4495fa9e4066Sahrens ZFS_EXIT(zfsvfs); 449604ce3d0bSMark Shellenbaum return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4497fa9e4066Sahrens } 4498fa9e4066Sahrens 4499fa9e4066Sahrens /* 4500fa9e4066Sahrens * If we can't find a page in the cache, we will create a new page 4501fa9e4066Sahrens * and fill it with file data. For efficiency, we may try to fill 4502ac05c741SMark Maybee * multiple pages at once (klustering) to fill up the supplied page 4503ed886187SMark Maybee * list. Note that the pages to be filled are held with an exclusive 4504ed886187SMark Maybee * lock to prevent access by other threads while they are being filled. 4505fa9e4066Sahrens */ 4506fa9e4066Sahrens static int 4507fa9e4066Sahrens zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4508fa9e4066Sahrens caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4509fa9e4066Sahrens { 4510fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4511fa9e4066Sahrens page_t *pp, *cur_pp; 4512fa9e4066Sahrens objset_t *os = zp->z_zfsvfs->z_os; 4513fa9e4066Sahrens u_offset_t io_off, total; 4514fa9e4066Sahrens size_t io_len; 4515fa9e4066Sahrens int err; 4516fa9e4066Sahrens 451744eda4d7Smaybee if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4518ac05c741SMark Maybee /* 4519ac05c741SMark Maybee * We only have a single page, don't bother klustering 4520ac05c741SMark Maybee */ 4521fa9e4066Sahrens io_off = off; 4522fa9e4066Sahrens io_len = PAGESIZE; 4523ed886187SMark Maybee pp = page_create_va(vp, io_off, io_len, 4524ed886187SMark Maybee PG_EXCL | PG_WAIT, seg, addr); 4525fa9e4066Sahrens } else { 4526fa9e4066Sahrens /* 4527ac05c741SMark Maybee * Try to find enough pages to fill the page list 4528fa9e4066Sahrens */ 4529fa9e4066Sahrens pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4530ac05c741SMark Maybee &io_len, off, plsz, 0); 4531fa9e4066Sahrens } 4532fa9e4066Sahrens if (pp == NULL) { 4533fa9e4066Sahrens /* 4534ac05c741SMark Maybee * The page already exists, nothing to do here. 4535fa9e4066Sahrens */ 4536fa9e4066Sahrens *pl = NULL; 4537fa9e4066Sahrens return (0); 4538fa9e4066Sahrens } 4539fa9e4066Sahrens 4540fa9e4066Sahrens /* 4541fa9e4066Sahrens * Fill the pages in the kluster. 4542fa9e4066Sahrens */ 4543fa9e4066Sahrens cur_pp = pp; 4544fa9e4066Sahrens for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4545ac05c741SMark Maybee caddr_t va; 4546ac05c741SMark Maybee 454744eda4d7Smaybee ASSERT3U(io_off, ==, cur_pp->p_offset); 45480fab61baSJonathan W Adams va = zfs_map_page(cur_pp, S_WRITE); 45497bfdf011SNeil Perrin err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 45507bfdf011SNeil Perrin DMU_READ_PREFETCH); 45510fab61baSJonathan W Adams zfs_unmap_page(cur_pp, va); 4552fa9e4066Sahrens if (err) { 4553fa9e4066Sahrens /* On error, toss the entire kluster */ 4554fa9e4066Sahrens pvn_read_done(pp, B_ERROR); 4555b87f3af3Sperrin /* convert checksum errors into IO errors */ 4556b87f3af3Sperrin if (err == ECKSUM) 4557be6fd75aSMatthew Ahrens err = SET_ERROR(EIO); 4558fa9e4066Sahrens return (err); 4559fa9e4066Sahrens } 4560fa9e4066Sahrens cur_pp = cur_pp->p_next; 4561fa9e4066Sahrens } 4562ac05c741SMark Maybee 4563fa9e4066Sahrens /* 4564ac05c741SMark Maybee * Fill in the page list array from the kluster starting 4565ac05c741SMark Maybee * from the desired offset `off'. 4566fa9e4066Sahrens * NOTE: the page list will always be null terminated. 4567fa9e4066Sahrens */ 4568fa9e4066Sahrens pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4569ac05c741SMark Maybee ASSERT(pl == NULL || (*pl)->p_offset == off); 4570fa9e4066Sahrens 4571fa9e4066Sahrens return (0); 4572fa9e4066Sahrens } 4573fa9e4066Sahrens 4574fa9e4066Sahrens /* 4575fa9e4066Sahrens * Return pointers to the pages for the file region [off, off + len] 4576fa9e4066Sahrens * in the pl array. If plsz is greater than len, this function may 4577ac05c741SMark Maybee * also return page pointers from after the specified region 4578ac05c741SMark Maybee * (i.e. the region [off, off + plsz]). These additional pages are 4579ac05c741SMark Maybee * only returned if they are already in the cache, or were created as 4580ac05c741SMark Maybee * part of a klustered read. 4581fa9e4066Sahrens * 4582fa9e4066Sahrens * IN: vp - vnode of file to get data from. 4583fa9e4066Sahrens * off - position in file to get data from. 4584fa9e4066Sahrens * len - amount of data to retrieve. 4585fa9e4066Sahrens * plsz - length of provided page list. 4586fa9e4066Sahrens * seg - segment to obtain pages for. 4587fa9e4066Sahrens * addr - virtual address of fault. 4588fa9e4066Sahrens * rw - mode of created pages. 4589fa9e4066Sahrens * cr - credentials of caller. 4590da6c28aaSamw * ct - caller context. 4591fa9e4066Sahrens * 4592fa9e4066Sahrens * OUT: protp - protection mode of created pages. 4593fa9e4066Sahrens * pl - list of pages created. 4594fa9e4066Sahrens * 4595f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4596fa9e4066Sahrens * 4597fa9e4066Sahrens * Timestamps: 4598fa9e4066Sahrens * vp - atime updated 4599fa9e4066Sahrens */ 4600fa9e4066Sahrens /* ARGSUSED */ 4601fa9e4066Sahrens static int 4602fa9e4066Sahrens zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4603f7170741SWill Andrews page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4604f7170741SWill Andrews enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4605fa9e4066Sahrens { 4606fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4607fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4608ac05c741SMark Maybee page_t **pl0 = pl; 4609ac05c741SMark Maybee int err = 0; 4610ac05c741SMark Maybee 4611ac05c741SMark Maybee /* we do our own caching, faultahead is unnecessary */ 4612ac05c741SMark Maybee if (pl == NULL) 4613ac05c741SMark Maybee return (0); 4614ac05c741SMark Maybee else if (len > plsz) 4615ac05c741SMark Maybee len = plsz; 461627bd165aSMark Maybee else 461727bd165aSMark Maybee len = P2ROUNDUP(len, PAGESIZE); 4618ac05c741SMark Maybee ASSERT(plsz >= len); 4619fa9e4066Sahrens 46203cb34c60Sahrens ZFS_ENTER(zfsvfs); 46213cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4622fa9e4066Sahrens 4623fa9e4066Sahrens if (protp) 4624fa9e4066Sahrens *protp = PROT_ALL; 4625fa9e4066Sahrens 4626fa9e4066Sahrens /* 4627ed886187SMark Maybee * Loop through the requested range [off, off + len) looking 4628fa9e4066Sahrens * for pages. If we don't find a page, we will need to create 4629fa9e4066Sahrens * a new page and fill it with data from the file. 4630fa9e4066Sahrens */ 4631fa9e4066Sahrens while (len > 0) { 4632ac05c741SMark Maybee if (*pl = page_lookup(vp, off, SE_SHARED)) 4633ac05c741SMark Maybee *(pl+1) = NULL; 4634ac05c741SMark Maybee else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4635ac05c741SMark Maybee goto out; 4636ac05c741SMark Maybee while (*pl) { 4637ac05c741SMark Maybee ASSERT3U((*pl)->p_offset, ==, off); 4638fa9e4066Sahrens off += PAGESIZE; 4639fa9e4066Sahrens addr += PAGESIZE; 464027bd165aSMark Maybee if (len > 0) { 464127bd165aSMark Maybee ASSERT3U(len, >=, PAGESIZE); 4642ac05c741SMark Maybee len -= PAGESIZE; 464327bd165aSMark Maybee } 4644ac05c741SMark Maybee ASSERT3U(plsz, >=, PAGESIZE); 4645fa9e4066Sahrens plsz -= PAGESIZE; 4646ac05c741SMark Maybee pl++; 4647fa9e4066Sahrens } 4648fa9e4066Sahrens } 4649fa9e4066Sahrens 4650fa9e4066Sahrens /* 4651fa9e4066Sahrens * Fill out the page array with any pages already in the cache. 4652fa9e4066Sahrens */ 4653ac05c741SMark Maybee while (plsz > 0 && 4654ac05c741SMark Maybee (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4655ac05c741SMark Maybee off += PAGESIZE; 4656ac05c741SMark Maybee plsz -= PAGESIZE; 4657fa9e4066Sahrens } 4658fa9e4066Sahrens out: 4659fe2f476aSperrin if (err) { 4660fe2f476aSperrin /* 4661fe2f476aSperrin * Release any pages we have previously locked. 4662fe2f476aSperrin */ 4663fe2f476aSperrin while (pl > pl0) 4664fe2f476aSperrin page_unlock(*--pl); 4665ac05c741SMark Maybee } else { 4666ac05c741SMark Maybee ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4667fe2f476aSperrin } 4668fe2f476aSperrin 4669fa9e4066Sahrens *pl = NULL; 4670fa9e4066Sahrens 4671fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4672fa9e4066Sahrens return (err); 4673fa9e4066Sahrens } 4674fa9e4066Sahrens 4675ea8dc4b6Seschrock /* 4676ea8dc4b6Seschrock * Request a memory map for a section of a file. This code interacts 4677ea8dc4b6Seschrock * with common code and the VM system as follows: 4678ea8dc4b6Seschrock * 4679f7170741SWill Andrews * - common code calls mmap(), which ends up in smmap_common() 4680f7170741SWill Andrews * - this calls VOP_MAP(), which takes you into (say) zfs 4681f7170741SWill Andrews * - zfs_map() calls as_map(), passing segvn_create() as the callback 4682f7170741SWill Andrews * - segvn_create() creates the new segment and calls VOP_ADDMAP() 4683f7170741SWill Andrews * - zfs_addmap() updates z_mapcnt 4684ea8dc4b6Seschrock */ 4685da6c28aaSamw /*ARGSUSED*/ 4686fa9e4066Sahrens static int 4687fa9e4066Sahrens zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4688da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4689da6c28aaSamw caller_context_t *ct) 4690fa9e4066Sahrens { 4691fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4692fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4693fa9e4066Sahrens segvn_crargs_t vn_a; 4694fa9e4066Sahrens int error; 4695fa9e4066Sahrens 46960616c50eSmarks ZFS_ENTER(zfsvfs); 46970616c50eSmarks ZFS_VERIFY_ZP(zp); 46980616c50eSmarks 46992889ec41SGordon Ross /* 47002889ec41SGordon Ross * Note: ZFS_READONLY is handled in zfs_zaccess_common. 47012889ec41SGordon Ross */ 47022889ec41SGordon Ross 47030a586ceaSMark Shellenbaum if ((prot & PROT_WRITE) && (zp->z_pflags & 47042889ec41SGordon Ross (ZFS_IMMUTABLE | ZFS_APPENDONLY))) { 47050616c50eSmarks ZFS_EXIT(zfsvfs); 4706be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 47070616c50eSmarks } 4708da6c28aaSamw 47090616c50eSmarks if ((prot & (PROT_READ | PROT_EXEC)) && 47100a586ceaSMark Shellenbaum (zp->z_pflags & ZFS_AV_QUARANTINED)) { 47110616c50eSmarks ZFS_EXIT(zfsvfs); 4712be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 47130616c50eSmarks } 4714fa9e4066Sahrens 4715fa9e4066Sahrens if (vp->v_flag & VNOMAP) { 4716fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4717be6fd75aSMatthew Ahrens return (SET_ERROR(ENOSYS)); 4718fa9e4066Sahrens } 4719fa9e4066Sahrens 4720fa9e4066Sahrens if (off < 0 || len > MAXOFFSET_T - off) { 4721fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4722be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 4723fa9e4066Sahrens } 4724fa9e4066Sahrens 4725fa9e4066Sahrens if (vp->v_type != VREG) { 4726fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4727be6fd75aSMatthew Ahrens return (SET_ERROR(ENODEV)); 4728fa9e4066Sahrens } 4729fa9e4066Sahrens 4730fa9e4066Sahrens /* 4731fa9e4066Sahrens * If file is locked, disallow mapping. 4732fa9e4066Sahrens */ 47330a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 4734ea8dc4b6Seschrock ZFS_EXIT(zfsvfs); 4735be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 4736fa9e4066Sahrens } 4737fa9e4066Sahrens 4738fa9e4066Sahrens as_rangelock(as); 473960946fe0Smec error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 474060946fe0Smec if (error != 0) { 474160946fe0Smec as_rangeunlock(as); 474260946fe0Smec ZFS_EXIT(zfsvfs); 474360946fe0Smec return (error); 4744fa9e4066Sahrens } 4745fa9e4066Sahrens 4746fa9e4066Sahrens vn_a.vp = vp; 4747fa9e4066Sahrens vn_a.offset = (u_offset_t)off; 4748fa9e4066Sahrens vn_a.type = flags & MAP_TYPE; 4749fa9e4066Sahrens vn_a.prot = prot; 4750fa9e4066Sahrens vn_a.maxprot = maxprot; 4751fa9e4066Sahrens vn_a.cred = cr; 4752fa9e4066Sahrens vn_a.amp = NULL; 4753fa9e4066Sahrens vn_a.flags = flags & ~MAP_TYPE; 47544944b02eSkchow vn_a.szc = 0; 47554944b02eSkchow vn_a.lgrp_mem_policy_flags = 0; 4756fa9e4066Sahrens 4757fa9e4066Sahrens error = as_map(as, *addrp, len, segvn_create, &vn_a); 4758fa9e4066Sahrens 4759fa9e4066Sahrens as_rangeunlock(as); 4760fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4761fa9e4066Sahrens return (error); 4762fa9e4066Sahrens } 4763fa9e4066Sahrens 4764fa9e4066Sahrens /* ARGSUSED */ 4765fa9e4066Sahrens static int 4766fa9e4066Sahrens zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4767da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4768da6c28aaSamw caller_context_t *ct) 4769fa9e4066Sahrens { 4770ea8dc4b6Seschrock uint64_t pages = btopr(len); 4771ea8dc4b6Seschrock 4772ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 4773fa9e4066Sahrens return (0); 4774fa9e4066Sahrens } 4775fa9e4066Sahrens 4776b468a217Seschrock /* 4777b468a217Seschrock * The reason we push dirty pages as part of zfs_delmap() is so that we get a 4778b468a217Seschrock * more accurate mtime for the associated file. Since we don't have a way of 4779b468a217Seschrock * detecting when the data was actually modified, we have to resort to 4780b468a217Seschrock * heuristics. If an explicit msync() is done, then we mark the mtime when the 4781b468a217Seschrock * last page is pushed. The problem occurs when the msync() call is omitted, 4782b468a217Seschrock * which by far the most common case: 4783b468a217Seschrock * 47844bb73804SMatthew Ahrens * open() 47854bb73804SMatthew Ahrens * mmap() 47864bb73804SMatthew Ahrens * <modify memory> 47874bb73804SMatthew Ahrens * munmap() 47884bb73804SMatthew Ahrens * close() 47894bb73804SMatthew Ahrens * <time lapse> 47904bb73804SMatthew Ahrens * putpage() via fsflush 4791b468a217Seschrock * 4792b468a217Seschrock * If we wait until fsflush to come along, we can have a modification time that 4793b468a217Seschrock * is some arbitrary point in the future. In order to prevent this in the 4794b468a217Seschrock * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 4795b468a217Seschrock * torn down. 4796b468a217Seschrock */ 4797fa9e4066Sahrens /* ARGSUSED */ 4798fa9e4066Sahrens static int 4799fa9e4066Sahrens zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4800da6c28aaSamw size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 4801da6c28aaSamw caller_context_t *ct) 4802fa9e4066Sahrens { 4803ea8dc4b6Seschrock uint64_t pages = btopr(len); 4804ea8dc4b6Seschrock 4805ea8dc4b6Seschrock ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 4806ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 4807b468a217Seschrock 4808b468a217Seschrock if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 4809b468a217Seschrock vn_has_cached_data(vp)) 4810da6c28aaSamw (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 4811b468a217Seschrock 4812fa9e4066Sahrens return (0); 4813fa9e4066Sahrens } 4814fa9e4066Sahrens 4815fa9e4066Sahrens /* 4816fa9e4066Sahrens * Free or allocate space in a file. Currently, this function only 4817fa9e4066Sahrens * supports the `F_FREESP' command. However, this command is somewhat 4818fa9e4066Sahrens * misnamed, as its functionality includes the ability to allocate as 4819fa9e4066Sahrens * well as free space. 4820fa9e4066Sahrens * 4821fa9e4066Sahrens * IN: vp - vnode of file to free data in. 4822fa9e4066Sahrens * cmd - action to take (only F_FREESP supported). 4823fa9e4066Sahrens * bfp - section of file to free/alloc. 4824fa9e4066Sahrens * flag - current file open mode flags. 4825fa9e4066Sahrens * offset - current file offset. 4826fa9e4066Sahrens * cr - credentials of caller [UNUSED]. 4827da6c28aaSamw * ct - caller context. 4828fa9e4066Sahrens * 4829f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4830fa9e4066Sahrens * 4831fa9e4066Sahrens * Timestamps: 4832fa9e4066Sahrens * vp - ctime|mtime updated 4833fa9e4066Sahrens */ 4834fa9e4066Sahrens /* ARGSUSED */ 4835fa9e4066Sahrens static int 4836fa9e4066Sahrens zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 4837fa9e4066Sahrens offset_t offset, cred_t *cr, caller_context_t *ct) 4838fa9e4066Sahrens { 4839fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4840fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4841fa9e4066Sahrens uint64_t off, len; 4842fa9e4066Sahrens int error; 4843fa9e4066Sahrens 48443cb34c60Sahrens ZFS_ENTER(zfsvfs); 48453cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4846fa9e4066Sahrens 4847fa9e4066Sahrens if (cmd != F_FREESP) { 4848fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4849be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4850fa9e4066Sahrens } 4851fa9e4066Sahrens 48522144b121SMarcel Telka /* 48532144b121SMarcel Telka * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 48542144b121SMarcel Telka * callers might not be able to detect properly that we are read-only, 48552144b121SMarcel Telka * so check it explicitly here. 48562144b121SMarcel Telka */ 48572144b121SMarcel Telka if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 48582144b121SMarcel Telka ZFS_EXIT(zfsvfs); 48592144b121SMarcel Telka return (SET_ERROR(EROFS)); 48602144b121SMarcel Telka } 48612144b121SMarcel Telka 4862fa9e4066Sahrens if (error = convoff(vp, bfp, 0, offset)) { 4863fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4864fa9e4066Sahrens return (error); 4865fa9e4066Sahrens } 4866fa9e4066Sahrens 4867fa9e4066Sahrens if (bfp->l_len < 0) { 4868fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4869be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4870fa9e4066Sahrens } 4871fa9e4066Sahrens 4872fa9e4066Sahrens off = bfp->l_start; 4873104e2ed7Sperrin len = bfp->l_len; /* 0 means from off to end of file */ 4874104e2ed7Sperrin 4875cdb0ab79Smaybee error = zfs_freesp(zp, off, len, flag, TRUE); 4876fa9e4066Sahrens 487772102e74SBryan Cantrill if (error == 0 && off == 0 && len == 0) 487872102e74SBryan Cantrill vnevent_truncate(ZTOV(zp), ct); 487972102e74SBryan Cantrill 4880fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4881fa9e4066Sahrens return (error); 4882fa9e4066Sahrens } 4883fa9e4066Sahrens 4884da6c28aaSamw /*ARGSUSED*/ 4885fa9e4066Sahrens static int 4886da6c28aaSamw zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4887fa9e4066Sahrens { 4888fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4889fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4890f18faf3fSek uint32_t gen; 48910a586ceaSMark Shellenbaum uint64_t gen64; 4892fa9e4066Sahrens uint64_t object = zp->z_id; 4893fa9e4066Sahrens zfid_short_t *zfid; 48940a586ceaSMark Shellenbaum int size, i, error; 4895fa9e4066Sahrens 48963cb34c60Sahrens ZFS_ENTER(zfsvfs); 48973cb34c60Sahrens ZFS_VERIFY_ZP(zp); 48980a586ceaSMark Shellenbaum 48990a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4900f3e6fb2fSMark Shellenbaum &gen64, sizeof (uint64_t))) != 0) { 4901f3e6fb2fSMark Shellenbaum ZFS_EXIT(zfsvfs); 49020a586ceaSMark Shellenbaum return (error); 4903f3e6fb2fSMark Shellenbaum } 49040a586ceaSMark Shellenbaum 49050a586ceaSMark Shellenbaum gen = (uint32_t)gen64; 4906fa9e4066Sahrens 4907fa9e4066Sahrens size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4908fa9e4066Sahrens if (fidp->fid_len < size) { 4909fa9e4066Sahrens fidp->fid_len = size; 49100f2dc02eSek ZFS_EXIT(zfsvfs); 4911be6fd75aSMatthew Ahrens return (SET_ERROR(ENOSPC)); 4912fa9e4066Sahrens } 4913fa9e4066Sahrens 4914fa9e4066Sahrens zfid = (zfid_short_t *)fidp; 4915fa9e4066Sahrens 4916fa9e4066Sahrens zfid->zf_len = size; 4917fa9e4066Sahrens 4918fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 4919fa9e4066Sahrens zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4920fa9e4066Sahrens 4921fa9e4066Sahrens /* Must have a non-zero generation number to distinguish from .zfs */ 4922fa9e4066Sahrens if (gen == 0) 4923fa9e4066Sahrens gen = 1; 4924fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 4925fa9e4066Sahrens zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4926fa9e4066Sahrens 4927fa9e4066Sahrens if (size == LONG_FID_LEN) { 4928fa9e4066Sahrens uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4929fa9e4066Sahrens zfid_long_t *zlfid; 4930fa9e4066Sahrens 4931fa9e4066Sahrens zlfid = (zfid_long_t *)fidp; 4932fa9e4066Sahrens 4933fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4934fa9e4066Sahrens zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4935fa9e4066Sahrens 4936fa9e4066Sahrens /* XXX - this should be the generation number for the objset */ 4937fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4938fa9e4066Sahrens zlfid->zf_setgen[i] = 0; 4939fa9e4066Sahrens } 4940fa9e4066Sahrens 4941fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4942fa9e4066Sahrens return (0); 4943fa9e4066Sahrens } 4944fa9e4066Sahrens 4945fa9e4066Sahrens static int 4946da6c28aaSamw zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4947da6c28aaSamw caller_context_t *ct) 4948fa9e4066Sahrens { 4949fa9e4066Sahrens znode_t *zp, *xzp; 4950fa9e4066Sahrens zfsvfs_t *zfsvfs; 4951fa9e4066Sahrens zfs_dirlock_t *dl; 4952fa9e4066Sahrens int error; 4953fa9e4066Sahrens 4954fa9e4066Sahrens switch (cmd) { 4955fa9e4066Sahrens case _PC_LINK_MAX: 4956fa9e4066Sahrens *valp = ULONG_MAX; 4957fa9e4066Sahrens return (0); 4958fa9e4066Sahrens 4959fa9e4066Sahrens case _PC_FILESIZEBITS: 4960fa9e4066Sahrens *valp = 64; 4961fa9e4066Sahrens return (0); 4962fa9e4066Sahrens 4963fa9e4066Sahrens case _PC_XATTR_EXISTS: 4964fa9e4066Sahrens zp = VTOZ(vp); 4965fa9e4066Sahrens zfsvfs = zp->z_zfsvfs; 49663cb34c60Sahrens ZFS_ENTER(zfsvfs); 49673cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4968fa9e4066Sahrens *valp = 0; 4969fa9e4066Sahrens error = zfs_dirent_lock(&dl, zp, "", &xzp, 4970da6c28aaSamw ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 4971fa9e4066Sahrens if (error == 0) { 4972fa9e4066Sahrens zfs_dirent_unlock(dl); 4973fa9e4066Sahrens if (!zfs_dirempty(xzp)) 4974fa9e4066Sahrens *valp = 1; 4975fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 4976fa9e4066Sahrens } else if (error == ENOENT) { 4977fa9e4066Sahrens /* 4978fa9e4066Sahrens * If there aren't extended attributes, it's the 4979fa9e4066Sahrens * same as having zero of them. 4980fa9e4066Sahrens */ 4981fa9e4066Sahrens error = 0; 4982fa9e4066Sahrens } 4983fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4984fa9e4066Sahrens return (error); 4985fa9e4066Sahrens 4986da6c28aaSamw case _PC_SATTR_ENABLED: 4987da6c28aaSamw case _PC_SATTR_EXISTS: 49889660e5cbSJanice Chang *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4989da6c28aaSamw (vp->v_type == VREG || vp->v_type == VDIR); 4990da6c28aaSamw return (0); 4991da6c28aaSamw 4992e802abbdSTim Haley case _PC_ACCESS_FILTERING: 4993e802abbdSTim Haley *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4994e802abbdSTim Haley vp->v_type == VDIR; 4995e802abbdSTim Haley return (0); 4996e802abbdSTim Haley 4997fa9e4066Sahrens case _PC_ACL_ENABLED: 4998fa9e4066Sahrens *valp = _ACL_ACE_ENABLED; 4999fa9e4066Sahrens return (0); 5000fa9e4066Sahrens 5001fa9e4066Sahrens case _PC_MIN_HOLE_SIZE: 5002fa9e4066Sahrens *valp = (ulong_t)SPA_MINBLOCKSIZE; 5003fa9e4066Sahrens return (0); 5004fa9e4066Sahrens 50053b862e9aSRoger A. Faulkner case _PC_TIMESTAMP_RESOLUTION: 50063b862e9aSRoger A. Faulkner /* nanosecond timestamp resolution */ 50073b862e9aSRoger A. Faulkner *valp = 1L; 50083b862e9aSRoger A. Faulkner return (0); 50093b862e9aSRoger A. Faulkner 5010fa9e4066Sahrens default: 5011da6c28aaSamw return (fs_pathconf(vp, cmd, valp, cr, ct)); 5012fa9e4066Sahrens } 5013fa9e4066Sahrens } 5014fa9e4066Sahrens 5015fa9e4066Sahrens /*ARGSUSED*/ 5016fa9e4066Sahrens static int 5017da6c28aaSamw zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5018da6c28aaSamw caller_context_t *ct) 5019fa9e4066Sahrens { 5020fa9e4066Sahrens znode_t *zp = VTOZ(vp); 5021fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5022fa9e4066Sahrens int error; 5023da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5024fa9e4066Sahrens 50253cb34c60Sahrens ZFS_ENTER(zfsvfs); 50263cb34c60Sahrens ZFS_VERIFY_ZP(zp); 5027da6c28aaSamw error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5028fa9e4066Sahrens ZFS_EXIT(zfsvfs); 5029fa9e4066Sahrens 5030fa9e4066Sahrens return (error); 5031fa9e4066Sahrens } 5032fa9e4066Sahrens 5033fa9e4066Sahrens /*ARGSUSED*/ 5034fa9e4066Sahrens static int 5035da6c28aaSamw zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5036da6c28aaSamw caller_context_t *ct) 5037fa9e4066Sahrens { 5038fa9e4066Sahrens znode_t *zp = VTOZ(vp); 5039fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5040fa9e4066Sahrens int error; 5041da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 504255da60b9SMark J Musante zilog_t *zilog = zfsvfs->z_log; 5043fa9e4066Sahrens 50443cb34c60Sahrens ZFS_ENTER(zfsvfs); 50453cb34c60Sahrens ZFS_VERIFY_ZP(zp); 504655da60b9SMark J Musante 5047da6c28aaSamw error = zfs_setacl(zp, vsecp, skipaclchk, cr); 504855da60b9SMark J Musante 504955da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 50505002558fSNeil Perrin zil_commit(zilog, 0); 505155da60b9SMark J Musante 5052fa9e4066Sahrens ZFS_EXIT(zfsvfs); 5053fa9e4066Sahrens return (error); 5054fa9e4066Sahrens } 5055fa9e4066Sahrens 5056c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5057f7170741SWill Andrews * The smallest read we may consider to loan out an arcbuf. 5058f7170741SWill Andrews * This must be a power of 2. 5059c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5060c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_min = (1 << 10); /* 1K */ 5061f7170741SWill Andrews /* 5062f7170741SWill Andrews * If set to less than the file block size, allow loaning out of an 5063f7170741SWill Andrews * arcbuf for a partial block read. This must be a power of 2. 5064f7170741SWill Andrews */ 5065c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_max = (1 << 17); /* 128K */ 5066c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5067c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 5068c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 5069c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 5070c242f9a0Schunli zhang - Sun Microsystems - Irvine United States caller_context_t *ct) 5071c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 5072c242f9a0Schunli zhang - Sun Microsystems - Irvine United States znode_t *zp = VTOZ(vp); 5073c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5074c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int max_blksz = zfsvfs->z_max_blksz; 5075c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio_t *uio = &xuio->xu_uio; 5076c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t size = uio->uio_resid; 5077c242f9a0Schunli zhang - Sun Microsystems - Irvine United States offset_t offset = uio->uio_loffset; 5078c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz; 5079c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int fullblk, i; 5080c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 5081c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t maxsize; 5082c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int preamble, postamble; 5083c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5084c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5085be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5086c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5087c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_ENTER(zfsvfs); 5088c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_VERIFY_ZP(zp); 5089c242f9a0Schunli zhang - Sun Microsystems - Irvine United States switch (ioflag) { 5090c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_WRITE: 5091c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5092c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for write if write size is bigger than 5093c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * max_blksz, and the file's block size is also max_blksz. 5094c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5095c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = max_blksz; 5096c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || zp->z_blksz != blksz) { 5097c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5098be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5099c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5100c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5101c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Caller requests buffers for write before knowing where the 5102c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write offset might be (e.g. NFS TCP write). 5103c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5104c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (offset == -1) { 5105c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = 0; 5106c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 5107c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = P2PHASE(offset, blksz); 5108c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 5109c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = blksz - preamble; 5110c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= preamble; 5111c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5112c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5113c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5114c242f9a0Schunli zhang - Sun Microsystems - Irvine United States postamble = P2PHASE(size, blksz); 5115c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= postamble; 5116c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5117c242f9a0Schunli zhang - Sun Microsystems - Irvine United States fullblk = size / blksz; 5118570de38fSSurya Prakki (void) dmu_xuio_init(xuio, 5119c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 5120c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5121c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int, postamble, int, 5122c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 5123c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5124c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5125c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Have to fix iov base/len for partial buffers. They 5126c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * currently represent full arc_buf's. 5127c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5128c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 5129c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data begins in the middle of the arc_buf */ 51300a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 51310a586ceaSMark Shellenbaum blksz); 5132c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5133570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 5134570de38fSSurya Prakki blksz - preamble, preamble); 5135c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5136c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5137c242f9a0Schunli zhang - Sun Microsystems - Irvine United States for (i = 0; i < fullblk; i++) { 51380a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 51390a586ceaSMark Shellenbaum blksz); 5140c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5141570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5142c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5143c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5144c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (postamble) { 5145c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data ends in the middle of the arc_buf */ 51460a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 51470a586ceaSMark Shellenbaum blksz); 5148c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5149570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5150c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5151c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 5152c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_READ: 5153c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5154c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for read if the read size is larger than 5155c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * the current file block size. Block alignment is not 5156c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * considered. Partial arc_buf will be loaned out for read. 5157c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5158c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zp->z_blksz; 5159c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz < zcr_blksz_min) 5160c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_min; 5161c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > zcr_blksz_max) 5162c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_max; 5163c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* avoid potential complexity of dealing with it */ 5164c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > max_blksz) { 5165c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5166be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5167c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5168c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 51690a586ceaSMark Shellenbaum maxsize = zp->z_size - uio->uio_loffset; 5170c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size > maxsize) 5171c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size = maxsize; 5172c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5173c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || vn_has_cached_data(vp)) { 5174c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5175be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5176c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5177c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 5178c242f9a0Schunli zhang - Sun Microsystems - Irvine United States default: 5179c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5180be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5181c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5182c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5183c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio->uio_extflg = UIO_XUIO; 5184c242f9a0Schunli zhang - Sun Microsystems - Irvine United States XUIO_XUZC_RW(xuio) = ioflag; 5185c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5186c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 5187c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5188c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5189c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 5190c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 5191c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5192c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 5193c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i; 5194c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 5195c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int ioflag = XUIO_XUZC_RW(xuio); 5196c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5197c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5198c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5199c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i = dmu_xuio_cnt(xuio); 5200c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (i-- > 0) { 5201c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i); 5202c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5203c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * if abuf == NULL, it must be a write buffer 5204c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * that has been returned in zfs_write(). 5205c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5206c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (abuf) 5207c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 5208c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf || ioflag == UIO_WRITE); 5209c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5210c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5211c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_fini(xuio); 5212c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 5213c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5214c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5215fa9e4066Sahrens /* 5216fa9e4066Sahrens * Predeclare these here so that the compiler assumes that 5217fa9e4066Sahrens * this is an "old style" function declaration that does 5218fa9e4066Sahrens * not include arguments => we won't get type mismatch errors 5219fa9e4066Sahrens * in the initializations that follow. 5220fa9e4066Sahrens */ 5221fa9e4066Sahrens static int zfs_inval(); 5222fa9e4066Sahrens static int zfs_isdir(); 5223fa9e4066Sahrens 5224fa9e4066Sahrens static int 5225fa9e4066Sahrens zfs_inval() 5226fa9e4066Sahrens { 5227be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5228fa9e4066Sahrens } 5229fa9e4066Sahrens 5230fa9e4066Sahrens static int 5231fa9e4066Sahrens zfs_isdir() 5232fa9e4066Sahrens { 5233be6fd75aSMatthew Ahrens return (SET_ERROR(EISDIR)); 5234fa9e4066Sahrens } 5235fa9e4066Sahrens /* 5236fa9e4066Sahrens * Directory vnode operations template 5237fa9e4066Sahrens */ 5238fa9e4066Sahrens vnodeops_t *zfs_dvnodeops; 5239fa9e4066Sahrens const fs_operation_def_t zfs_dvnodeops_template[] = { 5240aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5241aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5242aa59c4cbSrsb VOPNAME_READ, { .error = zfs_isdir }, 5243aa59c4cbSrsb VOPNAME_WRITE, { .error = zfs_isdir }, 5244aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5245aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5246aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5247aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5248aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5249aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 5250aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5251aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 5252aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5253aa59c4cbSrsb VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5254aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5255aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5256aa59c4cbSrsb VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5257aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5258aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5259aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5260aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5261aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5262aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5263aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 52644bb73804SMatthew Ahrens VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5265aa59c4cbSrsb NULL, NULL 5266fa9e4066Sahrens }; 5267fa9e4066Sahrens 5268fa9e4066Sahrens /* 5269fa9e4066Sahrens * Regular file vnode operations template 5270fa9e4066Sahrens */ 5271fa9e4066Sahrens vnodeops_t *zfs_fvnodeops; 5272fa9e4066Sahrens const fs_operation_def_t zfs_fvnodeops_template[] = { 5273aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5274aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5275aa59c4cbSrsb VOPNAME_READ, { .vop_read = zfs_read }, 5276aa59c4cbSrsb VOPNAME_WRITE, { .vop_write = zfs_write }, 5277aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5278aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5279aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5280aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5281aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5282aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5283aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5284aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5285aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5286aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5287aa59c4cbSrsb VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5288aa59c4cbSrsb VOPNAME_SPACE, { .vop_space = zfs_space }, 5289aa59c4cbSrsb VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5290aa59c4cbSrsb VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5291aa59c4cbSrsb VOPNAME_MAP, { .vop_map = zfs_map }, 5292aa59c4cbSrsb VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5293aa59c4cbSrsb VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5294aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5295aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5296aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5297aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 52984bb73804SMatthew Ahrens VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 52994bb73804SMatthew Ahrens VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5300aa59c4cbSrsb NULL, NULL 5301fa9e4066Sahrens }; 5302fa9e4066Sahrens 5303fa9e4066Sahrens /* 5304fa9e4066Sahrens * Symbolic link vnode operations template 5305fa9e4066Sahrens */ 5306fa9e4066Sahrens vnodeops_t *zfs_symvnodeops; 5307fa9e4066Sahrens const fs_operation_def_t zfs_symvnodeops_template[] = { 5308aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5309aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5310aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5311aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5312aa59c4cbSrsb VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5313aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5314aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5315aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5316aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5317aa59c4cbSrsb NULL, NULL 5318fa9e4066Sahrens }; 5319fa9e4066Sahrens 5320743a77edSAlan Wright /* 5321743a77edSAlan Wright * special share hidden files vnode operations template 5322743a77edSAlan Wright */ 5323743a77edSAlan Wright vnodeops_t *zfs_sharevnodeops; 5324743a77edSAlan Wright const fs_operation_def_t zfs_sharevnodeops_template[] = { 5325743a77edSAlan Wright VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5326743a77edSAlan Wright VOPNAME_ACCESS, { .vop_access = zfs_access }, 5327743a77edSAlan Wright VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5328743a77edSAlan Wright VOPNAME_FID, { .vop_fid = zfs_fid }, 5329743a77edSAlan Wright VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5330743a77edSAlan Wright VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5331743a77edSAlan Wright VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5332743a77edSAlan Wright VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5333743a77edSAlan Wright NULL, NULL 5334743a77edSAlan Wright }; 5335743a77edSAlan Wright 5336fa9e4066Sahrens /* 5337fa9e4066Sahrens * Extended attribute directory vnode operations template 5338f7170741SWill Andrews * 5339f7170741SWill Andrews * This template is identical to the directory vnodes 5340f7170741SWill Andrews * operation template except for restricted operations: 5341f7170741SWill Andrews * VOP_MKDIR() 5342f7170741SWill Andrews * VOP_SYMLINK() 5343f7170741SWill Andrews * 5344fa9e4066Sahrens * Note that there are other restrictions embedded in: 5345fa9e4066Sahrens * zfs_create() - restrict type to VREG 5346fa9e4066Sahrens * zfs_link() - no links into/out of attribute space 5347fa9e4066Sahrens * zfs_rename() - no moves into/out of attribute space 5348fa9e4066Sahrens */ 5349fa9e4066Sahrens vnodeops_t *zfs_xdvnodeops; 5350fa9e4066Sahrens const fs_operation_def_t zfs_xdvnodeops_template[] = { 5351aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5352aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5353aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5354aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5355aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5356aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5357aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5358aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 5359aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5360aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 5361aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5362aa59c4cbSrsb VOPNAME_MKDIR, { .error = zfs_inval }, 5363aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5364aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5365aa59c4cbSrsb VOPNAME_SYMLINK, { .error = zfs_inval }, 5366aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5367aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5368aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5369aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5370aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5371aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5372aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5373aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5374aa59c4cbSrsb NULL, NULL 5375fa9e4066Sahrens }; 5376fa9e4066Sahrens 5377fa9e4066Sahrens /* 5378fa9e4066Sahrens * Error vnode operations template 5379fa9e4066Sahrens */ 5380fa9e4066Sahrens vnodeops_t *zfs_evnodeops; 5381fa9e4066Sahrens const fs_operation_def_t zfs_evnodeops_template[] = { 5382aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5383aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5384aa59c4cbSrsb NULL, NULL 5385fa9e4066Sahrens }; 5386