1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5736b9155Smarks * Common Development and Distribution License (the "License"). 6736b9155Smarks * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22d39ee142SMark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23*be6fd75aSMatthew Ahrens * Copyright (c) 2013 by Delphix. All rights reserved. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 2675c76197Speteh /* Portions Copyright 2007 Jeremy Teo */ 2755da60b9SMark J Musante /* Portions Copyright 2010 Robert Milkowski */ 2875c76197Speteh 29fa9e4066Sahrens #include <sys/types.h> 30fa9e4066Sahrens #include <sys/param.h> 31fa9e4066Sahrens #include <sys/time.h> 32fa9e4066Sahrens #include <sys/systm.h> 33fa9e4066Sahrens #include <sys/sysmacros.h> 34fa9e4066Sahrens #include <sys/resource.h> 35fa9e4066Sahrens #include <sys/vfs.h> 36aa59c4cbSrsb #include <sys/vfs_opreg.h> 37fa9e4066Sahrens #include <sys/vnode.h> 38fa9e4066Sahrens #include <sys/file.h> 39fa9e4066Sahrens #include <sys/stat.h> 40fa9e4066Sahrens #include <sys/kmem.h> 41fa9e4066Sahrens #include <sys/taskq.h> 42fa9e4066Sahrens #include <sys/uio.h> 43fa9e4066Sahrens #include <sys/vmsystm.h> 44fa9e4066Sahrens #include <sys/atomic.h> 4544eda4d7Smaybee #include <sys/vm.h> 46fa9e4066Sahrens #include <vm/seg_vn.h> 47fa9e4066Sahrens #include <vm/pvn.h> 48fa9e4066Sahrens #include <vm/as.h> 490fab61baSJonathan W Adams #include <vm/kpm.h> 500fab61baSJonathan W Adams #include <vm/seg_kpm.h> 51fa9e4066Sahrens #include <sys/mman.h> 52fa9e4066Sahrens #include <sys/pathname.h> 53fa9e4066Sahrens #include <sys/cmn_err.h> 54fa9e4066Sahrens #include <sys/errno.h> 55fa9e4066Sahrens #include <sys/unistd.h> 56fa9e4066Sahrens #include <sys/zfs_dir.h> 57fa9e4066Sahrens #include <sys/zfs_acl.h> 58fa9e4066Sahrens #include <sys/zfs_ioctl.h> 59fa9e4066Sahrens #include <sys/fs/zfs.h> 60fa9e4066Sahrens #include <sys/dmu.h> 6155da60b9SMark J Musante #include <sys/dmu_objset.h> 62fa9e4066Sahrens #include <sys/spa.h> 63fa9e4066Sahrens #include <sys/txg.h> 64fa9e4066Sahrens #include <sys/dbuf.h> 65fa9e4066Sahrens #include <sys/zap.h> 660a586ceaSMark Shellenbaum #include <sys/sa.h> 67fa9e4066Sahrens #include <sys/dirent.h> 68fa9e4066Sahrens #include <sys/policy.h> 69fa9e4066Sahrens #include <sys/sunddi.h> 70fa9e4066Sahrens #include <sys/filio.h> 71c1ce5987SMark Shellenbaum #include <sys/sid.h> 72fa9e4066Sahrens #include "fs/fs_subr.h" 73fa9e4066Sahrens #include <sys/zfs_ctldir.h> 74da6c28aaSamw #include <sys/zfs_fuid.h> 750a586ceaSMark Shellenbaum #include <sys/zfs_sa.h> 76033f9833Sek #include <sys/dnlc.h> 77104e2ed7Sperrin #include <sys/zfs_rlock.h> 78da6c28aaSamw #include <sys/extdirent.h> 79da6c28aaSamw #include <sys/kidmap.h> 8067dbe2beSCasper H.S. Dik #include <sys/cred.h> 81b38f0970Sck #include <sys/attr.h> 82fa9e4066Sahrens 83fa9e4066Sahrens /* 84fa9e4066Sahrens * Programming rules. 85fa9e4066Sahrens * 86fa9e4066Sahrens * Each vnode op performs some logical unit of work. To do this, the ZPL must 87fa9e4066Sahrens * properly lock its in-core state, create a DMU transaction, do the work, 88fa9e4066Sahrens * record this work in the intent log (ZIL), commit the DMU transaction, 89da6c28aaSamw * and wait for the intent log to commit if it is a synchronous operation. 90da6c28aaSamw * Moreover, the vnode ops must work in both normal and log replay context. 91fa9e4066Sahrens * The ordering of events is important to avoid deadlocks and references 92fa9e4066Sahrens * to freed memory. The example below illustrates the following Big Rules: 93fa9e4066Sahrens * 94fa9e4066Sahrens * (1) A check must be made in each zfs thread for a mounted file system. 953cb34c60Sahrens * This is done avoiding races using ZFS_ENTER(zfsvfs). 963cb34c60Sahrens * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 973cb34c60Sahrens * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 983cb34c60Sahrens * can return EIO from the calling function. 99fa9e4066Sahrens * 100fa9e4066Sahrens * (2) VN_RELE() should always be the last thing except for zil_commit() 101b19a79ecSperrin * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 102fa9e4066Sahrens * First, if it's the last reference, the vnode/znode 103fa9e4066Sahrens * can be freed, so the zp may point to freed memory. Second, the last 104fa9e4066Sahrens * reference will call zfs_zinactive(), which may induce a lot of work -- 105104e2ed7Sperrin * pushing cached pages (which acquires range locks) and syncing out 106fa9e4066Sahrens * cached atime changes. Third, zfs_zinactive() may require a new tx, 107fa9e4066Sahrens * which could deadlock the system if you were already holding one. 1089d3574bfSNeil Perrin * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 109fa9e4066Sahrens * 1107885c754Sperrin * (3) All range locks must be grabbed before calling dmu_tx_assign(), 1117885c754Sperrin * as they can span dmu_tx_assign() calls. 1127885c754Sperrin * 1131209a471SNeil Perrin * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 114fa9e4066Sahrens * This is critical because we don't want to block while holding locks. 115fa9e4066Sahrens * Note, in particular, that if a lock is sometimes acquired before 116fa9e4066Sahrens * the tx assigns, and sometimes after (e.g. z_lock), then failing to 117fa9e4066Sahrens * use a non-blocking assign can deadlock the system. The scenario: 118fa9e4066Sahrens * 119fa9e4066Sahrens * Thread A has grabbed a lock before calling dmu_tx_assign(). 120fa9e4066Sahrens * Thread B is in an already-assigned tx, and blocks for this lock. 121fa9e4066Sahrens * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 122fa9e4066Sahrens * forever, because the previous txg can't quiesce until B's tx commits. 123fa9e4066Sahrens * 124fa9e4066Sahrens * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 1258a2f1b91Sahrens * then drop all locks, call dmu_tx_wait(), and try again. 126fa9e4066Sahrens * 1277885c754Sperrin * (5) If the operation succeeded, generate the intent log entry for it 128fa9e4066Sahrens * before dropping locks. This ensures that the ordering of events 129fa9e4066Sahrens * in the intent log matches the order in which they actually occurred. 1301209a471SNeil Perrin * During ZIL replay the zfs_log_* functions will update the sequence 1311209a471SNeil Perrin * number to indicate the zil transaction has replayed. 132fa9e4066Sahrens * 1337885c754Sperrin * (6) At the end of each vnode op, the DMU tx must always commit, 134fa9e4066Sahrens * regardless of whether there were any errors. 135fa9e4066Sahrens * 1365002558fSNeil Perrin * (7) After dropping all locks, invoke zil_commit(zilog, foid) 137fa9e4066Sahrens * to ensure that synchronous semantics are provided when necessary. 138fa9e4066Sahrens * 139fa9e4066Sahrens * In general, this is how things should be ordered in each vnode op: 140fa9e4066Sahrens * 141fa9e4066Sahrens * ZFS_ENTER(zfsvfs); // exit if unmounted 142fa9e4066Sahrens * top: 143fa9e4066Sahrens * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 144fa9e4066Sahrens * rw_enter(...); // grab any other locks you need 145fa9e4066Sahrens * tx = dmu_tx_create(...); // get DMU tx 146fa9e4066Sahrens * dmu_tx_hold_*(); // hold each object you might modify 1471209a471SNeil Perrin * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 148fa9e4066Sahrens * if (error) { 149fa9e4066Sahrens * rw_exit(...); // drop locks 150fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 151fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1521209a471SNeil Perrin * if (error == ERESTART) { 1538a2f1b91Sahrens * dmu_tx_wait(tx); 1548a2f1b91Sahrens * dmu_tx_abort(tx); 155fa9e4066Sahrens * goto top; 156fa9e4066Sahrens * } 1578a2f1b91Sahrens * dmu_tx_abort(tx); // abort DMU tx 158fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 159fa9e4066Sahrens * return (error); // really out of space 160fa9e4066Sahrens * } 161fa9e4066Sahrens * error = do_real_work(); // do whatever this VOP does 162fa9e4066Sahrens * if (error == 0) 163b19a79ecSperrin * zfs_log_*(...); // on success, make ZIL entry 164fa9e4066Sahrens * dmu_tx_commit(tx); // commit DMU tx -- error or not 165fa9e4066Sahrens * rw_exit(...); // drop locks 166fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 167fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1685002558fSNeil Perrin * zil_commit(zilog, foid); // synchronous when necessary 169fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 170fa9e4066Sahrens * return (error); // done, report error 171fa9e4066Sahrens */ 1723cb34c60Sahrens 173fa9e4066Sahrens /* ARGSUSED */ 174fa9e4066Sahrens static int 175da6c28aaSamw zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 176fa9e4066Sahrens { 17767bd71c6Sperrin znode_t *zp = VTOZ(*vpp); 178b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 179b614fdaaSMark Shellenbaum 180b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 181b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 18267bd71c6Sperrin 1830a586ceaSMark Shellenbaum if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 184da6c28aaSamw ((flag & FAPPEND) == 0)) { 185b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 186*be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 187da6c28aaSamw } 188da6c28aaSamw 189da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 190da6c28aaSamw ZTOV(zp)->v_type == VREG && 1910a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 192b614fdaaSMark Shellenbaum if (fs_vscan(*vpp, cr, 0) != 0) { 193b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 194*be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 195b614fdaaSMark Shellenbaum } 196b614fdaaSMark Shellenbaum } 197da6c28aaSamw 19867bd71c6Sperrin /* Keep a count of the synchronous opens in the znode */ 19967bd71c6Sperrin if (flag & (FSYNC | FDSYNC)) 20067bd71c6Sperrin atomic_inc_32(&zp->z_sync_cnt); 201da6c28aaSamw 202b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 203fa9e4066Sahrens return (0); 204fa9e4066Sahrens } 205fa9e4066Sahrens 206fa9e4066Sahrens /* ARGSUSED */ 207fa9e4066Sahrens static int 208da6c28aaSamw zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 209da6c28aaSamw caller_context_t *ct) 210fa9e4066Sahrens { 21167bd71c6Sperrin znode_t *zp = VTOZ(vp); 212b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 213b614fdaaSMark Shellenbaum 214ee8143cbSChris Kirby /* 215ee8143cbSChris Kirby * Clean up any locks held by this process on the vp. 216ee8143cbSChris Kirby */ 217ee8143cbSChris Kirby cleanlocks(vp, ddi_get_pid(), 0); 218ee8143cbSChris Kirby cleanshares(vp, ddi_get_pid()); 219ee8143cbSChris Kirby 220b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 221b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 22267bd71c6Sperrin 22367bd71c6Sperrin /* Decrement the synchronous opens in the znode */ 224ecb72030Sperrin if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 22567bd71c6Sperrin atomic_dec_32(&zp->z_sync_cnt); 22667bd71c6Sperrin 227da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 228da6c28aaSamw ZTOV(zp)->v_type == VREG && 2290a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 230da6c28aaSamw VERIFY(fs_vscan(vp, cr, 1) == 0); 231da6c28aaSamw 232b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 233fa9e4066Sahrens return (0); 234fa9e4066Sahrens } 235fa9e4066Sahrens 236fa9e4066Sahrens /* 237fa9e4066Sahrens * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 238fa9e4066Sahrens * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 239fa9e4066Sahrens */ 240fa9e4066Sahrens static int 241fa9e4066Sahrens zfs_holey(vnode_t *vp, int cmd, offset_t *off) 242fa9e4066Sahrens { 243fa9e4066Sahrens znode_t *zp = VTOZ(vp); 244fa9e4066Sahrens uint64_t noff = (uint64_t)*off; /* new offset */ 245fa9e4066Sahrens uint64_t file_sz; 246fa9e4066Sahrens int error; 247fa9e4066Sahrens boolean_t hole; 248fa9e4066Sahrens 2490a586ceaSMark Shellenbaum file_sz = zp->z_size; 250fa9e4066Sahrens if (noff >= file_sz) { 251*be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 252fa9e4066Sahrens } 253fa9e4066Sahrens 254fa9e4066Sahrens if (cmd == _FIO_SEEK_HOLE) 255fa9e4066Sahrens hole = B_TRUE; 256fa9e4066Sahrens else 257fa9e4066Sahrens hole = B_FALSE; 258fa9e4066Sahrens 259fa9e4066Sahrens error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 260fa9e4066Sahrens 261fa9e4066Sahrens /* end of file? */ 262fa9e4066Sahrens if ((error == ESRCH) || (noff > file_sz)) { 263fa9e4066Sahrens /* 264fa9e4066Sahrens * Handle the virtual hole at the end of file. 265fa9e4066Sahrens */ 266fa9e4066Sahrens if (hole) { 267fa9e4066Sahrens *off = file_sz; 268fa9e4066Sahrens return (0); 269fa9e4066Sahrens } 270*be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 271fa9e4066Sahrens } 272fa9e4066Sahrens 273fa9e4066Sahrens if (noff < *off) 274fa9e4066Sahrens return (error); 275fa9e4066Sahrens *off = noff; 276fa9e4066Sahrens return (error); 277fa9e4066Sahrens } 278fa9e4066Sahrens 279fa9e4066Sahrens /* ARGSUSED */ 280fa9e4066Sahrens static int 281fa9e4066Sahrens zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, 282da6c28aaSamw int *rvalp, caller_context_t *ct) 283fa9e4066Sahrens { 284fa9e4066Sahrens offset_t off; 285fa9e4066Sahrens int error; 286fa9e4066Sahrens zfsvfs_t *zfsvfs; 287f18faf3fSek znode_t *zp; 288fa9e4066Sahrens 289fa9e4066Sahrens switch (com) { 290ecb72030Sperrin case _FIOFFS: 291fa9e4066Sahrens return (zfs_sync(vp->v_vfsp, 0, cred)); 292fa9e4066Sahrens 293ea8dc4b6Seschrock /* 294ea8dc4b6Seschrock * The following two ioctls are used by bfu. Faking out, 295ea8dc4b6Seschrock * necessary to avoid bfu errors. 296ea8dc4b6Seschrock */ 297ecb72030Sperrin case _FIOGDIO: 298ecb72030Sperrin case _FIOSDIO: 299ea8dc4b6Seschrock return (0); 300ea8dc4b6Seschrock 301ecb72030Sperrin case _FIO_SEEK_DATA: 302ecb72030Sperrin case _FIO_SEEK_HOLE: 303fa9e4066Sahrens if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 304*be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 305fa9e4066Sahrens 306f18faf3fSek zp = VTOZ(vp); 307f18faf3fSek zfsvfs = zp->z_zfsvfs; 3083cb34c60Sahrens ZFS_ENTER(zfsvfs); 3093cb34c60Sahrens ZFS_VERIFY_ZP(zp); 310fa9e4066Sahrens 311fa9e4066Sahrens /* offset parameter is in/out */ 312fa9e4066Sahrens error = zfs_holey(vp, com, &off); 313fa9e4066Sahrens ZFS_EXIT(zfsvfs); 314fa9e4066Sahrens if (error) 315fa9e4066Sahrens return (error); 316fa9e4066Sahrens if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 317*be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 318fa9e4066Sahrens return (0); 319fa9e4066Sahrens } 320*be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTTY)); 321fa9e4066Sahrens } 322fa9e4066Sahrens 3230fab61baSJonathan W Adams /* 3240fab61baSJonathan W Adams * Utility functions to map and unmap a single physical page. These 3250fab61baSJonathan W Adams * are used to manage the mappable copies of ZFS file data, and therefore 3260fab61baSJonathan W Adams * do not update ref/mod bits. 3270fab61baSJonathan W Adams */ 3280fab61baSJonathan W Adams caddr_t 3290fab61baSJonathan W Adams zfs_map_page(page_t *pp, enum seg_rw rw) 3300fab61baSJonathan W Adams { 3310fab61baSJonathan W Adams if (kpm_enable) 3320fab61baSJonathan W Adams return (hat_kpm_mapin(pp, 0)); 3330fab61baSJonathan W Adams ASSERT(rw == S_READ || rw == S_WRITE); 3340fab61baSJonathan W Adams return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), 3350fab61baSJonathan W Adams (caddr_t)-1)); 3360fab61baSJonathan W Adams } 3370fab61baSJonathan W Adams 3380fab61baSJonathan W Adams void 3390fab61baSJonathan W Adams zfs_unmap_page(page_t *pp, caddr_t addr) 3400fab61baSJonathan W Adams { 3410fab61baSJonathan W Adams if (kpm_enable) { 3420fab61baSJonathan W Adams hat_kpm_mapout(pp, 0, addr); 3430fab61baSJonathan W Adams } else { 3440fab61baSJonathan W Adams ppmapout(addr); 3450fab61baSJonathan W Adams } 3460fab61baSJonathan W Adams } 3470fab61baSJonathan W Adams 348fa9e4066Sahrens /* 349fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 350fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 351fa9e4066Sahrens * 352fa9e4066Sahrens * On Write: If we find a memory mapped page, we write to *both* 353fa9e4066Sahrens * the page and the dmu buffer. 354fa9e4066Sahrens */ 355ac05c741SMark Maybee static void 356ac05c741SMark Maybee update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) 357fa9e4066Sahrens { 358ac05c741SMark Maybee int64_t off; 359fa9e4066Sahrens 360fa9e4066Sahrens off = start & PAGEOFFSET; 361fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 362fa9e4066Sahrens page_t *pp; 363ac05c741SMark Maybee uint64_t nbytes = MIN(PAGESIZE - off, len); 364fa9e4066Sahrens 365fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 366fa9e4066Sahrens caddr_t va; 367fa9e4066Sahrens 3680fab61baSJonathan W Adams va = zfs_map_page(pp, S_WRITE); 3697bfdf011SNeil Perrin (void) dmu_read(os, oid, start+off, nbytes, va+off, 3707bfdf011SNeil Perrin DMU_READ_PREFETCH); 3710fab61baSJonathan W Adams zfs_unmap_page(pp, va); 372fa9e4066Sahrens page_unlock(pp); 373fa9e4066Sahrens } 374ac05c741SMark Maybee len -= nbytes; 375fa9e4066Sahrens off = 0; 376fa9e4066Sahrens } 377fa9e4066Sahrens } 378fa9e4066Sahrens 379fa9e4066Sahrens /* 380fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 381fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 382fa9e4066Sahrens * 383fa9e4066Sahrens * On Read: We "read" preferentially from memory mapped pages, 384fa9e4066Sahrens * else we default from the dmu buffer. 385fa9e4066Sahrens * 386fa9e4066Sahrens * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 387fa9e4066Sahrens * the file is memory mapped. 388fa9e4066Sahrens */ 389fa9e4066Sahrens static int 390feb08c6bSbillm mappedread(vnode_t *vp, int nbytes, uio_t *uio) 391fa9e4066Sahrens { 392feb08c6bSbillm znode_t *zp = VTOZ(vp); 393feb08c6bSbillm objset_t *os = zp->z_zfsvfs->z_os; 394feb08c6bSbillm int64_t start, off; 395fa9e4066Sahrens int len = nbytes; 396fa9e4066Sahrens int error = 0; 397fa9e4066Sahrens 398fa9e4066Sahrens start = uio->uio_loffset; 399fa9e4066Sahrens off = start & PAGEOFFSET; 400fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 401fa9e4066Sahrens page_t *pp; 402feb08c6bSbillm uint64_t bytes = MIN(PAGESIZE - off, len); 403fa9e4066Sahrens 404fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 405fa9e4066Sahrens caddr_t va; 406fa9e4066Sahrens 4070fab61baSJonathan W Adams va = zfs_map_page(pp, S_READ); 408fa9e4066Sahrens error = uiomove(va + off, bytes, UIO_READ, uio); 4090fab61baSJonathan W Adams zfs_unmap_page(pp, va); 410fa9e4066Sahrens page_unlock(pp); 411fa9e4066Sahrens } else { 412feb08c6bSbillm error = dmu_read_uio(os, zp->z_id, uio, bytes); 413fa9e4066Sahrens } 414fa9e4066Sahrens len -= bytes; 415fa9e4066Sahrens off = 0; 416fa9e4066Sahrens if (error) 417fa9e4066Sahrens break; 418fa9e4066Sahrens } 419fa9e4066Sahrens return (error); 420fa9e4066Sahrens } 421fa9e4066Sahrens 422feb08c6bSbillm offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 423fa9e4066Sahrens 424fa9e4066Sahrens /* 425fa9e4066Sahrens * Read bytes from specified file into supplied buffer. 426fa9e4066Sahrens * 427fa9e4066Sahrens * IN: vp - vnode of file to be read from. 428fa9e4066Sahrens * uio - structure supplying read location, range info, 429fa9e4066Sahrens * and return buffer. 430fa9e4066Sahrens * ioflag - SYNC flags; used to provide FRSYNC semantics. 431fa9e4066Sahrens * cr - credentials of caller. 432da6c28aaSamw * ct - caller context 433fa9e4066Sahrens * 434fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 435fa9e4066Sahrens * 436fa9e4066Sahrens * RETURN: 0 if success 437fa9e4066Sahrens * error code if failure 438fa9e4066Sahrens * 439fa9e4066Sahrens * Side Effects: 440fa9e4066Sahrens * vp - atime updated if byte count > 0 441fa9e4066Sahrens */ 442fa9e4066Sahrens /* ARGSUSED */ 443fa9e4066Sahrens static int 444fa9e4066Sahrens zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 445fa9e4066Sahrens { 446fa9e4066Sahrens znode_t *zp = VTOZ(vp); 447fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 448f18faf3fSek objset_t *os; 449feb08c6bSbillm ssize_t n, nbytes; 450d5285caeSGeorge Wilson int error = 0; 451104e2ed7Sperrin rl_t *rl; 452c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 453fa9e4066Sahrens 4543cb34c60Sahrens ZFS_ENTER(zfsvfs); 4553cb34c60Sahrens ZFS_VERIFY_ZP(zp); 456f18faf3fSek os = zfsvfs->z_os; 457fa9e4066Sahrens 4580a586ceaSMark Shellenbaum if (zp->z_pflags & ZFS_AV_QUARANTINED) { 4590616c50eSmarks ZFS_EXIT(zfsvfs); 460*be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 4610616c50eSmarks } 4620616c50eSmarks 463fa9e4066Sahrens /* 464fa9e4066Sahrens * Validate file offset 465fa9e4066Sahrens */ 466fa9e4066Sahrens if (uio->uio_loffset < (offset_t)0) { 467fa9e4066Sahrens ZFS_EXIT(zfsvfs); 468*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 469fa9e4066Sahrens } 470fa9e4066Sahrens 471fa9e4066Sahrens /* 472fa9e4066Sahrens * Fasttrack empty reads 473fa9e4066Sahrens */ 474fa9e4066Sahrens if (uio->uio_resid == 0) { 475fa9e4066Sahrens ZFS_EXIT(zfsvfs); 476fa9e4066Sahrens return (0); 477fa9e4066Sahrens } 478fa9e4066Sahrens 479fa9e4066Sahrens /* 480104e2ed7Sperrin * Check for mandatory locks 481fa9e4066Sahrens */ 4820a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode)) { 483fa9e4066Sahrens if (error = chklock(vp, FREAD, 484fa9e4066Sahrens uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 485fa9e4066Sahrens ZFS_EXIT(zfsvfs); 486fa9e4066Sahrens return (error); 487fa9e4066Sahrens } 488fa9e4066Sahrens } 489fa9e4066Sahrens 490fa9e4066Sahrens /* 491fa9e4066Sahrens * If we're in FRSYNC mode, sync out this znode before reading it. 492fa9e4066Sahrens */ 49355da60b9SMark J Musante if (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4945002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 495fa9e4066Sahrens 496fa9e4066Sahrens /* 497104e2ed7Sperrin * Lock the range against changes. 498fa9e4066Sahrens */ 499104e2ed7Sperrin rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 500104e2ed7Sperrin 501fa9e4066Sahrens /* 502fa9e4066Sahrens * If we are reading past end-of-file we can skip 503fa9e4066Sahrens * to the end; but we might still need to set atime. 504fa9e4066Sahrens */ 5050a586ceaSMark Shellenbaum if (uio->uio_loffset >= zp->z_size) { 506fa9e4066Sahrens error = 0; 507fa9e4066Sahrens goto out; 508fa9e4066Sahrens } 509fa9e4066Sahrens 5100a586ceaSMark Shellenbaum ASSERT(uio->uio_loffset < zp->z_size); 5110a586ceaSMark Shellenbaum n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 512feb08c6bSbillm 513c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 514c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 515c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int nblk; 516c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz = zp->z_blksz; 517c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uint64_t offset = uio->uio_loffset; 518c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 519c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 520c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((ISP2(blksz))) { 521c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 522c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz)) / blksz; 523c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 524c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(offset + n <= blksz); 525c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = 1; 526c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 527570de38fSSurya Prakki (void) dmu_xuio_init(xuio, nblk); 528c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 529c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (vn_has_cached_data(vp)) { 530c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 531c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * For simplicity, we always allocate a full buffer 532c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * even if we only expect to read a portion of a block. 533c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 534c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (--nblk >= 0) { 535570de38fSSurya Prakki (void) dmu_xuio_add(xuio, 5360a586ceaSMark Shellenbaum dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5370a586ceaSMark Shellenbaum blksz), 0, blksz); 538c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 539c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 540c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 541c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 542feb08c6bSbillm while (n > 0) { 543feb08c6bSbillm nbytes = MIN(n, zfs_read_chunk_size - 544feb08c6bSbillm P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 545fa9e4066Sahrens 546feb08c6bSbillm if (vn_has_cached_data(vp)) 547feb08c6bSbillm error = mappedread(vp, nbytes, uio); 548feb08c6bSbillm else 549feb08c6bSbillm error = dmu_read_uio(os, zp->z_id, uio, nbytes); 550b87f3af3Sperrin if (error) { 551b87f3af3Sperrin /* convert checksum errors into IO errors */ 552b87f3af3Sperrin if (error == ECKSUM) 553*be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 554feb08c6bSbillm break; 555b87f3af3Sperrin } 556fa9e4066Sahrens 557feb08c6bSbillm n -= nbytes; 558fa9e4066Sahrens } 559fa9e4066Sahrens out: 560c5c6ffa0Smaybee zfs_range_unlock(rl); 561fa9e4066Sahrens 562fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 563fa9e4066Sahrens ZFS_EXIT(zfsvfs); 564fa9e4066Sahrens return (error); 565fa9e4066Sahrens } 566fa9e4066Sahrens 567fa9e4066Sahrens /* 568fa9e4066Sahrens * Write the bytes to a file. 569fa9e4066Sahrens * 570fa9e4066Sahrens * IN: vp - vnode of file to be written to. 571fa9e4066Sahrens * uio - structure supplying write location, range info, 572fa9e4066Sahrens * and data buffer. 573fa9e4066Sahrens * ioflag - FAPPEND flag set if in append mode. 574fa9e4066Sahrens * cr - credentials of caller. 575da6c28aaSamw * ct - caller context (NFS/CIFS fem monitor only) 576fa9e4066Sahrens * 577fa9e4066Sahrens * OUT: uio - updated offset and range. 578fa9e4066Sahrens * 579fa9e4066Sahrens * RETURN: 0 if success 580fa9e4066Sahrens * error code if failure 581fa9e4066Sahrens * 582fa9e4066Sahrens * Timestamps: 583fa9e4066Sahrens * vp - ctime|mtime updated if byte count > 0 584fa9e4066Sahrens */ 5850a586ceaSMark Shellenbaum 586fa9e4066Sahrens /* ARGSUSED */ 587fa9e4066Sahrens static int 588fa9e4066Sahrens zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 589fa9e4066Sahrens { 590fa9e4066Sahrens znode_t *zp = VTOZ(vp); 591fa9e4066Sahrens rlim64_t limit = uio->uio_llimit; 592fa9e4066Sahrens ssize_t start_resid = uio->uio_resid; 593fa9e4066Sahrens ssize_t tx_bytes; 594fa9e4066Sahrens uint64_t end_size; 595fa9e4066Sahrens dmu_tx_t *tx; 596fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 597f18faf3fSek zilog_t *zilog; 598fa9e4066Sahrens offset_t woff; 599fa9e4066Sahrens ssize_t n, nbytes; 600104e2ed7Sperrin rl_t *rl; 601fa9e4066Sahrens int max_blksz = zfsvfs->z_max_blksz; 602d5285caeSGeorge Wilson int error = 0; 6032fdbea25SAleksandr Guzovskiy arc_buf_t *abuf; 604d5285caeSGeorge Wilson iovec_t *aiov = NULL; 605c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 606c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i_iov = 0; 607c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int iovcnt = uio->uio_iovcnt; 608c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *iovp = uio->uio_iov; 609c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int write_eof; 6100a586ceaSMark Shellenbaum int count = 0; 6110a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[4]; 6120a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 613fa9e4066Sahrens 614fa9e4066Sahrens /* 615fa9e4066Sahrens * Fasttrack empty write 616fa9e4066Sahrens */ 617104e2ed7Sperrin n = start_resid; 618fa9e4066Sahrens if (n == 0) 619fa9e4066Sahrens return (0); 620fa9e4066Sahrens 621104e2ed7Sperrin if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 622104e2ed7Sperrin limit = MAXOFFSET_T; 623104e2ed7Sperrin 6243cb34c60Sahrens ZFS_ENTER(zfsvfs); 6253cb34c60Sahrens ZFS_VERIFY_ZP(zp); 626c09193bfSmarks 6270a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 6280a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 6290a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 6300a586ceaSMark Shellenbaum &zp->z_size, 8); 6310a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 6320a586ceaSMark Shellenbaum &zp->z_pflags, 8); 6330a586ceaSMark Shellenbaum 634c09193bfSmarks /* 635c09193bfSmarks * If immutable or not appending then return EPERM 636c09193bfSmarks */ 6370a586ceaSMark Shellenbaum if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 6380a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 6390a586ceaSMark Shellenbaum (uio->uio_loffset < zp->z_size))) { 640c09193bfSmarks ZFS_EXIT(zfsvfs); 641*be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 642c09193bfSmarks } 643c09193bfSmarks 644f18faf3fSek zilog = zfsvfs->z_log; 645fa9e4066Sahrens 64641865f27SWilliam Gorrell /* 64741865f27SWilliam Gorrell * Validate file offset 64841865f27SWilliam Gorrell */ 6490a586ceaSMark Shellenbaum woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 65041865f27SWilliam Gorrell if (woff < 0) { 65141865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 652*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 65341865f27SWilliam Gorrell } 65441865f27SWilliam Gorrell 65541865f27SWilliam Gorrell /* 65641865f27SWilliam Gorrell * Check for mandatory locks before calling zfs_range_lock() 65741865f27SWilliam Gorrell * in order to prevent a deadlock with locks set via fcntl(). 65841865f27SWilliam Gorrell */ 6590a586ceaSMark Shellenbaum if (MANDMODE((mode_t)zp->z_mode) && 66041865f27SWilliam Gorrell (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 66141865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 66241865f27SWilliam Gorrell return (error); 66341865f27SWilliam Gorrell } 66441865f27SWilliam Gorrell 665fa9e4066Sahrens /* 666c5c6ffa0Smaybee * Pre-fault the pages to ensure slow (eg NFS) pages 667104e2ed7Sperrin * don't hold up txg. 668c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Skip this if uio contains loaned arc_buf. 669fa9e4066Sahrens */ 670c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 671c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 672c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 673c242f9a0Schunli zhang - Sun Microsystems - Irvine United States else 674ff866947SSanjeev Bagewadi uio_prefaultpages(MIN(n, max_blksz), uio); 675fa9e4066Sahrens 676fa9e4066Sahrens /* 677fa9e4066Sahrens * If in append mode, set the io offset pointer to eof. 678fa9e4066Sahrens */ 679104e2ed7Sperrin if (ioflag & FAPPEND) { 680104e2ed7Sperrin /* 68141865f27SWilliam Gorrell * Obtain an appending range lock to guarantee file append 68241865f27SWilliam Gorrell * semantics. We reset the write offset once we have the lock. 683104e2ed7Sperrin */ 684104e2ed7Sperrin rl = zfs_range_lock(zp, 0, n, RL_APPEND); 68541865f27SWilliam Gorrell woff = rl->r_off; 686104e2ed7Sperrin if (rl->r_len == UINT64_MAX) { 68741865f27SWilliam Gorrell /* 68841865f27SWilliam Gorrell * We overlocked the file because this write will cause 68941865f27SWilliam Gorrell * the file block size to increase. 69041865f27SWilliam Gorrell * Note that zp_size cannot change with this lock held. 69141865f27SWilliam Gorrell */ 6920a586ceaSMark Shellenbaum woff = zp->z_size; 693104e2ed7Sperrin } 69441865f27SWilliam Gorrell uio->uio_loffset = woff; 695fa9e4066Sahrens } else { 696fa9e4066Sahrens /* 69741865f27SWilliam Gorrell * Note that if the file block size will change as a result of 69841865f27SWilliam Gorrell * this write, then this range lock will lock the entire file 69941865f27SWilliam Gorrell * so that we can re-write the block safely. 700fa9e4066Sahrens */ 701104e2ed7Sperrin rl = zfs_range_lock(zp, woff, n, RL_WRITER); 702fa9e4066Sahrens } 703fa9e4066Sahrens 704fa9e4066Sahrens if (woff >= limit) { 705feb08c6bSbillm zfs_range_unlock(rl); 706feb08c6bSbillm ZFS_EXIT(zfsvfs); 707*be6fd75aSMatthew Ahrens return (SET_ERROR(EFBIG)); 708fa9e4066Sahrens } 709fa9e4066Sahrens 710fa9e4066Sahrens if ((woff + n) > limit || woff > (limit - n)) 711fa9e4066Sahrens n = limit - woff; 712fa9e4066Sahrens 713c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* Will this write extend the file length? */ 7140a586ceaSMark Shellenbaum write_eof = (woff + n > zp->z_size); 715c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 7160a586ceaSMark Shellenbaum end_size = MAX(zp->z_size, woff + n); 717fa9e4066Sahrens 718104e2ed7Sperrin /* 719feb08c6bSbillm * Write the file in reasonable size chunks. Each chunk is written 720feb08c6bSbillm * in a separate transaction; this keeps the intent log records small 721feb08c6bSbillm * and allows us to do more fine-grained space accounting. 722104e2ed7Sperrin */ 723feb08c6bSbillm while (n > 0) { 7242fdbea25SAleksandr Guzovskiy abuf = NULL; 7252fdbea25SAleksandr Guzovskiy woff = uio->uio_loffset; 7262fdbea25SAleksandr Guzovskiy again: 7270a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 7280a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 7292fdbea25SAleksandr Guzovskiy if (abuf != NULL) 7302fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 731*be6fd75aSMatthew Ahrens error = SET_ERROR(EDQUOT); 73214843421SMatthew Ahrens break; 73314843421SMatthew Ahrens } 7342fdbea25SAleksandr Guzovskiy 735c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio && abuf == NULL) { 736c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(i_iov < iovcnt); 737c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov = &iovp[i_iov]; 738c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i_iov); 739c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_clear(xuio, i_iov); 740c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_cp_write, int, i_iov, 741c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *, aiov, arc_buf_t *, abuf); 742c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT((aiov->iov_base == abuf->b_data) || 743c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ((char *)aiov->iov_base - (char *)abuf->b_data + 744c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len == arc_buf_size(abuf))); 745c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i_iov++; 746c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else if (abuf == NULL && n >= max_blksz && 7470a586ceaSMark Shellenbaum woff >= zp->z_size && 7482fdbea25SAleksandr Guzovskiy P2PHASE(woff, max_blksz) == 0 && 7492fdbea25SAleksandr Guzovskiy zp->z_blksz == max_blksz) { 750c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 751c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * This write covers a full block. "Borrow" a buffer 752c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * from the dmu so that we can fill it before we enter 753c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * a transaction. This avoids the possibility of 754c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * holding up the transaction if the data copy hangs 755c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * up on a pagefault (e.g., from an NFS server mapping). 756c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 7572fdbea25SAleksandr Guzovskiy size_t cbytes; 7582fdbea25SAleksandr Guzovskiy 7590a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 7600a586ceaSMark Shellenbaum max_blksz); 7612fdbea25SAleksandr Guzovskiy ASSERT(abuf != NULL); 7622fdbea25SAleksandr Guzovskiy ASSERT(arc_buf_size(abuf) == max_blksz); 7632fdbea25SAleksandr Guzovskiy if (error = uiocopy(abuf->b_data, max_blksz, 7642fdbea25SAleksandr Guzovskiy UIO_WRITE, uio, &cbytes)) { 7652fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 7662fdbea25SAleksandr Guzovskiy break; 7672fdbea25SAleksandr Guzovskiy } 7682fdbea25SAleksandr Guzovskiy ASSERT(cbytes == max_blksz); 7692fdbea25SAleksandr Guzovskiy } 7702fdbea25SAleksandr Guzovskiy 7712fdbea25SAleksandr Guzovskiy /* 7722fdbea25SAleksandr Guzovskiy * Start a transaction. 7732fdbea25SAleksandr Guzovskiy */ 774feb08c6bSbillm tx = dmu_tx_create(zfsvfs->z_os); 7750a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 776feb08c6bSbillm dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 7770a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 7781209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 779feb08c6bSbillm if (error) { 7801209a471SNeil Perrin if (error == ERESTART) { 781feb08c6bSbillm dmu_tx_wait(tx); 782feb08c6bSbillm dmu_tx_abort(tx); 7832fdbea25SAleksandr Guzovskiy goto again; 784feb08c6bSbillm } 785feb08c6bSbillm dmu_tx_abort(tx); 7862fdbea25SAleksandr Guzovskiy if (abuf != NULL) 7872fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 788feb08c6bSbillm break; 789feb08c6bSbillm } 790104e2ed7Sperrin 791feb08c6bSbillm /* 792feb08c6bSbillm * If zfs_range_lock() over-locked we grow the blocksize 793feb08c6bSbillm * and then reduce the lock range. This will only happen 794feb08c6bSbillm * on the first iteration since zfs_range_reduce() will 795feb08c6bSbillm * shrink down r_len to the appropriate size. 796feb08c6bSbillm */ 797feb08c6bSbillm if (rl->r_len == UINT64_MAX) { 798feb08c6bSbillm uint64_t new_blksz; 799feb08c6bSbillm 800feb08c6bSbillm if (zp->z_blksz > max_blksz) { 801feb08c6bSbillm ASSERT(!ISP2(zp->z_blksz)); 802feb08c6bSbillm new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 803feb08c6bSbillm } else { 804feb08c6bSbillm new_blksz = MIN(end_size, max_blksz); 805feb08c6bSbillm } 806feb08c6bSbillm zfs_grow_blocksize(zp, new_blksz, tx); 807feb08c6bSbillm zfs_range_reduce(rl, woff, n); 808fa9e4066Sahrens } 809fa9e4066Sahrens 810fa9e4066Sahrens /* 811fa9e4066Sahrens * XXX - should we really limit each write to z_max_blksz? 812fa9e4066Sahrens * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 813fa9e4066Sahrens */ 814fa9e4066Sahrens nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 815fa9e4066Sahrens 8162fdbea25SAleksandr Guzovskiy if (abuf == NULL) { 8172fdbea25SAleksandr Guzovskiy tx_bytes = uio->uio_resid; 81894d1a210STim Haley error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 81994d1a210STim Haley uio, nbytes, tx); 8202fdbea25SAleksandr Guzovskiy tx_bytes -= uio->uio_resid; 8212fdbea25SAleksandr Guzovskiy } else { 8222fdbea25SAleksandr Guzovskiy tx_bytes = nbytes; 823c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 824c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 825c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * If this is not a full block write, but we are 826c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * extending the file past EOF and this data starts 827c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * block-aligned, use assign_arcbuf(). Otherwise, 828c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write via dmu_write(). 829c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 830c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (tx_bytes < max_blksz && (!write_eof || 831c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_base != abuf->b_data)) { 832c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio); 833c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_write(zfsvfs->z_os, zp->z_id, woff, 834c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len, aiov->iov_base, tx); 835c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 836c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_stat_wbuf_copied(); 837c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 838c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio || tx_bytes == max_blksz); 8390a586ceaSMark Shellenbaum dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 8400a586ceaSMark Shellenbaum woff, abuf, tx); 841c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 8422fdbea25SAleksandr Guzovskiy ASSERT(tx_bytes <= uio->uio_resid); 8432fdbea25SAleksandr Guzovskiy uioskip(uio, tx_bytes); 8442fdbea25SAleksandr Guzovskiy } 8452fdbea25SAleksandr Guzovskiy if (tx_bytes && vn_has_cached_data(vp)) { 846ac05c741SMark Maybee update_pages(vp, woff, 847ac05c741SMark Maybee tx_bytes, zfsvfs->z_os, zp->z_id); 8482fdbea25SAleksandr Guzovskiy } 849fa9e4066Sahrens 850feb08c6bSbillm /* 851feb08c6bSbillm * If we made no progress, we're done. If we made even 852feb08c6bSbillm * partial progress, update the znode and ZIL accordingly. 853feb08c6bSbillm */ 854feb08c6bSbillm if (tx_bytes == 0) { 8550a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 8560a586ceaSMark Shellenbaum (void *)&zp->z_size, sizeof (uint64_t), tx); 857af2c4821Smaybee dmu_tx_commit(tx); 858feb08c6bSbillm ASSERT(error != 0); 859fa9e4066Sahrens break; 860fa9e4066Sahrens } 861fa9e4066Sahrens 862169cdae2Smarks /* 863169cdae2Smarks * Clear Set-UID/Set-GID bits on successful write if not 864169cdae2Smarks * privileged and at least one of the excute bits is set. 865169cdae2Smarks * 866169cdae2Smarks * It would be nice to to this after all writes have 867169cdae2Smarks * been done, but that would still expose the ISUID/ISGID 868169cdae2Smarks * to another app after the partial write is committed. 869da6c28aaSamw * 870f1696b23SMark Shellenbaum * Note: we don't call zfs_fuid_map_id() here because 871f1696b23SMark Shellenbaum * user 0 is not an ephemeral uid. 872169cdae2Smarks */ 873169cdae2Smarks mutex_enter(&zp->z_acl_lock); 8740a586ceaSMark Shellenbaum if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 875169cdae2Smarks (S_IXUSR >> 6))) != 0 && 8760a586ceaSMark Shellenbaum (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 877169cdae2Smarks secpolicy_vnode_setid_retain(cr, 8780a586ceaSMark Shellenbaum (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 8790a586ceaSMark Shellenbaum uint64_t newmode; 8800a586ceaSMark Shellenbaum zp->z_mode &= ~(S_ISUID | S_ISGID); 8810a586ceaSMark Shellenbaum newmode = zp->z_mode; 8820a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 8830a586ceaSMark Shellenbaum (void *)&newmode, sizeof (uint64_t), tx); 884169cdae2Smarks } 885169cdae2Smarks mutex_exit(&zp->z_acl_lock); 886169cdae2Smarks 8870a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 8880a586ceaSMark Shellenbaum B_TRUE); 889fa9e4066Sahrens 890fa9e4066Sahrens /* 891feb08c6bSbillm * Update the file size (zp_size) if it has changed; 892feb08c6bSbillm * account for possible concurrent updates. 893fa9e4066Sahrens */ 8940a586ceaSMark Shellenbaum while ((end_size = zp->z_size) < uio->uio_loffset) { 8950a586ceaSMark Shellenbaum (void) atomic_cas_64(&zp->z_size, end_size, 896fa9e4066Sahrens uio->uio_loffset); 8970a586ceaSMark Shellenbaum ASSERT(error == 0); 8980a586ceaSMark Shellenbaum } 899c0e50c98SNeil Perrin /* 900c0e50c98SNeil Perrin * If we are replaying and eof is non zero then force 901c0e50c98SNeil Perrin * the file size to the specified eof. Note, there's no 902c0e50c98SNeil Perrin * concurrency during replay. 903c0e50c98SNeil Perrin */ 904c0e50c98SNeil Perrin if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 905c0e50c98SNeil Perrin zp->z_size = zfsvfs->z_replay_eof; 906c0e50c98SNeil Perrin 9070a586ceaSMark Shellenbaum error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 9080a586ceaSMark Shellenbaum 909feb08c6bSbillm zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 910feb08c6bSbillm dmu_tx_commit(tx); 911fa9e4066Sahrens 912feb08c6bSbillm if (error != 0) 913feb08c6bSbillm break; 914feb08c6bSbillm ASSERT(tx_bytes == nbytes); 915feb08c6bSbillm n -= nbytes; 916ff866947SSanjeev Bagewadi 917ff866947SSanjeev Bagewadi if (!xuio && n > 0) 918ff866947SSanjeev Bagewadi uio_prefaultpages(MIN(n, max_blksz), uio); 919feb08c6bSbillm } 920fa9e4066Sahrens 921c5c6ffa0Smaybee zfs_range_unlock(rl); 922fa9e4066Sahrens 923fa9e4066Sahrens /* 924fa9e4066Sahrens * If we're in replay mode, or we made no progress, return error. 925fa9e4066Sahrens * Otherwise, it's at least a partial write, so it's successful. 926fa9e4066Sahrens */ 9271209a471SNeil Perrin if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 928fa9e4066Sahrens ZFS_EXIT(zfsvfs); 929fa9e4066Sahrens return (error); 930fa9e4066Sahrens } 931fa9e4066Sahrens 93255da60b9SMark J Musante if (ioflag & (FSYNC | FDSYNC) || 93355da60b9SMark J Musante zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 9345002558fSNeil Perrin zil_commit(zilog, zp->z_id); 935fa9e4066Sahrens 936fa9e4066Sahrens ZFS_EXIT(zfsvfs); 937fa9e4066Sahrens return (0); 938fa9e4066Sahrens } 939fa9e4066Sahrens 940c5c6ffa0Smaybee void 941b24ab676SJeff Bonwick zfs_get_done(zgd_t *zgd, int error) 942c5c6ffa0Smaybee { 943b24ab676SJeff Bonwick znode_t *zp = zgd->zgd_private; 944b24ab676SJeff Bonwick objset_t *os = zp->z_zfsvfs->z_os; 945b24ab676SJeff Bonwick 946b24ab676SJeff Bonwick if (zgd->zgd_db) 947b24ab676SJeff Bonwick dmu_buf_rele(zgd->zgd_db, zgd); 948b24ab676SJeff Bonwick 949b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 950c5c6ffa0Smaybee 9519d3574bfSNeil Perrin /* 9529d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 9539d3574bfSNeil Perrin * txg stopped from syncing. 9549d3574bfSNeil Perrin */ 955b24ab676SJeff Bonwick VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 956b24ab676SJeff Bonwick 957b24ab676SJeff Bonwick if (error == 0 && zgd->zgd_bp) 958b24ab676SJeff Bonwick zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 959b24ab676SJeff Bonwick 96067bd71c6Sperrin kmem_free(zgd, sizeof (zgd_t)); 961c5c6ffa0Smaybee } 962c5c6ffa0Smaybee 963c87b8fc5SMark J Musante #ifdef DEBUG 964c87b8fc5SMark J Musante static int zil_fault_io = 0; 965c87b8fc5SMark J Musante #endif 966c87b8fc5SMark J Musante 967fa9e4066Sahrens /* 968fa9e4066Sahrens * Get data to generate a TX_WRITE intent log record. 969fa9e4066Sahrens */ 970fa9e4066Sahrens int 971c5c6ffa0Smaybee zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 972fa9e4066Sahrens { 973fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 974fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 975fa9e4066Sahrens znode_t *zp; 976b24ab676SJeff Bonwick uint64_t object = lr->lr_foid; 977b24ab676SJeff Bonwick uint64_t offset = lr->lr_offset; 978b24ab676SJeff Bonwick uint64_t size = lr->lr_length; 979b24ab676SJeff Bonwick blkptr_t *bp = &lr->lr_blkptr; 980c5c6ffa0Smaybee dmu_buf_t *db; 98167bd71c6Sperrin zgd_t *zgd; 982fa9e4066Sahrens int error = 0; 983fa9e4066Sahrens 984b24ab676SJeff Bonwick ASSERT(zio != NULL); 985b24ab676SJeff Bonwick ASSERT(size != 0); 986fa9e4066Sahrens 987fa9e4066Sahrens /* 988104e2ed7Sperrin * Nothing to do if the file has been removed 989fa9e4066Sahrens */ 990b24ab676SJeff Bonwick if (zfs_zget(zfsvfs, object, &zp) != 0) 991*be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 992893a6d32Sahrens if (zp->z_unlinked) { 9939d3574bfSNeil Perrin /* 9949d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 9959d3574bfSNeil Perrin * txg stopped from syncing. 9969d3574bfSNeil Perrin */ 9979d3574bfSNeil Perrin VN_RELE_ASYNC(ZTOV(zp), 9989d3574bfSNeil Perrin dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 999*be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1000fa9e4066Sahrens } 1001fa9e4066Sahrens 1002b24ab676SJeff Bonwick zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1003b24ab676SJeff Bonwick zgd->zgd_zilog = zfsvfs->z_log; 1004b24ab676SJeff Bonwick zgd->zgd_private = zp; 1005b24ab676SJeff Bonwick 1006fa9e4066Sahrens /* 1007fa9e4066Sahrens * Write records come in two flavors: immediate and indirect. 1008fa9e4066Sahrens * For small writes it's cheaper to store the data with the 1009fa9e4066Sahrens * log record (immediate); for large writes it's cheaper to 1010fa9e4066Sahrens * sync the data and get a pointer to it (indirect) so that 1011fa9e4066Sahrens * we don't have to write the data twice. 1012fa9e4066Sahrens */ 1013104e2ed7Sperrin if (buf != NULL) { /* immediate write */ 1014b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1015104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 10160a586ceaSMark Shellenbaum if (offset >= zp->z_size) { 1017*be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 1018b24ab676SJeff Bonwick } else { 1019b24ab676SJeff Bonwick error = dmu_read(os, object, offset, size, buf, 1020b24ab676SJeff Bonwick DMU_READ_NO_PREFETCH); 1021104e2ed7Sperrin } 1022b24ab676SJeff Bonwick ASSERT(error == 0 || error == ENOENT); 1023104e2ed7Sperrin } else { /* indirect write */ 1024fa9e4066Sahrens /* 1025104e2ed7Sperrin * Have to lock the whole block to ensure when it's 1026104e2ed7Sperrin * written out and it's checksum is being calculated 1027104e2ed7Sperrin * that no one can change the data. We need to re-check 1028104e2ed7Sperrin * blocksize after we get the lock in case it's changed! 1029fa9e4066Sahrens */ 1030104e2ed7Sperrin for (;;) { 1031b24ab676SJeff Bonwick uint64_t blkoff; 1032b24ab676SJeff Bonwick size = zp->z_blksz; 1033dfe73b3dSJeff Bonwick blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1034b24ab676SJeff Bonwick offset -= blkoff; 1035b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1036b24ab676SJeff Bonwick RL_READER); 1037b24ab676SJeff Bonwick if (zp->z_blksz == size) 1038104e2ed7Sperrin break; 1039b24ab676SJeff Bonwick offset += blkoff; 1040b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 1041104e2ed7Sperrin } 1042104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 10430a586ceaSMark Shellenbaum if (lr->lr_offset >= zp->z_size) 1044*be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 1045c87b8fc5SMark J Musante #ifdef DEBUG 1046c87b8fc5SMark J Musante if (zil_fault_io) { 1047*be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 1048c87b8fc5SMark J Musante zil_fault_io = 0; 1049c87b8fc5SMark J Musante } 1050c87b8fc5SMark J Musante #endif 1051b24ab676SJeff Bonwick if (error == 0) 105247cb52daSJeff Bonwick error = dmu_buf_hold(os, object, offset, zgd, &db, 105347cb52daSJeff Bonwick DMU_READ_NO_PREFETCH); 1054c87b8fc5SMark J Musante 1055975c32a0SNeil Perrin if (error == 0) { 105680901aeaSGeorge Wilson blkptr_t *obp = dmu_buf_get_blkptr(db); 105780901aeaSGeorge Wilson if (obp) { 105880901aeaSGeorge Wilson ASSERT(BP_IS_HOLE(bp)); 105980901aeaSGeorge Wilson *bp = *obp; 106080901aeaSGeorge Wilson } 106180901aeaSGeorge Wilson 1062b24ab676SJeff Bonwick zgd->zgd_db = db; 1063b24ab676SJeff Bonwick zgd->zgd_bp = bp; 1064b24ab676SJeff Bonwick 1065b24ab676SJeff Bonwick ASSERT(db->db_offset == offset); 1066b24ab676SJeff Bonwick ASSERT(db->db_size == size); 1067b24ab676SJeff Bonwick 1068b24ab676SJeff Bonwick error = dmu_sync(zio, lr->lr_common.lrc_txg, 1069b24ab676SJeff Bonwick zfs_get_done, zgd); 1070b24ab676SJeff Bonwick ASSERT(error || lr->lr_length <= zp->z_blksz); 1071b24ab676SJeff Bonwick 1072975c32a0SNeil Perrin /* 1073b24ab676SJeff Bonwick * On success, we need to wait for the write I/O 1074b24ab676SJeff Bonwick * initiated by dmu_sync() to complete before we can 1075b24ab676SJeff Bonwick * release this dbuf. We will finish everything up 1076b24ab676SJeff Bonwick * in the zfs_get_done() callback. 1077975c32a0SNeil Perrin */ 1078b24ab676SJeff Bonwick if (error == 0) 1079b24ab676SJeff Bonwick return (0); 1080975c32a0SNeil Perrin 1081b24ab676SJeff Bonwick if (error == EALREADY) { 1082b24ab676SJeff Bonwick lr->lr_common.lrc_txtype = TX_WRITE2; 1083b24ab676SJeff Bonwick error = 0; 1084b24ab676SJeff Bonwick } 1085975c32a0SNeil Perrin } 1086fa9e4066Sahrens } 1087b24ab676SJeff Bonwick 1088b24ab676SJeff Bonwick zfs_get_done(zgd, error); 1089b24ab676SJeff Bonwick 1090fa9e4066Sahrens return (error); 1091fa9e4066Sahrens } 1092fa9e4066Sahrens 1093fa9e4066Sahrens /*ARGSUSED*/ 1094fa9e4066Sahrens static int 1095da6c28aaSamw zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1096da6c28aaSamw caller_context_t *ct) 1097fa9e4066Sahrens { 1098fa9e4066Sahrens znode_t *zp = VTOZ(vp); 1099fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1100fa9e4066Sahrens int error; 1101fa9e4066Sahrens 11023cb34c60Sahrens ZFS_ENTER(zfsvfs); 11033cb34c60Sahrens ZFS_VERIFY_ZP(zp); 1104da6c28aaSamw 1105da6c28aaSamw if (flag & V_ACE_MASK) 1106da6c28aaSamw error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1107da6c28aaSamw else 1108da6c28aaSamw error = zfs_zaccess_rwx(zp, mode, flag, cr); 1109da6c28aaSamw 1110fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1111fa9e4066Sahrens return (error); 1112fa9e4066Sahrens } 1113fa9e4066Sahrens 1114d47621a4STim Haley /* 1115d47621a4STim Haley * If vnode is for a device return a specfs vnode instead. 1116d47621a4STim Haley */ 1117d47621a4STim Haley static int 1118d47621a4STim Haley specvp_check(vnode_t **vpp, cred_t *cr) 1119d47621a4STim Haley { 1120d47621a4STim Haley int error = 0; 1121d47621a4STim Haley 1122d47621a4STim Haley if (IS_DEVVP(*vpp)) { 1123d47621a4STim Haley struct vnode *svp; 1124d47621a4STim Haley 1125d47621a4STim Haley svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1126d47621a4STim Haley VN_RELE(*vpp); 1127d47621a4STim Haley if (svp == NULL) 1128*be6fd75aSMatthew Ahrens error = SET_ERROR(ENOSYS); 1129d47621a4STim Haley *vpp = svp; 1130d47621a4STim Haley } 1131d47621a4STim Haley return (error); 1132d47621a4STim Haley } 1133d47621a4STim Haley 1134d47621a4STim Haley 1135fa9e4066Sahrens /* 1136fa9e4066Sahrens * Lookup an entry in a directory, or an extended attribute directory. 1137fa9e4066Sahrens * If it exists, return a held vnode reference for it. 1138fa9e4066Sahrens * 1139fa9e4066Sahrens * IN: dvp - vnode of directory to search. 1140fa9e4066Sahrens * nm - name of entry to lookup. 1141fa9e4066Sahrens * pnp - full pathname to lookup [UNUSED]. 1142fa9e4066Sahrens * flags - LOOKUP_XATTR set if looking for an attribute. 1143fa9e4066Sahrens * rdir - root directory vnode [UNUSED]. 1144fa9e4066Sahrens * cr - credentials of caller. 1145da6c28aaSamw * ct - caller context 1146da6c28aaSamw * direntflags - directory lookup flags 1147da6c28aaSamw * realpnp - returned pathname. 1148fa9e4066Sahrens * 1149fa9e4066Sahrens * OUT: vpp - vnode of located entry, NULL if not found. 1150fa9e4066Sahrens * 1151fa9e4066Sahrens * RETURN: 0 if success 1152fa9e4066Sahrens * error code if failure 1153fa9e4066Sahrens * 1154fa9e4066Sahrens * Timestamps: 1155fa9e4066Sahrens * NA 1156fa9e4066Sahrens */ 1157fa9e4066Sahrens /* ARGSUSED */ 1158fa9e4066Sahrens static int 1159fa9e4066Sahrens zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 1160da6c28aaSamw int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 1161da6c28aaSamw int *direntflags, pathname_t *realpnp) 1162fa9e4066Sahrens { 1163fa9e4066Sahrens znode_t *zdp = VTOZ(dvp); 1164fa9e4066Sahrens zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1165d47621a4STim Haley int error = 0; 1166d47621a4STim Haley 1167d47621a4STim Haley /* fast path */ 1168d47621a4STim Haley if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1169d47621a4STim Haley 1170d47621a4STim Haley if (dvp->v_type != VDIR) { 1171*be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTDIR)); 11720a586ceaSMark Shellenbaum } else if (zdp->z_sa_hdl == NULL) { 1173*be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 1174d47621a4STim Haley } 1175d47621a4STim Haley 1176d47621a4STim Haley if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1177d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1178d47621a4STim Haley if (!error) { 1179d47621a4STim Haley *vpp = dvp; 1180d47621a4STim Haley VN_HOLD(*vpp); 1181d47621a4STim Haley return (0); 1182d47621a4STim Haley } 1183d47621a4STim Haley return (error); 1184d47621a4STim Haley } else { 1185d47621a4STim Haley vnode_t *tvp = dnlc_lookup(dvp, nm); 1186d47621a4STim Haley 1187d47621a4STim Haley if (tvp) { 1188d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1189d47621a4STim Haley if (error) { 1190d47621a4STim Haley VN_RELE(tvp); 1191d47621a4STim Haley return (error); 1192d47621a4STim Haley } 1193d47621a4STim Haley if (tvp == DNLC_NO_VNODE) { 1194d47621a4STim Haley VN_RELE(tvp); 1195*be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1196d47621a4STim Haley } else { 1197d47621a4STim Haley *vpp = tvp; 1198d47621a4STim Haley return (specvp_check(vpp, cr)); 1199d47621a4STim Haley } 1200d47621a4STim Haley } 1201d47621a4STim Haley } 1202d47621a4STim Haley } 1203d47621a4STim Haley 1204d47621a4STim Haley DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1205fa9e4066Sahrens 12063cb34c60Sahrens ZFS_ENTER(zfsvfs); 12073cb34c60Sahrens ZFS_VERIFY_ZP(zdp); 1208fa9e4066Sahrens 1209fa9e4066Sahrens *vpp = NULL; 1210fa9e4066Sahrens 1211fa9e4066Sahrens if (flags & LOOKUP_XATTR) { 12127b55fa8eSck /* 12137b55fa8eSck * If the xattr property is off, refuse the lookup request. 12147b55fa8eSck */ 12157b55fa8eSck if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 12167b55fa8eSck ZFS_EXIT(zfsvfs); 1217*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 12187b55fa8eSck } 12197b55fa8eSck 1220fa9e4066Sahrens /* 1221fa9e4066Sahrens * We don't allow recursive attributes.. 1222fa9e4066Sahrens * Maybe someday we will. 1223fa9e4066Sahrens */ 12240a586ceaSMark Shellenbaum if (zdp->z_pflags & ZFS_XATTR) { 1225fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1226*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1227fa9e4066Sahrens } 1228fa9e4066Sahrens 12293f063a9dSck if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1230fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1231fa9e4066Sahrens return (error); 1232fa9e4066Sahrens } 1233fa9e4066Sahrens 1234fa9e4066Sahrens /* 1235fa9e4066Sahrens * Do we have permission to get into attribute directory? 1236fa9e4066Sahrens */ 1237fa9e4066Sahrens 1238da6c28aaSamw if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1239da6c28aaSamw B_FALSE, cr)) { 1240fa9e4066Sahrens VN_RELE(*vpp); 1241da6c28aaSamw *vpp = NULL; 1242fa9e4066Sahrens } 1243fa9e4066Sahrens 1244fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1245fa9e4066Sahrens return (error); 1246fa9e4066Sahrens } 1247fa9e4066Sahrens 12480f2dc02eSek if (dvp->v_type != VDIR) { 12490f2dc02eSek ZFS_EXIT(zfsvfs); 1250*be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTDIR)); 12510f2dc02eSek } 1252736b9155Smarks 1253fa9e4066Sahrens /* 1254fa9e4066Sahrens * Check accessibility of directory. 1255fa9e4066Sahrens */ 1256fa9e4066Sahrens 1257da6c28aaSamw if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1258fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1259fa9e4066Sahrens return (error); 1260fa9e4066Sahrens } 1261fa9e4066Sahrens 1262de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1263da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1264da6c28aaSamw ZFS_EXIT(zfsvfs); 1265*be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1266da6c28aaSamw } 1267fa9e4066Sahrens 1268da6c28aaSamw error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1269d47621a4STim Haley if (error == 0) 1270d47621a4STim Haley error = specvp_check(vpp, cr); 1271fa9e4066Sahrens 1272fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1273fa9e4066Sahrens return (error); 1274fa9e4066Sahrens } 1275fa9e4066Sahrens 1276fa9e4066Sahrens /* 1277fa9e4066Sahrens * Attempt to create a new entry in a directory. If the entry 1278fa9e4066Sahrens * already exists, truncate the file if permissible, else return 1279fa9e4066Sahrens * an error. Return the vp of the created or trunc'd file. 1280fa9e4066Sahrens * 1281fa9e4066Sahrens * IN: dvp - vnode of directory to put new file entry in. 1282fa9e4066Sahrens * name - name of new file entry. 1283fa9e4066Sahrens * vap - attributes of new file. 1284fa9e4066Sahrens * excl - flag indicating exclusive or non-exclusive mode. 1285fa9e4066Sahrens * mode - mode to open file with. 1286fa9e4066Sahrens * cr - credentials of caller. 1287fa9e4066Sahrens * flag - large file flag [UNUSED]. 1288da6c28aaSamw * ct - caller context 1289da6c28aaSamw * vsecp - ACL to be set 1290fa9e4066Sahrens * 1291fa9e4066Sahrens * OUT: vpp - vnode of created or trunc'd entry. 1292fa9e4066Sahrens * 1293fa9e4066Sahrens * RETURN: 0 if success 1294fa9e4066Sahrens * error code if failure 1295fa9e4066Sahrens * 1296fa9e4066Sahrens * Timestamps: 1297fa9e4066Sahrens * dvp - ctime|mtime updated if new entry created 1298fa9e4066Sahrens * vp - ctime|mtime always, atime if new 1299fa9e4066Sahrens */ 1300da6c28aaSamw 1301fa9e4066Sahrens /* ARGSUSED */ 1302fa9e4066Sahrens static int 1303fa9e4066Sahrens zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, 1304da6c28aaSamw int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, 1305da6c28aaSamw vsecattr_t *vsecp) 1306fa9e4066Sahrens { 1307fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1308fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1309f18faf3fSek zilog_t *zilog; 1310f18faf3fSek objset_t *os; 1311fa9e4066Sahrens zfs_dirlock_t *dl; 1312fa9e4066Sahrens dmu_tx_t *tx; 1313fa9e4066Sahrens int error; 1314c1ce5987SMark Shellenbaum ksid_t *ksid; 1315c1ce5987SMark Shellenbaum uid_t uid; 1316c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 13170a586ceaSMark Shellenbaum zfs_acl_ids_t acl_ids; 131889459e17SMark Shellenbaum boolean_t fuid_dirtied; 1319c8c24165SMark Shellenbaum boolean_t have_acl = B_FALSE; 1320da6c28aaSamw 1321da6c28aaSamw /* 1322da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1323da6c28aaSamw * make sure file system is at proper version 1324da6c28aaSamw */ 1325da6c28aaSamw 1326c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1327c1ce5987SMark Shellenbaum if (ksid) 1328c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1329c1ce5987SMark Shellenbaum else 1330c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1331c1ce5987SMark Shellenbaum 1332da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1333da6c28aaSamw (vsecp || (vap->va_mask & AT_XVATTR) || 1334c1ce5987SMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1335*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1336fa9e4066Sahrens 13373cb34c60Sahrens ZFS_ENTER(zfsvfs); 13383cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1339f18faf3fSek os = zfsvfs->z_os; 1340f18faf3fSek zilog = zfsvfs->z_log; 1341fa9e4066Sahrens 1342de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1343da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1344da6c28aaSamw ZFS_EXIT(zfsvfs); 1345*be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1346da6c28aaSamw } 1347da6c28aaSamw 1348da6c28aaSamw if (vap->va_mask & AT_XVATTR) { 1349da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1350da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1351da6c28aaSamw ZFS_EXIT(zfsvfs); 1352da6c28aaSamw return (error); 1353da6c28aaSamw } 1354da6c28aaSamw } 1355fa9e4066Sahrens top: 1356fa9e4066Sahrens *vpp = NULL; 1357fa9e4066Sahrens 1358fa9e4066Sahrens if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) 1359fa9e4066Sahrens vap->va_mode &= ~VSVTX; 1360fa9e4066Sahrens 1361fa9e4066Sahrens if (*name == '\0') { 1362fa9e4066Sahrens /* 1363fa9e4066Sahrens * Null component name refers to the directory itself. 1364fa9e4066Sahrens */ 1365fa9e4066Sahrens VN_HOLD(dvp); 1366fa9e4066Sahrens zp = dzp; 1367fa9e4066Sahrens dl = NULL; 1368fa9e4066Sahrens error = 0; 1369fa9e4066Sahrens } else { 1370fa9e4066Sahrens /* possible VN_HOLD(zp) */ 1371da6c28aaSamw int zflg = 0; 1372da6c28aaSamw 1373da6c28aaSamw if (flag & FIGNORECASE) 1374da6c28aaSamw zflg |= ZCILOOK; 1375da6c28aaSamw 1376da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1377da6c28aaSamw NULL, NULL); 1378da6c28aaSamw if (error) { 13790b2a8171SMark Shellenbaum if (have_acl) 13800b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1381fa9e4066Sahrens if (strcmp(name, "..") == 0) 1382*be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 1383fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1384fa9e4066Sahrens return (error); 1385fa9e4066Sahrens } 1386fa9e4066Sahrens } 13870a586ceaSMark Shellenbaum 1388fa9e4066Sahrens if (zp == NULL) { 1389da6c28aaSamw uint64_t txtype; 1390da6c28aaSamw 1391fa9e4066Sahrens /* 1392fa9e4066Sahrens * Create a new file object and update the directory 1393fa9e4066Sahrens * to reference it. 1394fa9e4066Sahrens */ 1395da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 13960b2a8171SMark Shellenbaum if (have_acl) 13970b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1398fa9e4066Sahrens goto out; 1399fa9e4066Sahrens } 1400fa9e4066Sahrens 1401fa9e4066Sahrens /* 1402fa9e4066Sahrens * We only support the creation of regular files in 1403fa9e4066Sahrens * extended attribute directories. 1404fa9e4066Sahrens */ 14050a586ceaSMark Shellenbaum 14060a586ceaSMark Shellenbaum if ((dzp->z_pflags & ZFS_XATTR) && 1407fa9e4066Sahrens (vap->va_type != VREG)) { 14080b2a8171SMark Shellenbaum if (have_acl) 14090b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1410*be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 1411fa9e4066Sahrens goto out; 1412fa9e4066Sahrens } 1413fa9e4066Sahrens 1414c8c24165SMark Shellenbaum if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1415c8c24165SMark Shellenbaum cr, vsecp, &acl_ids)) != 0) 141689459e17SMark Shellenbaum goto out; 1417c8c24165SMark Shellenbaum have_acl = B_TRUE; 1418c8c24165SMark Shellenbaum 141914843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 14204929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 1421*be6fd75aSMatthew Ahrens error = SET_ERROR(EDQUOT); 142214843421SMatthew Ahrens goto out; 142314843421SMatthew Ahrens } 142489459e17SMark Shellenbaum 1425fa9e4066Sahrens tx = dmu_tx_create(os); 14260a586ceaSMark Shellenbaum 14270a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 14280a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 14290a586ceaSMark Shellenbaum 143089459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 143114843421SMatthew Ahrens if (fuid_dirtied) 143214843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 1433ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 14340a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 14350a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && 14360a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1437fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 14380a586ceaSMark Shellenbaum 0, acl_ids.z_aclp->z_acl_bytes); 1439da6c28aaSamw } 14401209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 1441fa9e4066Sahrens if (error) { 1442fa9e4066Sahrens zfs_dirent_unlock(dl); 14431209a471SNeil Perrin if (error == ERESTART) { 14448a2f1b91Sahrens dmu_tx_wait(tx); 14458a2f1b91Sahrens dmu_tx_abort(tx); 1446fa9e4066Sahrens goto top; 1447fa9e4066Sahrens } 1448c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 14498a2f1b91Sahrens dmu_tx_abort(tx); 1450fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1451fa9e4066Sahrens return (error); 1452fa9e4066Sahrens } 14530a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 145489459e17SMark Shellenbaum 145589459e17SMark Shellenbaum if (fuid_dirtied) 145689459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 145789459e17SMark Shellenbaum 1458fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1459da6c28aaSamw txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1460da6c28aaSamw if (flag & FIGNORECASE) 1461da6c28aaSamw txtype |= TX_CI; 1462da6c28aaSamw zfs_log_create(zilog, tx, txtype, dzp, zp, name, 146389459e17SMark Shellenbaum vsecp, acl_ids.z_fuidp, vap); 146489459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1465fa9e4066Sahrens dmu_tx_commit(tx); 1466fa9e4066Sahrens } else { 1467da6c28aaSamw int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1468da6c28aaSamw 14690b2a8171SMark Shellenbaum if (have_acl) 14700b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 14710b2a8171SMark Shellenbaum have_acl = B_FALSE; 14720b2a8171SMark Shellenbaum 1473fa9e4066Sahrens /* 1474fa9e4066Sahrens * A directory entry already exists for this name. 1475fa9e4066Sahrens */ 1476fa9e4066Sahrens /* 1477fa9e4066Sahrens * Can't truncate an existing file if in exclusive mode. 1478fa9e4066Sahrens */ 1479fa9e4066Sahrens if (excl == EXCL) { 1480*be6fd75aSMatthew Ahrens error = SET_ERROR(EEXIST); 1481fa9e4066Sahrens goto out; 1482fa9e4066Sahrens } 1483fa9e4066Sahrens /* 1484fa9e4066Sahrens * Can't open a directory for writing. 1485fa9e4066Sahrens */ 1486fa9e4066Sahrens if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1487*be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 1488fa9e4066Sahrens goto out; 1489fa9e4066Sahrens } 1490fa9e4066Sahrens /* 1491fa9e4066Sahrens * Verify requested access to file. 1492fa9e4066Sahrens */ 1493da6c28aaSamw if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1494fa9e4066Sahrens goto out; 1495fa9e4066Sahrens } 1496fa9e4066Sahrens 1497fa9e4066Sahrens mutex_enter(&dzp->z_lock); 1498fa9e4066Sahrens dzp->z_seq++; 1499fa9e4066Sahrens mutex_exit(&dzp->z_lock); 1500fa9e4066Sahrens 15015730cc9aSmaybee /* 15025730cc9aSmaybee * Truncate regular files if requested. 15035730cc9aSmaybee */ 15045730cc9aSmaybee if ((ZTOV(zp)->v_type == VREG) && 1505fa9e4066Sahrens (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1506cdb0ab79Smaybee /* we can't hold any locks when calling zfs_freesp() */ 1507cdb0ab79Smaybee zfs_dirent_unlock(dl); 1508cdb0ab79Smaybee dl = NULL; 15095730cc9aSmaybee error = zfs_freesp(zp, 0, 0, mode, TRUE); 1510df2381bfSpraks if (error == 0) { 1511da6c28aaSamw vnevent_create(ZTOV(zp), ct); 1512df2381bfSpraks } 1513fa9e4066Sahrens } 1514fa9e4066Sahrens } 1515fa9e4066Sahrens out: 1516fa9e4066Sahrens 1517fa9e4066Sahrens if (dl) 1518fa9e4066Sahrens zfs_dirent_unlock(dl); 1519fa9e4066Sahrens 1520fa9e4066Sahrens if (error) { 1521fa9e4066Sahrens if (zp) 1522fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1523fa9e4066Sahrens } else { 1524fa9e4066Sahrens *vpp = ZTOV(zp); 1525d47621a4STim Haley error = specvp_check(vpp, cr); 1526fa9e4066Sahrens } 1527fa9e4066Sahrens 152855da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 15295002558fSNeil Perrin zil_commit(zilog, 0); 153055da60b9SMark J Musante 1531fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1532fa9e4066Sahrens return (error); 1533fa9e4066Sahrens } 1534fa9e4066Sahrens 1535fa9e4066Sahrens /* 1536fa9e4066Sahrens * Remove an entry from a directory. 1537fa9e4066Sahrens * 1538fa9e4066Sahrens * IN: dvp - vnode of directory to remove entry from. 1539fa9e4066Sahrens * name - name of entry to remove. 1540fa9e4066Sahrens * cr - credentials of caller. 1541da6c28aaSamw * ct - caller context 1542da6c28aaSamw * flags - case flags 1543fa9e4066Sahrens * 1544fa9e4066Sahrens * RETURN: 0 if success 1545fa9e4066Sahrens * error code if failure 1546fa9e4066Sahrens * 1547fa9e4066Sahrens * Timestamps: 1548fa9e4066Sahrens * dvp - ctime|mtime 1549fa9e4066Sahrens * vp - ctime (if nlink > 0) 1550fa9e4066Sahrens */ 15510a586ceaSMark Shellenbaum 15520a586ceaSMark Shellenbaum uint64_t null_xattr = 0; 15530a586ceaSMark Shellenbaum 1554da6c28aaSamw /*ARGSUSED*/ 1555fa9e4066Sahrens static int 1556da6c28aaSamw zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1557da6c28aaSamw int flags) 1558fa9e4066Sahrens { 1559fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 15600b2a8171SMark Shellenbaum znode_t *xzp; 1561fa9e4066Sahrens vnode_t *vp; 1562fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1563f18faf3fSek zilog_t *zilog; 15640b2a8171SMark Shellenbaum uint64_t acl_obj, xattr_obj; 15650a586ceaSMark Shellenbaum uint64_t xattr_obj_unlinked = 0; 156651bd2f97SNeil Perrin uint64_t obj = 0; 1567fa9e4066Sahrens zfs_dirlock_t *dl; 1568fa9e4066Sahrens dmu_tx_t *tx; 1569893a6d32Sahrens boolean_t may_delete_now, delete_now = FALSE; 1570cdb0ab79Smaybee boolean_t unlinked, toobig = FALSE; 1571da6c28aaSamw uint64_t txtype; 1572da6c28aaSamw pathname_t *realnmp = NULL; 1573da6c28aaSamw pathname_t realnm; 1574fa9e4066Sahrens int error; 1575da6c28aaSamw int zflg = ZEXISTS; 1576fa9e4066Sahrens 15773cb34c60Sahrens ZFS_ENTER(zfsvfs); 15783cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1579f18faf3fSek zilog = zfsvfs->z_log; 1580fa9e4066Sahrens 1581da6c28aaSamw if (flags & FIGNORECASE) { 1582da6c28aaSamw zflg |= ZCILOOK; 1583da6c28aaSamw pn_alloc(&realnm); 1584da6c28aaSamw realnmp = &realnm; 1585da6c28aaSamw } 1586da6c28aaSamw 1587fa9e4066Sahrens top: 15880b2a8171SMark Shellenbaum xattr_obj = 0; 15890b2a8171SMark Shellenbaum xzp = NULL; 1590fa9e4066Sahrens /* 1591fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1592fa9e4066Sahrens */ 1593da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1594da6c28aaSamw NULL, realnmp)) { 1595da6c28aaSamw if (realnmp) 1596da6c28aaSamw pn_free(realnmp); 1597fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1598fa9e4066Sahrens return (error); 1599fa9e4066Sahrens } 1600fa9e4066Sahrens 1601fa9e4066Sahrens vp = ZTOV(zp); 1602fa9e4066Sahrens 1603fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1604fa9e4066Sahrens goto out; 1605fa9e4066Sahrens } 1606fa9e4066Sahrens 1607fa9e4066Sahrens /* 1608fa9e4066Sahrens * Need to use rmdir for removing directories. 1609fa9e4066Sahrens */ 1610fa9e4066Sahrens if (vp->v_type == VDIR) { 1611*be6fd75aSMatthew Ahrens error = SET_ERROR(EPERM); 1612fa9e4066Sahrens goto out; 1613fa9e4066Sahrens } 1614fa9e4066Sahrens 1615da6c28aaSamw vnevent_remove(vp, dvp, name, ct); 1616fa9e4066Sahrens 1617da6c28aaSamw if (realnmp) 1618ab04eb8eStimh dnlc_remove(dvp, realnmp->pn_buf); 1619da6c28aaSamw else 1620da6c28aaSamw dnlc_remove(dvp, name); 1621033f9833Sek 1622fa9e4066Sahrens mutex_enter(&vp->v_lock); 1623fa9e4066Sahrens may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1624fa9e4066Sahrens mutex_exit(&vp->v_lock); 1625fa9e4066Sahrens 1626fa9e4066Sahrens /* 1627893a6d32Sahrens * We may delete the znode now, or we may put it in the unlinked set; 1628fa9e4066Sahrens * it depends on whether we're the last link, and on whether there are 1629fa9e4066Sahrens * other holds on the vnode. So we dmu_tx_hold() the right things to 1630fa9e4066Sahrens * allow for either case. 1631fa9e4066Sahrens */ 163251bd2f97SNeil Perrin obj = zp->z_id; 1633fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1634ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 16350a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 16360a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 16370a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 1638cdb0ab79Smaybee if (may_delete_now) { 1639cdb0ab79Smaybee toobig = 16400a586ceaSMark Shellenbaum zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1641cdb0ab79Smaybee /* if the file is too big, only hold_free a token amount */ 1642cdb0ab79Smaybee dmu_tx_hold_free(tx, zp->z_id, 0, 1643cdb0ab79Smaybee (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1644cdb0ab79Smaybee } 1645fa9e4066Sahrens 1646fa9e4066Sahrens /* are there any extended attributes? */ 16470a586ceaSMark Shellenbaum error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 16480a586ceaSMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 16490b2a8171SMark Shellenbaum if (error == 0 && xattr_obj) { 16500a586ceaSMark Shellenbaum error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1651fb09f5aaSMadhav Suresh ASSERT0(error); 16520a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 16530a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1654fa9e4066Sahrens } 1655fa9e4066Sahrens 16561412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 16571412a1a2SMark Shellenbaum if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1658fa9e4066Sahrens dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 16591412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 1660fa9e4066Sahrens 1661fa9e4066Sahrens /* charge as an update -- would be nice not to charge at all */ 1662893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1663fa9e4066Sahrens 16641209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 1665fa9e4066Sahrens if (error) { 1666fa9e4066Sahrens zfs_dirent_unlock(dl); 1667fa9e4066Sahrens VN_RELE(vp); 16680b2a8171SMark Shellenbaum if (xzp) 16690b2a8171SMark Shellenbaum VN_RELE(ZTOV(xzp)); 16701209a471SNeil Perrin if (error == ERESTART) { 16718a2f1b91Sahrens dmu_tx_wait(tx); 16728a2f1b91Sahrens dmu_tx_abort(tx); 1673fa9e4066Sahrens goto top; 1674fa9e4066Sahrens } 1675da6c28aaSamw if (realnmp) 1676da6c28aaSamw pn_free(realnmp); 16778a2f1b91Sahrens dmu_tx_abort(tx); 1678fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1679fa9e4066Sahrens return (error); 1680fa9e4066Sahrens } 1681fa9e4066Sahrens 1682fa9e4066Sahrens /* 1683fa9e4066Sahrens * Remove the directory entry. 1684fa9e4066Sahrens */ 1685da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1686fa9e4066Sahrens 1687fa9e4066Sahrens if (error) { 1688fa9e4066Sahrens dmu_tx_commit(tx); 1689fa9e4066Sahrens goto out; 1690fa9e4066Sahrens } 1691fa9e4066Sahrens 1692893a6d32Sahrens if (unlinked) { 16930a586ceaSMark Shellenbaum 16941412a1a2SMark Shellenbaum /* 16951412a1a2SMark Shellenbaum * Hold z_lock so that we can make sure that the ACL obj 16961412a1a2SMark Shellenbaum * hasn't changed. Could have been deleted due to 16971412a1a2SMark Shellenbaum * zfs_sa_upgrade(). 16981412a1a2SMark Shellenbaum */ 16991412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 1700fa9e4066Sahrens mutex_enter(&vp->v_lock); 17010a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 17020a586ceaSMark Shellenbaum &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1703cdb0ab79Smaybee delete_now = may_delete_now && !toobig && 1704fa9e4066Sahrens vp->v_count == 1 && !vn_has_cached_data(vp) && 17051412a1a2SMark Shellenbaum xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 17060a586ceaSMark Shellenbaum acl_obj; 1707fa9e4066Sahrens mutex_exit(&vp->v_lock); 1708fa9e4066Sahrens } 1709fa9e4066Sahrens 1710fa9e4066Sahrens if (delete_now) { 17110a586ceaSMark Shellenbaum if (xattr_obj_unlinked) { 17120a586ceaSMark Shellenbaum ASSERT3U(xzp->z_links, ==, 2); 1713fa9e4066Sahrens mutex_enter(&xzp->z_lock); 1714893a6d32Sahrens xzp->z_unlinked = 1; 17150a586ceaSMark Shellenbaum xzp->z_links = 0; 17160a586ceaSMark Shellenbaum error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 17170a586ceaSMark Shellenbaum &xzp->z_links, sizeof (xzp->z_links), tx); 17180a586ceaSMark Shellenbaum ASSERT3U(error, ==, 0); 1719fa9e4066Sahrens mutex_exit(&xzp->z_lock); 1720893a6d32Sahrens zfs_unlinked_add(xzp, tx); 17211412a1a2SMark Shellenbaum 17220a586ceaSMark Shellenbaum if (zp->z_is_sa) 17230a586ceaSMark Shellenbaum error = sa_remove(zp->z_sa_hdl, 17240a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), tx); 17250a586ceaSMark Shellenbaum else 17260a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, 17270a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), &null_xattr, 17280a586ceaSMark Shellenbaum sizeof (uint64_t), tx); 1729fb09f5aaSMadhav Suresh ASSERT0(error); 1730fa9e4066Sahrens } 1731fa9e4066Sahrens mutex_enter(&vp->v_lock); 1732fa9e4066Sahrens vp->v_count--; 1733fb09f5aaSMadhav Suresh ASSERT0(vp->v_count); 1734fa9e4066Sahrens mutex_exit(&vp->v_lock); 1735fa9e4066Sahrens mutex_exit(&zp->z_lock); 1736fa9e4066Sahrens zfs_znode_delete(zp, tx); 1737893a6d32Sahrens } else if (unlinked) { 17381412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 1739893a6d32Sahrens zfs_unlinked_add(zp, tx); 1740fa9e4066Sahrens } 1741fa9e4066Sahrens 1742da6c28aaSamw txtype = TX_REMOVE; 1743da6c28aaSamw if (flags & FIGNORECASE) 1744da6c28aaSamw txtype |= TX_CI; 174551bd2f97SNeil Perrin zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 1746fa9e4066Sahrens 1747fa9e4066Sahrens dmu_tx_commit(tx); 1748fa9e4066Sahrens out: 1749da6c28aaSamw if (realnmp) 1750da6c28aaSamw pn_free(realnmp); 1751da6c28aaSamw 1752fa9e4066Sahrens zfs_dirent_unlock(dl); 1753fa9e4066Sahrens 175406e0070dSMark Shellenbaum if (!delete_now) 1755fa9e4066Sahrens VN_RELE(vp); 175606e0070dSMark Shellenbaum if (xzp) 1757fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 1758fa9e4066Sahrens 175955da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 17605002558fSNeil Perrin zil_commit(zilog, 0); 176155da60b9SMark J Musante 1762fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1763fa9e4066Sahrens return (error); 1764fa9e4066Sahrens } 1765fa9e4066Sahrens 1766fa9e4066Sahrens /* 1767fa9e4066Sahrens * Create a new directory and insert it into dvp using the name 1768fa9e4066Sahrens * provided. Return a pointer to the inserted directory. 1769fa9e4066Sahrens * 1770fa9e4066Sahrens * IN: dvp - vnode of directory to add subdir to. 1771fa9e4066Sahrens * dirname - name of new directory. 1772fa9e4066Sahrens * vap - attributes of new directory. 1773fa9e4066Sahrens * cr - credentials of caller. 1774da6c28aaSamw * ct - caller context 1775da6c28aaSamw * vsecp - ACL to be set 1776fa9e4066Sahrens * 1777fa9e4066Sahrens * OUT: vpp - vnode of created directory. 1778fa9e4066Sahrens * 1779fa9e4066Sahrens * RETURN: 0 if success 1780fa9e4066Sahrens * error code if failure 1781fa9e4066Sahrens * 1782fa9e4066Sahrens * Timestamps: 1783fa9e4066Sahrens * dvp - ctime|mtime updated 1784fa9e4066Sahrens * vp - ctime|mtime|atime updated 1785fa9e4066Sahrens */ 1786da6c28aaSamw /*ARGSUSED*/ 1787fa9e4066Sahrens static int 1788da6c28aaSamw zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1789da6c28aaSamw caller_context_t *ct, int flags, vsecattr_t *vsecp) 1790fa9e4066Sahrens { 1791fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1792fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1793f18faf3fSek zilog_t *zilog; 1794fa9e4066Sahrens zfs_dirlock_t *dl; 1795da6c28aaSamw uint64_t txtype; 1796fa9e4066Sahrens dmu_tx_t *tx; 1797fa9e4066Sahrens int error; 1798da6c28aaSamw int zf = ZNEW; 1799c1ce5987SMark Shellenbaum ksid_t *ksid; 1800c1ce5987SMark Shellenbaum uid_t uid; 1801c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 18020a586ceaSMark Shellenbaum zfs_acl_ids_t acl_ids; 180389459e17SMark Shellenbaum boolean_t fuid_dirtied; 1804fa9e4066Sahrens 1805fa9e4066Sahrens ASSERT(vap->va_type == VDIR); 1806fa9e4066Sahrens 1807da6c28aaSamw /* 1808da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1809da6c28aaSamw * make sure file system is at proper version 1810da6c28aaSamw */ 1811da6c28aaSamw 1812c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1813c1ce5987SMark Shellenbaum if (ksid) 1814c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1815c1ce5987SMark Shellenbaum else 1816c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1817da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1818c1ce5987SMark Shellenbaum (vsecp || (vap->va_mask & AT_XVATTR) || 1819756962ecSMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1820*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1821da6c28aaSamw 18223cb34c60Sahrens ZFS_ENTER(zfsvfs); 18233cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1824f18faf3fSek zilog = zfsvfs->z_log; 1825fa9e4066Sahrens 18260a586ceaSMark Shellenbaum if (dzp->z_pflags & ZFS_XATTR) { 1827fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1828*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1829fa9e4066Sahrens } 1830da6c28aaSamw 1831de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(dirname, 1832da6c28aaSamw strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1833da6c28aaSamw ZFS_EXIT(zfsvfs); 1834*be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1835da6c28aaSamw } 1836da6c28aaSamw if (flags & FIGNORECASE) 1837da6c28aaSamw zf |= ZCILOOK; 1838da6c28aaSamw 1839c8c24165SMark Shellenbaum if (vap->va_mask & AT_XVATTR) { 1840da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1841da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1842da6c28aaSamw ZFS_EXIT(zfsvfs); 1843da6c28aaSamw return (error); 1844da6c28aaSamw } 1845c8c24165SMark Shellenbaum } 1846fa9e4066Sahrens 1847c8c24165SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 1848c8c24165SMark Shellenbaum vsecp, &acl_ids)) != 0) { 1849c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 1850c8c24165SMark Shellenbaum return (error); 1851c8c24165SMark Shellenbaum } 1852fa9e4066Sahrens /* 1853fa9e4066Sahrens * First make sure the new directory doesn't exist. 1854c8c24165SMark Shellenbaum * 1855c8c24165SMark Shellenbaum * Existence is checked first to make sure we don't return 1856c8c24165SMark Shellenbaum * EACCES instead of EEXIST which can cause some applications 1857c8c24165SMark Shellenbaum * to fail. 1858fa9e4066Sahrens */ 1859da6c28aaSamw top: 1860da6c28aaSamw *vpp = NULL; 1861da6c28aaSamw 1862da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1863da6c28aaSamw NULL, NULL)) { 1864c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1865fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1866fa9e4066Sahrens return (error); 1867fa9e4066Sahrens } 1868fa9e4066Sahrens 1869da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1870c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1871d2443e76Smarks zfs_dirent_unlock(dl); 1872d2443e76Smarks ZFS_EXIT(zfsvfs); 1873d2443e76Smarks return (error); 1874d2443e76Smarks } 1875d2443e76Smarks 187614843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 18774929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 187814843421SMatthew Ahrens zfs_dirent_unlock(dl); 187914843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 1880*be6fd75aSMatthew Ahrens return (SET_ERROR(EDQUOT)); 188114843421SMatthew Ahrens } 188289459e17SMark Shellenbaum 1883fa9e4066Sahrens /* 1884fa9e4066Sahrens * Add a new entry to the directory. 1885fa9e4066Sahrens */ 1886fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1887ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1888ea8dc4b6Seschrock dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 188989459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 189014843421SMatthew Ahrens if (fuid_dirtied) 189114843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 18920a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 18930a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 18940a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 18950a586ceaSMark Shellenbaum } 18960a586ceaSMark Shellenbaum 18970a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 18980a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 18990a586ceaSMark Shellenbaum 19001209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 1901fa9e4066Sahrens if (error) { 1902fa9e4066Sahrens zfs_dirent_unlock(dl); 19031209a471SNeil Perrin if (error == ERESTART) { 19048a2f1b91Sahrens dmu_tx_wait(tx); 19058a2f1b91Sahrens dmu_tx_abort(tx); 1906fa9e4066Sahrens goto top; 1907fa9e4066Sahrens } 1908c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 19098a2f1b91Sahrens dmu_tx_abort(tx); 1910fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1911fa9e4066Sahrens return (error); 1912fa9e4066Sahrens } 1913fa9e4066Sahrens 1914fa9e4066Sahrens /* 1915fa9e4066Sahrens * Create new node. 1916fa9e4066Sahrens */ 19170a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1918fa9e4066Sahrens 191989459e17SMark Shellenbaum if (fuid_dirtied) 192089459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 19210a586ceaSMark Shellenbaum 1922fa9e4066Sahrens /* 1923fa9e4066Sahrens * Now put new name in parent dir. 1924fa9e4066Sahrens */ 1925fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1926fa9e4066Sahrens 1927fa9e4066Sahrens *vpp = ZTOV(zp); 1928fa9e4066Sahrens 1929da6c28aaSamw txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 1930da6c28aaSamw if (flags & FIGNORECASE) 1931da6c28aaSamw txtype |= TX_CI; 193289459e17SMark Shellenbaum zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 193389459e17SMark Shellenbaum acl_ids.z_fuidp, vap); 1934da6c28aaSamw 193589459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 19360a586ceaSMark Shellenbaum 1937fa9e4066Sahrens dmu_tx_commit(tx); 1938fa9e4066Sahrens 1939fa9e4066Sahrens zfs_dirent_unlock(dl); 1940fa9e4066Sahrens 194155da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 19425002558fSNeil Perrin zil_commit(zilog, 0); 194355da60b9SMark J Musante 1944fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1945fa9e4066Sahrens return (0); 1946fa9e4066Sahrens } 1947fa9e4066Sahrens 1948fa9e4066Sahrens /* 1949fa9e4066Sahrens * Remove a directory subdir entry. If the current working 1950fa9e4066Sahrens * directory is the same as the subdir to be removed, the 1951fa9e4066Sahrens * remove will fail. 1952fa9e4066Sahrens * 1953fa9e4066Sahrens * IN: dvp - vnode of directory to remove from. 1954fa9e4066Sahrens * name - name of directory to be removed. 1955fa9e4066Sahrens * cwd - vnode of current working directory. 1956fa9e4066Sahrens * cr - credentials of caller. 1957da6c28aaSamw * ct - caller context 1958da6c28aaSamw * flags - case flags 1959fa9e4066Sahrens * 1960fa9e4066Sahrens * RETURN: 0 if success 1961fa9e4066Sahrens * error code if failure 1962fa9e4066Sahrens * 1963fa9e4066Sahrens * Timestamps: 1964fa9e4066Sahrens * dvp - ctime|mtime updated 1965fa9e4066Sahrens */ 1966da6c28aaSamw /*ARGSUSED*/ 1967fa9e4066Sahrens static int 1968da6c28aaSamw zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 1969da6c28aaSamw caller_context_t *ct, int flags) 1970fa9e4066Sahrens { 1971fa9e4066Sahrens znode_t *dzp = VTOZ(dvp); 1972fa9e4066Sahrens znode_t *zp; 1973fa9e4066Sahrens vnode_t *vp; 1974fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1975f18faf3fSek zilog_t *zilog; 1976fa9e4066Sahrens zfs_dirlock_t *dl; 1977fa9e4066Sahrens dmu_tx_t *tx; 1978fa9e4066Sahrens int error; 1979da6c28aaSamw int zflg = ZEXISTS; 1980fa9e4066Sahrens 19813cb34c60Sahrens ZFS_ENTER(zfsvfs); 19823cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1983f18faf3fSek zilog = zfsvfs->z_log; 1984fa9e4066Sahrens 1985da6c28aaSamw if (flags & FIGNORECASE) 1986da6c28aaSamw zflg |= ZCILOOK; 1987fa9e4066Sahrens top: 1988fa9e4066Sahrens zp = NULL; 1989fa9e4066Sahrens 1990fa9e4066Sahrens /* 1991fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1992fa9e4066Sahrens */ 1993da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1994da6c28aaSamw NULL, NULL)) { 1995fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1996fa9e4066Sahrens return (error); 1997fa9e4066Sahrens } 1998fa9e4066Sahrens 1999fa9e4066Sahrens vp = ZTOV(zp); 2000fa9e4066Sahrens 2001fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2002fa9e4066Sahrens goto out; 2003fa9e4066Sahrens } 2004fa9e4066Sahrens 2005fa9e4066Sahrens if (vp->v_type != VDIR) { 2006*be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTDIR); 2007fa9e4066Sahrens goto out; 2008fa9e4066Sahrens } 2009fa9e4066Sahrens 2010fa9e4066Sahrens if (vp == cwd) { 2011*be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 2012fa9e4066Sahrens goto out; 2013fa9e4066Sahrens } 2014fa9e4066Sahrens 2015da6c28aaSamw vnevent_rmdir(vp, dvp, name, ct); 2016fa9e4066Sahrens 2017fa9e4066Sahrens /* 2018af2c4821Smaybee * Grab a lock on the directory to make sure that noone is 2019af2c4821Smaybee * trying to add (or lookup) entries while we are removing it. 2020af2c4821Smaybee */ 2021af2c4821Smaybee rw_enter(&zp->z_name_lock, RW_WRITER); 2022af2c4821Smaybee 2023af2c4821Smaybee /* 2024af2c4821Smaybee * Grab a lock on the parent pointer to make sure we play well 2025fa9e4066Sahrens * with the treewalk and directory rename code. 2026fa9e4066Sahrens */ 2027fa9e4066Sahrens rw_enter(&zp->z_parent_lock, RW_WRITER); 2028fa9e4066Sahrens 2029fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 2030ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 20310a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2032893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 20330a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 20340a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 20351209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 2036fa9e4066Sahrens if (error) { 2037fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 2038af2c4821Smaybee rw_exit(&zp->z_name_lock); 2039fa9e4066Sahrens zfs_dirent_unlock(dl); 2040fa9e4066Sahrens VN_RELE(vp); 20411209a471SNeil Perrin if (error == ERESTART) { 20428a2f1b91Sahrens dmu_tx_wait(tx); 20438a2f1b91Sahrens dmu_tx_abort(tx); 2044fa9e4066Sahrens goto top; 2045fa9e4066Sahrens } 20468a2f1b91Sahrens dmu_tx_abort(tx); 2047fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2048fa9e4066Sahrens return (error); 2049fa9e4066Sahrens } 2050fa9e4066Sahrens 2051da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2052fa9e4066Sahrens 2053da6c28aaSamw if (error == 0) { 2054da6c28aaSamw uint64_t txtype = TX_RMDIR; 2055da6c28aaSamw if (flags & FIGNORECASE) 2056da6c28aaSamw txtype |= TX_CI; 20575002558fSNeil Perrin zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2058da6c28aaSamw } 2059fa9e4066Sahrens 2060fa9e4066Sahrens dmu_tx_commit(tx); 2061fa9e4066Sahrens 2062fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 2063af2c4821Smaybee rw_exit(&zp->z_name_lock); 2064fa9e4066Sahrens out: 2065fa9e4066Sahrens zfs_dirent_unlock(dl); 2066fa9e4066Sahrens 2067fa9e4066Sahrens VN_RELE(vp); 2068fa9e4066Sahrens 206955da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 20705002558fSNeil Perrin zil_commit(zilog, 0); 207155da60b9SMark J Musante 2072fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2073fa9e4066Sahrens return (error); 2074fa9e4066Sahrens } 2075fa9e4066Sahrens 2076fa9e4066Sahrens /* 2077fa9e4066Sahrens * Read as many directory entries as will fit into the provided 2078fa9e4066Sahrens * buffer from the given directory cursor position (specified in 2079fa9e4066Sahrens * the uio structure. 2080fa9e4066Sahrens * 2081fa9e4066Sahrens * IN: vp - vnode of directory to read. 2082fa9e4066Sahrens * uio - structure supplying read location, range info, 2083fa9e4066Sahrens * and return buffer. 2084fa9e4066Sahrens * cr - credentials of caller. 2085da6c28aaSamw * ct - caller context 2086da6c28aaSamw * flags - case flags 2087fa9e4066Sahrens * 2088fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 2089fa9e4066Sahrens * eofp - set to true if end-of-file detected. 2090fa9e4066Sahrens * 2091fa9e4066Sahrens * RETURN: 0 if success 2092fa9e4066Sahrens * error code if failure 2093fa9e4066Sahrens * 2094fa9e4066Sahrens * Timestamps: 2095fa9e4066Sahrens * vp - atime updated 2096fa9e4066Sahrens * 2097fa9e4066Sahrens * Note that the low 4 bits of the cookie returned by zap is always zero. 2098fa9e4066Sahrens * This allows us to use the low range for "special" directory entries: 2099fa9e4066Sahrens * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2100fa9e4066Sahrens * we use the offset 2 for the '.zfs' directory. 2101fa9e4066Sahrens */ 2102fa9e4066Sahrens /* ARGSUSED */ 2103fa9e4066Sahrens static int 2104da6c28aaSamw zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, 2105da6c28aaSamw caller_context_t *ct, int flags) 2106fa9e4066Sahrens { 2107fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2108fa9e4066Sahrens iovec_t *iovp; 2109da6c28aaSamw edirent_t *eodp; 2110fa9e4066Sahrens dirent64_t *odp; 2111fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 21127f6e3e7dSperrin objset_t *os; 2113fa9e4066Sahrens caddr_t outbuf; 2114fa9e4066Sahrens size_t bufsize; 2115fa9e4066Sahrens zap_cursor_t zc; 2116fa9e4066Sahrens zap_attribute_t zap; 2117fa9e4066Sahrens uint_t bytes_wanted; 2118fa9e4066Sahrens uint64_t offset; /* must be unsigned; checks for < 1 */ 21190a586ceaSMark Shellenbaum uint64_t parent; 2120fa9e4066Sahrens int local_eof; 21217f6e3e7dSperrin int outcount; 21227f6e3e7dSperrin int error; 21237f6e3e7dSperrin uint8_t prefetch; 2124b38f0970Sck boolean_t check_sysattrs; 2125fa9e4066Sahrens 21263cb34c60Sahrens ZFS_ENTER(zfsvfs); 21273cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2128fa9e4066Sahrens 21290a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 21300a586ceaSMark Shellenbaum &parent, sizeof (parent))) != 0) { 21310a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 21320a586ceaSMark Shellenbaum return (error); 21330a586ceaSMark Shellenbaum } 21340a586ceaSMark Shellenbaum 2135fa9e4066Sahrens /* 2136fa9e4066Sahrens * If we are not given an eof variable, 2137fa9e4066Sahrens * use a local one. 2138fa9e4066Sahrens */ 2139fa9e4066Sahrens if (eofp == NULL) 2140fa9e4066Sahrens eofp = &local_eof; 2141fa9e4066Sahrens 2142fa9e4066Sahrens /* 2143fa9e4066Sahrens * Check for valid iov_len. 2144fa9e4066Sahrens */ 2145fa9e4066Sahrens if (uio->uio_iov->iov_len <= 0) { 2146fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2147*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 2148fa9e4066Sahrens } 2149fa9e4066Sahrens 2150fa9e4066Sahrens /* 2151fa9e4066Sahrens * Quit if directory has been removed (posix) 2152fa9e4066Sahrens */ 2153893a6d32Sahrens if ((*eofp = zp->z_unlinked) != 0) { 2154fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2155fa9e4066Sahrens return (0); 2156fa9e4066Sahrens } 2157fa9e4066Sahrens 21587f6e3e7dSperrin error = 0; 21597f6e3e7dSperrin os = zfsvfs->z_os; 21607f6e3e7dSperrin offset = uio->uio_loffset; 21617f6e3e7dSperrin prefetch = zp->z_zn_prefetch; 21627f6e3e7dSperrin 2163fa9e4066Sahrens /* 2164fa9e4066Sahrens * Initialize the iterator cursor. 2165fa9e4066Sahrens */ 2166fa9e4066Sahrens if (offset <= 3) { 2167fa9e4066Sahrens /* 2168fa9e4066Sahrens * Start iteration from the beginning of the directory. 2169fa9e4066Sahrens */ 21707f6e3e7dSperrin zap_cursor_init(&zc, os, zp->z_id); 2171fa9e4066Sahrens } else { 2172fa9e4066Sahrens /* 2173fa9e4066Sahrens * The offset is a serialized cursor. 2174fa9e4066Sahrens */ 21757f6e3e7dSperrin zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2176fa9e4066Sahrens } 2177fa9e4066Sahrens 2178fa9e4066Sahrens /* 2179fa9e4066Sahrens * Get space to change directory entries into fs independent format. 2180fa9e4066Sahrens */ 2181fa9e4066Sahrens iovp = uio->uio_iov; 2182fa9e4066Sahrens bytes_wanted = iovp->iov_len; 2183fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2184fa9e4066Sahrens bufsize = bytes_wanted; 2185fa9e4066Sahrens outbuf = kmem_alloc(bufsize, KM_SLEEP); 2186fa9e4066Sahrens odp = (struct dirent64 *)outbuf; 2187fa9e4066Sahrens } else { 2188fa9e4066Sahrens bufsize = bytes_wanted; 2189d5285caeSGeorge Wilson outbuf = NULL; 2190fa9e4066Sahrens odp = (struct dirent64 *)iovp->iov_base; 2191fa9e4066Sahrens } 2192da6c28aaSamw eodp = (struct edirent *)odp; 2193fa9e4066Sahrens 2194b38f0970Sck /* 21959660e5cbSJanice Chang * If this VFS supports the system attribute view interface; and 21969660e5cbSJanice Chang * we're looking at an extended attribute directory; and we care 21979660e5cbSJanice Chang * about normalization conflicts on this vfs; then we must check 21989660e5cbSJanice Chang * for normalization conflicts with the sysattr name space. 2199b38f0970Sck */ 22009660e5cbSJanice Chang check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2201b38f0970Sck (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2202b38f0970Sck (flags & V_RDDIR_ENTFLAGS); 2203b38f0970Sck 2204fa9e4066Sahrens /* 2205fa9e4066Sahrens * Transform to file-system independent format 2206fa9e4066Sahrens */ 2207fa9e4066Sahrens outcount = 0; 2208fa9e4066Sahrens while (outcount < bytes_wanted) { 2209b1b8ab34Slling ino64_t objnum; 2210b1b8ab34Slling ushort_t reclen; 221197f85387STim Haley off64_t *next = NULL; 2212b1b8ab34Slling 2213fa9e4066Sahrens /* 2214fa9e4066Sahrens * Special case `.', `..', and `.zfs'. 2215fa9e4066Sahrens */ 2216fa9e4066Sahrens if (offset == 0) { 2217fa9e4066Sahrens (void) strcpy(zap.za_name, "."); 2218da6c28aaSamw zap.za_normalization_conflict = 0; 2219b1b8ab34Slling objnum = zp->z_id; 2220fa9e4066Sahrens } else if (offset == 1) { 2221fa9e4066Sahrens (void) strcpy(zap.za_name, ".."); 2222da6c28aaSamw zap.za_normalization_conflict = 0; 22230a586ceaSMark Shellenbaum objnum = parent; 2224fa9e4066Sahrens } else if (offset == 2 && zfs_show_ctldir(zp)) { 2225fa9e4066Sahrens (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2226da6c28aaSamw zap.za_normalization_conflict = 0; 2227b1b8ab34Slling objnum = ZFSCTL_INO_ROOT; 2228fa9e4066Sahrens } else { 2229fa9e4066Sahrens /* 2230fa9e4066Sahrens * Grab next entry. 2231fa9e4066Sahrens */ 2232fa9e4066Sahrens if (error = zap_cursor_retrieve(&zc, &zap)) { 2233fa9e4066Sahrens if ((*eofp = (error == ENOENT)) != 0) 2234fa9e4066Sahrens break; 2235fa9e4066Sahrens else 2236fa9e4066Sahrens goto update; 2237fa9e4066Sahrens } 2238fa9e4066Sahrens 2239fa9e4066Sahrens if (zap.za_integer_length != 8 || 2240fa9e4066Sahrens zap.za_num_integers != 1) { 2241fa9e4066Sahrens cmn_err(CE_WARN, "zap_readdir: bad directory " 2242fa9e4066Sahrens "entry, obj = %lld, offset = %lld\n", 2243fa9e4066Sahrens (u_longlong_t)zp->z_id, 2244fa9e4066Sahrens (u_longlong_t)offset); 2245*be6fd75aSMatthew Ahrens error = SET_ERROR(ENXIO); 2246fa9e4066Sahrens goto update; 2247fa9e4066Sahrens } 2248b1b8ab34Slling 2249b1b8ab34Slling objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2250b1b8ab34Slling /* 2251b1b8ab34Slling * MacOS X can extract the object type here such as: 2252b1b8ab34Slling * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2253b1b8ab34Slling */ 2254b38f0970Sck 2255b38f0970Sck if (check_sysattrs && !zap.za_normalization_conflict) { 2256b38f0970Sck zap.za_normalization_conflict = 2257b38f0970Sck xattr_sysattr_casechk(zap.za_name); 2258b38f0970Sck } 2259fa9e4066Sahrens } 2260da6c28aaSamw 2261e802abbdSTim Haley if (flags & V_RDDIR_ACCFILTER) { 2262e802abbdSTim Haley /* 2263e802abbdSTim Haley * If we have no access at all, don't include 2264e802abbdSTim Haley * this entry in the returned information 2265e802abbdSTim Haley */ 2266e802abbdSTim Haley znode_t *ezp; 2267e802abbdSTim Haley if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2268e802abbdSTim Haley goto skip_entry; 2269e802abbdSTim Haley if (!zfs_has_access(ezp, cr)) { 2270e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2271e802abbdSTim Haley goto skip_entry; 2272e802abbdSTim Haley } 2273e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2274e802abbdSTim Haley } 2275e802abbdSTim Haley 2276da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) 2277da6c28aaSamw reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2278da6c28aaSamw else 2279da6c28aaSamw reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2280fa9e4066Sahrens 2281fa9e4066Sahrens /* 2282fa9e4066Sahrens * Will this entry fit in the buffer? 2283fa9e4066Sahrens */ 2284b1b8ab34Slling if (outcount + reclen > bufsize) { 2285fa9e4066Sahrens /* 2286fa9e4066Sahrens * Did we manage to fit anything in the buffer? 2287fa9e4066Sahrens */ 2288fa9e4066Sahrens if (!outcount) { 2289*be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 2290fa9e4066Sahrens goto update; 2291fa9e4066Sahrens } 2292fa9e4066Sahrens break; 2293fa9e4066Sahrens } 2294da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) { 2295da6c28aaSamw /* 2296da6c28aaSamw * Add extended flag entry: 2297da6c28aaSamw */ 2298da6c28aaSamw eodp->ed_ino = objnum; 2299da6c28aaSamw eodp->ed_reclen = reclen; 2300da6c28aaSamw /* NOTE: ed_off is the offset for the *next* entry */ 2301da6c28aaSamw next = &(eodp->ed_off); 2302da6c28aaSamw eodp->ed_eflags = zap.za_normalization_conflict ? 2303da6c28aaSamw ED_CASE_CONFLICT : 0; 2304da6c28aaSamw (void) strncpy(eodp->ed_name, zap.za_name, 2305da6c28aaSamw EDIRENT_NAMELEN(reclen)); 2306da6c28aaSamw eodp = (edirent_t *)((intptr_t)eodp + reclen); 2307da6c28aaSamw } else { 2308da6c28aaSamw /* 2309da6c28aaSamw * Add normal entry: 2310da6c28aaSamw */ 2311da6c28aaSamw odp->d_ino = objnum; 2312da6c28aaSamw odp->d_reclen = reclen; 2313da6c28aaSamw /* NOTE: d_off is the offset for the *next* entry */ 2314da6c28aaSamw next = &(odp->d_off); 2315da6c28aaSamw (void) strncpy(odp->d_name, zap.za_name, 2316da6c28aaSamw DIRENT64_NAMELEN(reclen)); 2317da6c28aaSamw odp = (dirent64_t *)((intptr_t)odp + reclen); 2318da6c28aaSamw } 2319b1b8ab34Slling outcount += reclen; 2320fa9e4066Sahrens 2321fa9e4066Sahrens ASSERT(outcount <= bufsize); 2322fa9e4066Sahrens 2323fa9e4066Sahrens /* Prefetch znode */ 23247f6e3e7dSperrin if (prefetch) 2325b1b8ab34Slling dmu_prefetch(os, objnum, 0, 0); 2326fa9e4066Sahrens 2327e802abbdSTim Haley skip_entry: 2328fa9e4066Sahrens /* 2329fa9e4066Sahrens * Move to the next entry, fill in the previous offset. 2330fa9e4066Sahrens */ 2331fa9e4066Sahrens if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2332fa9e4066Sahrens zap_cursor_advance(&zc); 2333fa9e4066Sahrens offset = zap_cursor_serialize(&zc); 2334fa9e4066Sahrens } else { 2335fa9e4066Sahrens offset += 1; 2336fa9e4066Sahrens } 233797f85387STim Haley if (next) 233897f85387STim Haley *next = offset; 2339fa9e4066Sahrens } 23407f6e3e7dSperrin zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2341fa9e4066Sahrens 2342fa9e4066Sahrens if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2343fa9e4066Sahrens iovp->iov_base += outcount; 2344fa9e4066Sahrens iovp->iov_len -= outcount; 2345fa9e4066Sahrens uio->uio_resid -= outcount; 2346fa9e4066Sahrens } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2347fa9e4066Sahrens /* 2348fa9e4066Sahrens * Reset the pointer. 2349fa9e4066Sahrens */ 2350fa9e4066Sahrens offset = uio->uio_loffset; 2351fa9e4066Sahrens } 2352fa9e4066Sahrens 2353fa9e4066Sahrens update: 235487e5029aSahrens zap_cursor_fini(&zc); 2355fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2356fa9e4066Sahrens kmem_free(outbuf, bufsize); 2357fa9e4066Sahrens 2358fa9e4066Sahrens if (error == ENOENT) 2359fa9e4066Sahrens error = 0; 2360fa9e4066Sahrens 2361fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2362fa9e4066Sahrens 2363fa9e4066Sahrens uio->uio_loffset = offset; 2364fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2365fa9e4066Sahrens return (error); 2366fa9e4066Sahrens } 2367fa9e4066Sahrens 2368ec533521Sfr ulong_t zfs_fsync_sync_cnt = 4; 2369ec533521Sfr 2370fa9e4066Sahrens static int 2371da6c28aaSamw zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2372fa9e4066Sahrens { 2373fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2374fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2375fa9e4066Sahrens 2376b468a217Seschrock /* 2377b468a217Seschrock * Regardless of whether this is required for standards conformance, 2378b468a217Seschrock * this is the logical behavior when fsync() is called on a file with 2379b468a217Seschrock * dirty pages. We use B_ASYNC since the ZIL transactions are already 2380b468a217Seschrock * going to be pushed out as part of the zil_commit(). 2381b468a217Seschrock */ 2382b468a217Seschrock if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && 2383b468a217Seschrock (vp->v_type == VREG) && !(IS_SWAPVP(vp))) 2384da6c28aaSamw (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); 2385b468a217Seschrock 2386ec533521Sfr (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2387ec533521Sfr 238855da60b9SMark J Musante if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 238955da60b9SMark J Musante ZFS_ENTER(zfsvfs); 239055da60b9SMark J Musante ZFS_VERIFY_ZP(zp); 23915002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 239255da60b9SMark J Musante ZFS_EXIT(zfsvfs); 239355da60b9SMark J Musante } 2394fa9e4066Sahrens return (0); 2395fa9e4066Sahrens } 2396fa9e4066Sahrens 2397da6c28aaSamw 2398fa9e4066Sahrens /* 2399fa9e4066Sahrens * Get the requested file attributes and place them in the provided 2400fa9e4066Sahrens * vattr structure. 2401fa9e4066Sahrens * 2402fa9e4066Sahrens * IN: vp - vnode of file. 2403fa9e4066Sahrens * vap - va_mask identifies requested attributes. 2404da6c28aaSamw * If AT_XVATTR set, then optional attrs are requested 2405da6c28aaSamw * flags - ATTR_NOACLCHECK (CIFS server context) 2406fa9e4066Sahrens * cr - credentials of caller. 2407da6c28aaSamw * ct - caller context 2408fa9e4066Sahrens * 2409fa9e4066Sahrens * OUT: vap - attribute values. 2410fa9e4066Sahrens * 2411fa9e4066Sahrens * RETURN: 0 (always succeeds) 2412fa9e4066Sahrens */ 2413fa9e4066Sahrens /* ARGSUSED */ 2414fa9e4066Sahrens static int 2415da6c28aaSamw zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2416da6c28aaSamw caller_context_t *ct) 2417fa9e4066Sahrens { 2418fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2419fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2420da6c28aaSamw int error = 0; 2421ecd6cf80Smarks uint64_t links; 24220a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2423da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2424da6c28aaSamw xoptattr_t *xoap = NULL; 2425da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 24260a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[2]; 24270a586ceaSMark Shellenbaum int count = 0; 2428fa9e4066Sahrens 24293cb34c60Sahrens ZFS_ENTER(zfsvfs); 24303cb34c60Sahrens ZFS_VERIFY_ZP(zp); 24310a586ceaSMark Shellenbaum 2432f1696b23SMark Shellenbaum zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2433f1696b23SMark Shellenbaum 24340a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 24350a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 24360a586ceaSMark Shellenbaum 24370a586ceaSMark Shellenbaum if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 24380a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 24390a586ceaSMark Shellenbaum return (error); 24400a586ceaSMark Shellenbaum } 2441fa9e4066Sahrens 2442da6c28aaSamw /* 2443da6c28aaSamw * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2444da6c28aaSamw * Also, if we are the owner don't bother, since owner should 2445da6c28aaSamw * always be allowed to read basic attributes of file. 2446da6c28aaSamw */ 2447f1696b23SMark Shellenbaum if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2448f1696b23SMark Shellenbaum (vap->va_uid != crgetuid(cr))) { 2449da6c28aaSamw if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2450da6c28aaSamw skipaclchk, cr)) { 2451da6c28aaSamw ZFS_EXIT(zfsvfs); 2452da6c28aaSamw return (error); 2453da6c28aaSamw } 2454da6c28aaSamw } 2455da6c28aaSamw 2456fa9e4066Sahrens /* 2457fa9e4066Sahrens * Return all attributes. It's cheaper to provide the answer 2458fa9e4066Sahrens * than to determine whether we were asked the question. 2459fa9e4066Sahrens */ 2460fa9e4066Sahrens 246134f345efSRay Hassan mutex_enter(&zp->z_lock); 2462fa9e4066Sahrens vap->va_type = vp->v_type; 24630a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode & MODEMASK; 2464fa9e4066Sahrens vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2465fa9e4066Sahrens vap->va_nodeid = zp->z_id; 2466ecd6cf80Smarks if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 24670a586ceaSMark Shellenbaum links = zp->z_links + 1; 2468ecd6cf80Smarks else 24690a586ceaSMark Shellenbaum links = zp->z_links; 2470ecd6cf80Smarks vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 24710a586ceaSMark Shellenbaum vap->va_size = zp->z_size; 247272fc53bcSmarks vap->va_rdev = vp->v_rdev; 2473fa9e4066Sahrens vap->va_seq = zp->z_seq; 2474fa9e4066Sahrens 2475fa9e4066Sahrens /* 2476da6c28aaSamw * Add in any requested optional attributes and the create time. 2477da6c28aaSamw * Also set the corresponding bits in the returned attribute bitmap. 2478fa9e4066Sahrens */ 2479da6c28aaSamw if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2480da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2481da6c28aaSamw xoap->xoa_archive = 24820a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2483da6c28aaSamw XVA_SET_RTN(xvap, XAT_ARCHIVE); 2484da6c28aaSamw } 2485da6c28aaSamw 2486da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2487da6c28aaSamw xoap->xoa_readonly = 24880a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_READONLY) != 0); 2489da6c28aaSamw XVA_SET_RTN(xvap, XAT_READONLY); 2490da6c28aaSamw } 2491da6c28aaSamw 2492da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2493da6c28aaSamw xoap->xoa_system = 24940a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_SYSTEM) != 0); 2495da6c28aaSamw XVA_SET_RTN(xvap, XAT_SYSTEM); 2496da6c28aaSamw } 2497da6c28aaSamw 2498da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2499da6c28aaSamw xoap->xoa_hidden = 25000a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_HIDDEN) != 0); 2501da6c28aaSamw XVA_SET_RTN(xvap, XAT_HIDDEN); 2502da6c28aaSamw } 2503da6c28aaSamw 2504da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2505da6c28aaSamw xoap->xoa_nounlink = 25060a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2507da6c28aaSamw XVA_SET_RTN(xvap, XAT_NOUNLINK); 2508da6c28aaSamw } 2509da6c28aaSamw 2510da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2511da6c28aaSamw xoap->xoa_immutable = 25120a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2513da6c28aaSamw XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2514da6c28aaSamw } 2515da6c28aaSamw 2516da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2517da6c28aaSamw xoap->xoa_appendonly = 25180a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2519da6c28aaSamw XVA_SET_RTN(xvap, XAT_APPENDONLY); 2520da6c28aaSamw } 2521da6c28aaSamw 2522da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2523da6c28aaSamw xoap->xoa_nodump = 25240a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0); 2525da6c28aaSamw XVA_SET_RTN(xvap, XAT_NODUMP); 2526da6c28aaSamw } 2527da6c28aaSamw 2528da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2529da6c28aaSamw xoap->xoa_opaque = 25300a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_OPAQUE) != 0); 2531da6c28aaSamw XVA_SET_RTN(xvap, XAT_OPAQUE); 2532da6c28aaSamw } 2533da6c28aaSamw 2534da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2535da6c28aaSamw xoap->xoa_av_quarantined = 25360a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2537da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2538da6c28aaSamw } 2539da6c28aaSamw 2540da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2541da6c28aaSamw xoap->xoa_av_modified = 25420a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2543da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2544da6c28aaSamw } 2545da6c28aaSamw 2546da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 25470a586ceaSMark Shellenbaum vp->v_type == VREG) { 25480a586ceaSMark Shellenbaum zfs_sa_get_scanstamp(zp, xvap); 2549da6c28aaSamw } 2550da6c28aaSamw 2551da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 25520a586ceaSMark Shellenbaum uint64_t times[2]; 25530a586ceaSMark Shellenbaum 25540a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 25550a586ceaSMark Shellenbaum times, sizeof (times)); 25560a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2557da6c28aaSamw XVA_SET_RTN(xvap, XAT_CREATETIME); 2558fa9e4066Sahrens } 25597a286c47SDai Ngo 25607a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 25610a586ceaSMark Shellenbaum xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 25627a286c47SDai Ngo XVA_SET_RTN(xvap, XAT_REPARSE); 25637a286c47SDai Ngo } 256499d5e173STim Haley if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 256599d5e173STim Haley xoap->xoa_generation = zp->z_gen; 256699d5e173STim Haley XVA_SET_RTN(xvap, XAT_GEN); 256799d5e173STim Haley } 2568fd9ee8b5Sjoyce mcintosh 2569fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2570fd9ee8b5Sjoyce mcintosh xoap->xoa_offline = 2571fd9ee8b5Sjoyce mcintosh ((zp->z_pflags & ZFS_OFFLINE) != 0); 2572fd9ee8b5Sjoyce mcintosh XVA_SET_RTN(xvap, XAT_OFFLINE); 2573fd9ee8b5Sjoyce mcintosh } 2574fd9ee8b5Sjoyce mcintosh 2575fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2576fd9ee8b5Sjoyce mcintosh xoap->xoa_sparse = 2577fd9ee8b5Sjoyce mcintosh ((zp->z_pflags & ZFS_SPARSE) != 0); 2578fd9ee8b5Sjoyce mcintosh XVA_SET_RTN(xvap, XAT_SPARSE); 2579fd9ee8b5Sjoyce mcintosh } 2580fa9e4066Sahrens } 2581fa9e4066Sahrens 25820a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 25830a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_mtime, mtime); 25840a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2585da6c28aaSamw 2586fa9e4066Sahrens mutex_exit(&zp->z_lock); 2587fa9e4066Sahrens 25880a586ceaSMark Shellenbaum sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks); 2589fa9e4066Sahrens 2590fa9e4066Sahrens if (zp->z_blksz == 0) { 2591fa9e4066Sahrens /* 2592fa9e4066Sahrens * Block size hasn't been set; suggest maximal I/O transfers. 2593fa9e4066Sahrens */ 2594fa9e4066Sahrens vap->va_blksize = zfsvfs->z_max_blksz; 2595fa9e4066Sahrens } 2596fa9e4066Sahrens 2597fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2598fa9e4066Sahrens return (0); 2599fa9e4066Sahrens } 2600fa9e4066Sahrens 2601fa9e4066Sahrens /* 2602fa9e4066Sahrens * Set the file attributes to the values contained in the 2603fa9e4066Sahrens * vattr structure. 2604fa9e4066Sahrens * 2605fa9e4066Sahrens * IN: vp - vnode of file to be modified. 2606fa9e4066Sahrens * vap - new attribute values. 2607da6c28aaSamw * If AT_XVATTR set, then optional attrs are being set 2608fa9e4066Sahrens * flags - ATTR_UTIME set if non-default time values provided. 2609da6c28aaSamw * - ATTR_NOACLCHECK (CIFS context only). 2610fa9e4066Sahrens * cr - credentials of caller. 2611da6c28aaSamw * ct - caller context 2612fa9e4066Sahrens * 2613fa9e4066Sahrens * RETURN: 0 if success 2614fa9e4066Sahrens * error code if failure 2615fa9e4066Sahrens * 2616fa9e4066Sahrens * Timestamps: 2617fa9e4066Sahrens * vp - ctime updated, mtime updated if size changed. 2618fa9e4066Sahrens */ 2619fa9e4066Sahrens /* ARGSUSED */ 2620fa9e4066Sahrens static int 2621fa9e4066Sahrens zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2622fa9e4066Sahrens caller_context_t *ct) 2623fa9e4066Sahrens { 2624f18faf3fSek znode_t *zp = VTOZ(vp); 2625fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2626f18faf3fSek zilog_t *zilog; 2627fa9e4066Sahrens dmu_tx_t *tx; 2628fa9e4066Sahrens vattr_t oldva; 2629ae4caef8SMark Shellenbaum xvattr_t tmpxvattr; 26305730cc9aSmaybee uint_t mask = vap->va_mask; 2631d5285caeSGeorge Wilson uint_t saved_mask = 0; 2632f92daba9Smarks int trim_mask = 0; 2633fa9e4066Sahrens uint64_t new_mode; 263489459e17SMark Shellenbaum uint64_t new_uid, new_gid; 26350b2a8171SMark Shellenbaum uint64_t xattr_obj; 26360a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2637d2443e76Smarks znode_t *attrzp; 2638fa9e4066Sahrens int need_policy = FALSE; 26390a586ceaSMark Shellenbaum int err, err2; 2640da6c28aaSamw zfs_fuid_info_t *fuidp = NULL; 2641da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2642da6c28aaSamw xoptattr_t *xoap; 26430b2a8171SMark Shellenbaum zfs_acl_t *aclp; 2644da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 26450a586ceaSMark Shellenbaum boolean_t fuid_dirtied = B_FALSE; 26460a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[7], xattr_bulk[7]; 26470a586ceaSMark Shellenbaum int count = 0, xattr_count = 0; 2648fa9e4066Sahrens 2649fa9e4066Sahrens if (mask == 0) 2650fa9e4066Sahrens return (0); 2651fa9e4066Sahrens 2652fa9e4066Sahrens if (mask & AT_NOSET) 2653*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 2654fa9e4066Sahrens 26553cb34c60Sahrens ZFS_ENTER(zfsvfs); 26563cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2657da6c28aaSamw 2658da6c28aaSamw zilog = zfsvfs->z_log; 2659da6c28aaSamw 2660da6c28aaSamw /* 2661da6c28aaSamw * Make sure that if we have ephemeral uid/gid or xvattr specified 2662da6c28aaSamw * that file system is at proper version level 2663da6c28aaSamw */ 2664da6c28aaSamw 2665da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 2666da6c28aaSamw (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2667da6c28aaSamw ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 266802dcba3bStimh (mask & AT_XVATTR))) { 266902dcba3bStimh ZFS_EXIT(zfsvfs); 2670*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 267102dcba3bStimh } 2672da6c28aaSamw 267302dcba3bStimh if (mask & AT_SIZE && vp->v_type == VDIR) { 267402dcba3bStimh ZFS_EXIT(zfsvfs); 2675*be6fd75aSMatthew Ahrens return (SET_ERROR(EISDIR)); 267602dcba3bStimh } 2677fa9e4066Sahrens 267802dcba3bStimh if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 267902dcba3bStimh ZFS_EXIT(zfsvfs); 2680*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 268102dcba3bStimh } 268284c5a155Smarks 2683da6c28aaSamw /* 2684da6c28aaSamw * If this is an xvattr_t, then get a pointer to the structure of 2685da6c28aaSamw * optional attributes. If this is NULL, then we have a vattr_t. 2686da6c28aaSamw */ 2687da6c28aaSamw xoap = xva_getxoptattr(xvap); 2688da6c28aaSamw 2689ae4caef8SMark Shellenbaum xva_init(&tmpxvattr); 2690ae4caef8SMark Shellenbaum 2691da6c28aaSamw /* 2692da6c28aaSamw * Immutable files can only alter immutable bit and atime 2693da6c28aaSamw */ 26940a586ceaSMark Shellenbaum if ((zp->z_pflags & ZFS_IMMUTABLE) && 2695da6c28aaSamw ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 269602dcba3bStimh ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 269702dcba3bStimh ZFS_EXIT(zfsvfs); 2698*be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 269902dcba3bStimh } 2700da6c28aaSamw 27010a586ceaSMark Shellenbaum if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 270202dcba3bStimh ZFS_EXIT(zfsvfs); 2703*be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 270402dcba3bStimh } 2705fa9e4066Sahrens 270693129341Smarks /* 270793129341Smarks * Verify timestamps doesn't overflow 32 bits. 270893129341Smarks * ZFS can handle large timestamps, but 32bit syscalls can't 270993129341Smarks * handle times greater than 2039. This check should be removed 271093129341Smarks * once large timestamps are fully supported. 271193129341Smarks */ 271293129341Smarks if (mask & (AT_ATIME | AT_MTIME)) { 271393129341Smarks if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 271493129341Smarks ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 271593129341Smarks ZFS_EXIT(zfsvfs); 2716*be6fd75aSMatthew Ahrens return (SET_ERROR(EOVERFLOW)); 271793129341Smarks } 271893129341Smarks } 271993129341Smarks 2720fa9e4066Sahrens top: 2721d2443e76Smarks attrzp = NULL; 27220b2a8171SMark Shellenbaum aclp = NULL; 2723fa9e4066Sahrens 2724d47621a4STim Haley /* Can this be moved to before the top label? */ 2725fa9e4066Sahrens if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2726fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2727*be6fd75aSMatthew Ahrens return (SET_ERROR(EROFS)); 2728fa9e4066Sahrens } 2729fa9e4066Sahrens 2730fa9e4066Sahrens /* 2731fa9e4066Sahrens * First validate permissions 2732fa9e4066Sahrens */ 2733fa9e4066Sahrens 2734fa9e4066Sahrens if (mask & AT_SIZE) { 2735da6c28aaSamw err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2736fa9e4066Sahrens if (err) { 2737fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2738fa9e4066Sahrens return (err); 2739fa9e4066Sahrens } 27405730cc9aSmaybee /* 27415730cc9aSmaybee * XXX - Note, we are not providing any open 27425730cc9aSmaybee * mode flags here (like FNDELAY), so we may 27435730cc9aSmaybee * block if there are locks present... this 27445730cc9aSmaybee * should be addressed in openat(). 27455730cc9aSmaybee */ 2746cdb0ab79Smaybee /* XXX - would it be OK to generate a log record here? */ 2747cdb0ab79Smaybee err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 27485730cc9aSmaybee if (err) { 27495730cc9aSmaybee ZFS_EXIT(zfsvfs); 27505730cc9aSmaybee return (err); 27515730cc9aSmaybee } 2752fa9e4066Sahrens } 2753fa9e4066Sahrens 2754da6c28aaSamw if (mask & (AT_ATIME|AT_MTIME) || 2755da6c28aaSamw ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2756da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_READONLY) || 2757da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2758fd9ee8b5Sjoyce mcintosh XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2759fd9ee8b5Sjoyce mcintosh XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2760da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 27610a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2762da6c28aaSamw need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2763da6c28aaSamw skipaclchk, cr); 27640a586ceaSMark Shellenbaum } 2765fa9e4066Sahrens 2766fa9e4066Sahrens if (mask & (AT_UID|AT_GID)) { 2767fa9e4066Sahrens int idmask = (mask & (AT_UID|AT_GID)); 2768fa9e4066Sahrens int take_owner; 2769fa9e4066Sahrens int take_group; 2770fa9e4066Sahrens 2771a933bc41Smarks /* 2772a933bc41Smarks * NOTE: even if a new mode is being set, 2773a933bc41Smarks * we may clear S_ISUID/S_ISGID bits. 2774a933bc41Smarks */ 2775a933bc41Smarks 2776a933bc41Smarks if (!(mask & AT_MODE)) 27770a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode; 2778a933bc41Smarks 2779fa9e4066Sahrens /* 2780fa9e4066Sahrens * Take ownership or chgrp to group we are a member of 2781fa9e4066Sahrens */ 2782fa9e4066Sahrens 2783fa9e4066Sahrens take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2784da6c28aaSamw take_group = (mask & AT_GID) && 2785da6c28aaSamw zfs_groupmember(zfsvfs, vap->va_gid, cr); 2786fa9e4066Sahrens 2787fa9e4066Sahrens /* 2788fa9e4066Sahrens * If both AT_UID and AT_GID are set then take_owner and 2789fa9e4066Sahrens * take_group must both be set in order to allow taking 2790fa9e4066Sahrens * ownership. 2791fa9e4066Sahrens * 2792fa9e4066Sahrens * Otherwise, send the check through secpolicy_vnode_setattr() 2793fa9e4066Sahrens * 2794fa9e4066Sahrens */ 2795fa9e4066Sahrens 2796fa9e4066Sahrens if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2797fa9e4066Sahrens ((idmask == AT_UID) && take_owner) || 2798fa9e4066Sahrens ((idmask == AT_GID) && take_group)) { 2799da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2800da6c28aaSamw skipaclchk, cr) == 0) { 2801fa9e4066Sahrens /* 2802fa9e4066Sahrens * Remove setuid/setgid for non-privileged users 2803fa9e4066Sahrens */ 280413f9f30eSmarks secpolicy_setid_clear(vap, cr); 2805f92daba9Smarks trim_mask = (mask & (AT_UID|AT_GID)); 2806fa9e4066Sahrens } else { 2807fa9e4066Sahrens need_policy = TRUE; 2808fa9e4066Sahrens } 2809fa9e4066Sahrens } else { 2810fa9e4066Sahrens need_policy = TRUE; 2811fa9e4066Sahrens } 2812fa9e4066Sahrens } 2813fa9e4066Sahrens 2814f92daba9Smarks mutex_enter(&zp->z_lock); 28150a586ceaSMark Shellenbaum oldva.va_mode = zp->z_mode; 2816f1696b23SMark Shellenbaum zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2817da6c28aaSamw if (mask & AT_XVATTR) { 2818ae4caef8SMark Shellenbaum /* 2819ae4caef8SMark Shellenbaum * Update xvattr mask to include only those attributes 2820ae4caef8SMark Shellenbaum * that are actually changing. 2821ae4caef8SMark Shellenbaum * 2822ae4caef8SMark Shellenbaum * the bits will be restored prior to actually setting 2823ae4caef8SMark Shellenbaum * the attributes so the caller thinks they were set. 2824ae4caef8SMark Shellenbaum */ 2825ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2826ae4caef8SMark Shellenbaum if (xoap->xoa_appendonly != 28270a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 2828ae4caef8SMark Shellenbaum need_policy = TRUE; 2829ae4caef8SMark Shellenbaum } else { 2830ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2831ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2832ae4caef8SMark Shellenbaum } 2833ae4caef8SMark Shellenbaum } 2834ae4caef8SMark Shellenbaum 2835ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2836ae4caef8SMark Shellenbaum if (xoap->xoa_nounlink != 28370a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 2838ae4caef8SMark Shellenbaum need_policy = TRUE; 2839ae4caef8SMark Shellenbaum } else { 2840ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2841ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2842ae4caef8SMark Shellenbaum } 2843ae4caef8SMark Shellenbaum } 2844ae4caef8SMark Shellenbaum 2845ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2846ae4caef8SMark Shellenbaum if (xoap->xoa_immutable != 28470a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 2848ae4caef8SMark Shellenbaum need_policy = TRUE; 2849ae4caef8SMark Shellenbaum } else { 2850ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2851ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2852ae4caef8SMark Shellenbaum } 2853ae4caef8SMark Shellenbaum } 2854ae4caef8SMark Shellenbaum 2855ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2856ae4caef8SMark Shellenbaum if (xoap->xoa_nodump != 28570a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0)) { 2858ae4caef8SMark Shellenbaum need_policy = TRUE; 2859ae4caef8SMark Shellenbaum } else { 2860ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NODUMP); 2861ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2862ae4caef8SMark Shellenbaum } 2863ae4caef8SMark Shellenbaum } 2864ae4caef8SMark Shellenbaum 2865ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2866ae4caef8SMark Shellenbaum if (xoap->xoa_av_modified != 28670a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 2868ae4caef8SMark Shellenbaum need_policy = TRUE; 2869ae4caef8SMark Shellenbaum } else { 2870ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2871ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2872ae4caef8SMark Shellenbaum } 2873ae4caef8SMark Shellenbaum } 2874ae4caef8SMark Shellenbaum 2875ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2876ae4caef8SMark Shellenbaum if ((vp->v_type != VREG && 2877ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined) || 2878ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined != 28790a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 2880ae4caef8SMark Shellenbaum need_policy = TRUE; 2881ae4caef8SMark Shellenbaum } else { 2882ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2883ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2884ae4caef8SMark Shellenbaum } 2885ae4caef8SMark Shellenbaum } 2886ae4caef8SMark Shellenbaum 28877a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 28887a286c47SDai Ngo mutex_exit(&zp->z_lock); 28897a286c47SDai Ngo ZFS_EXIT(zfsvfs); 2890*be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 28917a286c47SDai Ngo } 28927a286c47SDai Ngo 2893ae4caef8SMark Shellenbaum if (need_policy == FALSE && 2894ae4caef8SMark Shellenbaum (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2895ae4caef8SMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2896da6c28aaSamw need_policy = TRUE; 2897da6c28aaSamw } 2898da6c28aaSamw } 2899da6c28aaSamw 2900f92daba9Smarks mutex_exit(&zp->z_lock); 2901fa9e4066Sahrens 2902f92daba9Smarks if (mask & AT_MODE) { 2903da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2904f92daba9Smarks err = secpolicy_setid_setsticky_clear(vp, vap, 2905f92daba9Smarks &oldva, cr); 2906f92daba9Smarks if (err) { 2907f92daba9Smarks ZFS_EXIT(zfsvfs); 2908f92daba9Smarks return (err); 2909f92daba9Smarks } 2910f92daba9Smarks trim_mask |= AT_MODE; 2911f92daba9Smarks } else { 2912f92daba9Smarks need_policy = TRUE; 2913f92daba9Smarks } 2914f92daba9Smarks } 291513f9f30eSmarks 2916f92daba9Smarks if (need_policy) { 291713f9f30eSmarks /* 291813f9f30eSmarks * If trim_mask is set then take ownership 2919f92daba9Smarks * has been granted or write_acl is present and user 2920f92daba9Smarks * has the ability to modify mode. In that case remove 2921f92daba9Smarks * UID|GID and or MODE from mask so that 292213f9f30eSmarks * secpolicy_vnode_setattr() doesn't revoke it. 292313f9f30eSmarks */ 292413f9f30eSmarks 2925f92daba9Smarks if (trim_mask) { 2926f92daba9Smarks saved_mask = vap->va_mask; 2927f92daba9Smarks vap->va_mask &= ~trim_mask; 2928f92daba9Smarks } 2929fa9e4066Sahrens err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2930da6c28aaSamw (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2931fa9e4066Sahrens if (err) { 2932fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2933fa9e4066Sahrens return (err); 2934fa9e4066Sahrens } 293513f9f30eSmarks 293613f9f30eSmarks if (trim_mask) 2937f92daba9Smarks vap->va_mask |= saved_mask; 2938fa9e4066Sahrens } 2939fa9e4066Sahrens 2940fa9e4066Sahrens /* 2941fa9e4066Sahrens * secpolicy_vnode_setattr, or take ownership may have 2942fa9e4066Sahrens * changed va_mask 2943fa9e4066Sahrens */ 2944fa9e4066Sahrens mask = vap->va_mask; 2945fa9e4066Sahrens 29460a586ceaSMark Shellenbaum if ((mask & (AT_UID | AT_GID))) { 29470b2a8171SMark Shellenbaum err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 29480b2a8171SMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 29490a586ceaSMark Shellenbaum 29500b2a8171SMark Shellenbaum if (err == 0 && xattr_obj) { 29510a586ceaSMark Shellenbaum err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 29520a586ceaSMark Shellenbaum if (err) 29530a586ceaSMark Shellenbaum goto out2; 29540a586ceaSMark Shellenbaum } 29550a586ceaSMark Shellenbaum if (mask & AT_UID) { 29560a586ceaSMark Shellenbaum new_uid = zfs_fuid_create(zfsvfs, 29570a586ceaSMark Shellenbaum (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 2958f1696b23SMark Shellenbaum if (new_uid != zp->z_uid && 29590a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 29600b2a8171SMark Shellenbaum if (attrzp) 29610b2a8171SMark Shellenbaum VN_RELE(ZTOV(attrzp)); 2962*be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 29630a586ceaSMark Shellenbaum goto out2; 29640a586ceaSMark Shellenbaum } 29650a586ceaSMark Shellenbaum } 29660a586ceaSMark Shellenbaum 29670a586ceaSMark Shellenbaum if (mask & AT_GID) { 29680a586ceaSMark Shellenbaum new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 29690a586ceaSMark Shellenbaum cr, ZFS_GROUP, &fuidp); 29700a586ceaSMark Shellenbaum if (new_gid != zp->z_gid && 29710a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 29720b2a8171SMark Shellenbaum if (attrzp) 29730b2a8171SMark Shellenbaum VN_RELE(ZTOV(attrzp)); 2974*be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 29750a586ceaSMark Shellenbaum goto out2; 29760a586ceaSMark Shellenbaum } 29770a586ceaSMark Shellenbaum } 29780a586ceaSMark Shellenbaum } 2979fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 2980fa9e4066Sahrens 2981fa9e4066Sahrens if (mask & AT_MODE) { 29820a586ceaSMark Shellenbaum uint64_t pmode = zp->z_mode; 29831412a1a2SMark Shellenbaum uint64_t acl_obj; 2984169cdae2Smarks new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2985fa9e4066Sahrens 298671dbfc28SPaul B. Henson if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 298771dbfc28SPaul B. Henson !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 2988*be6fd75aSMatthew Ahrens err = SET_ERROR(EPERM); 298971dbfc28SPaul B. Henson goto out; 299071dbfc28SPaul B. Henson } 299171dbfc28SPaul B. Henson 2992a3c49ce1SAlbert Lee if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 2993a3c49ce1SAlbert Lee goto out; 29940a586ceaSMark Shellenbaum 29951412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 29961412a1a2SMark Shellenbaum if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 29970a586ceaSMark Shellenbaum /* 29980a586ceaSMark Shellenbaum * Are we upgrading ACL from old V0 format 29990a586ceaSMark Shellenbaum * to V1 format? 30000a586ceaSMark Shellenbaum */ 30012bd6c4deSMark Shellenbaum if (zfsvfs->z_version >= ZPL_VERSION_FUID && 30021412a1a2SMark Shellenbaum zfs_znode_acl_version(zp) == 3003da6c28aaSamw ZFS_ACL_VERSION_INITIAL) { 30041412a1a2SMark Shellenbaum dmu_tx_hold_free(tx, acl_obj, 0, 3005da6c28aaSamw DMU_OBJECT_END); 3006da6c28aaSamw dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 30074c841f60Smarks 0, aclp->z_acl_bytes); 3008da6c28aaSamw } else { 30091412a1a2SMark Shellenbaum dmu_tx_hold_write(tx, acl_obj, 0, 30104c841f60Smarks aclp->z_acl_bytes); 30114c841f60Smarks } 30120a586ceaSMark Shellenbaum } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 30136d38e247Smarks dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 30146d38e247Smarks 0, aclp->z_acl_bytes); 3015da6c28aaSamw } 30161412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 30170a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 30180a586ceaSMark Shellenbaum } else { 30190a586ceaSMark Shellenbaum if ((mask & AT_XVATTR) && 30200a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 30210a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 30220a586ceaSMark Shellenbaum else 30230a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 3024fa9e4066Sahrens } 3025fa9e4066Sahrens 30260a586ceaSMark Shellenbaum if (attrzp) { 30270a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 3028d2443e76Smarks } 3029d2443e76Smarks 30300a586ceaSMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 30310a586ceaSMark Shellenbaum if (fuid_dirtied) 30320a586ceaSMark Shellenbaum zfs_fuid_txhold(zfsvfs, tx); 30330a586ceaSMark Shellenbaum 30340a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 30350a586ceaSMark Shellenbaum 30361209a471SNeil Perrin err = dmu_tx_assign(tx, TXG_NOWAIT); 3037fa9e4066Sahrens if (err) { 303814843421SMatthew Ahrens if (err == ERESTART) 30398a2f1b91Sahrens dmu_tx_wait(tx); 304014843421SMatthew Ahrens goto out; 3041fa9e4066Sahrens } 3042fa9e4066Sahrens 30430a586ceaSMark Shellenbaum count = 0; 3044fa9e4066Sahrens /* 3045fa9e4066Sahrens * Set each attribute requested. 3046fa9e4066Sahrens * We group settings according to the locks they need to acquire. 3047fa9e4066Sahrens * 3048fa9e4066Sahrens * Note: you cannot set ctime directly, although it will be 3049fa9e4066Sahrens * updated as a side-effect of calling this function. 3050fa9e4066Sahrens */ 3051fa9e4066Sahrens 30521412a1a2SMark Shellenbaum 30531412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 30541412a1a2SMark Shellenbaum mutex_enter(&zp->z_acl_lock); 3055fa9e4066Sahrens mutex_enter(&zp->z_lock); 3056fa9e4066Sahrens 3057db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3058db9986c7SMark Shellenbaum &zp->z_pflags, sizeof (zp->z_pflags)); 3059db9986c7SMark Shellenbaum 3060db9986c7SMark Shellenbaum if (attrzp) { 30611412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 30621412a1a2SMark Shellenbaum mutex_enter(&attrzp->z_acl_lock); 30630a586ceaSMark Shellenbaum mutex_enter(&attrzp->z_lock); 3064db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3065db9986c7SMark Shellenbaum SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3066db9986c7SMark Shellenbaum sizeof (attrzp->z_pflags)); 3067db9986c7SMark Shellenbaum } 30680a586ceaSMark Shellenbaum 306927dd1e87SMark Shellenbaum if (mask & (AT_UID|AT_GID)) { 307027dd1e87SMark Shellenbaum 307127dd1e87SMark Shellenbaum if (mask & AT_UID) { 307227dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 307327dd1e87SMark Shellenbaum &new_uid, sizeof (new_uid)); 3074f1696b23SMark Shellenbaum zp->z_uid = new_uid; 307527dd1e87SMark Shellenbaum if (attrzp) { 307627dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 307727dd1e87SMark Shellenbaum SA_ZPL_UID(zfsvfs), NULL, &new_uid, 307827dd1e87SMark Shellenbaum sizeof (new_uid)); 3079f1696b23SMark Shellenbaum attrzp->z_uid = new_uid; 308027dd1e87SMark Shellenbaum } 30810a586ceaSMark Shellenbaum } 30820a586ceaSMark Shellenbaum 308327dd1e87SMark Shellenbaum if (mask & AT_GID) { 308427dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 308527dd1e87SMark Shellenbaum NULL, &new_gid, sizeof (new_gid)); 3086f1696b23SMark Shellenbaum zp->z_gid = new_gid; 308727dd1e87SMark Shellenbaum if (attrzp) { 308827dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 308927dd1e87SMark Shellenbaum SA_ZPL_GID(zfsvfs), NULL, &new_gid, 309027dd1e87SMark Shellenbaum sizeof (new_gid)); 3091f1696b23SMark Shellenbaum attrzp->z_gid = new_gid; 309227dd1e87SMark Shellenbaum } 309327dd1e87SMark Shellenbaum } 309427dd1e87SMark Shellenbaum if (!(mask & AT_MODE)) { 309527dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 309627dd1e87SMark Shellenbaum NULL, &new_mode, sizeof (new_mode)); 309727dd1e87SMark Shellenbaum new_mode = zp->z_mode; 309827dd1e87SMark Shellenbaum } 309927dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(zp); 310027dd1e87SMark Shellenbaum ASSERT(err == 0); 31010a586ceaSMark Shellenbaum if (attrzp) { 310227dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(attrzp); 310327dd1e87SMark Shellenbaum ASSERT(err == 0); 31040a586ceaSMark Shellenbaum } 31050a586ceaSMark Shellenbaum } 31060a586ceaSMark Shellenbaum 3107fa9e4066Sahrens if (mask & AT_MODE) { 31080a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 31090a586ceaSMark Shellenbaum &new_mode, sizeof (new_mode)); 31100a586ceaSMark Shellenbaum zp->z_mode = new_mode; 311127dd1e87SMark Shellenbaum ASSERT3U((uintptr_t)aclp, !=, NULL); 311289459e17SMark Shellenbaum err = zfs_aclset_common(zp, aclp, cr, tx); 3113fb09f5aaSMadhav Suresh ASSERT0(err); 31140b2a8171SMark Shellenbaum if (zp->z_acl_cached) 31150b2a8171SMark Shellenbaum zfs_acl_free(zp->z_acl_cached); 31164929fd5eSTim Haley zp->z_acl_cached = aclp; 31174929fd5eSTim Haley aclp = NULL; 3118fa9e4066Sahrens } 3119fa9e4066Sahrens 3120d2443e76Smarks 31210a586ceaSMark Shellenbaum if (mask & AT_ATIME) { 31220a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 31230a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 31240a586ceaSMark Shellenbaum &zp->z_atime, sizeof (zp->z_atime)); 3125d2443e76Smarks } 3126fa9e4066Sahrens 31270a586ceaSMark Shellenbaum if (mask & AT_MTIME) { 31280a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 31290a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 31300a586ceaSMark Shellenbaum mtime, sizeof (mtime)); 3131d2443e76Smarks } 3132d2443e76Smarks 3133cdb0ab79Smaybee /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 31340a586ceaSMark Shellenbaum if (mask & AT_SIZE && !(mask & AT_MTIME)) { 3135db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 3136db9986c7SMark Shellenbaum NULL, mtime, sizeof (mtime)); 31370a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 31380a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 31390a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 31400a586ceaSMark Shellenbaum B_TRUE); 31410a586ceaSMark Shellenbaum } else if (mask != 0) { 31420a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 31430a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 31440a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 31450a586ceaSMark Shellenbaum B_TRUE); 31460a586ceaSMark Shellenbaum if (attrzp) { 31470a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 31480a586ceaSMark Shellenbaum SA_ZPL_CTIME(zfsvfs), NULL, 31490a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 31500a586ceaSMark Shellenbaum zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 31510a586ceaSMark Shellenbaum mtime, ctime, B_TRUE); 31520a586ceaSMark Shellenbaum } 31530a586ceaSMark Shellenbaum } 3154da6c28aaSamw /* 3155da6c28aaSamw * Do this after setting timestamps to prevent timestamp 3156da6c28aaSamw * update from toggling bit 3157da6c28aaSamw */ 3158da6c28aaSamw 3159da6c28aaSamw if (xoap && (mask & AT_XVATTR)) { 3160ae4caef8SMark Shellenbaum 3161ae4caef8SMark Shellenbaum /* 3162ae4caef8SMark Shellenbaum * restore trimmed off masks 3163ae4caef8SMark Shellenbaum * so that return masks can be set for caller. 3164ae4caef8SMark Shellenbaum */ 3165ae4caef8SMark Shellenbaum 3166ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3167ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_APPENDONLY); 3168ae4caef8SMark Shellenbaum } 3169ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3170ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NOUNLINK); 3171ae4caef8SMark Shellenbaum } 3172ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3173ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3174ae4caef8SMark Shellenbaum } 3175ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3176ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NODUMP); 3177ae4caef8SMark Shellenbaum } 3178ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3179ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3180ae4caef8SMark Shellenbaum } 3181ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3182ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3183ae4caef8SMark Shellenbaum } 3184ae4caef8SMark Shellenbaum 31850a586ceaSMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3186da6c28aaSamw ASSERT(vp->v_type == VREG); 3187da6c28aaSamw 31880a586ceaSMark Shellenbaum zfs_xvattr_set(zp, xvap, tx); 3189da6c28aaSamw } 3190fa9e4066Sahrens 319189459e17SMark Shellenbaum if (fuid_dirtied) 319289459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 319389459e17SMark Shellenbaum 31945730cc9aSmaybee if (mask != 0) 3195da6c28aaSamw zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3196fa9e4066Sahrens 3197fa9e4066Sahrens mutex_exit(&zp->z_lock); 31981412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 31991412a1a2SMark Shellenbaum mutex_exit(&zp->z_acl_lock); 3200fa9e4066Sahrens 32011412a1a2SMark Shellenbaum if (attrzp) { 32021412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 32031412a1a2SMark Shellenbaum mutex_exit(&attrzp->z_acl_lock); 32041412a1a2SMark Shellenbaum mutex_exit(&attrzp->z_lock); 32051412a1a2SMark Shellenbaum } 320614843421SMatthew Ahrens out: 32070a586ceaSMark Shellenbaum if (err == 0 && attrzp) { 32080a586ceaSMark Shellenbaum err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 32090a586ceaSMark Shellenbaum xattr_count, tx); 32100a586ceaSMark Shellenbaum ASSERT(err2 == 0); 32110a586ceaSMark Shellenbaum } 32120a586ceaSMark Shellenbaum 3213d2443e76Smarks if (attrzp) 3214d2443e76Smarks VN_RELE(ZTOV(attrzp)); 32154929fd5eSTim Haley if (aclp) 32164929fd5eSTim Haley zfs_acl_free(aclp); 32174929fd5eSTim Haley 321814843421SMatthew Ahrens if (fuidp) { 321914843421SMatthew Ahrens zfs_fuid_info_free(fuidp); 322014843421SMatthew Ahrens fuidp = NULL; 322114843421SMatthew Ahrens } 322214843421SMatthew Ahrens 32230a586ceaSMark Shellenbaum if (err) { 322414843421SMatthew Ahrens dmu_tx_abort(tx); 32250a586ceaSMark Shellenbaum if (err == ERESTART) 32260a586ceaSMark Shellenbaum goto top; 32270a586ceaSMark Shellenbaum } else { 32280a586ceaSMark Shellenbaum err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 322914843421SMatthew Ahrens dmu_tx_commit(tx); 32300a586ceaSMark Shellenbaum } 323114843421SMatthew Ahrens 32320a586ceaSMark Shellenbaum out2: 323355da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 32345002558fSNeil Perrin zil_commit(zilog, 0); 323555da60b9SMark J Musante 3236fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3237fa9e4066Sahrens return (err); 3238fa9e4066Sahrens } 3239fa9e4066Sahrens 3240fa9e4066Sahrens typedef struct zfs_zlock { 3241fa9e4066Sahrens krwlock_t *zl_rwlock; /* lock we acquired */ 3242fa9e4066Sahrens znode_t *zl_znode; /* znode we held */ 3243fa9e4066Sahrens struct zfs_zlock *zl_next; /* next in list */ 3244fa9e4066Sahrens } zfs_zlock_t; 3245fa9e4066Sahrens 3246ff008e00Smaybee /* 3247ff008e00Smaybee * Drop locks and release vnodes that were held by zfs_rename_lock(). 3248ff008e00Smaybee */ 3249ff008e00Smaybee static void 3250ff008e00Smaybee zfs_rename_unlock(zfs_zlock_t **zlpp) 3251ff008e00Smaybee { 3252ff008e00Smaybee zfs_zlock_t *zl; 3253ff008e00Smaybee 3254ff008e00Smaybee while ((zl = *zlpp) != NULL) { 3255ff008e00Smaybee if (zl->zl_znode != NULL) 3256ff008e00Smaybee VN_RELE(ZTOV(zl->zl_znode)); 3257ff008e00Smaybee rw_exit(zl->zl_rwlock); 3258ff008e00Smaybee *zlpp = zl->zl_next; 3259ff008e00Smaybee kmem_free(zl, sizeof (*zl)); 3260ff008e00Smaybee } 3261ff008e00Smaybee } 3262ff008e00Smaybee 3263ff008e00Smaybee /* 3264ff008e00Smaybee * Search back through the directory tree, using the ".." entries. 3265ff008e00Smaybee * Lock each directory in the chain to prevent concurrent renames. 3266ff008e00Smaybee * Fail any attempt to move a directory into one of its own descendants. 3267ff008e00Smaybee * XXX - z_parent_lock can overlap with map or grow locks 3268ff008e00Smaybee */ 3269fa9e4066Sahrens static int 3270fa9e4066Sahrens zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3271fa9e4066Sahrens { 3272fa9e4066Sahrens zfs_zlock_t *zl; 3273feb08c6bSbillm znode_t *zp = tdzp; 3274fa9e4066Sahrens uint64_t rootid = zp->z_zfsvfs->z_root; 32750a586ceaSMark Shellenbaum uint64_t oidp = zp->z_id; 3276fa9e4066Sahrens krwlock_t *rwlp = &szp->z_parent_lock; 3277fa9e4066Sahrens krw_t rw = RW_WRITER; 3278fa9e4066Sahrens 3279fa9e4066Sahrens /* 3280fa9e4066Sahrens * First pass write-locks szp and compares to zp->z_id. 3281fa9e4066Sahrens * Later passes read-lock zp and compare to zp->z_parent. 3282fa9e4066Sahrens */ 3283fa9e4066Sahrens do { 3284ff008e00Smaybee if (!rw_tryenter(rwlp, rw)) { 3285ff008e00Smaybee /* 3286ff008e00Smaybee * Another thread is renaming in this path. 3287ff008e00Smaybee * Note that if we are a WRITER, we don't have any 3288ff008e00Smaybee * parent_locks held yet. 3289ff008e00Smaybee */ 3290ff008e00Smaybee if (rw == RW_READER && zp->z_id > szp->z_id) { 3291ff008e00Smaybee /* 3292ff008e00Smaybee * Drop our locks and restart 3293ff008e00Smaybee */ 3294ff008e00Smaybee zfs_rename_unlock(&zl); 3295ff008e00Smaybee *zlpp = NULL; 3296ff008e00Smaybee zp = tdzp; 32970a586ceaSMark Shellenbaum oidp = zp->z_id; 3298ff008e00Smaybee rwlp = &szp->z_parent_lock; 3299ff008e00Smaybee rw = RW_WRITER; 3300ff008e00Smaybee continue; 3301ff008e00Smaybee } else { 3302ff008e00Smaybee /* 3303ff008e00Smaybee * Wait for other thread to drop its locks 3304ff008e00Smaybee */ 3305ff008e00Smaybee rw_enter(rwlp, rw); 3306ff008e00Smaybee } 3307ff008e00Smaybee } 3308ff008e00Smaybee 3309fa9e4066Sahrens zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3310fa9e4066Sahrens zl->zl_rwlock = rwlp; 3311fa9e4066Sahrens zl->zl_znode = NULL; 3312fa9e4066Sahrens zl->zl_next = *zlpp; 3313fa9e4066Sahrens *zlpp = zl; 3314fa9e4066Sahrens 33150a586ceaSMark Shellenbaum if (oidp == szp->z_id) /* We're a descendant of szp */ 3316*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 3317fa9e4066Sahrens 33180a586ceaSMark Shellenbaum if (oidp == rootid) /* We've hit the top */ 3319fa9e4066Sahrens return (0); 3320fa9e4066Sahrens 3321fa9e4066Sahrens if (rw == RW_READER) { /* i.e. not the first pass */ 33220a586ceaSMark Shellenbaum int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3323fa9e4066Sahrens if (error) 3324fa9e4066Sahrens return (error); 3325fa9e4066Sahrens zl->zl_znode = zp; 3326fa9e4066Sahrens } 33270a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 33280a586ceaSMark Shellenbaum &oidp, sizeof (oidp)); 3329fa9e4066Sahrens rwlp = &zp->z_parent_lock; 3330fa9e4066Sahrens rw = RW_READER; 3331fa9e4066Sahrens 3332fa9e4066Sahrens } while (zp->z_id != sdzp->z_id); 3333fa9e4066Sahrens 3334fa9e4066Sahrens return (0); 3335fa9e4066Sahrens } 3336fa9e4066Sahrens 3337fa9e4066Sahrens /* 3338fa9e4066Sahrens * Move an entry from the provided source directory to the target 3339fa9e4066Sahrens * directory. Change the entry name as indicated. 3340fa9e4066Sahrens * 3341fa9e4066Sahrens * IN: sdvp - Source directory containing the "old entry". 3342fa9e4066Sahrens * snm - Old entry name. 3343fa9e4066Sahrens * tdvp - Target directory to contain the "new entry". 3344fa9e4066Sahrens * tnm - New entry name. 3345fa9e4066Sahrens * cr - credentials of caller. 3346da6c28aaSamw * ct - caller context 3347da6c28aaSamw * flags - case flags 3348fa9e4066Sahrens * 3349fa9e4066Sahrens * RETURN: 0 if success 3350fa9e4066Sahrens * error code if failure 3351fa9e4066Sahrens * 3352fa9e4066Sahrens * Timestamps: 3353fa9e4066Sahrens * sdvp,tdvp - ctime|mtime updated 3354fa9e4066Sahrens */ 3355da6c28aaSamw /*ARGSUSED*/ 3356fa9e4066Sahrens static int 3357da6c28aaSamw zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3358da6c28aaSamw caller_context_t *ct, int flags) 3359fa9e4066Sahrens { 3360fa9e4066Sahrens znode_t *tdzp, *szp, *tzp; 3361fa9e4066Sahrens znode_t *sdzp = VTOZ(sdvp); 3362fa9e4066Sahrens zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3363f18faf3fSek zilog_t *zilog; 3364fa9e4066Sahrens vnode_t *realvp; 3365fa9e4066Sahrens zfs_dirlock_t *sdl, *tdl; 3366fa9e4066Sahrens dmu_tx_t *tx; 3367fa9e4066Sahrens zfs_zlock_t *zl; 3368da6c28aaSamw int cmp, serr, terr; 3369da6c28aaSamw int error = 0; 3370da6c28aaSamw int zflg = 0; 3371fa9e4066Sahrens 33723cb34c60Sahrens ZFS_ENTER(zfsvfs); 33733cb34c60Sahrens ZFS_VERIFY_ZP(sdzp); 3374f18faf3fSek zilog = zfsvfs->z_log; 3375fa9e4066Sahrens 3376fa9e4066Sahrens /* 3377fa9e4066Sahrens * Make sure we have the real vp for the target directory. 3378fa9e4066Sahrens */ 3379da6c28aaSamw if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3380fa9e4066Sahrens tdvp = realvp; 3381fa9e4066Sahrens 3382d39ee142SMark Shellenbaum if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { 3383fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3384*be6fd75aSMatthew Ahrens return (SET_ERROR(EXDEV)); 3385fa9e4066Sahrens } 3386fa9e4066Sahrens 3387fa9e4066Sahrens tdzp = VTOZ(tdvp); 33883cb34c60Sahrens ZFS_VERIFY_ZP(tdzp); 3389de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(tnm, 3390da6c28aaSamw strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3391da6c28aaSamw ZFS_EXIT(zfsvfs); 3392*be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3393da6c28aaSamw } 3394da6c28aaSamw 3395da6c28aaSamw if (flags & FIGNORECASE) 3396da6c28aaSamw zflg |= ZCILOOK; 3397da6c28aaSamw 3398fa9e4066Sahrens top: 3399fa9e4066Sahrens szp = NULL; 3400fa9e4066Sahrens tzp = NULL; 3401fa9e4066Sahrens zl = NULL; 3402fa9e4066Sahrens 3403fa9e4066Sahrens /* 3404fa9e4066Sahrens * This is to prevent the creation of links into attribute space 3405fa9e4066Sahrens * by renaming a linked file into/outof an attribute directory. 3406fa9e4066Sahrens * See the comment in zfs_link() for why this is considered bad. 3407fa9e4066Sahrens */ 34080a586ceaSMark Shellenbaum if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3409fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3410*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 3411fa9e4066Sahrens } 3412fa9e4066Sahrens 3413fa9e4066Sahrens /* 3414fa9e4066Sahrens * Lock source and target directory entries. To prevent deadlock, 3415fa9e4066Sahrens * a lock ordering must be defined. We lock the directory with 3416fa9e4066Sahrens * the smallest object id first, or if it's a tie, the one with 3417fa9e4066Sahrens * the lexically first name. 3418fa9e4066Sahrens */ 3419fa9e4066Sahrens if (sdzp->z_id < tdzp->z_id) { 3420fa9e4066Sahrens cmp = -1; 3421fa9e4066Sahrens } else if (sdzp->z_id > tdzp->z_id) { 3422fa9e4066Sahrens cmp = 1; 3423fa9e4066Sahrens } else { 3424da6c28aaSamw /* 3425da6c28aaSamw * First compare the two name arguments without 3426da6c28aaSamw * considering any case folding. 3427da6c28aaSamw */ 3428da6c28aaSamw int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3429da6c28aaSamw 3430da6c28aaSamw cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3431de8267e0Stimh ASSERT(error == 0 || !zfsvfs->z_utf8); 3432fa9e4066Sahrens if (cmp == 0) { 3433fa9e4066Sahrens /* 3434fa9e4066Sahrens * POSIX: "If the old argument and the new argument 3435fa9e4066Sahrens * both refer to links to the same existing file, 3436fa9e4066Sahrens * the rename() function shall return successfully 3437fa9e4066Sahrens * and perform no other action." 3438fa9e4066Sahrens */ 3439fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3440fa9e4066Sahrens return (0); 3441fa9e4066Sahrens } 3442da6c28aaSamw /* 3443da6c28aaSamw * If the file system is case-folding, then we may 3444da6c28aaSamw * have some more checking to do. A case-folding file 3445da6c28aaSamw * system is either supporting mixed case sensitivity 3446da6c28aaSamw * access or is completely case-insensitive. Note 3447da6c28aaSamw * that the file system is always case preserving. 3448da6c28aaSamw * 3449da6c28aaSamw * In mixed sensitivity mode case sensitive behavior 3450da6c28aaSamw * is the default. FIGNORECASE must be used to 3451da6c28aaSamw * explicitly request case insensitive behavior. 3452da6c28aaSamw * 3453da6c28aaSamw * If the source and target names provided differ only 3454da6c28aaSamw * by case (e.g., a request to rename 'tim' to 'Tim'), 3455da6c28aaSamw * we will treat this as a special case in the 3456da6c28aaSamw * case-insensitive mode: as long as the source name 3457da6c28aaSamw * is an exact match, we will allow this to proceed as 3458da6c28aaSamw * a name-change request. 3459da6c28aaSamw */ 3460de8267e0Stimh if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3461de8267e0Stimh (zfsvfs->z_case == ZFS_CASE_MIXED && 3462de8267e0Stimh flags & FIGNORECASE)) && 3463da6c28aaSamw u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3464da6c28aaSamw &error) == 0) { 3465da6c28aaSamw /* 3466da6c28aaSamw * case preserving rename request, require exact 3467da6c28aaSamw * name matches 3468da6c28aaSamw */ 3469da6c28aaSamw zflg |= ZCIEXACT; 3470da6c28aaSamw zflg &= ~ZCILOOK; 3471da6c28aaSamw } 3472fa9e4066Sahrens } 3473da6c28aaSamw 3474afefc7e4SSanjeev Bagewadi /* 3475afefc7e4SSanjeev Bagewadi * If the source and destination directories are the same, we should 3476afefc7e4SSanjeev Bagewadi * grab the z_name_lock of that directory only once. 3477afefc7e4SSanjeev Bagewadi */ 3478afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) { 3479afefc7e4SSanjeev Bagewadi zflg |= ZHAVELOCK; 3480afefc7e4SSanjeev Bagewadi rw_enter(&sdzp->z_name_lock, RW_READER); 3481afefc7e4SSanjeev Bagewadi } 3482afefc7e4SSanjeev Bagewadi 3483fa9e4066Sahrens if (cmp < 0) { 3484da6c28aaSamw serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3485da6c28aaSamw ZEXISTS | zflg, NULL, NULL); 3486da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3487da6c28aaSamw tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3488fa9e4066Sahrens } else { 3489da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3490da6c28aaSamw tdzp, tnm, &tzp, zflg, NULL, NULL); 3491da6c28aaSamw serr = zfs_dirent_lock(&sdl, 3492da6c28aaSamw sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3493da6c28aaSamw NULL, NULL); 3494fa9e4066Sahrens } 3495fa9e4066Sahrens 3496fa9e4066Sahrens if (serr) { 3497fa9e4066Sahrens /* 3498fa9e4066Sahrens * Source entry invalid or not there. 3499fa9e4066Sahrens */ 3500fa9e4066Sahrens if (!terr) { 3501fa9e4066Sahrens zfs_dirent_unlock(tdl); 3502fa9e4066Sahrens if (tzp) 3503fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3504fa9e4066Sahrens } 3505afefc7e4SSanjeev Bagewadi 3506afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3507afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3508afefc7e4SSanjeev Bagewadi 3509fa9e4066Sahrens if (strcmp(snm, "..") == 0) 3510*be6fd75aSMatthew Ahrens serr = SET_ERROR(EINVAL); 3511fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3512fa9e4066Sahrens return (serr); 3513fa9e4066Sahrens } 3514fa9e4066Sahrens if (terr) { 3515fa9e4066Sahrens zfs_dirent_unlock(sdl); 3516fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3517afefc7e4SSanjeev Bagewadi 3518afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3519afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3520afefc7e4SSanjeev Bagewadi 3521fa9e4066Sahrens if (strcmp(tnm, "..") == 0) 3522*be6fd75aSMatthew Ahrens terr = SET_ERROR(EINVAL); 3523fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3524fa9e4066Sahrens return (terr); 3525fa9e4066Sahrens } 3526fa9e4066Sahrens 3527fa9e4066Sahrens /* 3528fa9e4066Sahrens * Must have write access at the source to remove the old entry 3529fa9e4066Sahrens * and write access at the target to create the new entry. 3530fa9e4066Sahrens * Note that if target and source are the same, this can be 3531fa9e4066Sahrens * done in a single check. 3532fa9e4066Sahrens */ 3533fa9e4066Sahrens 3534fa9e4066Sahrens if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3535fa9e4066Sahrens goto out; 3536fa9e4066Sahrens 3537fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3538fa9e4066Sahrens /* 3539fa9e4066Sahrens * Check to make sure rename is valid. 3540fa9e4066Sahrens * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3541fa9e4066Sahrens */ 3542fa9e4066Sahrens if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3543fa9e4066Sahrens goto out; 3544fa9e4066Sahrens } 3545fa9e4066Sahrens 3546fa9e4066Sahrens /* 3547fa9e4066Sahrens * Does target exist? 3548fa9e4066Sahrens */ 3549fa9e4066Sahrens if (tzp) { 3550fa9e4066Sahrens /* 3551fa9e4066Sahrens * Source and target must be the same type. 3552fa9e4066Sahrens */ 3553fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3554fa9e4066Sahrens if (ZTOV(tzp)->v_type != VDIR) { 3555*be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTDIR); 3556fa9e4066Sahrens goto out; 3557fa9e4066Sahrens } 3558fa9e4066Sahrens } else { 3559fa9e4066Sahrens if (ZTOV(tzp)->v_type == VDIR) { 3560*be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 3561fa9e4066Sahrens goto out; 3562fa9e4066Sahrens } 3563fa9e4066Sahrens } 3564fa9e4066Sahrens /* 3565fa9e4066Sahrens * POSIX dictates that when the source and target 3566fa9e4066Sahrens * entries refer to the same file object, rename 3567fa9e4066Sahrens * must do nothing and exit without error. 3568fa9e4066Sahrens */ 3569fa9e4066Sahrens if (szp->z_id == tzp->z_id) { 3570fa9e4066Sahrens error = 0; 3571fa9e4066Sahrens goto out; 3572fa9e4066Sahrens } 3573fa9e4066Sahrens } 3574fa9e4066Sahrens 3575da6c28aaSamw vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3576fa9e4066Sahrens if (tzp) 3577da6c28aaSamw vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3578df2381bfSpraks 3579df2381bfSpraks /* 3580df2381bfSpraks * notify the target directory if it is not the same 3581df2381bfSpraks * as source directory. 3582df2381bfSpraks */ 3583df2381bfSpraks if (tdvp != sdvp) { 3584da6c28aaSamw vnevent_rename_dest_dir(tdvp, ct); 3585df2381bfSpraks } 3586fa9e4066Sahrens 3587fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 35880a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 35890a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3590ea8dc4b6Seschrock dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3591ea8dc4b6Seschrock dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 35920a586ceaSMark Shellenbaum if (sdzp != tdzp) { 35930a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 35940a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tdzp); 35950a586ceaSMark Shellenbaum } 35960a586ceaSMark Shellenbaum if (tzp) { 35970a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 35980a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tzp); 35990a586ceaSMark Shellenbaum } 36000a586ceaSMark Shellenbaum 36010a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 3602893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 36031209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 3604fa9e4066Sahrens if (error) { 3605fa9e4066Sahrens if (zl != NULL) 3606fa9e4066Sahrens zfs_rename_unlock(&zl); 3607fa9e4066Sahrens zfs_dirent_unlock(sdl); 3608fa9e4066Sahrens zfs_dirent_unlock(tdl); 3609afefc7e4SSanjeev Bagewadi 3610afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3611afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3612afefc7e4SSanjeev Bagewadi 3613fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3614fa9e4066Sahrens if (tzp) 3615fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 36161209a471SNeil Perrin if (error == ERESTART) { 36178a2f1b91Sahrens dmu_tx_wait(tx); 36188a2f1b91Sahrens dmu_tx_abort(tx); 3619fa9e4066Sahrens goto top; 3620fa9e4066Sahrens } 36218a2f1b91Sahrens dmu_tx_abort(tx); 3622fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3623fa9e4066Sahrens return (error); 3624fa9e4066Sahrens } 3625fa9e4066Sahrens 3626fa9e4066Sahrens if (tzp) /* Attempt to remove the existing target */ 3627da6c28aaSamw error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3628fa9e4066Sahrens 3629fa9e4066Sahrens if (error == 0) { 3630fa9e4066Sahrens error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3631fa9e4066Sahrens if (error == 0) { 36320a586ceaSMark Shellenbaum szp->z_pflags |= ZFS_AV_MODIFIED; 36330a586ceaSMark Shellenbaum 36340a586ceaSMark Shellenbaum error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 36350a586ceaSMark Shellenbaum (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3636fb09f5aaSMadhav Suresh ASSERT0(error); 3637da6c28aaSamw 3638fa9e4066Sahrens error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 36396ed5e6abSSam Falkner if (error == 0) { 36406ed5e6abSSam Falkner zfs_log_rename(zilog, tx, TX_RENAME | 364191de656bSNeil Perrin (flags & FIGNORECASE ? TX_CI : 0), sdzp, 364291de656bSNeil Perrin sdl->dl_name, tdzp, tdl->dl_name, szp); 364351ece835Seschrock 36446ed5e6abSSam Falkner /* 36456ed5e6abSSam Falkner * Update path information for the target vnode 36466ed5e6abSSam Falkner */ 36476ed5e6abSSam Falkner vn_renamepath(tdvp, ZTOV(szp), tnm, 36486ed5e6abSSam Falkner strlen(tnm)); 36496ed5e6abSSam Falkner } else { 36506ed5e6abSSam Falkner /* 36516ed5e6abSSam Falkner * At this point, we have successfully created 36526ed5e6abSSam Falkner * the target name, but have failed to remove 36536ed5e6abSSam Falkner * the source name. Since the create was done 36546ed5e6abSSam Falkner * with the ZRENAMING flag, there are 36556ed5e6abSSam Falkner * complications; for one, the link count is 36566ed5e6abSSam Falkner * wrong. The easiest way to deal with this 36576ed5e6abSSam Falkner * is to remove the newly created target, and 36586ed5e6abSSam Falkner * return the original error. This must 36596ed5e6abSSam Falkner * succeed; fortunately, it is very unlikely to 36606ed5e6abSSam Falkner * fail, since we just created it. 36616ed5e6abSSam Falkner */ 36626ed5e6abSSam Falkner VERIFY3U(zfs_link_destroy(tdl, szp, tx, 36636ed5e6abSSam Falkner ZRENAMING, NULL), ==, 0); 36646ed5e6abSSam Falkner } 3665fa9e4066Sahrens } 3666fa9e4066Sahrens } 3667fa9e4066Sahrens 3668fa9e4066Sahrens dmu_tx_commit(tx); 3669fa9e4066Sahrens out: 3670fa9e4066Sahrens if (zl != NULL) 3671fa9e4066Sahrens zfs_rename_unlock(&zl); 3672fa9e4066Sahrens 3673fa9e4066Sahrens zfs_dirent_unlock(sdl); 3674fa9e4066Sahrens zfs_dirent_unlock(tdl); 3675fa9e4066Sahrens 3676afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3677afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3678afefc7e4SSanjeev Bagewadi 3679afefc7e4SSanjeev Bagewadi 3680fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3681fa9e4066Sahrens if (tzp) 3682fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3683fa9e4066Sahrens 368455da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 36855002558fSNeil Perrin zil_commit(zilog, 0); 368655da60b9SMark J Musante 3687fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3688fa9e4066Sahrens return (error); 3689fa9e4066Sahrens } 3690fa9e4066Sahrens 3691fa9e4066Sahrens /* 3692fa9e4066Sahrens * Insert the indicated symbolic reference entry into the directory. 3693fa9e4066Sahrens * 3694fa9e4066Sahrens * IN: dvp - Directory to contain new symbolic link. 3695fa9e4066Sahrens * link - Name for new symlink entry. 3696fa9e4066Sahrens * vap - Attributes of new entry. 3697fa9e4066Sahrens * target - Target path of new symlink. 3698fa9e4066Sahrens * cr - credentials of caller. 3699da6c28aaSamw * ct - caller context 3700da6c28aaSamw * flags - case flags 3701fa9e4066Sahrens * 3702fa9e4066Sahrens * RETURN: 0 if success 3703fa9e4066Sahrens * error code if failure 3704fa9e4066Sahrens * 3705fa9e4066Sahrens * Timestamps: 3706fa9e4066Sahrens * dvp - ctime|mtime updated 3707fa9e4066Sahrens */ 3708da6c28aaSamw /*ARGSUSED*/ 3709fa9e4066Sahrens static int 3710da6c28aaSamw zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, 3711da6c28aaSamw caller_context_t *ct, int flags) 3712fa9e4066Sahrens { 3713fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 3714fa9e4066Sahrens zfs_dirlock_t *dl; 3715fa9e4066Sahrens dmu_tx_t *tx; 3716fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3717f18faf3fSek zilog_t *zilog; 37180a586ceaSMark Shellenbaum uint64_t len = strlen(link); 3719fa9e4066Sahrens int error; 3720da6c28aaSamw int zflg = ZNEW; 372189459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 372289459e17SMark Shellenbaum boolean_t fuid_dirtied; 37230a586ceaSMark Shellenbaum uint64_t txtype = TX_SYMLINK; 3724fa9e4066Sahrens 3725fa9e4066Sahrens ASSERT(vap->va_type == VLNK); 3726fa9e4066Sahrens 37273cb34c60Sahrens ZFS_ENTER(zfsvfs); 37283cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 3729f18faf3fSek zilog = zfsvfs->z_log; 3730da6c28aaSamw 3731de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3732da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3733da6c28aaSamw ZFS_EXIT(zfsvfs); 3734*be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3735da6c28aaSamw } 3736da6c28aaSamw if (flags & FIGNORECASE) 3737da6c28aaSamw zflg |= ZCILOOK; 3738fa9e4066Sahrens 3739fa9e4066Sahrens if (len > MAXPATHLEN) { 3740fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3741*be6fd75aSMatthew Ahrens return (SET_ERROR(ENAMETOOLONG)); 3742fa9e4066Sahrens } 3743fa9e4066Sahrens 3744c8c24165SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, 3745c8c24165SMark Shellenbaum vap, cr, NULL, &acl_ids)) != 0) { 3746c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 3747c8c24165SMark Shellenbaum return (error); 3748c8c24165SMark Shellenbaum } 3749c8c24165SMark Shellenbaum top: 3750fa9e4066Sahrens /* 3751fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 3752fa9e4066Sahrens */ 3753da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 3754da6c28aaSamw if (error) { 3755c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3756c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 3757c8c24165SMark Shellenbaum return (error); 3758c8c24165SMark Shellenbaum } 3759c8c24165SMark Shellenbaum 3760c8c24165SMark Shellenbaum if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3761c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 37628e303ae0SMark Shellenbaum zfs_dirent_unlock(dl); 3763fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3764fa9e4066Sahrens return (error); 3765fa9e4066Sahrens } 3766fa9e4066Sahrens 376714843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 376814843421SMatthew Ahrens zfs_acl_ids_free(&acl_ids); 376914843421SMatthew Ahrens zfs_dirent_unlock(dl); 377014843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 3771*be6fd75aSMatthew Ahrens return (SET_ERROR(EDQUOT)); 377214843421SMatthew Ahrens } 3773fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 377489459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 3775fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3776ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 37770a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 37780a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE + len); 37790a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 37800a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 37810a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 37820a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 37830a586ceaSMark Shellenbaum } 378414843421SMatthew Ahrens if (fuid_dirtied) 378514843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 37861209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 3787fa9e4066Sahrens if (error) { 3788fa9e4066Sahrens zfs_dirent_unlock(dl); 37891209a471SNeil Perrin if (error == ERESTART) { 37908a2f1b91Sahrens dmu_tx_wait(tx); 37918a2f1b91Sahrens dmu_tx_abort(tx); 3792fa9e4066Sahrens goto top; 3793fa9e4066Sahrens } 3794c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 37958a2f1b91Sahrens dmu_tx_abort(tx); 3796fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3797fa9e4066Sahrens return (error); 3798fa9e4066Sahrens } 3799fa9e4066Sahrens 3800fa9e4066Sahrens /* 3801fa9e4066Sahrens * Create a new object for the symlink. 38020a586ceaSMark Shellenbaum * for version 4 ZPL datsets the symlink will be an SA attribute 3803fa9e4066Sahrens */ 38040a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 3805fa9e4066Sahrens 38060a586ceaSMark Shellenbaum if (fuid_dirtied) 38070a586ceaSMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 3808fa9e4066Sahrens 38091412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 38100a586ceaSMark Shellenbaum if (zp->z_is_sa) 38110a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 38120a586ceaSMark Shellenbaum link, len, tx); 38130a586ceaSMark Shellenbaum else 38140a586ceaSMark Shellenbaum zfs_sa_symlink(zp, link, len, tx); 38151412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 3816fa9e4066Sahrens 38170a586ceaSMark Shellenbaum zp->z_size = len; 38180a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 38190a586ceaSMark Shellenbaum &zp->z_size, sizeof (zp->z_size), tx); 3820fa9e4066Sahrens /* 3821fa9e4066Sahrens * Insert the new object into the directory. 3822fa9e4066Sahrens */ 3823fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 38240a586ceaSMark Shellenbaum 38250a586ceaSMark Shellenbaum if (flags & FIGNORECASE) 38260a586ceaSMark Shellenbaum txtype |= TX_CI; 38270a586ceaSMark Shellenbaum zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 382889459e17SMark Shellenbaum 382989459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3830fa9e4066Sahrens 3831fa9e4066Sahrens dmu_tx_commit(tx); 3832fa9e4066Sahrens 3833fa9e4066Sahrens zfs_dirent_unlock(dl); 3834fa9e4066Sahrens 3835fa9e4066Sahrens VN_RELE(ZTOV(zp)); 3836fa9e4066Sahrens 383755da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 38385002558fSNeil Perrin zil_commit(zilog, 0); 383955da60b9SMark J Musante 3840fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3841fa9e4066Sahrens return (error); 3842fa9e4066Sahrens } 3843fa9e4066Sahrens 3844fa9e4066Sahrens /* 3845fa9e4066Sahrens * Return, in the buffer contained in the provided uio structure, 3846fa9e4066Sahrens * the symbolic path referred to by vp. 3847fa9e4066Sahrens * 3848fa9e4066Sahrens * IN: vp - vnode of symbolic link. 3849fa9e4066Sahrens * uoip - structure to contain the link path. 3850fa9e4066Sahrens * cr - credentials of caller. 3851da6c28aaSamw * ct - caller context 3852fa9e4066Sahrens * 3853fa9e4066Sahrens * OUT: uio - structure to contain the link path. 3854fa9e4066Sahrens * 3855fa9e4066Sahrens * RETURN: 0 if success 3856fa9e4066Sahrens * error code if failure 3857fa9e4066Sahrens * 3858fa9e4066Sahrens * Timestamps: 3859fa9e4066Sahrens * vp - atime updated 3860fa9e4066Sahrens */ 3861fa9e4066Sahrens /* ARGSUSED */ 3862fa9e4066Sahrens static int 3863da6c28aaSamw zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3864fa9e4066Sahrens { 3865fa9e4066Sahrens znode_t *zp = VTOZ(vp); 3866fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3867fa9e4066Sahrens int error; 3868fa9e4066Sahrens 38693cb34c60Sahrens ZFS_ENTER(zfsvfs); 38703cb34c60Sahrens ZFS_VERIFY_ZP(zp); 3871fa9e4066Sahrens 38721412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 38730a586ceaSMark Shellenbaum if (zp->z_is_sa) 38740a586ceaSMark Shellenbaum error = sa_lookup_uio(zp->z_sa_hdl, 38750a586ceaSMark Shellenbaum SA_ZPL_SYMLINK(zfsvfs), uio); 38760a586ceaSMark Shellenbaum else 38770a586ceaSMark Shellenbaum error = zfs_sa_readlink(zp, uio); 38781412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 3879fa9e4066Sahrens 3880fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 38810a586ceaSMark Shellenbaum 3882fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3883fa9e4066Sahrens return (error); 3884fa9e4066Sahrens } 3885fa9e4066Sahrens 3886fa9e4066Sahrens /* 3887fa9e4066Sahrens * Insert a new entry into directory tdvp referencing svp. 3888fa9e4066Sahrens * 3889fa9e4066Sahrens * IN: tdvp - Directory to contain new entry. 3890fa9e4066Sahrens * svp - vnode of new entry. 3891fa9e4066Sahrens * name - name of new entry. 3892fa9e4066Sahrens * cr - credentials of caller. 3893da6c28aaSamw * ct - caller context 3894fa9e4066Sahrens * 3895fa9e4066Sahrens * RETURN: 0 if success 3896fa9e4066Sahrens * error code if failure 3897fa9e4066Sahrens * 3898fa9e4066Sahrens * Timestamps: 3899fa9e4066Sahrens * tdvp - ctime|mtime updated 3900fa9e4066Sahrens * svp - ctime updated 3901fa9e4066Sahrens */ 3902fa9e4066Sahrens /* ARGSUSED */ 3903fa9e4066Sahrens static int 3904da6c28aaSamw zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 3905da6c28aaSamw caller_context_t *ct, int flags) 3906fa9e4066Sahrens { 3907fa9e4066Sahrens znode_t *dzp = VTOZ(tdvp); 3908fa9e4066Sahrens znode_t *tzp, *szp; 3909fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3910f18faf3fSek zilog_t *zilog; 3911fa9e4066Sahrens zfs_dirlock_t *dl; 3912fa9e4066Sahrens dmu_tx_t *tx; 3913fa9e4066Sahrens vnode_t *realvp; 3914fa9e4066Sahrens int error; 3915da6c28aaSamw int zf = ZNEW; 3916d39ee142SMark Shellenbaum uint64_t parent; 3917f1696b23SMark Shellenbaum uid_t owner; 3918fa9e4066Sahrens 3919fa9e4066Sahrens ASSERT(tdvp->v_type == VDIR); 3920fa9e4066Sahrens 39213cb34c60Sahrens ZFS_ENTER(zfsvfs); 39223cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 3923f18faf3fSek zilog = zfsvfs->z_log; 3924fa9e4066Sahrens 3925da6c28aaSamw if (VOP_REALVP(svp, &realvp, ct) == 0) 3926fa9e4066Sahrens svp = realvp; 3927fa9e4066Sahrens 3928d39ee142SMark Shellenbaum /* 3929d39ee142SMark Shellenbaum * POSIX dictates that we return EPERM here. 3930d39ee142SMark Shellenbaum * Better choices include ENOTSUP or EISDIR. 3931d39ee142SMark Shellenbaum */ 3932d39ee142SMark Shellenbaum if (svp->v_type == VDIR) { 3933d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 3934*be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 3935d39ee142SMark Shellenbaum } 3936d39ee142SMark Shellenbaum 3937d39ee142SMark Shellenbaum if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { 3938fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3939*be6fd75aSMatthew Ahrens return (SET_ERROR(EXDEV)); 3940fa9e4066Sahrens } 3941d39ee142SMark Shellenbaum 39423cb34c60Sahrens szp = VTOZ(svp); 39433cb34c60Sahrens ZFS_VERIFY_ZP(szp); 3944fa9e4066Sahrens 3945d39ee142SMark Shellenbaum /* Prevent links to .zfs/shares files */ 3946d39ee142SMark Shellenbaum 3947d39ee142SMark Shellenbaum if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 3948d39ee142SMark Shellenbaum &parent, sizeof (uint64_t))) != 0) { 3949d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 3950d39ee142SMark Shellenbaum return (error); 3951d39ee142SMark Shellenbaum } 3952d39ee142SMark Shellenbaum if (parent == zfsvfs->z_shares_dir) { 3953d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 3954*be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 3955d39ee142SMark Shellenbaum } 3956d39ee142SMark Shellenbaum 3957de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, 3958da6c28aaSamw strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3959da6c28aaSamw ZFS_EXIT(zfsvfs); 3960*be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3961da6c28aaSamw } 3962da6c28aaSamw if (flags & FIGNORECASE) 3963da6c28aaSamw zf |= ZCILOOK; 3964da6c28aaSamw 3965fa9e4066Sahrens /* 3966fa9e4066Sahrens * We do not support links between attributes and non-attributes 3967fa9e4066Sahrens * because of the potential security risk of creating links 3968fa9e4066Sahrens * into "normal" file space in order to circumvent restrictions 3969fa9e4066Sahrens * imposed in attribute space. 3970fa9e4066Sahrens */ 39710a586ceaSMark Shellenbaum if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 3972fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3973*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 3974fa9e4066Sahrens } 3975fa9e4066Sahrens 3976fa9e4066Sahrens 3977f1696b23SMark Shellenbaum owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 3978f1696b23SMark Shellenbaum if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) { 3979fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3980*be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 3981fa9e4066Sahrens } 3982fa9e4066Sahrens 3983da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3984fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3985fa9e4066Sahrens return (error); 3986fa9e4066Sahrens } 3987fa9e4066Sahrens 3988d39ee142SMark Shellenbaum top: 3989fa9e4066Sahrens /* 3990fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 3991fa9e4066Sahrens */ 3992da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 3993da6c28aaSamw if (error) { 3994fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3995fa9e4066Sahrens return (error); 3996fa9e4066Sahrens } 3997fa9e4066Sahrens 3998fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 39990a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4000ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 40010a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 40020a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 40031209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 4004fa9e4066Sahrens if (error) { 4005fa9e4066Sahrens zfs_dirent_unlock(dl); 40061209a471SNeil Perrin if (error == ERESTART) { 40078a2f1b91Sahrens dmu_tx_wait(tx); 40088a2f1b91Sahrens dmu_tx_abort(tx); 4009fa9e4066Sahrens goto top; 4010fa9e4066Sahrens } 40118a2f1b91Sahrens dmu_tx_abort(tx); 4012fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4013fa9e4066Sahrens return (error); 4014fa9e4066Sahrens } 4015fa9e4066Sahrens 4016fa9e4066Sahrens error = zfs_link_create(dl, szp, tx, 0); 4017fa9e4066Sahrens 4018da6c28aaSamw if (error == 0) { 4019da6c28aaSamw uint64_t txtype = TX_LINK; 4020da6c28aaSamw if (flags & FIGNORECASE) 4021da6c28aaSamw txtype |= TX_CI; 4022da6c28aaSamw zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4023da6c28aaSamw } 4024fa9e4066Sahrens 4025fa9e4066Sahrens dmu_tx_commit(tx); 4026fa9e4066Sahrens 4027fa9e4066Sahrens zfs_dirent_unlock(dl); 4028fa9e4066Sahrens 4029df2381bfSpraks if (error == 0) { 4030da6c28aaSamw vnevent_link(svp, ct); 4031df2381bfSpraks } 4032df2381bfSpraks 403355da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 40345002558fSNeil Perrin zil_commit(zilog, 0); 403555da60b9SMark J Musante 4036fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4037fa9e4066Sahrens return (error); 4038fa9e4066Sahrens } 4039fa9e4066Sahrens 4040fa9e4066Sahrens /* 4041fa9e4066Sahrens * zfs_null_putapage() is used when the file system has been force 4042fa9e4066Sahrens * unmounted. It just drops the pages. 4043fa9e4066Sahrens */ 4044fa9e4066Sahrens /* ARGSUSED */ 4045fa9e4066Sahrens static int 4046fa9e4066Sahrens zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4047fa9e4066Sahrens size_t *lenp, int flags, cred_t *cr) 4048fa9e4066Sahrens { 4049fa9e4066Sahrens pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4050fa9e4066Sahrens return (0); 4051fa9e4066Sahrens } 4052fa9e4066Sahrens 405344eda4d7Smaybee /* 405444eda4d7Smaybee * Push a page out to disk, klustering if possible. 405544eda4d7Smaybee * 405644eda4d7Smaybee * IN: vp - file to push page to. 405744eda4d7Smaybee * pp - page to push. 405844eda4d7Smaybee * flags - additional flags. 405944eda4d7Smaybee * cr - credentials of caller. 406044eda4d7Smaybee * 406144eda4d7Smaybee * OUT: offp - start of range pushed. 406244eda4d7Smaybee * lenp - len of range pushed. 406344eda4d7Smaybee * 406444eda4d7Smaybee * RETURN: 0 if success 406544eda4d7Smaybee * error code if failure 406644eda4d7Smaybee * 406744eda4d7Smaybee * NOTE: callers must have locked the page to be pushed. On 406844eda4d7Smaybee * exit, the page (and all other pages in the kluster) must be 406944eda4d7Smaybee * unlocked. 407044eda4d7Smaybee */ 4071fa9e4066Sahrens /* ARGSUSED */ 4072fa9e4066Sahrens static int 4073fa9e4066Sahrens zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4074fa9e4066Sahrens size_t *lenp, int flags, cred_t *cr) 4075fa9e4066Sahrens { 4076fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4077fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4078fa9e4066Sahrens dmu_tx_t *tx; 407944eda4d7Smaybee u_offset_t off, koff; 408044eda4d7Smaybee size_t len, klen; 4081fa9e4066Sahrens int err; 4082fa9e4066Sahrens 4083fa9e4066Sahrens off = pp->p_offset; 408444eda4d7Smaybee len = PAGESIZE; 408544eda4d7Smaybee /* 408644eda4d7Smaybee * If our blocksize is bigger than the page size, try to kluster 40871209a471SNeil Perrin * multiple pages so that we write a full block (thus avoiding 408844eda4d7Smaybee * a read-modify-write). 408944eda4d7Smaybee */ 40900a586ceaSMark Shellenbaum if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 4091ac05c741SMark Maybee klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4092ac05c741SMark Maybee koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 40930a586ceaSMark Shellenbaum ASSERT(koff <= zp->z_size); 40940a586ceaSMark Shellenbaum if (koff + klen > zp->z_size) 40950a586ceaSMark Shellenbaum klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 409644eda4d7Smaybee pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 409744eda4d7Smaybee } 409844eda4d7Smaybee ASSERT3U(btop(len), ==, btopr(len)); 4099ac05c741SMark Maybee 4100dd6ef538Smaybee /* 4101dd6ef538Smaybee * Can't push pages past end-of-file. 4102dd6ef538Smaybee */ 41030a586ceaSMark Shellenbaum if (off >= zp->z_size) { 4104f4d2e9e6Smaybee /* ignore all pages */ 410544eda4d7Smaybee err = 0; 410644eda4d7Smaybee goto out; 41070a586ceaSMark Shellenbaum } else if (off + len > zp->z_size) { 41080a586ceaSMark Shellenbaum int npages = btopr(zp->z_size - off); 410944eda4d7Smaybee page_t *trunc; 411044eda4d7Smaybee 411144eda4d7Smaybee page_list_break(&pp, &trunc, npages); 4112f4d2e9e6Smaybee /* ignore pages past end of file */ 411344eda4d7Smaybee if (trunc) 4114f4d2e9e6Smaybee pvn_write_done(trunc, flags); 41150a586ceaSMark Shellenbaum len = zp->z_size - off; 4116dd6ef538Smaybee } 411714843421SMatthew Ahrens 41180a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 41190a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4120*be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 412114843421SMatthew Ahrens goto out; 412214843421SMatthew Ahrens } 4123ac05c741SMark Maybee top: 4124fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 4125fa9e4066Sahrens dmu_tx_hold_write(tx, zp->z_id, off, len); 41260a586ceaSMark Shellenbaum 41270a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 41280a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 41291209a471SNeil Perrin err = dmu_tx_assign(tx, TXG_NOWAIT); 4130fa9e4066Sahrens if (err != 0) { 41311209a471SNeil Perrin if (err == ERESTART) { 41328a2f1b91Sahrens dmu_tx_wait(tx); 41338a2f1b91Sahrens dmu_tx_abort(tx); 4134fa9e4066Sahrens goto top; 4135fa9e4066Sahrens } 41368a2f1b91Sahrens dmu_tx_abort(tx); 4137fa9e4066Sahrens goto out; 4138fa9e4066Sahrens } 4139fa9e4066Sahrens 414044eda4d7Smaybee if (zp->z_blksz <= PAGESIZE) { 41410fab61baSJonathan W Adams caddr_t va = zfs_map_page(pp, S_READ); 414244eda4d7Smaybee ASSERT3U(len, <=, PAGESIZE); 414344eda4d7Smaybee dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 41440fab61baSJonathan W Adams zfs_unmap_page(pp, va); 414544eda4d7Smaybee } else { 414644eda4d7Smaybee err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 414744eda4d7Smaybee } 4148fa9e4066Sahrens 414944eda4d7Smaybee if (err == 0) { 41500a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 4151db9986c7SMark Shellenbaum sa_bulk_attr_t bulk[3]; 41520a586ceaSMark Shellenbaum int count = 0; 41530a586ceaSMark Shellenbaum 41540a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 41550a586ceaSMark Shellenbaum &mtime, 16); 41560a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 41570a586ceaSMark Shellenbaum &ctime, 16); 4158db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4159db9986c7SMark Shellenbaum &zp->z_pflags, 8); 41600a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 41610a586ceaSMark Shellenbaum B_TRUE); 4162ac05c741SMark Maybee zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 416344eda4d7Smaybee } 416468857716SLin Ling dmu_tx_commit(tx); 4165fa9e4066Sahrens 416644eda4d7Smaybee out: 4167f4d2e9e6Smaybee pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4168fa9e4066Sahrens if (offp) 4169fa9e4066Sahrens *offp = off; 4170fa9e4066Sahrens if (lenp) 4171fa9e4066Sahrens *lenp = len; 4172fa9e4066Sahrens 4173fa9e4066Sahrens return (err); 4174fa9e4066Sahrens } 4175fa9e4066Sahrens 4176fa9e4066Sahrens /* 4177fa9e4066Sahrens * Copy the portion of the file indicated from pages into the file. 4178fa9e4066Sahrens * The pages are stored in a page list attached to the files vnode. 4179fa9e4066Sahrens * 4180fa9e4066Sahrens * IN: vp - vnode of file to push page data to. 4181fa9e4066Sahrens * off - position in file to put data. 4182fa9e4066Sahrens * len - amount of data to write. 4183fa9e4066Sahrens * flags - flags to control the operation. 4184fa9e4066Sahrens * cr - credentials of caller. 4185da6c28aaSamw * ct - caller context. 4186fa9e4066Sahrens * 4187fa9e4066Sahrens * RETURN: 0 if success 4188fa9e4066Sahrens * error code if failure 4189fa9e4066Sahrens * 4190fa9e4066Sahrens * Timestamps: 4191fa9e4066Sahrens * vp - ctime|mtime updated 4192fa9e4066Sahrens */ 4193da6c28aaSamw /*ARGSUSED*/ 4194fa9e4066Sahrens static int 4195da6c28aaSamw zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4196da6c28aaSamw caller_context_t *ct) 4197fa9e4066Sahrens { 4198fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4199fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4200fa9e4066Sahrens page_t *pp; 4201fa9e4066Sahrens size_t io_len; 4202fa9e4066Sahrens u_offset_t io_off; 4203ac05c741SMark Maybee uint_t blksz; 4204ac05c741SMark Maybee rl_t *rl; 4205fa9e4066Sahrens int error = 0; 4206fa9e4066Sahrens 42073cb34c60Sahrens ZFS_ENTER(zfsvfs); 42083cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4209fa9e4066Sahrens 4210c4fc6b21SGarrett D'Amore /* 4211c4fc6b21SGarrett D'Amore * There's nothing to do if no data is cached. 4212c4fc6b21SGarrett D'Amore */ 4213c4fc6b21SGarrett D'Amore if (!vn_has_cached_data(vp)) { 4214c4fc6b21SGarrett D'Amore ZFS_EXIT(zfsvfs); 4215c4fc6b21SGarrett D'Amore return (0); 4216c4fc6b21SGarrett D'Amore } 4217c4fc6b21SGarrett D'Amore 4218ac05c741SMark Maybee /* 4219ac05c741SMark Maybee * Align this request to the file block size in case we kluster. 4220ac05c741SMark Maybee * XXX - this can result in pretty aggresive locking, which can 4221ac05c741SMark Maybee * impact simultanious read/write access. One option might be 4222ac05c741SMark Maybee * to break up long requests (len == 0) into block-by-block 4223ac05c741SMark Maybee * operations to get narrower locking. 4224ac05c741SMark Maybee */ 4225ac05c741SMark Maybee blksz = zp->z_blksz; 4226ac05c741SMark Maybee if (ISP2(blksz)) 4227ac05c741SMark Maybee io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4228ac05c741SMark Maybee else 4229ac05c741SMark Maybee io_off = 0; 4230ac05c741SMark Maybee if (len > 0 && ISP2(blksz)) 42315a6f5619SMark Maybee io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4232ac05c741SMark Maybee else 4233ac05c741SMark Maybee io_len = 0; 4234ac05c741SMark Maybee 4235ac05c741SMark Maybee if (io_len == 0) { 4236fa9e4066Sahrens /* 4237ac05c741SMark Maybee * Search the entire vp list for pages >= io_off. 4238fa9e4066Sahrens */ 4239ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4240ac05c741SMark Maybee error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4241fe9cf88cSperrin goto out; 4242fa9e4066Sahrens } 4243ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4244fa9e4066Sahrens 42450a586ceaSMark Shellenbaum if (off > zp->z_size) { 4246fa9e4066Sahrens /* past end of file */ 4247ac05c741SMark Maybee zfs_range_unlock(rl); 4248fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4249fa9e4066Sahrens return (0); 4250fa9e4066Sahrens } 4251fa9e4066Sahrens 42520a586ceaSMark Shellenbaum len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4253fa9e4066Sahrens 4254ac05c741SMark Maybee for (off = io_off; io_off < off + len; io_off += io_len) { 4255fa9e4066Sahrens if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4256104e2ed7Sperrin pp = page_lookup(vp, io_off, 4257ecb72030Sperrin (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4258fa9e4066Sahrens } else { 4259fa9e4066Sahrens pp = page_lookup_nowait(vp, io_off, 4260ecb72030Sperrin (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4261fa9e4066Sahrens } 4262fa9e4066Sahrens 4263fa9e4066Sahrens if (pp != NULL && pvn_getdirty(pp, flags)) { 4264fa9e4066Sahrens int err; 4265fa9e4066Sahrens 4266fa9e4066Sahrens /* 4267fa9e4066Sahrens * Found a dirty page to push 4268fa9e4066Sahrens */ 4269104e2ed7Sperrin err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4270104e2ed7Sperrin if (err) 4271fa9e4066Sahrens error = err; 4272fa9e4066Sahrens } else { 4273fa9e4066Sahrens io_len = PAGESIZE; 4274fa9e4066Sahrens } 4275fa9e4066Sahrens } 4276fe9cf88cSperrin out: 4277ac05c741SMark Maybee zfs_range_unlock(rl); 427855da60b9SMark J Musante if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 42795002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 4280fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4281fa9e4066Sahrens return (error); 4282fa9e4066Sahrens } 4283fa9e4066Sahrens 4284da6c28aaSamw /*ARGSUSED*/ 4285fa9e4066Sahrens void 4286da6c28aaSamw zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4287fa9e4066Sahrens { 4288fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4289fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4290fa9e4066Sahrens int error; 4291fa9e4066Sahrens 4292f18faf3fSek rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 42930a586ceaSMark Shellenbaum if (zp->z_sa_hdl == NULL) { 42944ccbb6e7Sahrens /* 4295874395d5Smaybee * The fs has been unmounted, or we did a 4296874395d5Smaybee * suspend/resume and this file no longer exists. 42974ccbb6e7Sahrens */ 4298fa9e4066Sahrens if (vn_has_cached_data(vp)) { 4299fa9e4066Sahrens (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage, 4300fa9e4066Sahrens B_INVAL, cr); 4301fa9e4066Sahrens } 4302fa9e4066Sahrens 4303ea8dc4b6Seschrock mutex_enter(&zp->z_lock); 4304cd2adeceSChris Kirby mutex_enter(&vp->v_lock); 4305cd2adeceSChris Kirby ASSERT(vp->v_count == 1); 4306cd2adeceSChris Kirby vp->v_count = 0; 4307cd2adeceSChris Kirby mutex_exit(&vp->v_lock); 43084ccbb6e7Sahrens mutex_exit(&zp->z_lock); 4309f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 4310874395d5Smaybee zfs_znode_free(zp); 4311fa9e4066Sahrens return; 4312fa9e4066Sahrens } 4313fa9e4066Sahrens 4314fa9e4066Sahrens /* 4315fa9e4066Sahrens * Attempt to push any data in the page cache. If this fails 4316fa9e4066Sahrens * we will get kicked out later in zfs_zinactive(). 4317fa9e4066Sahrens */ 43188afd4dd6Sperrin if (vn_has_cached_data(vp)) { 43198afd4dd6Sperrin (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC, 43208afd4dd6Sperrin cr); 43218afd4dd6Sperrin } 4322fa9e4066Sahrens 4323893a6d32Sahrens if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4324fa9e4066Sahrens dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4325fa9e4066Sahrens 43260a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 43270a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 4328fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 4329fa9e4066Sahrens if (error) { 4330fa9e4066Sahrens dmu_tx_abort(tx); 4331fa9e4066Sahrens } else { 4332fa9e4066Sahrens mutex_enter(&zp->z_lock); 43330a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 43340a586ceaSMark Shellenbaum (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4335fa9e4066Sahrens zp->z_atime_dirty = 0; 4336fa9e4066Sahrens mutex_exit(&zp->z_lock); 4337fa9e4066Sahrens dmu_tx_commit(tx); 4338fa9e4066Sahrens } 4339fa9e4066Sahrens } 4340fa9e4066Sahrens 4341fa9e4066Sahrens zfs_zinactive(zp); 4342f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 4343fa9e4066Sahrens } 4344fa9e4066Sahrens 4345fa9e4066Sahrens /* 4346fa9e4066Sahrens * Bounds-check the seek operation. 4347fa9e4066Sahrens * 4348fa9e4066Sahrens * IN: vp - vnode seeking within 4349fa9e4066Sahrens * ooff - old file offset 4350fa9e4066Sahrens * noffp - pointer to new file offset 4351da6c28aaSamw * ct - caller context 4352fa9e4066Sahrens * 4353fa9e4066Sahrens * RETURN: 0 if success 4354fa9e4066Sahrens * EINVAL if new offset invalid 4355fa9e4066Sahrens */ 4356fa9e4066Sahrens /* ARGSUSED */ 4357fa9e4066Sahrens static int 4358da6c28aaSamw zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4359da6c28aaSamw caller_context_t *ct) 4360fa9e4066Sahrens { 4361fa9e4066Sahrens if (vp->v_type == VDIR) 4362fa9e4066Sahrens return (0); 4363fa9e4066Sahrens return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4364fa9e4066Sahrens } 4365fa9e4066Sahrens 4366fa9e4066Sahrens /* 4367fa9e4066Sahrens * Pre-filter the generic locking function to trap attempts to place 4368fa9e4066Sahrens * a mandatory lock on a memory mapped file. 4369fa9e4066Sahrens */ 4370fa9e4066Sahrens static int 4371fa9e4066Sahrens zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4372da6c28aaSamw flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4373fa9e4066Sahrens { 4374fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4375fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4376fa9e4066Sahrens 43773cb34c60Sahrens ZFS_ENTER(zfsvfs); 43783cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4379fa9e4066Sahrens 4380fa9e4066Sahrens /* 4381ea8dc4b6Seschrock * We are following the UFS semantics with respect to mapcnt 4382ea8dc4b6Seschrock * here: If we see that the file is mapped already, then we will 4383ea8dc4b6Seschrock * return an error, but we don't worry about races between this 4384ea8dc4b6Seschrock * function and zfs_map(). 4385fa9e4066Sahrens */ 43860a586ceaSMark Shellenbaum if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4387fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4388*be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 4389fa9e4066Sahrens } 4390fa9e4066Sahrens ZFS_EXIT(zfsvfs); 439104ce3d0bSMark Shellenbaum return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4392fa9e4066Sahrens } 4393fa9e4066Sahrens 4394fa9e4066Sahrens /* 4395fa9e4066Sahrens * If we can't find a page in the cache, we will create a new page 4396fa9e4066Sahrens * and fill it with file data. For efficiency, we may try to fill 4397ac05c741SMark Maybee * multiple pages at once (klustering) to fill up the supplied page 4398ed886187SMark Maybee * list. Note that the pages to be filled are held with an exclusive 4399ed886187SMark Maybee * lock to prevent access by other threads while they are being filled. 4400fa9e4066Sahrens */ 4401fa9e4066Sahrens static int 4402fa9e4066Sahrens zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4403fa9e4066Sahrens caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4404fa9e4066Sahrens { 4405fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4406fa9e4066Sahrens page_t *pp, *cur_pp; 4407fa9e4066Sahrens objset_t *os = zp->z_zfsvfs->z_os; 4408fa9e4066Sahrens u_offset_t io_off, total; 4409fa9e4066Sahrens size_t io_len; 4410fa9e4066Sahrens int err; 4411fa9e4066Sahrens 441244eda4d7Smaybee if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4413ac05c741SMark Maybee /* 4414ac05c741SMark Maybee * We only have a single page, don't bother klustering 4415ac05c741SMark Maybee */ 4416fa9e4066Sahrens io_off = off; 4417fa9e4066Sahrens io_len = PAGESIZE; 4418ed886187SMark Maybee pp = page_create_va(vp, io_off, io_len, 4419ed886187SMark Maybee PG_EXCL | PG_WAIT, seg, addr); 4420fa9e4066Sahrens } else { 4421fa9e4066Sahrens /* 4422ac05c741SMark Maybee * Try to find enough pages to fill the page list 4423fa9e4066Sahrens */ 4424fa9e4066Sahrens pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4425ac05c741SMark Maybee &io_len, off, plsz, 0); 4426fa9e4066Sahrens } 4427fa9e4066Sahrens if (pp == NULL) { 4428fa9e4066Sahrens /* 4429ac05c741SMark Maybee * The page already exists, nothing to do here. 4430fa9e4066Sahrens */ 4431fa9e4066Sahrens *pl = NULL; 4432fa9e4066Sahrens return (0); 4433fa9e4066Sahrens } 4434fa9e4066Sahrens 4435fa9e4066Sahrens /* 4436fa9e4066Sahrens * Fill the pages in the kluster. 4437fa9e4066Sahrens */ 4438fa9e4066Sahrens cur_pp = pp; 4439fa9e4066Sahrens for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4440ac05c741SMark Maybee caddr_t va; 4441ac05c741SMark Maybee 444244eda4d7Smaybee ASSERT3U(io_off, ==, cur_pp->p_offset); 44430fab61baSJonathan W Adams va = zfs_map_page(cur_pp, S_WRITE); 44447bfdf011SNeil Perrin err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 44457bfdf011SNeil Perrin DMU_READ_PREFETCH); 44460fab61baSJonathan W Adams zfs_unmap_page(cur_pp, va); 4447fa9e4066Sahrens if (err) { 4448fa9e4066Sahrens /* On error, toss the entire kluster */ 4449fa9e4066Sahrens pvn_read_done(pp, B_ERROR); 4450b87f3af3Sperrin /* convert checksum errors into IO errors */ 4451b87f3af3Sperrin if (err == ECKSUM) 4452*be6fd75aSMatthew Ahrens err = SET_ERROR(EIO); 4453fa9e4066Sahrens return (err); 4454fa9e4066Sahrens } 4455fa9e4066Sahrens cur_pp = cur_pp->p_next; 4456fa9e4066Sahrens } 4457ac05c741SMark Maybee 4458fa9e4066Sahrens /* 4459ac05c741SMark Maybee * Fill in the page list array from the kluster starting 4460ac05c741SMark Maybee * from the desired offset `off'. 4461fa9e4066Sahrens * NOTE: the page list will always be null terminated. 4462fa9e4066Sahrens */ 4463fa9e4066Sahrens pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4464ac05c741SMark Maybee ASSERT(pl == NULL || (*pl)->p_offset == off); 4465fa9e4066Sahrens 4466fa9e4066Sahrens return (0); 4467fa9e4066Sahrens } 4468fa9e4066Sahrens 4469fa9e4066Sahrens /* 4470fa9e4066Sahrens * Return pointers to the pages for the file region [off, off + len] 4471fa9e4066Sahrens * in the pl array. If plsz is greater than len, this function may 4472ac05c741SMark Maybee * also return page pointers from after the specified region 4473ac05c741SMark Maybee * (i.e. the region [off, off + plsz]). These additional pages are 4474ac05c741SMark Maybee * only returned if they are already in the cache, or were created as 4475ac05c741SMark Maybee * part of a klustered read. 4476fa9e4066Sahrens * 4477fa9e4066Sahrens * IN: vp - vnode of file to get data from. 4478fa9e4066Sahrens * off - position in file to get data from. 4479fa9e4066Sahrens * len - amount of data to retrieve. 4480fa9e4066Sahrens * plsz - length of provided page list. 4481fa9e4066Sahrens * seg - segment to obtain pages for. 4482fa9e4066Sahrens * addr - virtual address of fault. 4483fa9e4066Sahrens * rw - mode of created pages. 4484fa9e4066Sahrens * cr - credentials of caller. 4485da6c28aaSamw * ct - caller context. 4486fa9e4066Sahrens * 4487fa9e4066Sahrens * OUT: protp - protection mode of created pages. 4488fa9e4066Sahrens * pl - list of pages created. 4489fa9e4066Sahrens * 4490fa9e4066Sahrens * RETURN: 0 if success 4491fa9e4066Sahrens * error code if failure 4492fa9e4066Sahrens * 4493fa9e4066Sahrens * Timestamps: 4494fa9e4066Sahrens * vp - atime updated 4495fa9e4066Sahrens */ 4496fa9e4066Sahrens /* ARGSUSED */ 4497fa9e4066Sahrens static int 4498fa9e4066Sahrens zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4499fa9e4066Sahrens page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4500da6c28aaSamw enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4501fa9e4066Sahrens { 4502fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4503fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4504ac05c741SMark Maybee page_t **pl0 = pl; 4505ac05c741SMark Maybee int err = 0; 4506ac05c741SMark Maybee 4507ac05c741SMark Maybee /* we do our own caching, faultahead is unnecessary */ 4508ac05c741SMark Maybee if (pl == NULL) 4509ac05c741SMark Maybee return (0); 4510ac05c741SMark Maybee else if (len > plsz) 4511ac05c741SMark Maybee len = plsz; 451227bd165aSMark Maybee else 451327bd165aSMark Maybee len = P2ROUNDUP(len, PAGESIZE); 4514ac05c741SMark Maybee ASSERT(plsz >= len); 4515fa9e4066Sahrens 45163cb34c60Sahrens ZFS_ENTER(zfsvfs); 45173cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4518fa9e4066Sahrens 4519fa9e4066Sahrens if (protp) 4520fa9e4066Sahrens *protp = PROT_ALL; 4521fa9e4066Sahrens 4522fa9e4066Sahrens /* 4523ed886187SMark Maybee * Loop through the requested range [off, off + len) looking 4524fa9e4066Sahrens * for pages. If we don't find a page, we will need to create 4525fa9e4066Sahrens * a new page and fill it with data from the file. 4526fa9e4066Sahrens */ 4527fa9e4066Sahrens while (len > 0) { 4528ac05c741SMark Maybee if (*pl = page_lookup(vp, off, SE_SHARED)) 4529ac05c741SMark Maybee *(pl+1) = NULL; 4530ac05c741SMark Maybee else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4531ac05c741SMark Maybee goto out; 4532ac05c741SMark Maybee while (*pl) { 4533ac05c741SMark Maybee ASSERT3U((*pl)->p_offset, ==, off); 4534fa9e4066Sahrens off += PAGESIZE; 4535fa9e4066Sahrens addr += PAGESIZE; 453627bd165aSMark Maybee if (len > 0) { 453727bd165aSMark Maybee ASSERT3U(len, >=, PAGESIZE); 4538ac05c741SMark Maybee len -= PAGESIZE; 453927bd165aSMark Maybee } 4540ac05c741SMark Maybee ASSERT3U(plsz, >=, PAGESIZE); 4541fa9e4066Sahrens plsz -= PAGESIZE; 4542ac05c741SMark Maybee pl++; 4543fa9e4066Sahrens } 4544fa9e4066Sahrens } 4545fa9e4066Sahrens 4546fa9e4066Sahrens /* 4547fa9e4066Sahrens * Fill out the page array with any pages already in the cache. 4548fa9e4066Sahrens */ 4549ac05c741SMark Maybee while (plsz > 0 && 4550ac05c741SMark Maybee (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4551ac05c741SMark Maybee off += PAGESIZE; 4552ac05c741SMark Maybee plsz -= PAGESIZE; 4553fa9e4066Sahrens } 4554fa9e4066Sahrens out: 4555fe2f476aSperrin if (err) { 4556fe2f476aSperrin /* 4557fe2f476aSperrin * Release any pages we have previously locked. 4558fe2f476aSperrin */ 4559fe2f476aSperrin while (pl > pl0) 4560fe2f476aSperrin page_unlock(*--pl); 4561ac05c741SMark Maybee } else { 4562ac05c741SMark Maybee ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4563fe2f476aSperrin } 4564fe2f476aSperrin 4565fa9e4066Sahrens *pl = NULL; 4566fa9e4066Sahrens 4567fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4568fa9e4066Sahrens return (err); 4569fa9e4066Sahrens } 4570fa9e4066Sahrens 4571ea8dc4b6Seschrock /* 4572ea8dc4b6Seschrock * Request a memory map for a section of a file. This code interacts 4573ea8dc4b6Seschrock * with common code and the VM system as follows: 4574ea8dc4b6Seschrock * 4575ea8dc4b6Seschrock * common code calls mmap(), which ends up in smmap_common() 4576ea8dc4b6Seschrock * 4577ea8dc4b6Seschrock * this calls VOP_MAP(), which takes you into (say) zfs 4578ea8dc4b6Seschrock * 4579ea8dc4b6Seschrock * zfs_map() calls as_map(), passing segvn_create() as the callback 4580ea8dc4b6Seschrock * 4581ea8dc4b6Seschrock * segvn_create() creates the new segment and calls VOP_ADDMAP() 4582ea8dc4b6Seschrock * 4583ea8dc4b6Seschrock * zfs_addmap() updates z_mapcnt 4584ea8dc4b6Seschrock */ 4585da6c28aaSamw /*ARGSUSED*/ 4586fa9e4066Sahrens static int 4587fa9e4066Sahrens zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4588da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4589da6c28aaSamw caller_context_t *ct) 4590fa9e4066Sahrens { 4591fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4592fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4593fa9e4066Sahrens segvn_crargs_t vn_a; 4594fa9e4066Sahrens int error; 4595fa9e4066Sahrens 45960616c50eSmarks ZFS_ENTER(zfsvfs); 45970616c50eSmarks ZFS_VERIFY_ZP(zp); 45980616c50eSmarks 45990a586ceaSMark Shellenbaum if ((prot & PROT_WRITE) && (zp->z_pflags & 46000a586ceaSMark Shellenbaum (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 46010616c50eSmarks ZFS_EXIT(zfsvfs); 4602*be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 46030616c50eSmarks } 4604da6c28aaSamw 46050616c50eSmarks if ((prot & (PROT_READ | PROT_EXEC)) && 46060a586ceaSMark Shellenbaum (zp->z_pflags & ZFS_AV_QUARANTINED)) { 46070616c50eSmarks ZFS_EXIT(zfsvfs); 4608*be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 46090616c50eSmarks } 4610fa9e4066Sahrens 4611fa9e4066Sahrens if (vp->v_flag & VNOMAP) { 4612fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4613*be6fd75aSMatthew Ahrens return (SET_ERROR(ENOSYS)); 4614fa9e4066Sahrens } 4615fa9e4066Sahrens 4616fa9e4066Sahrens if (off < 0 || len > MAXOFFSET_T - off) { 4617fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4618*be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 4619fa9e4066Sahrens } 4620fa9e4066Sahrens 4621fa9e4066Sahrens if (vp->v_type != VREG) { 4622fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4623*be6fd75aSMatthew Ahrens return (SET_ERROR(ENODEV)); 4624fa9e4066Sahrens } 4625fa9e4066Sahrens 4626fa9e4066Sahrens /* 4627fa9e4066Sahrens * If file is locked, disallow mapping. 4628fa9e4066Sahrens */ 46290a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 4630ea8dc4b6Seschrock ZFS_EXIT(zfsvfs); 4631*be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 4632fa9e4066Sahrens } 4633fa9e4066Sahrens 4634fa9e4066Sahrens as_rangelock(as); 463560946fe0Smec error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 463660946fe0Smec if (error != 0) { 463760946fe0Smec as_rangeunlock(as); 463860946fe0Smec ZFS_EXIT(zfsvfs); 463960946fe0Smec return (error); 4640fa9e4066Sahrens } 4641fa9e4066Sahrens 4642fa9e4066Sahrens vn_a.vp = vp; 4643fa9e4066Sahrens vn_a.offset = (u_offset_t)off; 4644fa9e4066Sahrens vn_a.type = flags & MAP_TYPE; 4645fa9e4066Sahrens vn_a.prot = prot; 4646fa9e4066Sahrens vn_a.maxprot = maxprot; 4647fa9e4066Sahrens vn_a.cred = cr; 4648fa9e4066Sahrens vn_a.amp = NULL; 4649fa9e4066Sahrens vn_a.flags = flags & ~MAP_TYPE; 46504944b02eSkchow vn_a.szc = 0; 46514944b02eSkchow vn_a.lgrp_mem_policy_flags = 0; 4652fa9e4066Sahrens 4653fa9e4066Sahrens error = as_map(as, *addrp, len, segvn_create, &vn_a); 4654fa9e4066Sahrens 4655fa9e4066Sahrens as_rangeunlock(as); 4656fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4657fa9e4066Sahrens return (error); 4658fa9e4066Sahrens } 4659fa9e4066Sahrens 4660fa9e4066Sahrens /* ARGSUSED */ 4661fa9e4066Sahrens static int 4662fa9e4066Sahrens zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4663da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4664da6c28aaSamw caller_context_t *ct) 4665fa9e4066Sahrens { 4666ea8dc4b6Seschrock uint64_t pages = btopr(len); 4667ea8dc4b6Seschrock 4668ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 4669fa9e4066Sahrens return (0); 4670fa9e4066Sahrens } 4671fa9e4066Sahrens 4672b468a217Seschrock /* 4673b468a217Seschrock * The reason we push dirty pages as part of zfs_delmap() is so that we get a 4674b468a217Seschrock * more accurate mtime for the associated file. Since we don't have a way of 4675b468a217Seschrock * detecting when the data was actually modified, we have to resort to 4676b468a217Seschrock * heuristics. If an explicit msync() is done, then we mark the mtime when the 4677b468a217Seschrock * last page is pushed. The problem occurs when the msync() call is omitted, 4678b468a217Seschrock * which by far the most common case: 4679b468a217Seschrock * 4680b468a217Seschrock * open() 4681b468a217Seschrock * mmap() 4682b468a217Seschrock * <modify memory> 4683b468a217Seschrock * munmap() 4684b468a217Seschrock * close() 4685b468a217Seschrock * <time lapse> 4686b468a217Seschrock * putpage() via fsflush 4687b468a217Seschrock * 4688b468a217Seschrock * If we wait until fsflush to come along, we can have a modification time that 4689b468a217Seschrock * is some arbitrary point in the future. In order to prevent this in the 4690b468a217Seschrock * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 4691b468a217Seschrock * torn down. 4692b468a217Seschrock */ 4693fa9e4066Sahrens /* ARGSUSED */ 4694fa9e4066Sahrens static int 4695fa9e4066Sahrens zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4696da6c28aaSamw size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 4697da6c28aaSamw caller_context_t *ct) 4698fa9e4066Sahrens { 4699ea8dc4b6Seschrock uint64_t pages = btopr(len); 4700ea8dc4b6Seschrock 4701ea8dc4b6Seschrock ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 4702ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 4703b468a217Seschrock 4704b468a217Seschrock if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 4705b468a217Seschrock vn_has_cached_data(vp)) 4706da6c28aaSamw (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 4707b468a217Seschrock 4708fa9e4066Sahrens return (0); 4709fa9e4066Sahrens } 4710fa9e4066Sahrens 4711fa9e4066Sahrens /* 4712fa9e4066Sahrens * Free or allocate space in a file. Currently, this function only 4713fa9e4066Sahrens * supports the `F_FREESP' command. However, this command is somewhat 4714fa9e4066Sahrens * misnamed, as its functionality includes the ability to allocate as 4715fa9e4066Sahrens * well as free space. 4716fa9e4066Sahrens * 4717fa9e4066Sahrens * IN: vp - vnode of file to free data in. 4718fa9e4066Sahrens * cmd - action to take (only F_FREESP supported). 4719fa9e4066Sahrens * bfp - section of file to free/alloc. 4720fa9e4066Sahrens * flag - current file open mode flags. 4721fa9e4066Sahrens * offset - current file offset. 4722fa9e4066Sahrens * cr - credentials of caller [UNUSED]. 4723da6c28aaSamw * ct - caller context. 4724fa9e4066Sahrens * 4725fa9e4066Sahrens * RETURN: 0 if success 4726fa9e4066Sahrens * error code if failure 4727fa9e4066Sahrens * 4728fa9e4066Sahrens * Timestamps: 4729fa9e4066Sahrens * vp - ctime|mtime updated 4730fa9e4066Sahrens */ 4731fa9e4066Sahrens /* ARGSUSED */ 4732fa9e4066Sahrens static int 4733fa9e4066Sahrens zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 4734fa9e4066Sahrens offset_t offset, cred_t *cr, caller_context_t *ct) 4735fa9e4066Sahrens { 4736fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4737fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4738fa9e4066Sahrens uint64_t off, len; 4739fa9e4066Sahrens int error; 4740fa9e4066Sahrens 47413cb34c60Sahrens ZFS_ENTER(zfsvfs); 47423cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4743fa9e4066Sahrens 4744fa9e4066Sahrens if (cmd != F_FREESP) { 4745fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4746*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4747fa9e4066Sahrens } 4748fa9e4066Sahrens 4749fa9e4066Sahrens if (error = convoff(vp, bfp, 0, offset)) { 4750fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4751fa9e4066Sahrens return (error); 4752fa9e4066Sahrens } 4753fa9e4066Sahrens 4754fa9e4066Sahrens if (bfp->l_len < 0) { 4755fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4756*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4757fa9e4066Sahrens } 4758fa9e4066Sahrens 4759fa9e4066Sahrens off = bfp->l_start; 4760104e2ed7Sperrin len = bfp->l_len; /* 0 means from off to end of file */ 4761104e2ed7Sperrin 4762cdb0ab79Smaybee error = zfs_freesp(zp, off, len, flag, TRUE); 4763fa9e4066Sahrens 4764fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4765fa9e4066Sahrens return (error); 4766fa9e4066Sahrens } 4767fa9e4066Sahrens 4768da6c28aaSamw /*ARGSUSED*/ 4769fa9e4066Sahrens static int 4770da6c28aaSamw zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4771fa9e4066Sahrens { 4772fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4773fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4774f18faf3fSek uint32_t gen; 47750a586ceaSMark Shellenbaum uint64_t gen64; 4776fa9e4066Sahrens uint64_t object = zp->z_id; 4777fa9e4066Sahrens zfid_short_t *zfid; 47780a586ceaSMark Shellenbaum int size, i, error; 4779fa9e4066Sahrens 47803cb34c60Sahrens ZFS_ENTER(zfsvfs); 47813cb34c60Sahrens ZFS_VERIFY_ZP(zp); 47820a586ceaSMark Shellenbaum 47830a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4784f3e6fb2fSMark Shellenbaum &gen64, sizeof (uint64_t))) != 0) { 4785f3e6fb2fSMark Shellenbaum ZFS_EXIT(zfsvfs); 47860a586ceaSMark Shellenbaum return (error); 4787f3e6fb2fSMark Shellenbaum } 47880a586ceaSMark Shellenbaum 47890a586ceaSMark Shellenbaum gen = (uint32_t)gen64; 4790fa9e4066Sahrens 4791fa9e4066Sahrens size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4792fa9e4066Sahrens if (fidp->fid_len < size) { 4793fa9e4066Sahrens fidp->fid_len = size; 47940f2dc02eSek ZFS_EXIT(zfsvfs); 4795*be6fd75aSMatthew Ahrens return (SET_ERROR(ENOSPC)); 4796fa9e4066Sahrens } 4797fa9e4066Sahrens 4798fa9e4066Sahrens zfid = (zfid_short_t *)fidp; 4799fa9e4066Sahrens 4800fa9e4066Sahrens zfid->zf_len = size; 4801fa9e4066Sahrens 4802fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 4803fa9e4066Sahrens zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4804fa9e4066Sahrens 4805fa9e4066Sahrens /* Must have a non-zero generation number to distinguish from .zfs */ 4806fa9e4066Sahrens if (gen == 0) 4807fa9e4066Sahrens gen = 1; 4808fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 4809fa9e4066Sahrens zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4810fa9e4066Sahrens 4811fa9e4066Sahrens if (size == LONG_FID_LEN) { 4812fa9e4066Sahrens uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4813fa9e4066Sahrens zfid_long_t *zlfid; 4814fa9e4066Sahrens 4815fa9e4066Sahrens zlfid = (zfid_long_t *)fidp; 4816fa9e4066Sahrens 4817fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4818fa9e4066Sahrens zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4819fa9e4066Sahrens 4820fa9e4066Sahrens /* XXX - this should be the generation number for the objset */ 4821fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4822fa9e4066Sahrens zlfid->zf_setgen[i] = 0; 4823fa9e4066Sahrens } 4824fa9e4066Sahrens 4825fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4826fa9e4066Sahrens return (0); 4827fa9e4066Sahrens } 4828fa9e4066Sahrens 4829fa9e4066Sahrens static int 4830da6c28aaSamw zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4831da6c28aaSamw caller_context_t *ct) 4832fa9e4066Sahrens { 4833fa9e4066Sahrens znode_t *zp, *xzp; 4834fa9e4066Sahrens zfsvfs_t *zfsvfs; 4835fa9e4066Sahrens zfs_dirlock_t *dl; 4836fa9e4066Sahrens int error; 4837fa9e4066Sahrens 4838fa9e4066Sahrens switch (cmd) { 4839fa9e4066Sahrens case _PC_LINK_MAX: 4840fa9e4066Sahrens *valp = ULONG_MAX; 4841fa9e4066Sahrens return (0); 4842fa9e4066Sahrens 4843fa9e4066Sahrens case _PC_FILESIZEBITS: 4844fa9e4066Sahrens *valp = 64; 4845fa9e4066Sahrens return (0); 4846fa9e4066Sahrens 4847fa9e4066Sahrens case _PC_XATTR_EXISTS: 4848fa9e4066Sahrens zp = VTOZ(vp); 4849fa9e4066Sahrens zfsvfs = zp->z_zfsvfs; 48503cb34c60Sahrens ZFS_ENTER(zfsvfs); 48513cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4852fa9e4066Sahrens *valp = 0; 4853fa9e4066Sahrens error = zfs_dirent_lock(&dl, zp, "", &xzp, 4854da6c28aaSamw ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 4855fa9e4066Sahrens if (error == 0) { 4856fa9e4066Sahrens zfs_dirent_unlock(dl); 4857fa9e4066Sahrens if (!zfs_dirempty(xzp)) 4858fa9e4066Sahrens *valp = 1; 4859fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 4860fa9e4066Sahrens } else if (error == ENOENT) { 4861fa9e4066Sahrens /* 4862fa9e4066Sahrens * If there aren't extended attributes, it's the 4863fa9e4066Sahrens * same as having zero of them. 4864fa9e4066Sahrens */ 4865fa9e4066Sahrens error = 0; 4866fa9e4066Sahrens } 4867fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4868fa9e4066Sahrens return (error); 4869fa9e4066Sahrens 4870da6c28aaSamw case _PC_SATTR_ENABLED: 4871da6c28aaSamw case _PC_SATTR_EXISTS: 48729660e5cbSJanice Chang *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4873da6c28aaSamw (vp->v_type == VREG || vp->v_type == VDIR); 4874da6c28aaSamw return (0); 4875da6c28aaSamw 4876e802abbdSTim Haley case _PC_ACCESS_FILTERING: 4877e802abbdSTim Haley *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4878e802abbdSTim Haley vp->v_type == VDIR; 4879e802abbdSTim Haley return (0); 4880e802abbdSTim Haley 4881fa9e4066Sahrens case _PC_ACL_ENABLED: 4882fa9e4066Sahrens *valp = _ACL_ACE_ENABLED; 4883fa9e4066Sahrens return (0); 4884fa9e4066Sahrens 4885fa9e4066Sahrens case _PC_MIN_HOLE_SIZE: 4886fa9e4066Sahrens *valp = (ulong_t)SPA_MINBLOCKSIZE; 4887fa9e4066Sahrens return (0); 4888fa9e4066Sahrens 48893b862e9aSRoger A. Faulkner case _PC_TIMESTAMP_RESOLUTION: 48903b862e9aSRoger A. Faulkner /* nanosecond timestamp resolution */ 48913b862e9aSRoger A. Faulkner *valp = 1L; 48923b862e9aSRoger A. Faulkner return (0); 48933b862e9aSRoger A. Faulkner 4894fa9e4066Sahrens default: 4895da6c28aaSamw return (fs_pathconf(vp, cmd, valp, cr, ct)); 4896fa9e4066Sahrens } 4897fa9e4066Sahrens } 4898fa9e4066Sahrens 4899fa9e4066Sahrens /*ARGSUSED*/ 4900fa9e4066Sahrens static int 4901da6c28aaSamw zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4902da6c28aaSamw caller_context_t *ct) 4903fa9e4066Sahrens { 4904fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4905fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4906fa9e4066Sahrens int error; 4907da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4908fa9e4066Sahrens 49093cb34c60Sahrens ZFS_ENTER(zfsvfs); 49103cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4911da6c28aaSamw error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4912fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4913fa9e4066Sahrens 4914fa9e4066Sahrens return (error); 4915fa9e4066Sahrens } 4916fa9e4066Sahrens 4917fa9e4066Sahrens /*ARGSUSED*/ 4918fa9e4066Sahrens static int 4919da6c28aaSamw zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4920da6c28aaSamw caller_context_t *ct) 4921fa9e4066Sahrens { 4922fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4923fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4924fa9e4066Sahrens int error; 4925da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 492655da60b9SMark J Musante zilog_t *zilog = zfsvfs->z_log; 4927fa9e4066Sahrens 49283cb34c60Sahrens ZFS_ENTER(zfsvfs); 49293cb34c60Sahrens ZFS_VERIFY_ZP(zp); 493055da60b9SMark J Musante 4931da6c28aaSamw error = zfs_setacl(zp, vsecp, skipaclchk, cr); 493255da60b9SMark J Musante 493355da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 49345002558fSNeil Perrin zil_commit(zilog, 0); 493555da60b9SMark J Musante 4936fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4937fa9e4066Sahrens return (error); 4938fa9e4066Sahrens } 4939fa9e4066Sahrens 4940c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 4941c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Tunable, both must be a power of 2. 4942c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * 4943c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf 4944c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * zcr_blksz_max: if set to less than the file block size, allow loaning out of 4945c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * an arcbuf for a partial block read 4946c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 4947c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_min = (1 << 10); /* 1K */ 4948c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_max = (1 << 17); /* 128K */ 4949c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4950c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 4951c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 4952c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 4953c242f9a0Schunli zhang - Sun Microsystems - Irvine United States caller_context_t *ct) 4954c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 4955c242f9a0Schunli zhang - Sun Microsystems - Irvine United States znode_t *zp = VTOZ(vp); 4956c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4957c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int max_blksz = zfsvfs->z_max_blksz; 4958c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio_t *uio = &xuio->xu_uio; 4959c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t size = uio->uio_resid; 4960c242f9a0Schunli zhang - Sun Microsystems - Irvine United States offset_t offset = uio->uio_loffset; 4961c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz; 4962c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int fullblk, i; 4963c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 4964c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t maxsize; 4965c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int preamble, postamble; 4966c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4967c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio->xu_type != UIOTYPE_ZEROCOPY) 4968*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4969c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4970c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_ENTER(zfsvfs); 4971c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_VERIFY_ZP(zp); 4972c242f9a0Schunli zhang - Sun Microsystems - Irvine United States switch (ioflag) { 4973c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_WRITE: 4974c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 4975c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for write if write size is bigger than 4976c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * max_blksz, and the file's block size is also max_blksz. 4977c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 4978c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = max_blksz; 4979c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || zp->z_blksz != blksz) { 4980c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 4981*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4982c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4983c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 4984c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Caller requests buffers for write before knowing where the 4985c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write offset might be (e.g. NFS TCP write). 4986c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 4987c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (offset == -1) { 4988c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = 0; 4989c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 4990c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = P2PHASE(offset, blksz); 4991c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 4992c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = blksz - preamble; 4993c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= preamble; 4994c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4995c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4996c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4997c242f9a0Schunli zhang - Sun Microsystems - Irvine United States postamble = P2PHASE(size, blksz); 4998c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= postamble; 4999c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5000c242f9a0Schunli zhang - Sun Microsystems - Irvine United States fullblk = size / blksz; 5001570de38fSSurya Prakki (void) dmu_xuio_init(xuio, 5002c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 5003c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5004c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int, postamble, int, 5005c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 5006c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5007c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5008c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Have to fix iov base/len for partial buffers. They 5009c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * currently represent full arc_buf's. 5010c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5011c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 5012c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data begins in the middle of the arc_buf */ 50130a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 50140a586ceaSMark Shellenbaum blksz); 5015c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5016570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 5017570de38fSSurya Prakki blksz - preamble, preamble); 5018c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5019c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5020c242f9a0Schunli zhang - Sun Microsystems - Irvine United States for (i = 0; i < fullblk; i++) { 50210a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 50220a586ceaSMark Shellenbaum blksz); 5023c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5024570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5025c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5026c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5027c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (postamble) { 5028c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data ends in the middle of the arc_buf */ 50290a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 50300a586ceaSMark Shellenbaum blksz); 5031c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5032570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5033c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5034c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 5035c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_READ: 5036c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5037c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for read if the read size is larger than 5038c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * the current file block size. Block alignment is not 5039c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * considered. Partial arc_buf will be loaned out for read. 5040c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5041c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zp->z_blksz; 5042c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz < zcr_blksz_min) 5043c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_min; 5044c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > zcr_blksz_max) 5045c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_max; 5046c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* avoid potential complexity of dealing with it */ 5047c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > max_blksz) { 5048c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5049*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5050c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5051c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 50520a586ceaSMark Shellenbaum maxsize = zp->z_size - uio->uio_loffset; 5053c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size > maxsize) 5054c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size = maxsize; 5055c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5056c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || vn_has_cached_data(vp)) { 5057c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5058*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5059c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5060c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 5061c242f9a0Schunli zhang - Sun Microsystems - Irvine United States default: 5062c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5063*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5064c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5065c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5066c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio->uio_extflg = UIO_XUIO; 5067c242f9a0Schunli zhang - Sun Microsystems - Irvine United States XUIO_XUZC_RW(xuio) = ioflag; 5068c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5069c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 5070c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5071c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5072c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 5073c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 5074c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5075c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 5076c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i; 5077c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 5078c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int ioflag = XUIO_XUZC_RW(xuio); 5079c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5080c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5081c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5082c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i = dmu_xuio_cnt(xuio); 5083c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (i-- > 0) { 5084c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i); 5085c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5086c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * if abuf == NULL, it must be a write buffer 5087c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * that has been returned in zfs_write(). 5088c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5089c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (abuf) 5090c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 5091c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf || ioflag == UIO_WRITE); 5092c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5093c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5094c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_fini(xuio); 5095c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 5096c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5097c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5098fa9e4066Sahrens /* 5099fa9e4066Sahrens * Predeclare these here so that the compiler assumes that 5100fa9e4066Sahrens * this is an "old style" function declaration that does 5101fa9e4066Sahrens * not include arguments => we won't get type mismatch errors 5102fa9e4066Sahrens * in the initializations that follow. 5103fa9e4066Sahrens */ 5104fa9e4066Sahrens static int zfs_inval(); 5105fa9e4066Sahrens static int zfs_isdir(); 5106fa9e4066Sahrens 5107fa9e4066Sahrens static int 5108fa9e4066Sahrens zfs_inval() 5109fa9e4066Sahrens { 5110*be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5111fa9e4066Sahrens } 5112fa9e4066Sahrens 5113fa9e4066Sahrens static int 5114fa9e4066Sahrens zfs_isdir() 5115fa9e4066Sahrens { 5116*be6fd75aSMatthew Ahrens return (SET_ERROR(EISDIR)); 5117fa9e4066Sahrens } 5118fa9e4066Sahrens /* 5119fa9e4066Sahrens * Directory vnode operations template 5120fa9e4066Sahrens */ 5121fa9e4066Sahrens vnodeops_t *zfs_dvnodeops; 5122fa9e4066Sahrens const fs_operation_def_t zfs_dvnodeops_template[] = { 5123aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5124aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5125aa59c4cbSrsb VOPNAME_READ, { .error = zfs_isdir }, 5126aa59c4cbSrsb VOPNAME_WRITE, { .error = zfs_isdir }, 5127aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5128aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5129aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5130aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5131aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5132aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 5133aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5134aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 5135aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5136aa59c4cbSrsb VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5137aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5138aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5139aa59c4cbSrsb VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5140aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5141aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5142aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5143aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5144aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5145aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5146aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5147df2381bfSpraks VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5148aa59c4cbSrsb NULL, NULL 5149fa9e4066Sahrens }; 5150fa9e4066Sahrens 5151fa9e4066Sahrens /* 5152fa9e4066Sahrens * Regular file vnode operations template 5153fa9e4066Sahrens */ 5154fa9e4066Sahrens vnodeops_t *zfs_fvnodeops; 5155fa9e4066Sahrens const fs_operation_def_t zfs_fvnodeops_template[] = { 5156aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5157aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5158aa59c4cbSrsb VOPNAME_READ, { .vop_read = zfs_read }, 5159aa59c4cbSrsb VOPNAME_WRITE, { .vop_write = zfs_write }, 5160aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5161aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5162aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5163aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5164aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5165aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5166aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5167aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5168aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5169aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5170aa59c4cbSrsb VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5171aa59c4cbSrsb VOPNAME_SPACE, { .vop_space = zfs_space }, 5172aa59c4cbSrsb VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5173aa59c4cbSrsb VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5174aa59c4cbSrsb VOPNAME_MAP, { .vop_map = zfs_map }, 5175aa59c4cbSrsb VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5176aa59c4cbSrsb VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5177aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5178aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5179aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5180aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5181c242f9a0Schunli zhang - Sun Microsystems - Irvine United States VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 5182c242f9a0Schunli zhang - Sun Microsystems - Irvine United States VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5183aa59c4cbSrsb NULL, NULL 5184fa9e4066Sahrens }; 5185fa9e4066Sahrens 5186fa9e4066Sahrens /* 5187fa9e4066Sahrens * Symbolic link vnode operations template 5188fa9e4066Sahrens */ 5189fa9e4066Sahrens vnodeops_t *zfs_symvnodeops; 5190fa9e4066Sahrens const fs_operation_def_t zfs_symvnodeops_template[] = { 5191aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5192aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5193aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5194aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5195aa59c4cbSrsb VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5196aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5197aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5198aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5199aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5200aa59c4cbSrsb NULL, NULL 5201fa9e4066Sahrens }; 5202fa9e4066Sahrens 5203743a77edSAlan Wright /* 5204743a77edSAlan Wright * special share hidden files vnode operations template 5205743a77edSAlan Wright */ 5206743a77edSAlan Wright vnodeops_t *zfs_sharevnodeops; 5207743a77edSAlan Wright const fs_operation_def_t zfs_sharevnodeops_template[] = { 5208743a77edSAlan Wright VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5209743a77edSAlan Wright VOPNAME_ACCESS, { .vop_access = zfs_access }, 5210743a77edSAlan Wright VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5211743a77edSAlan Wright VOPNAME_FID, { .vop_fid = zfs_fid }, 5212743a77edSAlan Wright VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5213743a77edSAlan Wright VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5214743a77edSAlan Wright VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5215743a77edSAlan Wright VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5216743a77edSAlan Wright NULL, NULL 5217743a77edSAlan Wright }; 5218743a77edSAlan Wright 5219fa9e4066Sahrens /* 5220fa9e4066Sahrens * Extended attribute directory vnode operations template 5221fa9e4066Sahrens * This template is identical to the directory vnodes 5222fa9e4066Sahrens * operation template except for restricted operations: 5223fa9e4066Sahrens * VOP_MKDIR() 5224fa9e4066Sahrens * VOP_SYMLINK() 5225fa9e4066Sahrens * Note that there are other restrictions embedded in: 5226fa9e4066Sahrens * zfs_create() - restrict type to VREG 5227fa9e4066Sahrens * zfs_link() - no links into/out of attribute space 5228fa9e4066Sahrens * zfs_rename() - no moves into/out of attribute space 5229fa9e4066Sahrens */ 5230fa9e4066Sahrens vnodeops_t *zfs_xdvnodeops; 5231fa9e4066Sahrens const fs_operation_def_t zfs_xdvnodeops_template[] = { 5232aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5233aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5234aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5235aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5236aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5237aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5238aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5239aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 5240aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5241aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 5242aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5243aa59c4cbSrsb VOPNAME_MKDIR, { .error = zfs_inval }, 5244aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5245aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5246aa59c4cbSrsb VOPNAME_SYMLINK, { .error = zfs_inval }, 5247aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5248aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5249aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5250aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5251aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5252aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5253aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5254aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5255aa59c4cbSrsb NULL, NULL 5256fa9e4066Sahrens }; 5257fa9e4066Sahrens 5258fa9e4066Sahrens /* 5259fa9e4066Sahrens * Error vnode operations template 5260fa9e4066Sahrens */ 5261fa9e4066Sahrens vnodeops_t *zfs_evnodeops; 5262fa9e4066Sahrens const fs_operation_def_t zfs_evnodeops_template[] = { 5263aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5264aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5265aa59c4cbSrsb NULL, NULL 5266fa9e4066Sahrens }; 5267