1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5736b9155Smarks * Common Development and Distribution License (the "License"). 6736b9155Smarks * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22d39ee142SMark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23fa9e4066Sahrens */ 24fa9e4066Sahrens 2575c76197Speteh /* Portions Copyright 2007 Jeremy Teo */ 26*55da60b9SMark J Musante /* Portions Copyright 2010 Robert Milkowski */ 2775c76197Speteh 28fa9e4066Sahrens #include <sys/types.h> 29fa9e4066Sahrens #include <sys/param.h> 30fa9e4066Sahrens #include <sys/time.h> 31fa9e4066Sahrens #include <sys/systm.h> 32fa9e4066Sahrens #include <sys/sysmacros.h> 33fa9e4066Sahrens #include <sys/resource.h> 34fa9e4066Sahrens #include <sys/vfs.h> 35aa59c4cbSrsb #include <sys/vfs_opreg.h> 36fa9e4066Sahrens #include <sys/vnode.h> 37fa9e4066Sahrens #include <sys/file.h> 38fa9e4066Sahrens #include <sys/stat.h> 39fa9e4066Sahrens #include <sys/kmem.h> 40fa9e4066Sahrens #include <sys/taskq.h> 41fa9e4066Sahrens #include <sys/uio.h> 42fa9e4066Sahrens #include <sys/vmsystm.h> 43fa9e4066Sahrens #include <sys/atomic.h> 4444eda4d7Smaybee #include <sys/vm.h> 45fa9e4066Sahrens #include <vm/seg_vn.h> 46fa9e4066Sahrens #include <vm/pvn.h> 47fa9e4066Sahrens #include <vm/as.h> 480fab61baSJonathan W Adams #include <vm/kpm.h> 490fab61baSJonathan W Adams #include <vm/seg_kpm.h> 50fa9e4066Sahrens #include <sys/mman.h> 51fa9e4066Sahrens #include <sys/pathname.h> 52fa9e4066Sahrens #include <sys/cmn_err.h> 53fa9e4066Sahrens #include <sys/errno.h> 54fa9e4066Sahrens #include <sys/unistd.h> 55fa9e4066Sahrens #include <sys/zfs_dir.h> 56fa9e4066Sahrens #include <sys/zfs_acl.h> 57fa9e4066Sahrens #include <sys/zfs_ioctl.h> 58fa9e4066Sahrens #include <sys/fs/zfs.h> 59fa9e4066Sahrens #include <sys/dmu.h> 60*55da60b9SMark J Musante #include <sys/dmu_objset.h> 61fa9e4066Sahrens #include <sys/spa.h> 62fa9e4066Sahrens #include <sys/txg.h> 63fa9e4066Sahrens #include <sys/dbuf.h> 64fa9e4066Sahrens #include <sys/zap.h> 650a586ceaSMark Shellenbaum #include <sys/sa.h> 66fa9e4066Sahrens #include <sys/dirent.h> 67fa9e4066Sahrens #include <sys/policy.h> 68fa9e4066Sahrens #include <sys/sunddi.h> 69fa9e4066Sahrens #include <sys/filio.h> 70c1ce5987SMark Shellenbaum #include <sys/sid.h> 71fa9e4066Sahrens #include "fs/fs_subr.h" 72fa9e4066Sahrens #include <sys/zfs_ctldir.h> 73da6c28aaSamw #include <sys/zfs_fuid.h> 740a586ceaSMark Shellenbaum #include <sys/zfs_sa.h> 75033f9833Sek #include <sys/dnlc.h> 76104e2ed7Sperrin #include <sys/zfs_rlock.h> 77da6c28aaSamw #include <sys/extdirent.h> 78da6c28aaSamw #include <sys/kidmap.h> 7967dbe2beSCasper H.S. Dik #include <sys/cred.h> 80b38f0970Sck #include <sys/attr.h> 81fa9e4066Sahrens 82fa9e4066Sahrens /* 83fa9e4066Sahrens * Programming rules. 84fa9e4066Sahrens * 85fa9e4066Sahrens * Each vnode op performs some logical unit of work. To do this, the ZPL must 86fa9e4066Sahrens * properly lock its in-core state, create a DMU transaction, do the work, 87fa9e4066Sahrens * record this work in the intent log (ZIL), commit the DMU transaction, 88da6c28aaSamw * and wait for the intent log to commit if it is a synchronous operation. 89da6c28aaSamw * Moreover, the vnode ops must work in both normal and log replay context. 90fa9e4066Sahrens * The ordering of events is important to avoid deadlocks and references 91fa9e4066Sahrens * to freed memory. The example below illustrates the following Big Rules: 92fa9e4066Sahrens * 93fa9e4066Sahrens * (1) A check must be made in each zfs thread for a mounted file system. 943cb34c60Sahrens * This is done avoiding races using ZFS_ENTER(zfsvfs). 953cb34c60Sahrens * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 963cb34c60Sahrens * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 973cb34c60Sahrens * can return EIO from the calling function. 98fa9e4066Sahrens * 99fa9e4066Sahrens * (2) VN_RELE() should always be the last thing except for zil_commit() 100b19a79ecSperrin * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 101fa9e4066Sahrens * First, if it's the last reference, the vnode/znode 102fa9e4066Sahrens * can be freed, so the zp may point to freed memory. Second, the last 103fa9e4066Sahrens * reference will call zfs_zinactive(), which may induce a lot of work -- 104104e2ed7Sperrin * pushing cached pages (which acquires range locks) and syncing out 105fa9e4066Sahrens * cached atime changes. Third, zfs_zinactive() may require a new tx, 106fa9e4066Sahrens * which could deadlock the system if you were already holding one. 1079d3574bfSNeil Perrin * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 108fa9e4066Sahrens * 1097885c754Sperrin * (3) All range locks must be grabbed before calling dmu_tx_assign(), 1107885c754Sperrin * as they can span dmu_tx_assign() calls. 1117885c754Sperrin * 1121209a471SNeil Perrin * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 113fa9e4066Sahrens * This is critical because we don't want to block while holding locks. 114fa9e4066Sahrens * Note, in particular, that if a lock is sometimes acquired before 115fa9e4066Sahrens * the tx assigns, and sometimes after (e.g. z_lock), then failing to 116fa9e4066Sahrens * use a non-blocking assign can deadlock the system. The scenario: 117fa9e4066Sahrens * 118fa9e4066Sahrens * Thread A has grabbed a lock before calling dmu_tx_assign(). 119fa9e4066Sahrens * Thread B is in an already-assigned tx, and blocks for this lock. 120fa9e4066Sahrens * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 121fa9e4066Sahrens * forever, because the previous txg can't quiesce until B's tx commits. 122fa9e4066Sahrens * 123fa9e4066Sahrens * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 1248a2f1b91Sahrens * then drop all locks, call dmu_tx_wait(), and try again. 125fa9e4066Sahrens * 1267885c754Sperrin * (5) If the operation succeeded, generate the intent log entry for it 127fa9e4066Sahrens * before dropping locks. This ensures that the ordering of events 128fa9e4066Sahrens * in the intent log matches the order in which they actually occurred. 1291209a471SNeil Perrin * During ZIL replay the zfs_log_* functions will update the sequence 1301209a471SNeil Perrin * number to indicate the zil transaction has replayed. 131fa9e4066Sahrens * 1327885c754Sperrin * (6) At the end of each vnode op, the DMU tx must always commit, 133fa9e4066Sahrens * regardless of whether there were any errors. 134fa9e4066Sahrens * 135b19a79ecSperrin * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid) 136fa9e4066Sahrens * to ensure that synchronous semantics are provided when necessary. 137fa9e4066Sahrens * 138fa9e4066Sahrens * In general, this is how things should be ordered in each vnode op: 139fa9e4066Sahrens * 140fa9e4066Sahrens * ZFS_ENTER(zfsvfs); // exit if unmounted 141fa9e4066Sahrens * top: 142fa9e4066Sahrens * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 143fa9e4066Sahrens * rw_enter(...); // grab any other locks you need 144fa9e4066Sahrens * tx = dmu_tx_create(...); // get DMU tx 145fa9e4066Sahrens * dmu_tx_hold_*(); // hold each object you might modify 1461209a471SNeil Perrin * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 147fa9e4066Sahrens * if (error) { 148fa9e4066Sahrens * rw_exit(...); // drop locks 149fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 150fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1511209a471SNeil Perrin * if (error == ERESTART) { 1528a2f1b91Sahrens * dmu_tx_wait(tx); 1538a2f1b91Sahrens * dmu_tx_abort(tx); 154fa9e4066Sahrens * goto top; 155fa9e4066Sahrens * } 1568a2f1b91Sahrens * dmu_tx_abort(tx); // abort DMU tx 157fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 158fa9e4066Sahrens * return (error); // really out of space 159fa9e4066Sahrens * } 160fa9e4066Sahrens * error = do_real_work(); // do whatever this VOP does 161fa9e4066Sahrens * if (error == 0) 162b19a79ecSperrin * zfs_log_*(...); // on success, make ZIL entry 163fa9e4066Sahrens * dmu_tx_commit(tx); // commit DMU tx -- error or not 164fa9e4066Sahrens * rw_exit(...); // drop locks 165fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 166fa9e4066Sahrens * VN_RELE(...); // release held vnodes 167b19a79ecSperrin * zil_commit(zilog, seq, foid); // synchronous when necessary 168fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 169fa9e4066Sahrens * return (error); // done, report error 170fa9e4066Sahrens */ 1713cb34c60Sahrens 172fa9e4066Sahrens /* ARGSUSED */ 173fa9e4066Sahrens static int 174da6c28aaSamw zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 175fa9e4066Sahrens { 17667bd71c6Sperrin znode_t *zp = VTOZ(*vpp); 177b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 178b614fdaaSMark Shellenbaum 179b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 180b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 18167bd71c6Sperrin 1820a586ceaSMark Shellenbaum if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 183da6c28aaSamw ((flag & FAPPEND) == 0)) { 184b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 185da6c28aaSamw return (EPERM); 186da6c28aaSamw } 187da6c28aaSamw 188da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 189da6c28aaSamw ZTOV(zp)->v_type == VREG && 1900a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 191b614fdaaSMark Shellenbaum if (fs_vscan(*vpp, cr, 0) != 0) { 192b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 193da6c28aaSamw return (EACCES); 194b614fdaaSMark Shellenbaum } 195b614fdaaSMark Shellenbaum } 196da6c28aaSamw 19767bd71c6Sperrin /* Keep a count of the synchronous opens in the znode */ 19867bd71c6Sperrin if (flag & (FSYNC | FDSYNC)) 19967bd71c6Sperrin atomic_inc_32(&zp->z_sync_cnt); 200da6c28aaSamw 201b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 202fa9e4066Sahrens return (0); 203fa9e4066Sahrens } 204fa9e4066Sahrens 205fa9e4066Sahrens /* ARGSUSED */ 206fa9e4066Sahrens static int 207da6c28aaSamw zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 208da6c28aaSamw caller_context_t *ct) 209fa9e4066Sahrens { 21067bd71c6Sperrin znode_t *zp = VTOZ(vp); 211b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 212b614fdaaSMark Shellenbaum 213ee8143cbSChris Kirby /* 214ee8143cbSChris Kirby * Clean up any locks held by this process on the vp. 215ee8143cbSChris Kirby */ 216ee8143cbSChris Kirby cleanlocks(vp, ddi_get_pid(), 0); 217ee8143cbSChris Kirby cleanshares(vp, ddi_get_pid()); 218ee8143cbSChris Kirby 219b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 220b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 22167bd71c6Sperrin 22267bd71c6Sperrin /* Decrement the synchronous opens in the znode */ 223ecb72030Sperrin if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 22467bd71c6Sperrin atomic_dec_32(&zp->z_sync_cnt); 22567bd71c6Sperrin 226da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 227da6c28aaSamw ZTOV(zp)->v_type == VREG && 2280a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 229da6c28aaSamw VERIFY(fs_vscan(vp, cr, 1) == 0); 230da6c28aaSamw 231b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 232fa9e4066Sahrens return (0); 233fa9e4066Sahrens } 234fa9e4066Sahrens 235fa9e4066Sahrens /* 236fa9e4066Sahrens * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 237fa9e4066Sahrens * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 238fa9e4066Sahrens */ 239fa9e4066Sahrens static int 240fa9e4066Sahrens zfs_holey(vnode_t *vp, int cmd, offset_t *off) 241fa9e4066Sahrens { 242fa9e4066Sahrens znode_t *zp = VTOZ(vp); 243fa9e4066Sahrens uint64_t noff = (uint64_t)*off; /* new offset */ 244fa9e4066Sahrens uint64_t file_sz; 245fa9e4066Sahrens int error; 246fa9e4066Sahrens boolean_t hole; 247fa9e4066Sahrens 2480a586ceaSMark Shellenbaum file_sz = zp->z_size; 249fa9e4066Sahrens if (noff >= file_sz) { 250fa9e4066Sahrens return (ENXIO); 251fa9e4066Sahrens } 252fa9e4066Sahrens 253fa9e4066Sahrens if (cmd == _FIO_SEEK_HOLE) 254fa9e4066Sahrens hole = B_TRUE; 255fa9e4066Sahrens else 256fa9e4066Sahrens hole = B_FALSE; 257fa9e4066Sahrens 258fa9e4066Sahrens error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 259fa9e4066Sahrens 260fa9e4066Sahrens /* end of file? */ 261fa9e4066Sahrens if ((error == ESRCH) || (noff > file_sz)) { 262fa9e4066Sahrens /* 263fa9e4066Sahrens * Handle the virtual hole at the end of file. 264fa9e4066Sahrens */ 265fa9e4066Sahrens if (hole) { 266fa9e4066Sahrens *off = file_sz; 267fa9e4066Sahrens return (0); 268fa9e4066Sahrens } 269fa9e4066Sahrens return (ENXIO); 270fa9e4066Sahrens } 271fa9e4066Sahrens 272fa9e4066Sahrens if (noff < *off) 273fa9e4066Sahrens return (error); 274fa9e4066Sahrens *off = noff; 275fa9e4066Sahrens return (error); 276fa9e4066Sahrens } 277fa9e4066Sahrens 278fa9e4066Sahrens /* ARGSUSED */ 279fa9e4066Sahrens static int 280fa9e4066Sahrens zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, 281da6c28aaSamw int *rvalp, caller_context_t *ct) 282fa9e4066Sahrens { 283fa9e4066Sahrens offset_t off; 284fa9e4066Sahrens int error; 285fa9e4066Sahrens zfsvfs_t *zfsvfs; 286f18faf3fSek znode_t *zp; 287fa9e4066Sahrens 288fa9e4066Sahrens switch (com) { 289ecb72030Sperrin case _FIOFFS: 290fa9e4066Sahrens return (zfs_sync(vp->v_vfsp, 0, cred)); 291fa9e4066Sahrens 292ea8dc4b6Seschrock /* 293ea8dc4b6Seschrock * The following two ioctls are used by bfu. Faking out, 294ea8dc4b6Seschrock * necessary to avoid bfu errors. 295ea8dc4b6Seschrock */ 296ecb72030Sperrin case _FIOGDIO: 297ecb72030Sperrin case _FIOSDIO: 298ea8dc4b6Seschrock return (0); 299ea8dc4b6Seschrock 300ecb72030Sperrin case _FIO_SEEK_DATA: 301ecb72030Sperrin case _FIO_SEEK_HOLE: 302fa9e4066Sahrens if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 303fa9e4066Sahrens return (EFAULT); 304fa9e4066Sahrens 305f18faf3fSek zp = VTOZ(vp); 306f18faf3fSek zfsvfs = zp->z_zfsvfs; 3073cb34c60Sahrens ZFS_ENTER(zfsvfs); 3083cb34c60Sahrens ZFS_VERIFY_ZP(zp); 309fa9e4066Sahrens 310fa9e4066Sahrens /* offset parameter is in/out */ 311fa9e4066Sahrens error = zfs_holey(vp, com, &off); 312fa9e4066Sahrens ZFS_EXIT(zfsvfs); 313fa9e4066Sahrens if (error) 314fa9e4066Sahrens return (error); 315fa9e4066Sahrens if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 316fa9e4066Sahrens return (EFAULT); 317fa9e4066Sahrens return (0); 318fa9e4066Sahrens } 319fa9e4066Sahrens return (ENOTTY); 320fa9e4066Sahrens } 321fa9e4066Sahrens 3220fab61baSJonathan W Adams /* 3230fab61baSJonathan W Adams * Utility functions to map and unmap a single physical page. These 3240fab61baSJonathan W Adams * are used to manage the mappable copies of ZFS file data, and therefore 3250fab61baSJonathan W Adams * do not update ref/mod bits. 3260fab61baSJonathan W Adams */ 3270fab61baSJonathan W Adams caddr_t 3280fab61baSJonathan W Adams zfs_map_page(page_t *pp, enum seg_rw rw) 3290fab61baSJonathan W Adams { 3300fab61baSJonathan W Adams if (kpm_enable) 3310fab61baSJonathan W Adams return (hat_kpm_mapin(pp, 0)); 3320fab61baSJonathan W Adams ASSERT(rw == S_READ || rw == S_WRITE); 3330fab61baSJonathan W Adams return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), 3340fab61baSJonathan W Adams (caddr_t)-1)); 3350fab61baSJonathan W Adams } 3360fab61baSJonathan W Adams 3370fab61baSJonathan W Adams void 3380fab61baSJonathan W Adams zfs_unmap_page(page_t *pp, caddr_t addr) 3390fab61baSJonathan W Adams { 3400fab61baSJonathan W Adams if (kpm_enable) { 3410fab61baSJonathan W Adams hat_kpm_mapout(pp, 0, addr); 3420fab61baSJonathan W Adams } else { 3430fab61baSJonathan W Adams ppmapout(addr); 3440fab61baSJonathan W Adams } 3450fab61baSJonathan W Adams } 3460fab61baSJonathan W Adams 347fa9e4066Sahrens /* 348fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 349fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 350fa9e4066Sahrens * 351fa9e4066Sahrens * On Write: If we find a memory mapped page, we write to *both* 352fa9e4066Sahrens * the page and the dmu buffer. 353fa9e4066Sahrens */ 354ac05c741SMark Maybee static void 355ac05c741SMark Maybee update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) 356fa9e4066Sahrens { 357ac05c741SMark Maybee int64_t off; 358fa9e4066Sahrens 359fa9e4066Sahrens off = start & PAGEOFFSET; 360fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 361fa9e4066Sahrens page_t *pp; 362ac05c741SMark Maybee uint64_t nbytes = MIN(PAGESIZE - off, len); 363fa9e4066Sahrens 364fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 365fa9e4066Sahrens caddr_t va; 366fa9e4066Sahrens 3670fab61baSJonathan W Adams va = zfs_map_page(pp, S_WRITE); 3687bfdf011SNeil Perrin (void) dmu_read(os, oid, start+off, nbytes, va+off, 3697bfdf011SNeil Perrin DMU_READ_PREFETCH); 3700fab61baSJonathan W Adams zfs_unmap_page(pp, va); 371fa9e4066Sahrens page_unlock(pp); 372fa9e4066Sahrens } 373ac05c741SMark Maybee len -= nbytes; 374fa9e4066Sahrens off = 0; 375fa9e4066Sahrens } 376fa9e4066Sahrens } 377fa9e4066Sahrens 378fa9e4066Sahrens /* 379fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 380fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 381fa9e4066Sahrens * 382fa9e4066Sahrens * On Read: We "read" preferentially from memory mapped pages, 383fa9e4066Sahrens * else we default from the dmu buffer. 384fa9e4066Sahrens * 385fa9e4066Sahrens * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 386fa9e4066Sahrens * the file is memory mapped. 387fa9e4066Sahrens */ 388fa9e4066Sahrens static int 389feb08c6bSbillm mappedread(vnode_t *vp, int nbytes, uio_t *uio) 390fa9e4066Sahrens { 391feb08c6bSbillm znode_t *zp = VTOZ(vp); 392feb08c6bSbillm objset_t *os = zp->z_zfsvfs->z_os; 393feb08c6bSbillm int64_t start, off; 394fa9e4066Sahrens int len = nbytes; 395fa9e4066Sahrens int error = 0; 396fa9e4066Sahrens 397fa9e4066Sahrens start = uio->uio_loffset; 398fa9e4066Sahrens off = start & PAGEOFFSET; 399fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 400fa9e4066Sahrens page_t *pp; 401feb08c6bSbillm uint64_t bytes = MIN(PAGESIZE - off, len); 402fa9e4066Sahrens 403fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 404fa9e4066Sahrens caddr_t va; 405fa9e4066Sahrens 4060fab61baSJonathan W Adams va = zfs_map_page(pp, S_READ); 407fa9e4066Sahrens error = uiomove(va + off, bytes, UIO_READ, uio); 4080fab61baSJonathan W Adams zfs_unmap_page(pp, va); 409fa9e4066Sahrens page_unlock(pp); 410fa9e4066Sahrens } else { 411feb08c6bSbillm error = dmu_read_uio(os, zp->z_id, uio, bytes); 412fa9e4066Sahrens } 413fa9e4066Sahrens len -= bytes; 414fa9e4066Sahrens off = 0; 415fa9e4066Sahrens if (error) 416fa9e4066Sahrens break; 417fa9e4066Sahrens } 418fa9e4066Sahrens return (error); 419fa9e4066Sahrens } 420fa9e4066Sahrens 421feb08c6bSbillm offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 422fa9e4066Sahrens 423fa9e4066Sahrens /* 424fa9e4066Sahrens * Read bytes from specified file into supplied buffer. 425fa9e4066Sahrens * 426fa9e4066Sahrens * IN: vp - vnode of file to be read from. 427fa9e4066Sahrens * uio - structure supplying read location, range info, 428fa9e4066Sahrens * and return buffer. 429fa9e4066Sahrens * ioflag - SYNC flags; used to provide FRSYNC semantics. 430fa9e4066Sahrens * cr - credentials of caller. 431da6c28aaSamw * ct - caller context 432fa9e4066Sahrens * 433fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 434fa9e4066Sahrens * 435fa9e4066Sahrens * RETURN: 0 if success 436fa9e4066Sahrens * error code if failure 437fa9e4066Sahrens * 438fa9e4066Sahrens * Side Effects: 439fa9e4066Sahrens * vp - atime updated if byte count > 0 440fa9e4066Sahrens */ 441fa9e4066Sahrens /* ARGSUSED */ 442fa9e4066Sahrens static int 443fa9e4066Sahrens zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 444fa9e4066Sahrens { 445fa9e4066Sahrens znode_t *zp = VTOZ(vp); 446fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 447f18faf3fSek objset_t *os; 448feb08c6bSbillm ssize_t n, nbytes; 449feb08c6bSbillm int error; 450104e2ed7Sperrin rl_t *rl; 451c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 452fa9e4066Sahrens 4533cb34c60Sahrens ZFS_ENTER(zfsvfs); 4543cb34c60Sahrens ZFS_VERIFY_ZP(zp); 455f18faf3fSek os = zfsvfs->z_os; 456fa9e4066Sahrens 4570a586ceaSMark Shellenbaum if (zp->z_pflags & ZFS_AV_QUARANTINED) { 4580616c50eSmarks ZFS_EXIT(zfsvfs); 4590616c50eSmarks return (EACCES); 4600616c50eSmarks } 4610616c50eSmarks 462fa9e4066Sahrens /* 463fa9e4066Sahrens * Validate file offset 464fa9e4066Sahrens */ 465fa9e4066Sahrens if (uio->uio_loffset < (offset_t)0) { 466fa9e4066Sahrens ZFS_EXIT(zfsvfs); 467fa9e4066Sahrens return (EINVAL); 468fa9e4066Sahrens } 469fa9e4066Sahrens 470fa9e4066Sahrens /* 471fa9e4066Sahrens * Fasttrack empty reads 472fa9e4066Sahrens */ 473fa9e4066Sahrens if (uio->uio_resid == 0) { 474fa9e4066Sahrens ZFS_EXIT(zfsvfs); 475fa9e4066Sahrens return (0); 476fa9e4066Sahrens } 477fa9e4066Sahrens 478fa9e4066Sahrens /* 479104e2ed7Sperrin * Check for mandatory locks 480fa9e4066Sahrens */ 4810a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode)) { 482fa9e4066Sahrens if (error = chklock(vp, FREAD, 483fa9e4066Sahrens uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 484fa9e4066Sahrens ZFS_EXIT(zfsvfs); 485fa9e4066Sahrens return (error); 486fa9e4066Sahrens } 487fa9e4066Sahrens } 488fa9e4066Sahrens 489fa9e4066Sahrens /* 490fa9e4066Sahrens * If we're in FRSYNC mode, sync out this znode before reading it. 491fa9e4066Sahrens */ 492*55da60b9SMark J Musante if (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 493b19a79ecSperrin zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 494fa9e4066Sahrens 495fa9e4066Sahrens /* 496104e2ed7Sperrin * Lock the range against changes. 497fa9e4066Sahrens */ 498104e2ed7Sperrin rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 499104e2ed7Sperrin 500fa9e4066Sahrens /* 501fa9e4066Sahrens * If we are reading past end-of-file we can skip 502fa9e4066Sahrens * to the end; but we might still need to set atime. 503fa9e4066Sahrens */ 5040a586ceaSMark Shellenbaum if (uio->uio_loffset >= zp->z_size) { 505fa9e4066Sahrens error = 0; 506fa9e4066Sahrens goto out; 507fa9e4066Sahrens } 508fa9e4066Sahrens 5090a586ceaSMark Shellenbaum ASSERT(uio->uio_loffset < zp->z_size); 5100a586ceaSMark Shellenbaum n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 511feb08c6bSbillm 512c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 513c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 514c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int nblk; 515c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz = zp->z_blksz; 516c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uint64_t offset = uio->uio_loffset; 517c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 518c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 519c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((ISP2(blksz))) { 520c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 521c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz)) / blksz; 522c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 523c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(offset + n <= blksz); 524c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = 1; 525c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 526570de38fSSurya Prakki (void) dmu_xuio_init(xuio, nblk); 527c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 528c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (vn_has_cached_data(vp)) { 529c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 530c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * For simplicity, we always allocate a full buffer 531c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * even if we only expect to read a portion of a block. 532c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 533c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (--nblk >= 0) { 534570de38fSSurya Prakki (void) dmu_xuio_add(xuio, 5350a586ceaSMark Shellenbaum dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 5360a586ceaSMark Shellenbaum blksz), 0, blksz); 537c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 538c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 539c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 540c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 541feb08c6bSbillm while (n > 0) { 542feb08c6bSbillm nbytes = MIN(n, zfs_read_chunk_size - 543feb08c6bSbillm P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 544fa9e4066Sahrens 545feb08c6bSbillm if (vn_has_cached_data(vp)) 546feb08c6bSbillm error = mappedread(vp, nbytes, uio); 547feb08c6bSbillm else 548feb08c6bSbillm error = dmu_read_uio(os, zp->z_id, uio, nbytes); 549b87f3af3Sperrin if (error) { 550b87f3af3Sperrin /* convert checksum errors into IO errors */ 551b87f3af3Sperrin if (error == ECKSUM) 552b87f3af3Sperrin error = EIO; 553feb08c6bSbillm break; 554b87f3af3Sperrin } 555fa9e4066Sahrens 556feb08c6bSbillm n -= nbytes; 557fa9e4066Sahrens } 558fa9e4066Sahrens out: 559c5c6ffa0Smaybee zfs_range_unlock(rl); 560fa9e4066Sahrens 561fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 562fa9e4066Sahrens ZFS_EXIT(zfsvfs); 563fa9e4066Sahrens return (error); 564fa9e4066Sahrens } 565fa9e4066Sahrens 566fa9e4066Sahrens /* 567fa9e4066Sahrens * Write the bytes to a file. 568fa9e4066Sahrens * 569fa9e4066Sahrens * IN: vp - vnode of file to be written to. 570fa9e4066Sahrens * uio - structure supplying write location, range info, 571fa9e4066Sahrens * and data buffer. 572fa9e4066Sahrens * ioflag - FAPPEND flag set if in append mode. 573fa9e4066Sahrens * cr - credentials of caller. 574da6c28aaSamw * ct - caller context (NFS/CIFS fem monitor only) 575fa9e4066Sahrens * 576fa9e4066Sahrens * OUT: uio - updated offset and range. 577fa9e4066Sahrens * 578fa9e4066Sahrens * RETURN: 0 if success 579fa9e4066Sahrens * error code if failure 580fa9e4066Sahrens * 581fa9e4066Sahrens * Timestamps: 582fa9e4066Sahrens * vp - ctime|mtime updated if byte count > 0 583fa9e4066Sahrens */ 5840a586ceaSMark Shellenbaum 585fa9e4066Sahrens /* ARGSUSED */ 586fa9e4066Sahrens static int 587fa9e4066Sahrens zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 588fa9e4066Sahrens { 589fa9e4066Sahrens znode_t *zp = VTOZ(vp); 590fa9e4066Sahrens rlim64_t limit = uio->uio_llimit; 591fa9e4066Sahrens ssize_t start_resid = uio->uio_resid; 592fa9e4066Sahrens ssize_t tx_bytes; 593fa9e4066Sahrens uint64_t end_size; 594fa9e4066Sahrens dmu_tx_t *tx; 595fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 596f18faf3fSek zilog_t *zilog; 597fa9e4066Sahrens offset_t woff; 598fa9e4066Sahrens ssize_t n, nbytes; 599104e2ed7Sperrin rl_t *rl; 600fa9e4066Sahrens int max_blksz = zfsvfs->z_max_blksz; 601104e2ed7Sperrin int error; 6022fdbea25SAleksandr Guzovskiy arc_buf_t *abuf; 603c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *aiov; 604c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 605c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i_iov = 0; 606c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int iovcnt = uio->uio_iovcnt; 607c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *iovp = uio->uio_iov; 608c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int write_eof; 6090a586ceaSMark Shellenbaum int count = 0; 6100a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[4]; 6110a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 612fa9e4066Sahrens 613fa9e4066Sahrens /* 614fa9e4066Sahrens * Fasttrack empty write 615fa9e4066Sahrens */ 616104e2ed7Sperrin n = start_resid; 617fa9e4066Sahrens if (n == 0) 618fa9e4066Sahrens return (0); 619fa9e4066Sahrens 620104e2ed7Sperrin if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 621104e2ed7Sperrin limit = MAXOFFSET_T; 622104e2ed7Sperrin 6233cb34c60Sahrens ZFS_ENTER(zfsvfs); 6243cb34c60Sahrens ZFS_VERIFY_ZP(zp); 625c09193bfSmarks 6260a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 6270a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 6280a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 6290a586ceaSMark Shellenbaum &zp->z_size, 8); 6300a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 6310a586ceaSMark Shellenbaum &zp->z_pflags, 8); 6320a586ceaSMark Shellenbaum 633c09193bfSmarks /* 634c09193bfSmarks * If immutable or not appending then return EPERM 635c09193bfSmarks */ 6360a586ceaSMark Shellenbaum if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 6370a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 6380a586ceaSMark Shellenbaum (uio->uio_loffset < zp->z_size))) { 639c09193bfSmarks ZFS_EXIT(zfsvfs); 640c09193bfSmarks return (EPERM); 641c09193bfSmarks } 642c09193bfSmarks 643f18faf3fSek zilog = zfsvfs->z_log; 644fa9e4066Sahrens 64541865f27SWilliam Gorrell /* 64641865f27SWilliam Gorrell * Validate file offset 64741865f27SWilliam Gorrell */ 6480a586ceaSMark Shellenbaum woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 64941865f27SWilliam Gorrell if (woff < 0) { 65041865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 65141865f27SWilliam Gorrell return (EINVAL); 65241865f27SWilliam Gorrell } 65341865f27SWilliam Gorrell 65441865f27SWilliam Gorrell /* 65541865f27SWilliam Gorrell * Check for mandatory locks before calling zfs_range_lock() 65641865f27SWilliam Gorrell * in order to prevent a deadlock with locks set via fcntl(). 65741865f27SWilliam Gorrell */ 6580a586ceaSMark Shellenbaum if (MANDMODE((mode_t)zp->z_mode) && 65941865f27SWilliam Gorrell (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 66041865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 66141865f27SWilliam Gorrell return (error); 66241865f27SWilliam Gorrell } 66341865f27SWilliam Gorrell 664fa9e4066Sahrens /* 665c5c6ffa0Smaybee * Pre-fault the pages to ensure slow (eg NFS) pages 666104e2ed7Sperrin * don't hold up txg. 667c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Skip this if uio contains loaned arc_buf. 668fa9e4066Sahrens */ 669c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 670c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 671c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 672c242f9a0Schunli zhang - Sun Microsystems - Irvine United States else 673c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio_prefaultpages(n, uio); 674fa9e4066Sahrens 675fa9e4066Sahrens /* 676fa9e4066Sahrens * If in append mode, set the io offset pointer to eof. 677fa9e4066Sahrens */ 678104e2ed7Sperrin if (ioflag & FAPPEND) { 679104e2ed7Sperrin /* 68041865f27SWilliam Gorrell * Obtain an appending range lock to guarantee file append 68141865f27SWilliam Gorrell * semantics. We reset the write offset once we have the lock. 682104e2ed7Sperrin */ 683104e2ed7Sperrin rl = zfs_range_lock(zp, 0, n, RL_APPEND); 68441865f27SWilliam Gorrell woff = rl->r_off; 685104e2ed7Sperrin if (rl->r_len == UINT64_MAX) { 68641865f27SWilliam Gorrell /* 68741865f27SWilliam Gorrell * We overlocked the file because this write will cause 68841865f27SWilliam Gorrell * the file block size to increase. 68941865f27SWilliam Gorrell * Note that zp_size cannot change with this lock held. 69041865f27SWilliam Gorrell */ 6910a586ceaSMark Shellenbaum woff = zp->z_size; 692104e2ed7Sperrin } 69341865f27SWilliam Gorrell uio->uio_loffset = woff; 694fa9e4066Sahrens } else { 695fa9e4066Sahrens /* 69641865f27SWilliam Gorrell * Note that if the file block size will change as a result of 69741865f27SWilliam Gorrell * this write, then this range lock will lock the entire file 69841865f27SWilliam Gorrell * so that we can re-write the block safely. 699fa9e4066Sahrens */ 700104e2ed7Sperrin rl = zfs_range_lock(zp, woff, n, RL_WRITER); 701fa9e4066Sahrens } 702fa9e4066Sahrens 703fa9e4066Sahrens if (woff >= limit) { 704feb08c6bSbillm zfs_range_unlock(rl); 705feb08c6bSbillm ZFS_EXIT(zfsvfs); 706feb08c6bSbillm return (EFBIG); 707fa9e4066Sahrens } 708fa9e4066Sahrens 709fa9e4066Sahrens if ((woff + n) > limit || woff > (limit - n)) 710fa9e4066Sahrens n = limit - woff; 711fa9e4066Sahrens 712c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* Will this write extend the file length? */ 7130a586ceaSMark Shellenbaum write_eof = (woff + n > zp->z_size); 714c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 7150a586ceaSMark Shellenbaum end_size = MAX(zp->z_size, woff + n); 716fa9e4066Sahrens 717104e2ed7Sperrin /* 718feb08c6bSbillm * Write the file in reasonable size chunks. Each chunk is written 719feb08c6bSbillm * in a separate transaction; this keeps the intent log records small 720feb08c6bSbillm * and allows us to do more fine-grained space accounting. 721104e2ed7Sperrin */ 722feb08c6bSbillm while (n > 0) { 7232fdbea25SAleksandr Guzovskiy abuf = NULL; 7242fdbea25SAleksandr Guzovskiy woff = uio->uio_loffset; 7252fdbea25SAleksandr Guzovskiy again: 7260a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 7270a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 7282fdbea25SAleksandr Guzovskiy if (abuf != NULL) 7292fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 73014843421SMatthew Ahrens error = EDQUOT; 73114843421SMatthew Ahrens break; 73214843421SMatthew Ahrens } 7332fdbea25SAleksandr Guzovskiy 734c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio && abuf == NULL) { 735c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(i_iov < iovcnt); 736c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov = &iovp[i_iov]; 737c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i_iov); 738c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_clear(xuio, i_iov); 739c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_cp_write, int, i_iov, 740c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *, aiov, arc_buf_t *, abuf); 741c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT((aiov->iov_base == abuf->b_data) || 742c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ((char *)aiov->iov_base - (char *)abuf->b_data + 743c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len == arc_buf_size(abuf))); 744c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i_iov++; 745c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else if (abuf == NULL && n >= max_blksz && 7460a586ceaSMark Shellenbaum woff >= zp->z_size && 7472fdbea25SAleksandr Guzovskiy P2PHASE(woff, max_blksz) == 0 && 7482fdbea25SAleksandr Guzovskiy zp->z_blksz == max_blksz) { 749c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 750c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * This write covers a full block. "Borrow" a buffer 751c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * from the dmu so that we can fill it before we enter 752c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * a transaction. This avoids the possibility of 753c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * holding up the transaction if the data copy hangs 754c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * up on a pagefault (e.g., from an NFS server mapping). 755c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 7562fdbea25SAleksandr Guzovskiy size_t cbytes; 7572fdbea25SAleksandr Guzovskiy 7580a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 7590a586ceaSMark Shellenbaum max_blksz); 7602fdbea25SAleksandr Guzovskiy ASSERT(abuf != NULL); 7612fdbea25SAleksandr Guzovskiy ASSERT(arc_buf_size(abuf) == max_blksz); 7622fdbea25SAleksandr Guzovskiy if (error = uiocopy(abuf->b_data, max_blksz, 7632fdbea25SAleksandr Guzovskiy UIO_WRITE, uio, &cbytes)) { 7642fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 7652fdbea25SAleksandr Guzovskiy break; 7662fdbea25SAleksandr Guzovskiy } 7672fdbea25SAleksandr Guzovskiy ASSERT(cbytes == max_blksz); 7682fdbea25SAleksandr Guzovskiy } 7692fdbea25SAleksandr Guzovskiy 7702fdbea25SAleksandr Guzovskiy /* 7712fdbea25SAleksandr Guzovskiy * Start a transaction. 7722fdbea25SAleksandr Guzovskiy */ 773feb08c6bSbillm tx = dmu_tx_create(zfsvfs->z_os); 7740a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 775feb08c6bSbillm dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 7760a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 7771209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 778feb08c6bSbillm if (error) { 7791209a471SNeil Perrin if (error == ERESTART) { 780feb08c6bSbillm dmu_tx_wait(tx); 781feb08c6bSbillm dmu_tx_abort(tx); 7822fdbea25SAleksandr Guzovskiy goto again; 783feb08c6bSbillm } 784feb08c6bSbillm dmu_tx_abort(tx); 7852fdbea25SAleksandr Guzovskiy if (abuf != NULL) 7862fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 787feb08c6bSbillm break; 788feb08c6bSbillm } 789104e2ed7Sperrin 790feb08c6bSbillm /* 791feb08c6bSbillm * If zfs_range_lock() over-locked we grow the blocksize 792feb08c6bSbillm * and then reduce the lock range. This will only happen 793feb08c6bSbillm * on the first iteration since zfs_range_reduce() will 794feb08c6bSbillm * shrink down r_len to the appropriate size. 795feb08c6bSbillm */ 796feb08c6bSbillm if (rl->r_len == UINT64_MAX) { 797feb08c6bSbillm uint64_t new_blksz; 798feb08c6bSbillm 799feb08c6bSbillm if (zp->z_blksz > max_blksz) { 800feb08c6bSbillm ASSERT(!ISP2(zp->z_blksz)); 801feb08c6bSbillm new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 802feb08c6bSbillm } else { 803feb08c6bSbillm new_blksz = MIN(end_size, max_blksz); 804feb08c6bSbillm } 805feb08c6bSbillm zfs_grow_blocksize(zp, new_blksz, tx); 806feb08c6bSbillm zfs_range_reduce(rl, woff, n); 807fa9e4066Sahrens } 808fa9e4066Sahrens 809fa9e4066Sahrens /* 810fa9e4066Sahrens * XXX - should we really limit each write to z_max_blksz? 811fa9e4066Sahrens * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 812fa9e4066Sahrens */ 813fa9e4066Sahrens nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 814fa9e4066Sahrens 8152fdbea25SAleksandr Guzovskiy if (abuf == NULL) { 8162fdbea25SAleksandr Guzovskiy tx_bytes = uio->uio_resid; 81794d1a210STim Haley error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 81894d1a210STim Haley uio, nbytes, tx); 8192fdbea25SAleksandr Guzovskiy tx_bytes -= uio->uio_resid; 8202fdbea25SAleksandr Guzovskiy } else { 8212fdbea25SAleksandr Guzovskiy tx_bytes = nbytes; 822c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 823c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 824c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * If this is not a full block write, but we are 825c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * extending the file past EOF and this data starts 826c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * block-aligned, use assign_arcbuf(). Otherwise, 827c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write via dmu_write(). 828c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 829c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (tx_bytes < max_blksz && (!write_eof || 830c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_base != abuf->b_data)) { 831c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio); 832c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_write(zfsvfs->z_os, zp->z_id, woff, 833c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len, aiov->iov_base, tx); 834c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 835c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_stat_wbuf_copied(); 836c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 837c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio || tx_bytes == max_blksz); 8380a586ceaSMark Shellenbaum dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 8390a586ceaSMark Shellenbaum woff, abuf, tx); 840c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 8412fdbea25SAleksandr Guzovskiy ASSERT(tx_bytes <= uio->uio_resid); 8422fdbea25SAleksandr Guzovskiy uioskip(uio, tx_bytes); 8432fdbea25SAleksandr Guzovskiy } 8442fdbea25SAleksandr Guzovskiy if (tx_bytes && vn_has_cached_data(vp)) { 845ac05c741SMark Maybee update_pages(vp, woff, 846ac05c741SMark Maybee tx_bytes, zfsvfs->z_os, zp->z_id); 8472fdbea25SAleksandr Guzovskiy } 848fa9e4066Sahrens 849feb08c6bSbillm /* 850feb08c6bSbillm * If we made no progress, we're done. If we made even 851feb08c6bSbillm * partial progress, update the znode and ZIL accordingly. 852feb08c6bSbillm */ 853feb08c6bSbillm if (tx_bytes == 0) { 8540a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 8550a586ceaSMark Shellenbaum (void *)&zp->z_size, sizeof (uint64_t), tx); 856af2c4821Smaybee dmu_tx_commit(tx); 857feb08c6bSbillm ASSERT(error != 0); 858fa9e4066Sahrens break; 859fa9e4066Sahrens } 860fa9e4066Sahrens 861169cdae2Smarks /* 862169cdae2Smarks * Clear Set-UID/Set-GID bits on successful write if not 863169cdae2Smarks * privileged and at least one of the excute bits is set. 864169cdae2Smarks * 865169cdae2Smarks * It would be nice to to this after all writes have 866169cdae2Smarks * been done, but that would still expose the ISUID/ISGID 867169cdae2Smarks * to another app after the partial write is committed. 868da6c28aaSamw * 869169cdae2Smarks */ 870169cdae2Smarks mutex_enter(&zp->z_acl_lock); 8710a586ceaSMark Shellenbaum if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 872169cdae2Smarks (S_IXUSR >> 6))) != 0 && 8730a586ceaSMark Shellenbaum (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 874169cdae2Smarks secpolicy_vnode_setid_retain(cr, 8750a586ceaSMark Shellenbaum (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 8760a586ceaSMark Shellenbaum uint64_t newmode; 8770a586ceaSMark Shellenbaum zp->z_mode &= ~(S_ISUID | S_ISGID); 8780a586ceaSMark Shellenbaum newmode = zp->z_mode; 8790a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 8800a586ceaSMark Shellenbaum (void *)&newmode, sizeof (uint64_t), tx); 881169cdae2Smarks } 882169cdae2Smarks mutex_exit(&zp->z_acl_lock); 883169cdae2Smarks 8840a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 8850a586ceaSMark Shellenbaum B_TRUE); 886fa9e4066Sahrens 887fa9e4066Sahrens /* 888feb08c6bSbillm * Update the file size (zp_size) if it has changed; 889feb08c6bSbillm * account for possible concurrent updates. 890fa9e4066Sahrens */ 8910a586ceaSMark Shellenbaum while ((end_size = zp->z_size) < uio->uio_loffset) { 8920a586ceaSMark Shellenbaum (void) atomic_cas_64(&zp->z_size, end_size, 893fa9e4066Sahrens uio->uio_loffset); 8940a586ceaSMark Shellenbaum ASSERT(error == 0); 8950a586ceaSMark Shellenbaum } 8960a586ceaSMark Shellenbaum error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 8970a586ceaSMark Shellenbaum 898feb08c6bSbillm zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 899feb08c6bSbillm dmu_tx_commit(tx); 900fa9e4066Sahrens 901feb08c6bSbillm if (error != 0) 902feb08c6bSbillm break; 903feb08c6bSbillm ASSERT(tx_bytes == nbytes); 904feb08c6bSbillm n -= nbytes; 905feb08c6bSbillm } 906fa9e4066Sahrens 907c5c6ffa0Smaybee zfs_range_unlock(rl); 908fa9e4066Sahrens 909fa9e4066Sahrens /* 910fa9e4066Sahrens * If we're in replay mode, or we made no progress, return error. 911fa9e4066Sahrens * Otherwise, it's at least a partial write, so it's successful. 912fa9e4066Sahrens */ 9131209a471SNeil Perrin if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 914fa9e4066Sahrens ZFS_EXIT(zfsvfs); 915fa9e4066Sahrens return (error); 916fa9e4066Sahrens } 917fa9e4066Sahrens 918*55da60b9SMark J Musante if (ioflag & (FSYNC | FDSYNC) || 919*55da60b9SMark J Musante zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 920b19a79ecSperrin zil_commit(zilog, zp->z_last_itx, zp->z_id); 921fa9e4066Sahrens 922fa9e4066Sahrens ZFS_EXIT(zfsvfs); 923fa9e4066Sahrens return (0); 924fa9e4066Sahrens } 925fa9e4066Sahrens 926c5c6ffa0Smaybee void 927b24ab676SJeff Bonwick zfs_get_done(zgd_t *zgd, int error) 928c5c6ffa0Smaybee { 929b24ab676SJeff Bonwick znode_t *zp = zgd->zgd_private; 930b24ab676SJeff Bonwick objset_t *os = zp->z_zfsvfs->z_os; 931b24ab676SJeff Bonwick 932b24ab676SJeff Bonwick if (zgd->zgd_db) 933b24ab676SJeff Bonwick dmu_buf_rele(zgd->zgd_db, zgd); 934b24ab676SJeff Bonwick 935b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 936c5c6ffa0Smaybee 9379d3574bfSNeil Perrin /* 9389d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 9399d3574bfSNeil Perrin * txg stopped from syncing. 9409d3574bfSNeil Perrin */ 941b24ab676SJeff Bonwick VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 942b24ab676SJeff Bonwick 943b24ab676SJeff Bonwick if (error == 0 && zgd->zgd_bp) 944b24ab676SJeff Bonwick zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 945b24ab676SJeff Bonwick 94667bd71c6Sperrin kmem_free(zgd, sizeof (zgd_t)); 947c5c6ffa0Smaybee } 948c5c6ffa0Smaybee 949c87b8fc5SMark J Musante #ifdef DEBUG 950c87b8fc5SMark J Musante static int zil_fault_io = 0; 951c87b8fc5SMark J Musante #endif 952c87b8fc5SMark J Musante 953fa9e4066Sahrens /* 954fa9e4066Sahrens * Get data to generate a TX_WRITE intent log record. 955fa9e4066Sahrens */ 956fa9e4066Sahrens int 957c5c6ffa0Smaybee zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 958fa9e4066Sahrens { 959fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 960fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 961fa9e4066Sahrens znode_t *zp; 962b24ab676SJeff Bonwick uint64_t object = lr->lr_foid; 963b24ab676SJeff Bonwick uint64_t offset = lr->lr_offset; 964b24ab676SJeff Bonwick uint64_t size = lr->lr_length; 965b24ab676SJeff Bonwick blkptr_t *bp = &lr->lr_blkptr; 966c5c6ffa0Smaybee dmu_buf_t *db; 96767bd71c6Sperrin zgd_t *zgd; 968fa9e4066Sahrens int error = 0; 969fa9e4066Sahrens 970b24ab676SJeff Bonwick ASSERT(zio != NULL); 971b24ab676SJeff Bonwick ASSERT(size != 0); 972fa9e4066Sahrens 973fa9e4066Sahrens /* 974104e2ed7Sperrin * Nothing to do if the file has been removed 975fa9e4066Sahrens */ 976b24ab676SJeff Bonwick if (zfs_zget(zfsvfs, object, &zp) != 0) 977fa9e4066Sahrens return (ENOENT); 978893a6d32Sahrens if (zp->z_unlinked) { 9799d3574bfSNeil Perrin /* 9809d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 9819d3574bfSNeil Perrin * txg stopped from syncing. 9829d3574bfSNeil Perrin */ 9839d3574bfSNeil Perrin VN_RELE_ASYNC(ZTOV(zp), 9849d3574bfSNeil Perrin dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 985fa9e4066Sahrens return (ENOENT); 986fa9e4066Sahrens } 987fa9e4066Sahrens 988b24ab676SJeff Bonwick zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 989b24ab676SJeff Bonwick zgd->zgd_zilog = zfsvfs->z_log; 990b24ab676SJeff Bonwick zgd->zgd_private = zp; 991b24ab676SJeff Bonwick 992fa9e4066Sahrens /* 993fa9e4066Sahrens * Write records come in two flavors: immediate and indirect. 994fa9e4066Sahrens * For small writes it's cheaper to store the data with the 995fa9e4066Sahrens * log record (immediate); for large writes it's cheaper to 996fa9e4066Sahrens * sync the data and get a pointer to it (indirect) so that 997fa9e4066Sahrens * we don't have to write the data twice. 998fa9e4066Sahrens */ 999104e2ed7Sperrin if (buf != NULL) { /* immediate write */ 1000b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1001104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 10020a586ceaSMark Shellenbaum if (offset >= zp->z_size) { 1003104e2ed7Sperrin error = ENOENT; 1004b24ab676SJeff Bonwick } else { 1005b24ab676SJeff Bonwick error = dmu_read(os, object, offset, size, buf, 1006b24ab676SJeff Bonwick DMU_READ_NO_PREFETCH); 1007104e2ed7Sperrin } 1008b24ab676SJeff Bonwick ASSERT(error == 0 || error == ENOENT); 1009104e2ed7Sperrin } else { /* indirect write */ 1010fa9e4066Sahrens /* 1011104e2ed7Sperrin * Have to lock the whole block to ensure when it's 1012104e2ed7Sperrin * written out and it's checksum is being calculated 1013104e2ed7Sperrin * that no one can change the data. We need to re-check 1014104e2ed7Sperrin * blocksize after we get the lock in case it's changed! 1015fa9e4066Sahrens */ 1016104e2ed7Sperrin for (;;) { 1017b24ab676SJeff Bonwick uint64_t blkoff; 1018b24ab676SJeff Bonwick size = zp->z_blksz; 1019dfe73b3dSJeff Bonwick blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1020b24ab676SJeff Bonwick offset -= blkoff; 1021b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1022b24ab676SJeff Bonwick RL_READER); 1023b24ab676SJeff Bonwick if (zp->z_blksz == size) 1024104e2ed7Sperrin break; 1025b24ab676SJeff Bonwick offset += blkoff; 1026b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 1027104e2ed7Sperrin } 1028104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 10290a586ceaSMark Shellenbaum if (lr->lr_offset >= zp->z_size) 1030104e2ed7Sperrin error = ENOENT; 1031c87b8fc5SMark J Musante #ifdef DEBUG 1032c87b8fc5SMark J Musante if (zil_fault_io) { 1033c87b8fc5SMark J Musante error = EIO; 1034c87b8fc5SMark J Musante zil_fault_io = 0; 1035c87b8fc5SMark J Musante } 1036c87b8fc5SMark J Musante #endif 1037b24ab676SJeff Bonwick if (error == 0) 103847cb52daSJeff Bonwick error = dmu_buf_hold(os, object, offset, zgd, &db, 103947cb52daSJeff Bonwick DMU_READ_NO_PREFETCH); 1040c87b8fc5SMark J Musante 1041975c32a0SNeil Perrin if (error == 0) { 1042b24ab676SJeff Bonwick zgd->zgd_db = db; 1043b24ab676SJeff Bonwick zgd->zgd_bp = bp; 1044b24ab676SJeff Bonwick 1045b24ab676SJeff Bonwick ASSERT(db->db_offset == offset); 1046b24ab676SJeff Bonwick ASSERT(db->db_size == size); 1047b24ab676SJeff Bonwick 1048b24ab676SJeff Bonwick error = dmu_sync(zio, lr->lr_common.lrc_txg, 1049b24ab676SJeff Bonwick zfs_get_done, zgd); 1050b24ab676SJeff Bonwick ASSERT(error || lr->lr_length <= zp->z_blksz); 1051b24ab676SJeff Bonwick 1052975c32a0SNeil Perrin /* 1053b24ab676SJeff Bonwick * On success, we need to wait for the write I/O 1054b24ab676SJeff Bonwick * initiated by dmu_sync() to complete before we can 1055b24ab676SJeff Bonwick * release this dbuf. We will finish everything up 1056b24ab676SJeff Bonwick * in the zfs_get_done() callback. 1057975c32a0SNeil Perrin */ 1058b24ab676SJeff Bonwick if (error == 0) 1059b24ab676SJeff Bonwick return (0); 1060975c32a0SNeil Perrin 1061b24ab676SJeff Bonwick if (error == EALREADY) { 1062b24ab676SJeff Bonwick lr->lr_common.lrc_txtype = TX_WRITE2; 1063b24ab676SJeff Bonwick error = 0; 1064b24ab676SJeff Bonwick } 1065975c32a0SNeil Perrin } 1066fa9e4066Sahrens } 1067b24ab676SJeff Bonwick 1068b24ab676SJeff Bonwick zfs_get_done(zgd, error); 1069b24ab676SJeff Bonwick 1070fa9e4066Sahrens return (error); 1071fa9e4066Sahrens } 1072fa9e4066Sahrens 1073fa9e4066Sahrens /*ARGSUSED*/ 1074fa9e4066Sahrens static int 1075da6c28aaSamw zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1076da6c28aaSamw caller_context_t *ct) 1077fa9e4066Sahrens { 1078fa9e4066Sahrens znode_t *zp = VTOZ(vp); 1079fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1080fa9e4066Sahrens int error; 1081fa9e4066Sahrens 10823cb34c60Sahrens ZFS_ENTER(zfsvfs); 10833cb34c60Sahrens ZFS_VERIFY_ZP(zp); 1084da6c28aaSamw 1085da6c28aaSamw if (flag & V_ACE_MASK) 1086da6c28aaSamw error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1087da6c28aaSamw else 1088da6c28aaSamw error = zfs_zaccess_rwx(zp, mode, flag, cr); 1089da6c28aaSamw 1090fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1091fa9e4066Sahrens return (error); 1092fa9e4066Sahrens } 1093fa9e4066Sahrens 1094d47621a4STim Haley /* 1095d47621a4STim Haley * If vnode is for a device return a specfs vnode instead. 1096d47621a4STim Haley */ 1097d47621a4STim Haley static int 1098d47621a4STim Haley specvp_check(vnode_t **vpp, cred_t *cr) 1099d47621a4STim Haley { 1100d47621a4STim Haley int error = 0; 1101d47621a4STim Haley 1102d47621a4STim Haley if (IS_DEVVP(*vpp)) { 1103d47621a4STim Haley struct vnode *svp; 1104d47621a4STim Haley 1105d47621a4STim Haley svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1106d47621a4STim Haley VN_RELE(*vpp); 1107d47621a4STim Haley if (svp == NULL) 1108d47621a4STim Haley error = ENOSYS; 1109d47621a4STim Haley *vpp = svp; 1110d47621a4STim Haley } 1111d47621a4STim Haley return (error); 1112d47621a4STim Haley } 1113d47621a4STim Haley 1114d47621a4STim Haley 1115fa9e4066Sahrens /* 1116fa9e4066Sahrens * Lookup an entry in a directory, or an extended attribute directory. 1117fa9e4066Sahrens * If it exists, return a held vnode reference for it. 1118fa9e4066Sahrens * 1119fa9e4066Sahrens * IN: dvp - vnode of directory to search. 1120fa9e4066Sahrens * nm - name of entry to lookup. 1121fa9e4066Sahrens * pnp - full pathname to lookup [UNUSED]. 1122fa9e4066Sahrens * flags - LOOKUP_XATTR set if looking for an attribute. 1123fa9e4066Sahrens * rdir - root directory vnode [UNUSED]. 1124fa9e4066Sahrens * cr - credentials of caller. 1125da6c28aaSamw * ct - caller context 1126da6c28aaSamw * direntflags - directory lookup flags 1127da6c28aaSamw * realpnp - returned pathname. 1128fa9e4066Sahrens * 1129fa9e4066Sahrens * OUT: vpp - vnode of located entry, NULL if not found. 1130fa9e4066Sahrens * 1131fa9e4066Sahrens * RETURN: 0 if success 1132fa9e4066Sahrens * error code if failure 1133fa9e4066Sahrens * 1134fa9e4066Sahrens * Timestamps: 1135fa9e4066Sahrens * NA 1136fa9e4066Sahrens */ 1137fa9e4066Sahrens /* ARGSUSED */ 1138fa9e4066Sahrens static int 1139fa9e4066Sahrens zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 1140da6c28aaSamw int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 1141da6c28aaSamw int *direntflags, pathname_t *realpnp) 1142fa9e4066Sahrens { 1143fa9e4066Sahrens znode_t *zdp = VTOZ(dvp); 1144fa9e4066Sahrens zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1145d47621a4STim Haley int error = 0; 1146d47621a4STim Haley 1147d47621a4STim Haley /* fast path */ 1148d47621a4STim Haley if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1149d47621a4STim Haley 1150d47621a4STim Haley if (dvp->v_type != VDIR) { 1151d47621a4STim Haley return (ENOTDIR); 11520a586ceaSMark Shellenbaum } else if (zdp->z_sa_hdl == NULL) { 1153d47621a4STim Haley return (EIO); 1154d47621a4STim Haley } 1155d47621a4STim Haley 1156d47621a4STim Haley if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1157d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1158d47621a4STim Haley if (!error) { 1159d47621a4STim Haley *vpp = dvp; 1160d47621a4STim Haley VN_HOLD(*vpp); 1161d47621a4STim Haley return (0); 1162d47621a4STim Haley } 1163d47621a4STim Haley return (error); 1164d47621a4STim Haley } else { 1165d47621a4STim Haley vnode_t *tvp = dnlc_lookup(dvp, nm); 1166d47621a4STim Haley 1167d47621a4STim Haley if (tvp) { 1168d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1169d47621a4STim Haley if (error) { 1170d47621a4STim Haley VN_RELE(tvp); 1171d47621a4STim Haley return (error); 1172d47621a4STim Haley } 1173d47621a4STim Haley if (tvp == DNLC_NO_VNODE) { 1174d47621a4STim Haley VN_RELE(tvp); 1175d47621a4STim Haley return (ENOENT); 1176d47621a4STim Haley } else { 1177d47621a4STim Haley *vpp = tvp; 1178d47621a4STim Haley return (specvp_check(vpp, cr)); 1179d47621a4STim Haley } 1180d47621a4STim Haley } 1181d47621a4STim Haley } 1182d47621a4STim Haley } 1183d47621a4STim Haley 1184d47621a4STim Haley DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1185fa9e4066Sahrens 11863cb34c60Sahrens ZFS_ENTER(zfsvfs); 11873cb34c60Sahrens ZFS_VERIFY_ZP(zdp); 1188fa9e4066Sahrens 1189fa9e4066Sahrens *vpp = NULL; 1190fa9e4066Sahrens 1191fa9e4066Sahrens if (flags & LOOKUP_XATTR) { 11927b55fa8eSck /* 11937b55fa8eSck * If the xattr property is off, refuse the lookup request. 11947b55fa8eSck */ 11957b55fa8eSck if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 11967b55fa8eSck ZFS_EXIT(zfsvfs); 11977b55fa8eSck return (EINVAL); 11987b55fa8eSck } 11997b55fa8eSck 1200fa9e4066Sahrens /* 1201fa9e4066Sahrens * We don't allow recursive attributes.. 1202fa9e4066Sahrens * Maybe someday we will. 1203fa9e4066Sahrens */ 12040a586ceaSMark Shellenbaum if (zdp->z_pflags & ZFS_XATTR) { 1205fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1206fa9e4066Sahrens return (EINVAL); 1207fa9e4066Sahrens } 1208fa9e4066Sahrens 12093f063a9dSck if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1210fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1211fa9e4066Sahrens return (error); 1212fa9e4066Sahrens } 1213fa9e4066Sahrens 1214fa9e4066Sahrens /* 1215fa9e4066Sahrens * Do we have permission to get into attribute directory? 1216fa9e4066Sahrens */ 1217fa9e4066Sahrens 1218da6c28aaSamw if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1219da6c28aaSamw B_FALSE, cr)) { 1220fa9e4066Sahrens VN_RELE(*vpp); 1221da6c28aaSamw *vpp = NULL; 1222fa9e4066Sahrens } 1223fa9e4066Sahrens 1224fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1225fa9e4066Sahrens return (error); 1226fa9e4066Sahrens } 1227fa9e4066Sahrens 12280f2dc02eSek if (dvp->v_type != VDIR) { 12290f2dc02eSek ZFS_EXIT(zfsvfs); 1230736b9155Smarks return (ENOTDIR); 12310f2dc02eSek } 1232736b9155Smarks 1233fa9e4066Sahrens /* 1234fa9e4066Sahrens * Check accessibility of directory. 1235fa9e4066Sahrens */ 1236fa9e4066Sahrens 1237da6c28aaSamw if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1238fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1239fa9e4066Sahrens return (error); 1240fa9e4066Sahrens } 1241fa9e4066Sahrens 1242de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1243da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1244da6c28aaSamw ZFS_EXIT(zfsvfs); 1245da6c28aaSamw return (EILSEQ); 1246da6c28aaSamw } 1247fa9e4066Sahrens 1248da6c28aaSamw error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1249d47621a4STim Haley if (error == 0) 1250d47621a4STim Haley error = specvp_check(vpp, cr); 1251fa9e4066Sahrens 1252fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1253fa9e4066Sahrens return (error); 1254fa9e4066Sahrens } 1255fa9e4066Sahrens 1256fa9e4066Sahrens /* 1257fa9e4066Sahrens * Attempt to create a new entry in a directory. If the entry 1258fa9e4066Sahrens * already exists, truncate the file if permissible, else return 1259fa9e4066Sahrens * an error. Return the vp of the created or trunc'd file. 1260fa9e4066Sahrens * 1261fa9e4066Sahrens * IN: dvp - vnode of directory to put new file entry in. 1262fa9e4066Sahrens * name - name of new file entry. 1263fa9e4066Sahrens * vap - attributes of new file. 1264fa9e4066Sahrens * excl - flag indicating exclusive or non-exclusive mode. 1265fa9e4066Sahrens * mode - mode to open file with. 1266fa9e4066Sahrens * cr - credentials of caller. 1267fa9e4066Sahrens * flag - large file flag [UNUSED]. 1268da6c28aaSamw * ct - caller context 1269da6c28aaSamw * vsecp - ACL to be set 1270fa9e4066Sahrens * 1271fa9e4066Sahrens * OUT: vpp - vnode of created or trunc'd entry. 1272fa9e4066Sahrens * 1273fa9e4066Sahrens * RETURN: 0 if success 1274fa9e4066Sahrens * error code if failure 1275fa9e4066Sahrens * 1276fa9e4066Sahrens * Timestamps: 1277fa9e4066Sahrens * dvp - ctime|mtime updated if new entry created 1278fa9e4066Sahrens * vp - ctime|mtime always, atime if new 1279fa9e4066Sahrens */ 1280da6c28aaSamw 1281fa9e4066Sahrens /* ARGSUSED */ 1282fa9e4066Sahrens static int 1283fa9e4066Sahrens zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, 1284da6c28aaSamw int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, 1285da6c28aaSamw vsecattr_t *vsecp) 1286fa9e4066Sahrens { 1287fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1288fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1289f18faf3fSek zilog_t *zilog; 1290f18faf3fSek objset_t *os; 1291fa9e4066Sahrens zfs_dirlock_t *dl; 1292fa9e4066Sahrens dmu_tx_t *tx; 1293fa9e4066Sahrens int error; 1294c1ce5987SMark Shellenbaum ksid_t *ksid; 1295c1ce5987SMark Shellenbaum uid_t uid; 1296c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 12970a586ceaSMark Shellenbaum zfs_acl_ids_t acl_ids; 129889459e17SMark Shellenbaum boolean_t fuid_dirtied; 1299da6c28aaSamw 1300da6c28aaSamw /* 1301da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1302da6c28aaSamw * make sure file system is at proper version 1303da6c28aaSamw */ 1304da6c28aaSamw 1305c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1306c1ce5987SMark Shellenbaum if (ksid) 1307c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1308c1ce5987SMark Shellenbaum else 1309c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1310c1ce5987SMark Shellenbaum 1311da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1312da6c28aaSamw (vsecp || (vap->va_mask & AT_XVATTR) || 1313c1ce5987SMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1314da6c28aaSamw return (EINVAL); 1315fa9e4066Sahrens 13163cb34c60Sahrens ZFS_ENTER(zfsvfs); 13173cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1318f18faf3fSek os = zfsvfs->z_os; 1319f18faf3fSek zilog = zfsvfs->z_log; 1320fa9e4066Sahrens 1321de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1322da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1323da6c28aaSamw ZFS_EXIT(zfsvfs); 1324da6c28aaSamw return (EILSEQ); 1325da6c28aaSamw } 1326da6c28aaSamw 1327da6c28aaSamw if (vap->va_mask & AT_XVATTR) { 1328da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1329da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1330da6c28aaSamw ZFS_EXIT(zfsvfs); 1331da6c28aaSamw return (error); 1332da6c28aaSamw } 1333da6c28aaSamw } 1334fa9e4066Sahrens top: 1335fa9e4066Sahrens *vpp = NULL; 1336fa9e4066Sahrens 1337fa9e4066Sahrens if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) 1338fa9e4066Sahrens vap->va_mode &= ~VSVTX; 1339fa9e4066Sahrens 1340fa9e4066Sahrens if (*name == '\0') { 1341fa9e4066Sahrens /* 1342fa9e4066Sahrens * Null component name refers to the directory itself. 1343fa9e4066Sahrens */ 1344fa9e4066Sahrens VN_HOLD(dvp); 1345fa9e4066Sahrens zp = dzp; 1346fa9e4066Sahrens dl = NULL; 1347fa9e4066Sahrens error = 0; 1348fa9e4066Sahrens } else { 1349fa9e4066Sahrens /* possible VN_HOLD(zp) */ 1350da6c28aaSamw int zflg = 0; 1351da6c28aaSamw 1352da6c28aaSamw if (flag & FIGNORECASE) 1353da6c28aaSamw zflg |= ZCILOOK; 1354da6c28aaSamw 1355da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1356da6c28aaSamw NULL, NULL); 1357da6c28aaSamw if (error) { 1358fa9e4066Sahrens if (strcmp(name, "..") == 0) 1359fa9e4066Sahrens error = EISDIR; 1360fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1361fa9e4066Sahrens return (error); 1362fa9e4066Sahrens } 1363fa9e4066Sahrens } 13640a586ceaSMark Shellenbaum 1365fa9e4066Sahrens if (zp == NULL) { 1366da6c28aaSamw uint64_t txtype; 1367da6c28aaSamw 1368fa9e4066Sahrens /* 1369fa9e4066Sahrens * Create a new file object and update the directory 1370fa9e4066Sahrens * to reference it. 1371fa9e4066Sahrens */ 1372da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1373fa9e4066Sahrens goto out; 1374fa9e4066Sahrens } 1375fa9e4066Sahrens 1376fa9e4066Sahrens /* 1377fa9e4066Sahrens * We only support the creation of regular files in 1378fa9e4066Sahrens * extended attribute directories. 1379fa9e4066Sahrens */ 13800a586ceaSMark Shellenbaum 13810a586ceaSMark Shellenbaum if ((dzp->z_pflags & ZFS_XATTR) && 1382fa9e4066Sahrens (vap->va_type != VREG)) { 1383fa9e4066Sahrens error = EINVAL; 1384fa9e4066Sahrens goto out; 1385fa9e4066Sahrens } 1386fa9e4066Sahrens 138789459e17SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 138889459e17SMark Shellenbaum &acl_ids)) != 0) 138989459e17SMark Shellenbaum goto out; 139014843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 13914929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 139214843421SMatthew Ahrens error = EDQUOT; 139314843421SMatthew Ahrens goto out; 139414843421SMatthew Ahrens } 139589459e17SMark Shellenbaum 1396fa9e4066Sahrens tx = dmu_tx_create(os); 13970a586ceaSMark Shellenbaum 13980a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 13990a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 14000a586ceaSMark Shellenbaum 140189459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 140214843421SMatthew Ahrens if (fuid_dirtied) 140314843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 1404ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 14050a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 14060a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && 14070a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1408fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 14090a586ceaSMark Shellenbaum 0, acl_ids.z_aclp->z_acl_bytes); 1410da6c28aaSamw } 14111209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 1412fa9e4066Sahrens if (error) { 141389459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1414fa9e4066Sahrens zfs_dirent_unlock(dl); 14151209a471SNeil Perrin if (error == ERESTART) { 14168a2f1b91Sahrens dmu_tx_wait(tx); 14178a2f1b91Sahrens dmu_tx_abort(tx); 1418fa9e4066Sahrens goto top; 1419fa9e4066Sahrens } 14208a2f1b91Sahrens dmu_tx_abort(tx); 1421fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1422fa9e4066Sahrens return (error); 1423fa9e4066Sahrens } 14240a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 142589459e17SMark Shellenbaum 142689459e17SMark Shellenbaum if (fuid_dirtied) 142789459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 142889459e17SMark Shellenbaum 1429fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1430da6c28aaSamw txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1431da6c28aaSamw if (flag & FIGNORECASE) 1432da6c28aaSamw txtype |= TX_CI; 1433da6c28aaSamw zfs_log_create(zilog, tx, txtype, dzp, zp, name, 143489459e17SMark Shellenbaum vsecp, acl_ids.z_fuidp, vap); 143589459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1436fa9e4066Sahrens dmu_tx_commit(tx); 1437fa9e4066Sahrens } else { 1438da6c28aaSamw int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1439da6c28aaSamw 1440fa9e4066Sahrens /* 1441fa9e4066Sahrens * A directory entry already exists for this name. 1442fa9e4066Sahrens */ 1443fa9e4066Sahrens /* 1444fa9e4066Sahrens * Can't truncate an existing file if in exclusive mode. 1445fa9e4066Sahrens */ 1446fa9e4066Sahrens if (excl == EXCL) { 1447fa9e4066Sahrens error = EEXIST; 1448fa9e4066Sahrens goto out; 1449fa9e4066Sahrens } 1450fa9e4066Sahrens /* 1451fa9e4066Sahrens * Can't open a directory for writing. 1452fa9e4066Sahrens */ 1453fa9e4066Sahrens if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1454fa9e4066Sahrens error = EISDIR; 1455fa9e4066Sahrens goto out; 1456fa9e4066Sahrens } 1457fa9e4066Sahrens /* 1458fa9e4066Sahrens * Verify requested access to file. 1459fa9e4066Sahrens */ 1460da6c28aaSamw if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1461fa9e4066Sahrens goto out; 1462fa9e4066Sahrens } 1463fa9e4066Sahrens 1464fa9e4066Sahrens mutex_enter(&dzp->z_lock); 1465fa9e4066Sahrens dzp->z_seq++; 1466fa9e4066Sahrens mutex_exit(&dzp->z_lock); 1467fa9e4066Sahrens 14685730cc9aSmaybee /* 14695730cc9aSmaybee * Truncate regular files if requested. 14705730cc9aSmaybee */ 14715730cc9aSmaybee if ((ZTOV(zp)->v_type == VREG) && 1472fa9e4066Sahrens (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1473cdb0ab79Smaybee /* we can't hold any locks when calling zfs_freesp() */ 1474cdb0ab79Smaybee zfs_dirent_unlock(dl); 1475cdb0ab79Smaybee dl = NULL; 14765730cc9aSmaybee error = zfs_freesp(zp, 0, 0, mode, TRUE); 1477df2381bfSpraks if (error == 0) { 1478da6c28aaSamw vnevent_create(ZTOV(zp), ct); 1479df2381bfSpraks } 1480fa9e4066Sahrens } 1481fa9e4066Sahrens } 1482fa9e4066Sahrens out: 1483fa9e4066Sahrens 1484fa9e4066Sahrens if (dl) 1485fa9e4066Sahrens zfs_dirent_unlock(dl); 1486fa9e4066Sahrens 1487fa9e4066Sahrens if (error) { 1488fa9e4066Sahrens if (zp) 1489fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1490fa9e4066Sahrens } else { 1491fa9e4066Sahrens *vpp = ZTOV(zp); 1492d47621a4STim Haley error = specvp_check(vpp, cr); 1493fa9e4066Sahrens } 1494fa9e4066Sahrens 1495*55da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1496*55da60b9SMark J Musante zil_commit(zilog, UINT64_MAX, 0); 1497*55da60b9SMark J Musante 1498fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1499fa9e4066Sahrens return (error); 1500fa9e4066Sahrens } 1501fa9e4066Sahrens 1502fa9e4066Sahrens /* 1503fa9e4066Sahrens * Remove an entry from a directory. 1504fa9e4066Sahrens * 1505fa9e4066Sahrens * IN: dvp - vnode of directory to remove entry from. 1506fa9e4066Sahrens * name - name of entry to remove. 1507fa9e4066Sahrens * cr - credentials of caller. 1508da6c28aaSamw * ct - caller context 1509da6c28aaSamw * flags - case flags 1510fa9e4066Sahrens * 1511fa9e4066Sahrens * RETURN: 0 if success 1512fa9e4066Sahrens * error code if failure 1513fa9e4066Sahrens * 1514fa9e4066Sahrens * Timestamps: 1515fa9e4066Sahrens * dvp - ctime|mtime 1516fa9e4066Sahrens * vp - ctime (if nlink > 0) 1517fa9e4066Sahrens */ 15180a586ceaSMark Shellenbaum 15190a586ceaSMark Shellenbaum uint64_t null_xattr = 0; 15200a586ceaSMark Shellenbaum 1521da6c28aaSamw /*ARGSUSED*/ 1522fa9e4066Sahrens static int 1523da6c28aaSamw zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1524da6c28aaSamw int flags) 1525fa9e4066Sahrens { 1526fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1527fa9e4066Sahrens znode_t *xzp = NULL; 1528fa9e4066Sahrens vnode_t *vp; 1529fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1530f18faf3fSek zilog_t *zilog; 15310a586ceaSMark Shellenbaum uint64_t acl_obj, xattr_obj = 0; 15320a586ceaSMark Shellenbaum uint64_t xattr_obj_unlinked = 0; 1533fa9e4066Sahrens zfs_dirlock_t *dl; 1534fa9e4066Sahrens dmu_tx_t *tx; 1535893a6d32Sahrens boolean_t may_delete_now, delete_now = FALSE; 1536cdb0ab79Smaybee boolean_t unlinked, toobig = FALSE; 1537da6c28aaSamw uint64_t txtype; 1538da6c28aaSamw pathname_t *realnmp = NULL; 1539da6c28aaSamw pathname_t realnm; 1540fa9e4066Sahrens int error; 1541da6c28aaSamw int zflg = ZEXISTS; 1542fa9e4066Sahrens 15433cb34c60Sahrens ZFS_ENTER(zfsvfs); 15443cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1545f18faf3fSek zilog = zfsvfs->z_log; 1546fa9e4066Sahrens 1547da6c28aaSamw if (flags & FIGNORECASE) { 1548da6c28aaSamw zflg |= ZCILOOK; 1549da6c28aaSamw pn_alloc(&realnm); 1550da6c28aaSamw realnmp = &realnm; 1551da6c28aaSamw } 1552da6c28aaSamw 1553fa9e4066Sahrens top: 1554fa9e4066Sahrens /* 1555fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1556fa9e4066Sahrens */ 1557da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1558da6c28aaSamw NULL, realnmp)) { 1559da6c28aaSamw if (realnmp) 1560da6c28aaSamw pn_free(realnmp); 1561fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1562fa9e4066Sahrens return (error); 1563fa9e4066Sahrens } 1564fa9e4066Sahrens 1565fa9e4066Sahrens vp = ZTOV(zp); 1566fa9e4066Sahrens 1567fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1568fa9e4066Sahrens goto out; 1569fa9e4066Sahrens } 1570fa9e4066Sahrens 1571fa9e4066Sahrens /* 1572fa9e4066Sahrens * Need to use rmdir for removing directories. 1573fa9e4066Sahrens */ 1574fa9e4066Sahrens if (vp->v_type == VDIR) { 1575fa9e4066Sahrens error = EPERM; 1576fa9e4066Sahrens goto out; 1577fa9e4066Sahrens } 1578fa9e4066Sahrens 1579da6c28aaSamw vnevent_remove(vp, dvp, name, ct); 1580fa9e4066Sahrens 1581da6c28aaSamw if (realnmp) 1582ab04eb8eStimh dnlc_remove(dvp, realnmp->pn_buf); 1583da6c28aaSamw else 1584da6c28aaSamw dnlc_remove(dvp, name); 1585033f9833Sek 1586fa9e4066Sahrens mutex_enter(&vp->v_lock); 1587fa9e4066Sahrens may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1588fa9e4066Sahrens mutex_exit(&vp->v_lock); 1589fa9e4066Sahrens 1590fa9e4066Sahrens /* 1591893a6d32Sahrens * We may delete the znode now, or we may put it in the unlinked set; 1592fa9e4066Sahrens * it depends on whether we're the last link, and on whether there are 1593fa9e4066Sahrens * other holds on the vnode. So we dmu_tx_hold() the right things to 1594fa9e4066Sahrens * allow for either case. 1595fa9e4066Sahrens */ 1596fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1597ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 15980a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 15990a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 16000a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 1601cdb0ab79Smaybee if (may_delete_now) { 1602cdb0ab79Smaybee toobig = 16030a586ceaSMark Shellenbaum zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1604cdb0ab79Smaybee /* if the file is too big, only hold_free a token amount */ 1605cdb0ab79Smaybee dmu_tx_hold_free(tx, zp->z_id, 0, 1606cdb0ab79Smaybee (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1607cdb0ab79Smaybee } 1608fa9e4066Sahrens 1609fa9e4066Sahrens /* are there any extended attributes? */ 16100a586ceaSMark Shellenbaum error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 16110a586ceaSMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 16120a586ceaSMark Shellenbaum if (xattr_obj) { 16130a586ceaSMark Shellenbaum error = zfs_zget(zfsvfs, xattr_obj, &xzp); 16140a586ceaSMark Shellenbaum ASSERT3U(error, ==, 0); 16150a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 16160a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1617fa9e4066Sahrens } 1618fa9e4066Sahrens 1619fa9e4066Sahrens /* are there any additional acls */ 16200a586ceaSMark Shellenbaum if ((acl_obj = ZFS_EXTERNAL_ACL(zp)) != 0 && may_delete_now) 1621fa9e4066Sahrens dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1622fa9e4066Sahrens 1623fa9e4066Sahrens /* charge as an update -- would be nice not to charge at all */ 1624893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1625fa9e4066Sahrens 16261209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 1627fa9e4066Sahrens if (error) { 1628fa9e4066Sahrens zfs_dirent_unlock(dl); 1629fa9e4066Sahrens VN_RELE(vp); 16301209a471SNeil Perrin if (error == ERESTART) { 16318a2f1b91Sahrens dmu_tx_wait(tx); 16328a2f1b91Sahrens dmu_tx_abort(tx); 1633fa9e4066Sahrens goto top; 1634fa9e4066Sahrens } 1635da6c28aaSamw if (realnmp) 1636da6c28aaSamw pn_free(realnmp); 16378a2f1b91Sahrens dmu_tx_abort(tx); 1638fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1639fa9e4066Sahrens return (error); 1640fa9e4066Sahrens } 1641fa9e4066Sahrens 1642fa9e4066Sahrens /* 1643fa9e4066Sahrens * Remove the directory entry. 1644fa9e4066Sahrens */ 1645da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1646fa9e4066Sahrens 1647fa9e4066Sahrens if (error) { 1648fa9e4066Sahrens dmu_tx_commit(tx); 1649fa9e4066Sahrens goto out; 1650fa9e4066Sahrens } 1651fa9e4066Sahrens 1652893a6d32Sahrens if (unlinked) { 16530a586ceaSMark Shellenbaum 1654fa9e4066Sahrens mutex_enter(&vp->v_lock); 16550a586ceaSMark Shellenbaum 16560a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 16570a586ceaSMark Shellenbaum &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1658cdb0ab79Smaybee delete_now = may_delete_now && !toobig && 1659fa9e4066Sahrens vp->v_count == 1 && !vn_has_cached_data(vp) && 16600a586ceaSMark Shellenbaum xattr_obj == xattr_obj_unlinked && ZFS_EXTERNAL_ACL(zp) == 16610a586ceaSMark Shellenbaum acl_obj; 1662fa9e4066Sahrens mutex_exit(&vp->v_lock); 1663fa9e4066Sahrens } 1664fa9e4066Sahrens 1665fa9e4066Sahrens if (delete_now) { 16660a586ceaSMark Shellenbaum if (xattr_obj_unlinked) { 16670a586ceaSMark Shellenbaum ASSERT3U(xzp->z_links, ==, 2); 1668fa9e4066Sahrens mutex_enter(&xzp->z_lock); 1669893a6d32Sahrens xzp->z_unlinked = 1; 16700a586ceaSMark Shellenbaum xzp->z_links = 0; 16710a586ceaSMark Shellenbaum error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 16720a586ceaSMark Shellenbaum &xzp->z_links, sizeof (xzp->z_links), tx); 16730a586ceaSMark Shellenbaum ASSERT3U(error, ==, 0); 1674fa9e4066Sahrens mutex_exit(&xzp->z_lock); 1675893a6d32Sahrens zfs_unlinked_add(xzp, tx); 16760a586ceaSMark Shellenbaum if (zp->z_is_sa) 16770a586ceaSMark Shellenbaum error = sa_remove(zp->z_sa_hdl, 16780a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), tx); 16790a586ceaSMark Shellenbaum else 16800a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, 16810a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), &null_xattr, 16820a586ceaSMark Shellenbaum sizeof (uint64_t), tx); 16830a586ceaSMark Shellenbaum ASSERT3U(error, ==, 0); 1684fa9e4066Sahrens } 1685fa9e4066Sahrens mutex_enter(&zp->z_lock); 1686fa9e4066Sahrens mutex_enter(&vp->v_lock); 1687fa9e4066Sahrens vp->v_count--; 1688fa9e4066Sahrens ASSERT3U(vp->v_count, ==, 0); 1689fa9e4066Sahrens mutex_exit(&vp->v_lock); 1690fa9e4066Sahrens mutex_exit(&zp->z_lock); 1691fa9e4066Sahrens zfs_znode_delete(zp, tx); 1692893a6d32Sahrens } else if (unlinked) { 1693893a6d32Sahrens zfs_unlinked_add(zp, tx); 1694fa9e4066Sahrens } 1695fa9e4066Sahrens 1696da6c28aaSamw txtype = TX_REMOVE; 1697da6c28aaSamw if (flags & FIGNORECASE) 1698da6c28aaSamw txtype |= TX_CI; 1699da6c28aaSamw zfs_log_remove(zilog, tx, txtype, dzp, name); 1700fa9e4066Sahrens 1701fa9e4066Sahrens dmu_tx_commit(tx); 1702fa9e4066Sahrens out: 1703da6c28aaSamw if (realnmp) 1704da6c28aaSamw pn_free(realnmp); 1705da6c28aaSamw 1706fa9e4066Sahrens zfs_dirent_unlock(dl); 1707fa9e4066Sahrens 170806e0070dSMark Shellenbaum if (!delete_now) 1709fa9e4066Sahrens VN_RELE(vp); 171006e0070dSMark Shellenbaum if (xzp) 1711fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 1712fa9e4066Sahrens 1713*55da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1714*55da60b9SMark J Musante zil_commit(zilog, UINT64_MAX, 0); 1715*55da60b9SMark J Musante 1716fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1717fa9e4066Sahrens return (error); 1718fa9e4066Sahrens } 1719fa9e4066Sahrens 1720fa9e4066Sahrens /* 1721fa9e4066Sahrens * Create a new directory and insert it into dvp using the name 1722fa9e4066Sahrens * provided. Return a pointer to the inserted directory. 1723fa9e4066Sahrens * 1724fa9e4066Sahrens * IN: dvp - vnode of directory to add subdir to. 1725fa9e4066Sahrens * dirname - name of new directory. 1726fa9e4066Sahrens * vap - attributes of new directory. 1727fa9e4066Sahrens * cr - credentials of caller. 1728da6c28aaSamw * ct - caller context 1729da6c28aaSamw * vsecp - ACL to be set 1730fa9e4066Sahrens * 1731fa9e4066Sahrens * OUT: vpp - vnode of created directory. 1732fa9e4066Sahrens * 1733fa9e4066Sahrens * RETURN: 0 if success 1734fa9e4066Sahrens * error code if failure 1735fa9e4066Sahrens * 1736fa9e4066Sahrens * Timestamps: 1737fa9e4066Sahrens * dvp - ctime|mtime updated 1738fa9e4066Sahrens * vp - ctime|mtime|atime updated 1739fa9e4066Sahrens */ 1740da6c28aaSamw /*ARGSUSED*/ 1741fa9e4066Sahrens static int 1742da6c28aaSamw zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1743da6c28aaSamw caller_context_t *ct, int flags, vsecattr_t *vsecp) 1744fa9e4066Sahrens { 1745fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1746fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1747f18faf3fSek zilog_t *zilog; 1748fa9e4066Sahrens zfs_dirlock_t *dl; 1749da6c28aaSamw uint64_t txtype; 1750fa9e4066Sahrens dmu_tx_t *tx; 1751fa9e4066Sahrens int error; 1752da6c28aaSamw int zf = ZNEW; 1753c1ce5987SMark Shellenbaum ksid_t *ksid; 1754c1ce5987SMark Shellenbaum uid_t uid; 1755c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 17560a586ceaSMark Shellenbaum zfs_acl_ids_t acl_ids; 175789459e17SMark Shellenbaum boolean_t fuid_dirtied; 1758fa9e4066Sahrens 1759fa9e4066Sahrens ASSERT(vap->va_type == VDIR); 1760fa9e4066Sahrens 1761da6c28aaSamw /* 1762da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1763da6c28aaSamw * make sure file system is at proper version 1764da6c28aaSamw */ 1765da6c28aaSamw 1766c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1767c1ce5987SMark Shellenbaum if (ksid) 1768c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1769c1ce5987SMark Shellenbaum else 1770c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1771da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1772c1ce5987SMark Shellenbaum (vsecp || (vap->va_mask & AT_XVATTR) || 1773756962ecSMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1774da6c28aaSamw return (EINVAL); 1775da6c28aaSamw 17763cb34c60Sahrens ZFS_ENTER(zfsvfs); 17773cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1778f18faf3fSek zilog = zfsvfs->z_log; 1779fa9e4066Sahrens 17800a586ceaSMark Shellenbaum if (dzp->z_pflags & ZFS_XATTR) { 1781fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1782fa9e4066Sahrens return (EINVAL); 1783fa9e4066Sahrens } 1784da6c28aaSamw 1785de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(dirname, 1786da6c28aaSamw strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1787da6c28aaSamw ZFS_EXIT(zfsvfs); 1788da6c28aaSamw return (EILSEQ); 1789da6c28aaSamw } 1790da6c28aaSamw if (flags & FIGNORECASE) 1791da6c28aaSamw zf |= ZCILOOK; 1792da6c28aaSamw 1793da6c28aaSamw if (vap->va_mask & AT_XVATTR) 1794da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1795da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1796da6c28aaSamw ZFS_EXIT(zfsvfs); 1797da6c28aaSamw return (error); 1798da6c28aaSamw } 1799fa9e4066Sahrens 1800fa9e4066Sahrens /* 1801fa9e4066Sahrens * First make sure the new directory doesn't exist. 1802fa9e4066Sahrens */ 1803da6c28aaSamw top: 1804da6c28aaSamw *vpp = NULL; 1805da6c28aaSamw 1806da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1807da6c28aaSamw NULL, NULL)) { 1808fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1809fa9e4066Sahrens return (error); 1810fa9e4066Sahrens } 1811fa9e4066Sahrens 1812da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1813d2443e76Smarks zfs_dirent_unlock(dl); 1814d2443e76Smarks ZFS_EXIT(zfsvfs); 1815d2443e76Smarks return (error); 1816d2443e76Smarks } 1817d2443e76Smarks 181889459e17SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 181989459e17SMark Shellenbaum &acl_ids)) != 0) { 182089459e17SMark Shellenbaum zfs_dirent_unlock(dl); 182189459e17SMark Shellenbaum ZFS_EXIT(zfsvfs); 182289459e17SMark Shellenbaum return (error); 1823da6c28aaSamw } 182414843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 18254929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 182614843421SMatthew Ahrens zfs_dirent_unlock(dl); 182714843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 182814843421SMatthew Ahrens return (EDQUOT); 182914843421SMatthew Ahrens } 183089459e17SMark Shellenbaum 1831fa9e4066Sahrens /* 1832fa9e4066Sahrens * Add a new entry to the directory. 1833fa9e4066Sahrens */ 1834fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1835ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1836ea8dc4b6Seschrock dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 183789459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 183814843421SMatthew Ahrens if (fuid_dirtied) 183914843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 18400a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 18410a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 18420a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 18430a586ceaSMark Shellenbaum } 18440a586ceaSMark Shellenbaum 18450a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 18460a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 18470a586ceaSMark Shellenbaum 18481209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 1849fa9e4066Sahrens if (error) { 185089459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1851fa9e4066Sahrens zfs_dirent_unlock(dl); 18521209a471SNeil Perrin if (error == ERESTART) { 18538a2f1b91Sahrens dmu_tx_wait(tx); 18548a2f1b91Sahrens dmu_tx_abort(tx); 1855fa9e4066Sahrens goto top; 1856fa9e4066Sahrens } 18578a2f1b91Sahrens dmu_tx_abort(tx); 1858fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1859fa9e4066Sahrens return (error); 1860fa9e4066Sahrens } 1861fa9e4066Sahrens 1862fa9e4066Sahrens /* 1863fa9e4066Sahrens * Create new node. 1864fa9e4066Sahrens */ 18650a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 1866fa9e4066Sahrens 186789459e17SMark Shellenbaum if (fuid_dirtied) 186889459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 18690a586ceaSMark Shellenbaum 1870fa9e4066Sahrens /* 1871fa9e4066Sahrens * Now put new name in parent dir. 1872fa9e4066Sahrens */ 1873fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1874fa9e4066Sahrens 1875fa9e4066Sahrens *vpp = ZTOV(zp); 1876fa9e4066Sahrens 1877da6c28aaSamw txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 1878da6c28aaSamw if (flags & FIGNORECASE) 1879da6c28aaSamw txtype |= TX_CI; 188089459e17SMark Shellenbaum zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 188189459e17SMark Shellenbaum acl_ids.z_fuidp, vap); 1882da6c28aaSamw 188389459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 18840a586ceaSMark Shellenbaum 1885fa9e4066Sahrens dmu_tx_commit(tx); 1886fa9e4066Sahrens 1887fa9e4066Sahrens zfs_dirent_unlock(dl); 1888fa9e4066Sahrens 1889*55da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 1890*55da60b9SMark J Musante zil_commit(zilog, UINT64_MAX, 0); 1891*55da60b9SMark J Musante 1892fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1893fa9e4066Sahrens return (0); 1894fa9e4066Sahrens } 1895fa9e4066Sahrens 1896fa9e4066Sahrens /* 1897fa9e4066Sahrens * Remove a directory subdir entry. If the current working 1898fa9e4066Sahrens * directory is the same as the subdir to be removed, the 1899fa9e4066Sahrens * remove will fail. 1900fa9e4066Sahrens * 1901fa9e4066Sahrens * IN: dvp - vnode of directory to remove from. 1902fa9e4066Sahrens * name - name of directory to be removed. 1903fa9e4066Sahrens * cwd - vnode of current working directory. 1904fa9e4066Sahrens * cr - credentials of caller. 1905da6c28aaSamw * ct - caller context 1906da6c28aaSamw * flags - case flags 1907fa9e4066Sahrens * 1908fa9e4066Sahrens * RETURN: 0 if success 1909fa9e4066Sahrens * error code if failure 1910fa9e4066Sahrens * 1911fa9e4066Sahrens * Timestamps: 1912fa9e4066Sahrens * dvp - ctime|mtime updated 1913fa9e4066Sahrens */ 1914da6c28aaSamw /*ARGSUSED*/ 1915fa9e4066Sahrens static int 1916da6c28aaSamw zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 1917da6c28aaSamw caller_context_t *ct, int flags) 1918fa9e4066Sahrens { 1919fa9e4066Sahrens znode_t *dzp = VTOZ(dvp); 1920fa9e4066Sahrens znode_t *zp; 1921fa9e4066Sahrens vnode_t *vp; 1922fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1923f18faf3fSek zilog_t *zilog; 1924fa9e4066Sahrens zfs_dirlock_t *dl; 1925fa9e4066Sahrens dmu_tx_t *tx; 1926fa9e4066Sahrens int error; 1927da6c28aaSamw int zflg = ZEXISTS; 1928fa9e4066Sahrens 19293cb34c60Sahrens ZFS_ENTER(zfsvfs); 19303cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1931f18faf3fSek zilog = zfsvfs->z_log; 1932fa9e4066Sahrens 1933da6c28aaSamw if (flags & FIGNORECASE) 1934da6c28aaSamw zflg |= ZCILOOK; 1935fa9e4066Sahrens top: 1936fa9e4066Sahrens zp = NULL; 1937fa9e4066Sahrens 1938fa9e4066Sahrens /* 1939fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1940fa9e4066Sahrens */ 1941da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1942da6c28aaSamw NULL, NULL)) { 1943fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1944fa9e4066Sahrens return (error); 1945fa9e4066Sahrens } 1946fa9e4066Sahrens 1947fa9e4066Sahrens vp = ZTOV(zp); 1948fa9e4066Sahrens 1949fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1950fa9e4066Sahrens goto out; 1951fa9e4066Sahrens } 1952fa9e4066Sahrens 1953fa9e4066Sahrens if (vp->v_type != VDIR) { 1954fa9e4066Sahrens error = ENOTDIR; 1955fa9e4066Sahrens goto out; 1956fa9e4066Sahrens } 1957fa9e4066Sahrens 1958fa9e4066Sahrens if (vp == cwd) { 1959fa9e4066Sahrens error = EINVAL; 1960fa9e4066Sahrens goto out; 1961fa9e4066Sahrens } 1962fa9e4066Sahrens 1963da6c28aaSamw vnevent_rmdir(vp, dvp, name, ct); 1964fa9e4066Sahrens 1965fa9e4066Sahrens /* 1966af2c4821Smaybee * Grab a lock on the directory to make sure that noone is 1967af2c4821Smaybee * trying to add (or lookup) entries while we are removing it. 1968af2c4821Smaybee */ 1969af2c4821Smaybee rw_enter(&zp->z_name_lock, RW_WRITER); 1970af2c4821Smaybee 1971af2c4821Smaybee /* 1972af2c4821Smaybee * Grab a lock on the parent pointer to make sure we play well 1973fa9e4066Sahrens * with the treewalk and directory rename code. 1974fa9e4066Sahrens */ 1975fa9e4066Sahrens rw_enter(&zp->z_parent_lock, RW_WRITER); 1976fa9e4066Sahrens 1977fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1978ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 19790a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1980893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 19810a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 19820a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 19831209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 1984fa9e4066Sahrens if (error) { 1985fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 1986af2c4821Smaybee rw_exit(&zp->z_name_lock); 1987fa9e4066Sahrens zfs_dirent_unlock(dl); 1988fa9e4066Sahrens VN_RELE(vp); 19891209a471SNeil Perrin if (error == ERESTART) { 19908a2f1b91Sahrens dmu_tx_wait(tx); 19918a2f1b91Sahrens dmu_tx_abort(tx); 1992fa9e4066Sahrens goto top; 1993fa9e4066Sahrens } 19948a2f1b91Sahrens dmu_tx_abort(tx); 1995fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1996fa9e4066Sahrens return (error); 1997fa9e4066Sahrens } 1998fa9e4066Sahrens 1999da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2000fa9e4066Sahrens 2001da6c28aaSamw if (error == 0) { 2002da6c28aaSamw uint64_t txtype = TX_RMDIR; 2003da6c28aaSamw if (flags & FIGNORECASE) 2004da6c28aaSamw txtype |= TX_CI; 2005da6c28aaSamw zfs_log_remove(zilog, tx, txtype, dzp, name); 2006da6c28aaSamw } 2007fa9e4066Sahrens 2008fa9e4066Sahrens dmu_tx_commit(tx); 2009fa9e4066Sahrens 2010fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 2011af2c4821Smaybee rw_exit(&zp->z_name_lock); 2012fa9e4066Sahrens out: 2013fa9e4066Sahrens zfs_dirent_unlock(dl); 2014fa9e4066Sahrens 2015fa9e4066Sahrens VN_RELE(vp); 2016fa9e4066Sahrens 2017*55da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 2018*55da60b9SMark J Musante zil_commit(zilog, UINT64_MAX, 0); 2019*55da60b9SMark J Musante 2020fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2021fa9e4066Sahrens return (error); 2022fa9e4066Sahrens } 2023fa9e4066Sahrens 2024fa9e4066Sahrens /* 2025fa9e4066Sahrens * Read as many directory entries as will fit into the provided 2026fa9e4066Sahrens * buffer from the given directory cursor position (specified in 2027fa9e4066Sahrens * the uio structure. 2028fa9e4066Sahrens * 2029fa9e4066Sahrens * IN: vp - vnode of directory to read. 2030fa9e4066Sahrens * uio - structure supplying read location, range info, 2031fa9e4066Sahrens * and return buffer. 2032fa9e4066Sahrens * cr - credentials of caller. 2033da6c28aaSamw * ct - caller context 2034da6c28aaSamw * flags - case flags 2035fa9e4066Sahrens * 2036fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 2037fa9e4066Sahrens * eofp - set to true if end-of-file detected. 2038fa9e4066Sahrens * 2039fa9e4066Sahrens * RETURN: 0 if success 2040fa9e4066Sahrens * error code if failure 2041fa9e4066Sahrens * 2042fa9e4066Sahrens * Timestamps: 2043fa9e4066Sahrens * vp - atime updated 2044fa9e4066Sahrens * 2045fa9e4066Sahrens * Note that the low 4 bits of the cookie returned by zap is always zero. 2046fa9e4066Sahrens * This allows us to use the low range for "special" directory entries: 2047fa9e4066Sahrens * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2048fa9e4066Sahrens * we use the offset 2 for the '.zfs' directory. 2049fa9e4066Sahrens */ 2050fa9e4066Sahrens /* ARGSUSED */ 2051fa9e4066Sahrens static int 2052da6c28aaSamw zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, 2053da6c28aaSamw caller_context_t *ct, int flags) 2054fa9e4066Sahrens { 2055fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2056fa9e4066Sahrens iovec_t *iovp; 2057da6c28aaSamw edirent_t *eodp; 2058fa9e4066Sahrens dirent64_t *odp; 2059fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 20607f6e3e7dSperrin objset_t *os; 2061fa9e4066Sahrens caddr_t outbuf; 2062fa9e4066Sahrens size_t bufsize; 2063fa9e4066Sahrens zap_cursor_t zc; 2064fa9e4066Sahrens zap_attribute_t zap; 2065fa9e4066Sahrens uint_t bytes_wanted; 2066fa9e4066Sahrens uint64_t offset; /* must be unsigned; checks for < 1 */ 20670a586ceaSMark Shellenbaum uint64_t parent; 2068fa9e4066Sahrens int local_eof; 20697f6e3e7dSperrin int outcount; 20707f6e3e7dSperrin int error; 20717f6e3e7dSperrin uint8_t prefetch; 2072b38f0970Sck boolean_t check_sysattrs; 2073fa9e4066Sahrens 20743cb34c60Sahrens ZFS_ENTER(zfsvfs); 20753cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2076fa9e4066Sahrens 20770a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 20780a586ceaSMark Shellenbaum &parent, sizeof (parent))) != 0) { 20790a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 20800a586ceaSMark Shellenbaum return (error); 20810a586ceaSMark Shellenbaum } 20820a586ceaSMark Shellenbaum 2083fa9e4066Sahrens /* 2084fa9e4066Sahrens * If we are not given an eof variable, 2085fa9e4066Sahrens * use a local one. 2086fa9e4066Sahrens */ 2087fa9e4066Sahrens if (eofp == NULL) 2088fa9e4066Sahrens eofp = &local_eof; 2089fa9e4066Sahrens 2090fa9e4066Sahrens /* 2091fa9e4066Sahrens * Check for valid iov_len. 2092fa9e4066Sahrens */ 2093fa9e4066Sahrens if (uio->uio_iov->iov_len <= 0) { 2094fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2095fa9e4066Sahrens return (EINVAL); 2096fa9e4066Sahrens } 2097fa9e4066Sahrens 2098fa9e4066Sahrens /* 2099fa9e4066Sahrens * Quit if directory has been removed (posix) 2100fa9e4066Sahrens */ 2101893a6d32Sahrens if ((*eofp = zp->z_unlinked) != 0) { 2102fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2103fa9e4066Sahrens return (0); 2104fa9e4066Sahrens } 2105fa9e4066Sahrens 21067f6e3e7dSperrin error = 0; 21077f6e3e7dSperrin os = zfsvfs->z_os; 21087f6e3e7dSperrin offset = uio->uio_loffset; 21097f6e3e7dSperrin prefetch = zp->z_zn_prefetch; 21107f6e3e7dSperrin 2111fa9e4066Sahrens /* 2112fa9e4066Sahrens * Initialize the iterator cursor. 2113fa9e4066Sahrens */ 2114fa9e4066Sahrens if (offset <= 3) { 2115fa9e4066Sahrens /* 2116fa9e4066Sahrens * Start iteration from the beginning of the directory. 2117fa9e4066Sahrens */ 21187f6e3e7dSperrin zap_cursor_init(&zc, os, zp->z_id); 2119fa9e4066Sahrens } else { 2120fa9e4066Sahrens /* 2121fa9e4066Sahrens * The offset is a serialized cursor. 2122fa9e4066Sahrens */ 21237f6e3e7dSperrin zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2124fa9e4066Sahrens } 2125fa9e4066Sahrens 2126fa9e4066Sahrens /* 2127fa9e4066Sahrens * Get space to change directory entries into fs independent format. 2128fa9e4066Sahrens */ 2129fa9e4066Sahrens iovp = uio->uio_iov; 2130fa9e4066Sahrens bytes_wanted = iovp->iov_len; 2131fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2132fa9e4066Sahrens bufsize = bytes_wanted; 2133fa9e4066Sahrens outbuf = kmem_alloc(bufsize, KM_SLEEP); 2134fa9e4066Sahrens odp = (struct dirent64 *)outbuf; 2135fa9e4066Sahrens } else { 2136fa9e4066Sahrens bufsize = bytes_wanted; 2137fa9e4066Sahrens odp = (struct dirent64 *)iovp->iov_base; 2138fa9e4066Sahrens } 2139da6c28aaSamw eodp = (struct edirent *)odp; 2140fa9e4066Sahrens 2141b38f0970Sck /* 21429660e5cbSJanice Chang * If this VFS supports the system attribute view interface; and 21439660e5cbSJanice Chang * we're looking at an extended attribute directory; and we care 21449660e5cbSJanice Chang * about normalization conflicts on this vfs; then we must check 21459660e5cbSJanice Chang * for normalization conflicts with the sysattr name space. 2146b38f0970Sck */ 21479660e5cbSJanice Chang check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2148b38f0970Sck (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2149b38f0970Sck (flags & V_RDDIR_ENTFLAGS); 2150b38f0970Sck 2151fa9e4066Sahrens /* 2152fa9e4066Sahrens * Transform to file-system independent format 2153fa9e4066Sahrens */ 2154fa9e4066Sahrens outcount = 0; 2155fa9e4066Sahrens while (outcount < bytes_wanted) { 2156b1b8ab34Slling ino64_t objnum; 2157b1b8ab34Slling ushort_t reclen; 2158b1b8ab34Slling off64_t *next; 2159b1b8ab34Slling 2160fa9e4066Sahrens /* 2161fa9e4066Sahrens * Special case `.', `..', and `.zfs'. 2162fa9e4066Sahrens */ 2163fa9e4066Sahrens if (offset == 0) { 2164fa9e4066Sahrens (void) strcpy(zap.za_name, "."); 2165da6c28aaSamw zap.za_normalization_conflict = 0; 2166b1b8ab34Slling objnum = zp->z_id; 2167fa9e4066Sahrens } else if (offset == 1) { 2168fa9e4066Sahrens (void) strcpy(zap.za_name, ".."); 2169da6c28aaSamw zap.za_normalization_conflict = 0; 21700a586ceaSMark Shellenbaum objnum = parent; 2171fa9e4066Sahrens } else if (offset == 2 && zfs_show_ctldir(zp)) { 2172fa9e4066Sahrens (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2173da6c28aaSamw zap.za_normalization_conflict = 0; 2174b1b8ab34Slling objnum = ZFSCTL_INO_ROOT; 2175fa9e4066Sahrens } else { 2176fa9e4066Sahrens /* 2177fa9e4066Sahrens * Grab next entry. 2178fa9e4066Sahrens */ 2179fa9e4066Sahrens if (error = zap_cursor_retrieve(&zc, &zap)) { 2180fa9e4066Sahrens if ((*eofp = (error == ENOENT)) != 0) 2181fa9e4066Sahrens break; 2182fa9e4066Sahrens else 2183fa9e4066Sahrens goto update; 2184fa9e4066Sahrens } 2185fa9e4066Sahrens 2186fa9e4066Sahrens if (zap.za_integer_length != 8 || 2187fa9e4066Sahrens zap.za_num_integers != 1) { 2188fa9e4066Sahrens cmn_err(CE_WARN, "zap_readdir: bad directory " 2189fa9e4066Sahrens "entry, obj = %lld, offset = %lld\n", 2190fa9e4066Sahrens (u_longlong_t)zp->z_id, 2191fa9e4066Sahrens (u_longlong_t)offset); 2192fa9e4066Sahrens error = ENXIO; 2193fa9e4066Sahrens goto update; 2194fa9e4066Sahrens } 2195b1b8ab34Slling 2196b1b8ab34Slling objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2197b1b8ab34Slling /* 2198b1b8ab34Slling * MacOS X can extract the object type here such as: 2199b1b8ab34Slling * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2200b1b8ab34Slling */ 2201b38f0970Sck 2202b38f0970Sck if (check_sysattrs && !zap.za_normalization_conflict) { 2203b38f0970Sck zap.za_normalization_conflict = 2204b38f0970Sck xattr_sysattr_casechk(zap.za_name); 2205b38f0970Sck } 2206fa9e4066Sahrens } 2207da6c28aaSamw 2208e802abbdSTim Haley if (flags & V_RDDIR_ACCFILTER) { 2209e802abbdSTim Haley /* 2210e802abbdSTim Haley * If we have no access at all, don't include 2211e802abbdSTim Haley * this entry in the returned information 2212e802abbdSTim Haley */ 2213e802abbdSTim Haley znode_t *ezp; 2214e802abbdSTim Haley if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2215e802abbdSTim Haley goto skip_entry; 2216e802abbdSTim Haley if (!zfs_has_access(ezp, cr)) { 2217e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2218e802abbdSTim Haley goto skip_entry; 2219e802abbdSTim Haley } 2220e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2221e802abbdSTim Haley } 2222e802abbdSTim Haley 2223da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) 2224da6c28aaSamw reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2225da6c28aaSamw else 2226da6c28aaSamw reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2227fa9e4066Sahrens 2228fa9e4066Sahrens /* 2229fa9e4066Sahrens * Will this entry fit in the buffer? 2230fa9e4066Sahrens */ 2231b1b8ab34Slling if (outcount + reclen > bufsize) { 2232fa9e4066Sahrens /* 2233fa9e4066Sahrens * Did we manage to fit anything in the buffer? 2234fa9e4066Sahrens */ 2235fa9e4066Sahrens if (!outcount) { 2236fa9e4066Sahrens error = EINVAL; 2237fa9e4066Sahrens goto update; 2238fa9e4066Sahrens } 2239fa9e4066Sahrens break; 2240fa9e4066Sahrens } 2241da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) { 2242da6c28aaSamw /* 2243da6c28aaSamw * Add extended flag entry: 2244da6c28aaSamw */ 2245da6c28aaSamw eodp->ed_ino = objnum; 2246da6c28aaSamw eodp->ed_reclen = reclen; 2247da6c28aaSamw /* NOTE: ed_off is the offset for the *next* entry */ 2248da6c28aaSamw next = &(eodp->ed_off); 2249da6c28aaSamw eodp->ed_eflags = zap.za_normalization_conflict ? 2250da6c28aaSamw ED_CASE_CONFLICT : 0; 2251da6c28aaSamw (void) strncpy(eodp->ed_name, zap.za_name, 2252da6c28aaSamw EDIRENT_NAMELEN(reclen)); 2253da6c28aaSamw eodp = (edirent_t *)((intptr_t)eodp + reclen); 2254da6c28aaSamw } else { 2255da6c28aaSamw /* 2256da6c28aaSamw * Add normal entry: 2257da6c28aaSamw */ 2258da6c28aaSamw odp->d_ino = objnum; 2259da6c28aaSamw odp->d_reclen = reclen; 2260da6c28aaSamw /* NOTE: d_off is the offset for the *next* entry */ 2261da6c28aaSamw next = &(odp->d_off); 2262da6c28aaSamw (void) strncpy(odp->d_name, zap.za_name, 2263da6c28aaSamw DIRENT64_NAMELEN(reclen)); 2264da6c28aaSamw odp = (dirent64_t *)((intptr_t)odp + reclen); 2265da6c28aaSamw } 2266b1b8ab34Slling outcount += reclen; 2267fa9e4066Sahrens 2268fa9e4066Sahrens ASSERT(outcount <= bufsize); 2269fa9e4066Sahrens 2270fa9e4066Sahrens /* Prefetch znode */ 22717f6e3e7dSperrin if (prefetch) 2272b1b8ab34Slling dmu_prefetch(os, objnum, 0, 0); 2273fa9e4066Sahrens 2274e802abbdSTim Haley skip_entry: 2275fa9e4066Sahrens /* 2276fa9e4066Sahrens * Move to the next entry, fill in the previous offset. 2277fa9e4066Sahrens */ 2278fa9e4066Sahrens if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2279fa9e4066Sahrens zap_cursor_advance(&zc); 2280fa9e4066Sahrens offset = zap_cursor_serialize(&zc); 2281fa9e4066Sahrens } else { 2282fa9e4066Sahrens offset += 1; 2283fa9e4066Sahrens } 2284fa9e4066Sahrens *next = offset; 2285fa9e4066Sahrens } 22867f6e3e7dSperrin zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2287fa9e4066Sahrens 2288fa9e4066Sahrens if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2289fa9e4066Sahrens iovp->iov_base += outcount; 2290fa9e4066Sahrens iovp->iov_len -= outcount; 2291fa9e4066Sahrens uio->uio_resid -= outcount; 2292fa9e4066Sahrens } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2293fa9e4066Sahrens /* 2294fa9e4066Sahrens * Reset the pointer. 2295fa9e4066Sahrens */ 2296fa9e4066Sahrens offset = uio->uio_loffset; 2297fa9e4066Sahrens } 2298fa9e4066Sahrens 2299fa9e4066Sahrens update: 230087e5029aSahrens zap_cursor_fini(&zc); 2301fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2302fa9e4066Sahrens kmem_free(outbuf, bufsize); 2303fa9e4066Sahrens 2304fa9e4066Sahrens if (error == ENOENT) 2305fa9e4066Sahrens error = 0; 2306fa9e4066Sahrens 2307fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2308fa9e4066Sahrens 2309fa9e4066Sahrens uio->uio_loffset = offset; 2310fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2311fa9e4066Sahrens return (error); 2312fa9e4066Sahrens } 2313fa9e4066Sahrens 2314ec533521Sfr ulong_t zfs_fsync_sync_cnt = 4; 2315ec533521Sfr 2316fa9e4066Sahrens static int 2317da6c28aaSamw zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2318fa9e4066Sahrens { 2319fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2320fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2321fa9e4066Sahrens 2322b468a217Seschrock /* 2323b468a217Seschrock * Regardless of whether this is required for standards conformance, 2324b468a217Seschrock * this is the logical behavior when fsync() is called on a file with 2325b468a217Seschrock * dirty pages. We use B_ASYNC since the ZIL transactions are already 2326b468a217Seschrock * going to be pushed out as part of the zil_commit(). 2327b468a217Seschrock */ 2328b468a217Seschrock if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && 2329b468a217Seschrock (vp->v_type == VREG) && !(IS_SWAPVP(vp))) 2330da6c28aaSamw (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); 2331b468a217Seschrock 2332ec533521Sfr (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2333ec533521Sfr 2334*55da60b9SMark J Musante if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 2335*55da60b9SMark J Musante ZFS_ENTER(zfsvfs); 2336*55da60b9SMark J Musante ZFS_VERIFY_ZP(zp); 2337*55da60b9SMark J Musante zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 2338*55da60b9SMark J Musante ZFS_EXIT(zfsvfs); 2339*55da60b9SMark J Musante } 2340fa9e4066Sahrens return (0); 2341fa9e4066Sahrens } 2342fa9e4066Sahrens 2343da6c28aaSamw 2344fa9e4066Sahrens /* 2345fa9e4066Sahrens * Get the requested file attributes and place them in the provided 2346fa9e4066Sahrens * vattr structure. 2347fa9e4066Sahrens * 2348fa9e4066Sahrens * IN: vp - vnode of file. 2349fa9e4066Sahrens * vap - va_mask identifies requested attributes. 2350da6c28aaSamw * If AT_XVATTR set, then optional attrs are requested 2351da6c28aaSamw * flags - ATTR_NOACLCHECK (CIFS server context) 2352fa9e4066Sahrens * cr - credentials of caller. 2353da6c28aaSamw * ct - caller context 2354fa9e4066Sahrens * 2355fa9e4066Sahrens * OUT: vap - attribute values. 2356fa9e4066Sahrens * 2357fa9e4066Sahrens * RETURN: 0 (always succeeds) 2358fa9e4066Sahrens */ 2359fa9e4066Sahrens /* ARGSUSED */ 2360fa9e4066Sahrens static int 2361da6c28aaSamw zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2362da6c28aaSamw caller_context_t *ct) 2363fa9e4066Sahrens { 2364fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2365fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2366da6c28aaSamw int error = 0; 2367ecd6cf80Smarks uint64_t links; 23680a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2369da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2370da6c28aaSamw xoptattr_t *xoap = NULL; 2371da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 23720a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[2]; 23730a586ceaSMark Shellenbaum int count = 0; 2374fa9e4066Sahrens 23753cb34c60Sahrens ZFS_ENTER(zfsvfs); 23763cb34c60Sahrens ZFS_VERIFY_ZP(zp); 23770a586ceaSMark Shellenbaum 23780a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 23790a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 23800a586ceaSMark Shellenbaum 23810a586ceaSMark Shellenbaum if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 23820a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 23830a586ceaSMark Shellenbaum return (error); 23840a586ceaSMark Shellenbaum } 2385fa9e4066Sahrens 2386da6c28aaSamw /* 2387da6c28aaSamw * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2388da6c28aaSamw * Also, if we are the owner don't bother, since owner should 2389da6c28aaSamw * always be allowed to read basic attributes of file. 2390da6c28aaSamw */ 23910a586ceaSMark Shellenbaum if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && (zp->z_uid != crgetuid(cr))) { 2392da6c28aaSamw if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2393da6c28aaSamw skipaclchk, cr)) { 2394da6c28aaSamw ZFS_EXIT(zfsvfs); 2395da6c28aaSamw return (error); 2396da6c28aaSamw } 2397da6c28aaSamw } 2398da6c28aaSamw 2399fa9e4066Sahrens /* 2400fa9e4066Sahrens * Return all attributes. It's cheaper to provide the answer 2401fa9e4066Sahrens * than to determine whether we were asked the question. 2402fa9e4066Sahrens */ 2403fa9e4066Sahrens 240434f345efSRay Hassan mutex_enter(&zp->z_lock); 2405fa9e4066Sahrens vap->va_type = vp->v_type; 24060a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode & MODEMASK; 24070a586ceaSMark Shellenbaum vap->va_uid = zp->z_uid; 24080a586ceaSMark Shellenbaum vap->va_gid = zp->z_gid; 2409fa9e4066Sahrens vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2410fa9e4066Sahrens vap->va_nodeid = zp->z_id; 2411ecd6cf80Smarks if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 24120a586ceaSMark Shellenbaum links = zp->z_links + 1; 2413ecd6cf80Smarks else 24140a586ceaSMark Shellenbaum links = zp->z_links; 2415ecd6cf80Smarks vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 24160a586ceaSMark Shellenbaum vap->va_size = zp->z_size; 241772fc53bcSmarks vap->va_rdev = vp->v_rdev; 2418fa9e4066Sahrens vap->va_seq = zp->z_seq; 2419fa9e4066Sahrens 2420fa9e4066Sahrens /* 2421da6c28aaSamw * Add in any requested optional attributes and the create time. 2422da6c28aaSamw * Also set the corresponding bits in the returned attribute bitmap. 2423fa9e4066Sahrens */ 2424da6c28aaSamw if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2425da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2426da6c28aaSamw xoap->xoa_archive = 24270a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2428da6c28aaSamw XVA_SET_RTN(xvap, XAT_ARCHIVE); 2429da6c28aaSamw } 2430da6c28aaSamw 2431da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2432da6c28aaSamw xoap->xoa_readonly = 24330a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_READONLY) != 0); 2434da6c28aaSamw XVA_SET_RTN(xvap, XAT_READONLY); 2435da6c28aaSamw } 2436da6c28aaSamw 2437da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2438da6c28aaSamw xoap->xoa_system = 24390a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_SYSTEM) != 0); 2440da6c28aaSamw XVA_SET_RTN(xvap, XAT_SYSTEM); 2441da6c28aaSamw } 2442da6c28aaSamw 2443da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2444da6c28aaSamw xoap->xoa_hidden = 24450a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_HIDDEN) != 0); 2446da6c28aaSamw XVA_SET_RTN(xvap, XAT_HIDDEN); 2447da6c28aaSamw } 2448da6c28aaSamw 2449da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2450da6c28aaSamw xoap->xoa_nounlink = 24510a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2452da6c28aaSamw XVA_SET_RTN(xvap, XAT_NOUNLINK); 2453da6c28aaSamw } 2454da6c28aaSamw 2455da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2456da6c28aaSamw xoap->xoa_immutable = 24570a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2458da6c28aaSamw XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2459da6c28aaSamw } 2460da6c28aaSamw 2461da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2462da6c28aaSamw xoap->xoa_appendonly = 24630a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2464da6c28aaSamw XVA_SET_RTN(xvap, XAT_APPENDONLY); 2465da6c28aaSamw } 2466da6c28aaSamw 2467da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2468da6c28aaSamw xoap->xoa_nodump = 24690a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0); 2470da6c28aaSamw XVA_SET_RTN(xvap, XAT_NODUMP); 2471da6c28aaSamw } 2472da6c28aaSamw 2473da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2474da6c28aaSamw xoap->xoa_opaque = 24750a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_OPAQUE) != 0); 2476da6c28aaSamw XVA_SET_RTN(xvap, XAT_OPAQUE); 2477da6c28aaSamw } 2478da6c28aaSamw 2479da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2480da6c28aaSamw xoap->xoa_av_quarantined = 24810a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2482da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2483da6c28aaSamw } 2484da6c28aaSamw 2485da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2486da6c28aaSamw xoap->xoa_av_modified = 24870a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2488da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2489da6c28aaSamw } 2490da6c28aaSamw 2491da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 24920a586ceaSMark Shellenbaum vp->v_type == VREG) { 24930a586ceaSMark Shellenbaum zfs_sa_get_scanstamp(zp, xvap); 2494da6c28aaSamw } 2495da6c28aaSamw 2496da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 24970a586ceaSMark Shellenbaum uint64_t times[2]; 24980a586ceaSMark Shellenbaum 24990a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 25000a586ceaSMark Shellenbaum times, sizeof (times)); 25010a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2502da6c28aaSamw XVA_SET_RTN(xvap, XAT_CREATETIME); 2503fa9e4066Sahrens } 25047a286c47SDai Ngo 25057a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 25060a586ceaSMark Shellenbaum xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 25077a286c47SDai Ngo XVA_SET_RTN(xvap, XAT_REPARSE); 25087a286c47SDai Ngo } 2509fa9e4066Sahrens } 2510fa9e4066Sahrens 25110a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 25120a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_mtime, mtime); 25130a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2514da6c28aaSamw 2515fa9e4066Sahrens mutex_exit(&zp->z_lock); 2516fa9e4066Sahrens 25170a586ceaSMark Shellenbaum sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks); 2518fa9e4066Sahrens 2519fa9e4066Sahrens if (zp->z_blksz == 0) { 2520fa9e4066Sahrens /* 2521fa9e4066Sahrens * Block size hasn't been set; suggest maximal I/O transfers. 2522fa9e4066Sahrens */ 2523fa9e4066Sahrens vap->va_blksize = zfsvfs->z_max_blksz; 2524fa9e4066Sahrens } 2525fa9e4066Sahrens 2526fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2527fa9e4066Sahrens return (0); 2528fa9e4066Sahrens } 2529fa9e4066Sahrens 2530fa9e4066Sahrens /* 2531fa9e4066Sahrens * Set the file attributes to the values contained in the 2532fa9e4066Sahrens * vattr structure. 2533fa9e4066Sahrens * 2534fa9e4066Sahrens * IN: vp - vnode of file to be modified. 2535fa9e4066Sahrens * vap - new attribute values. 2536da6c28aaSamw * If AT_XVATTR set, then optional attrs are being set 2537fa9e4066Sahrens * flags - ATTR_UTIME set if non-default time values provided. 2538da6c28aaSamw * - ATTR_NOACLCHECK (CIFS context only). 2539fa9e4066Sahrens * cr - credentials of caller. 2540da6c28aaSamw * ct - caller context 2541fa9e4066Sahrens * 2542fa9e4066Sahrens * RETURN: 0 if success 2543fa9e4066Sahrens * error code if failure 2544fa9e4066Sahrens * 2545fa9e4066Sahrens * Timestamps: 2546fa9e4066Sahrens * vp - ctime updated, mtime updated if size changed. 2547fa9e4066Sahrens */ 2548fa9e4066Sahrens /* ARGSUSED */ 2549fa9e4066Sahrens static int 2550fa9e4066Sahrens zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2551fa9e4066Sahrens caller_context_t *ct) 2552fa9e4066Sahrens { 2553f18faf3fSek znode_t *zp = VTOZ(vp); 2554fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2555f18faf3fSek zilog_t *zilog; 2556fa9e4066Sahrens dmu_tx_t *tx; 2557fa9e4066Sahrens vattr_t oldva; 2558ae4caef8SMark Shellenbaum xvattr_t tmpxvattr; 25595730cc9aSmaybee uint_t mask = vap->va_mask; 25605730cc9aSmaybee uint_t saved_mask; 2561f92daba9Smarks int trim_mask = 0; 2562fa9e4066Sahrens uint64_t new_mode; 256389459e17SMark Shellenbaum uint64_t new_uid, new_gid; 25640a586ceaSMark Shellenbaum uint64_t xattr_obj = 0; 25650a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2566d2443e76Smarks znode_t *attrzp; 2567fa9e4066Sahrens int need_policy = FALSE; 25680a586ceaSMark Shellenbaum int err, err2; 2569da6c28aaSamw zfs_fuid_info_t *fuidp = NULL; 2570da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2571da6c28aaSamw xoptattr_t *xoap; 25724c841f60Smarks zfs_acl_t *aclp = NULL; 2573da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 25740a586ceaSMark Shellenbaum boolean_t fuid_dirtied = B_FALSE; 25750a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[7], xattr_bulk[7]; 25760a586ceaSMark Shellenbaum int count = 0, xattr_count = 0; 2577fa9e4066Sahrens 2578fa9e4066Sahrens if (mask == 0) 2579fa9e4066Sahrens return (0); 2580fa9e4066Sahrens 2581fa9e4066Sahrens if (mask & AT_NOSET) 2582fa9e4066Sahrens return (EINVAL); 2583fa9e4066Sahrens 25843cb34c60Sahrens ZFS_ENTER(zfsvfs); 25853cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2586da6c28aaSamw 2587da6c28aaSamw zilog = zfsvfs->z_log; 2588da6c28aaSamw 2589da6c28aaSamw /* 2590da6c28aaSamw * Make sure that if we have ephemeral uid/gid or xvattr specified 2591da6c28aaSamw * that file system is at proper version level 2592da6c28aaSamw */ 2593da6c28aaSamw 2594da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 2595da6c28aaSamw (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2596da6c28aaSamw ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 259702dcba3bStimh (mask & AT_XVATTR))) { 259802dcba3bStimh ZFS_EXIT(zfsvfs); 2599da6c28aaSamw return (EINVAL); 260002dcba3bStimh } 2601da6c28aaSamw 260202dcba3bStimh if (mask & AT_SIZE && vp->v_type == VDIR) { 260302dcba3bStimh ZFS_EXIT(zfsvfs); 2604fa9e4066Sahrens return (EISDIR); 260502dcba3bStimh } 2606fa9e4066Sahrens 260702dcba3bStimh if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 260802dcba3bStimh ZFS_EXIT(zfsvfs); 260984c5a155Smarks return (EINVAL); 261002dcba3bStimh } 261184c5a155Smarks 2612da6c28aaSamw /* 2613da6c28aaSamw * If this is an xvattr_t, then get a pointer to the structure of 2614da6c28aaSamw * optional attributes. If this is NULL, then we have a vattr_t. 2615da6c28aaSamw */ 2616da6c28aaSamw xoap = xva_getxoptattr(xvap); 2617da6c28aaSamw 2618ae4caef8SMark Shellenbaum xva_init(&tmpxvattr); 2619ae4caef8SMark Shellenbaum 2620da6c28aaSamw /* 2621da6c28aaSamw * Immutable files can only alter immutable bit and atime 2622da6c28aaSamw */ 26230a586ceaSMark Shellenbaum if ((zp->z_pflags & ZFS_IMMUTABLE) && 2624da6c28aaSamw ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 262502dcba3bStimh ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 262602dcba3bStimh ZFS_EXIT(zfsvfs); 2627da6c28aaSamw return (EPERM); 262802dcba3bStimh } 2629da6c28aaSamw 26300a586ceaSMark Shellenbaum if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 263102dcba3bStimh ZFS_EXIT(zfsvfs); 2632da6c28aaSamw return (EPERM); 263302dcba3bStimh } 2634fa9e4066Sahrens 263593129341Smarks /* 263693129341Smarks * Verify timestamps doesn't overflow 32 bits. 263793129341Smarks * ZFS can handle large timestamps, but 32bit syscalls can't 263893129341Smarks * handle times greater than 2039. This check should be removed 263993129341Smarks * once large timestamps are fully supported. 264093129341Smarks */ 264193129341Smarks if (mask & (AT_ATIME | AT_MTIME)) { 264293129341Smarks if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 264393129341Smarks ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 264493129341Smarks ZFS_EXIT(zfsvfs); 264593129341Smarks return (EOVERFLOW); 264693129341Smarks } 264793129341Smarks } 264893129341Smarks 2649fa9e4066Sahrens top: 2650d2443e76Smarks attrzp = NULL; 2651fa9e4066Sahrens 2652d47621a4STim Haley /* Can this be moved to before the top label? */ 2653fa9e4066Sahrens if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2654fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2655fa9e4066Sahrens return (EROFS); 2656fa9e4066Sahrens } 2657fa9e4066Sahrens 2658fa9e4066Sahrens /* 2659fa9e4066Sahrens * First validate permissions 2660fa9e4066Sahrens */ 2661fa9e4066Sahrens 2662fa9e4066Sahrens if (mask & AT_SIZE) { 2663da6c28aaSamw err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2664fa9e4066Sahrens if (err) { 2665fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2666fa9e4066Sahrens return (err); 2667fa9e4066Sahrens } 26685730cc9aSmaybee /* 26695730cc9aSmaybee * XXX - Note, we are not providing any open 26705730cc9aSmaybee * mode flags here (like FNDELAY), so we may 26715730cc9aSmaybee * block if there are locks present... this 26725730cc9aSmaybee * should be addressed in openat(). 26735730cc9aSmaybee */ 2674cdb0ab79Smaybee /* XXX - would it be OK to generate a log record here? */ 2675cdb0ab79Smaybee err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 26765730cc9aSmaybee if (err) { 26775730cc9aSmaybee ZFS_EXIT(zfsvfs); 26785730cc9aSmaybee return (err); 26795730cc9aSmaybee } 2680fa9e4066Sahrens } 2681fa9e4066Sahrens 2682da6c28aaSamw if (mask & (AT_ATIME|AT_MTIME) || 2683da6c28aaSamw ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2684da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_READONLY) || 2685da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2686da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 26870a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2688da6c28aaSamw need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2689da6c28aaSamw skipaclchk, cr); 26900a586ceaSMark Shellenbaum } 2691fa9e4066Sahrens 2692fa9e4066Sahrens if (mask & (AT_UID|AT_GID)) { 2693fa9e4066Sahrens int idmask = (mask & (AT_UID|AT_GID)); 2694fa9e4066Sahrens int take_owner; 2695fa9e4066Sahrens int take_group; 2696fa9e4066Sahrens 2697a933bc41Smarks /* 2698a933bc41Smarks * NOTE: even if a new mode is being set, 2699a933bc41Smarks * we may clear S_ISUID/S_ISGID bits. 2700a933bc41Smarks */ 2701a933bc41Smarks 2702a933bc41Smarks if (!(mask & AT_MODE)) 27030a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode; 2704a933bc41Smarks 2705fa9e4066Sahrens /* 2706fa9e4066Sahrens * Take ownership or chgrp to group we are a member of 2707fa9e4066Sahrens */ 2708fa9e4066Sahrens 2709fa9e4066Sahrens take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2710da6c28aaSamw take_group = (mask & AT_GID) && 2711da6c28aaSamw zfs_groupmember(zfsvfs, vap->va_gid, cr); 2712fa9e4066Sahrens 2713fa9e4066Sahrens /* 2714fa9e4066Sahrens * If both AT_UID and AT_GID are set then take_owner and 2715fa9e4066Sahrens * take_group must both be set in order to allow taking 2716fa9e4066Sahrens * ownership. 2717fa9e4066Sahrens * 2718fa9e4066Sahrens * Otherwise, send the check through secpolicy_vnode_setattr() 2719fa9e4066Sahrens * 2720fa9e4066Sahrens */ 2721fa9e4066Sahrens 2722fa9e4066Sahrens if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2723fa9e4066Sahrens ((idmask == AT_UID) && take_owner) || 2724fa9e4066Sahrens ((idmask == AT_GID) && take_group)) { 2725da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2726da6c28aaSamw skipaclchk, cr) == 0) { 2727fa9e4066Sahrens /* 2728fa9e4066Sahrens * Remove setuid/setgid for non-privileged users 2729fa9e4066Sahrens */ 273013f9f30eSmarks secpolicy_setid_clear(vap, cr); 2731f92daba9Smarks trim_mask = (mask & (AT_UID|AT_GID)); 2732fa9e4066Sahrens } else { 2733fa9e4066Sahrens need_policy = TRUE; 2734fa9e4066Sahrens } 2735fa9e4066Sahrens } else { 2736fa9e4066Sahrens need_policy = TRUE; 2737fa9e4066Sahrens } 2738fa9e4066Sahrens } 2739fa9e4066Sahrens 2740f92daba9Smarks mutex_enter(&zp->z_lock); 27410a586ceaSMark Shellenbaum oldva.va_mode = zp->z_mode; 27420a586ceaSMark Shellenbaum oldva.va_uid = zp->z_uid; 27430a586ceaSMark Shellenbaum oldva.va_gid = zp->z_gid; 2744da6c28aaSamw if (mask & AT_XVATTR) { 2745ae4caef8SMark Shellenbaum /* 2746ae4caef8SMark Shellenbaum * Update xvattr mask to include only those attributes 2747ae4caef8SMark Shellenbaum * that are actually changing. 2748ae4caef8SMark Shellenbaum * 2749ae4caef8SMark Shellenbaum * the bits will be restored prior to actually setting 2750ae4caef8SMark Shellenbaum * the attributes so the caller thinks they were set. 2751ae4caef8SMark Shellenbaum */ 2752ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2753ae4caef8SMark Shellenbaum if (xoap->xoa_appendonly != 27540a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 2755ae4caef8SMark Shellenbaum need_policy = TRUE; 2756ae4caef8SMark Shellenbaum } else { 2757ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2758ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2759ae4caef8SMark Shellenbaum } 2760ae4caef8SMark Shellenbaum } 2761ae4caef8SMark Shellenbaum 2762ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2763ae4caef8SMark Shellenbaum if (xoap->xoa_nounlink != 27640a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 2765ae4caef8SMark Shellenbaum need_policy = TRUE; 2766ae4caef8SMark Shellenbaum } else { 2767ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2768ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2769ae4caef8SMark Shellenbaum } 2770ae4caef8SMark Shellenbaum } 2771ae4caef8SMark Shellenbaum 2772ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2773ae4caef8SMark Shellenbaum if (xoap->xoa_immutable != 27740a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 2775ae4caef8SMark Shellenbaum need_policy = TRUE; 2776ae4caef8SMark Shellenbaum } else { 2777ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2778ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2779ae4caef8SMark Shellenbaum } 2780ae4caef8SMark Shellenbaum } 2781ae4caef8SMark Shellenbaum 2782ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2783ae4caef8SMark Shellenbaum if (xoap->xoa_nodump != 27840a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0)) { 2785ae4caef8SMark Shellenbaum need_policy = TRUE; 2786ae4caef8SMark Shellenbaum } else { 2787ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NODUMP); 2788ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2789ae4caef8SMark Shellenbaum } 2790ae4caef8SMark Shellenbaum } 2791ae4caef8SMark Shellenbaum 2792ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2793ae4caef8SMark Shellenbaum if (xoap->xoa_av_modified != 27940a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 2795ae4caef8SMark Shellenbaum need_policy = TRUE; 2796ae4caef8SMark Shellenbaum } else { 2797ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2798ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2799ae4caef8SMark Shellenbaum } 2800ae4caef8SMark Shellenbaum } 2801ae4caef8SMark Shellenbaum 2802ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2803ae4caef8SMark Shellenbaum if ((vp->v_type != VREG && 2804ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined) || 2805ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined != 28060a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 2807ae4caef8SMark Shellenbaum need_policy = TRUE; 2808ae4caef8SMark Shellenbaum } else { 2809ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2810ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2811ae4caef8SMark Shellenbaum } 2812ae4caef8SMark Shellenbaum } 2813ae4caef8SMark Shellenbaum 28147a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 28157a286c47SDai Ngo mutex_exit(&zp->z_lock); 28167a286c47SDai Ngo ZFS_EXIT(zfsvfs); 28177a286c47SDai Ngo return (EPERM); 28187a286c47SDai Ngo } 28197a286c47SDai Ngo 2820ae4caef8SMark Shellenbaum if (need_policy == FALSE && 2821ae4caef8SMark Shellenbaum (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2822ae4caef8SMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2823da6c28aaSamw need_policy = TRUE; 2824da6c28aaSamw } 2825da6c28aaSamw } 2826da6c28aaSamw 2827f92daba9Smarks mutex_exit(&zp->z_lock); 2828fa9e4066Sahrens 2829f92daba9Smarks if (mask & AT_MODE) { 2830da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2831f92daba9Smarks err = secpolicy_setid_setsticky_clear(vp, vap, 2832f92daba9Smarks &oldva, cr); 2833f92daba9Smarks if (err) { 2834f92daba9Smarks ZFS_EXIT(zfsvfs); 2835f92daba9Smarks return (err); 2836f92daba9Smarks } 2837f92daba9Smarks trim_mask |= AT_MODE; 2838f92daba9Smarks } else { 2839f92daba9Smarks need_policy = TRUE; 2840f92daba9Smarks } 2841f92daba9Smarks } 284213f9f30eSmarks 2843f92daba9Smarks if (need_policy) { 284413f9f30eSmarks /* 284513f9f30eSmarks * If trim_mask is set then take ownership 2846f92daba9Smarks * has been granted or write_acl is present and user 2847f92daba9Smarks * has the ability to modify mode. In that case remove 2848f92daba9Smarks * UID|GID and or MODE from mask so that 284913f9f30eSmarks * secpolicy_vnode_setattr() doesn't revoke it. 285013f9f30eSmarks */ 285113f9f30eSmarks 2852f92daba9Smarks if (trim_mask) { 2853f92daba9Smarks saved_mask = vap->va_mask; 2854f92daba9Smarks vap->va_mask &= ~trim_mask; 2855f92daba9Smarks } 2856fa9e4066Sahrens err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2857da6c28aaSamw (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2858fa9e4066Sahrens if (err) { 2859fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2860fa9e4066Sahrens return (err); 2861fa9e4066Sahrens } 286213f9f30eSmarks 286313f9f30eSmarks if (trim_mask) 2864f92daba9Smarks vap->va_mask |= saved_mask; 2865fa9e4066Sahrens } 2866fa9e4066Sahrens 2867fa9e4066Sahrens /* 2868fa9e4066Sahrens * secpolicy_vnode_setattr, or take ownership may have 2869fa9e4066Sahrens * changed va_mask 2870fa9e4066Sahrens */ 2871fa9e4066Sahrens mask = vap->va_mask; 2872fa9e4066Sahrens 28730a586ceaSMark Shellenbaum if ((mask & (AT_UID | AT_GID))) { 28740a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj, 28750a586ceaSMark Shellenbaum sizeof (xattr_obj)); 28760a586ceaSMark Shellenbaum 28770a586ceaSMark Shellenbaum if (xattr_obj) { 28780a586ceaSMark Shellenbaum err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 28790a586ceaSMark Shellenbaum if (err) 28800a586ceaSMark Shellenbaum goto out2; 28810a586ceaSMark Shellenbaum } 28820a586ceaSMark Shellenbaum if (mask & AT_UID) { 28830a586ceaSMark Shellenbaum new_uid = zfs_fuid_create(zfsvfs, 28840a586ceaSMark Shellenbaum (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 28850a586ceaSMark Shellenbaum if (vap->va_uid != zp->z_uid && 28860a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 28870a586ceaSMark Shellenbaum err = EDQUOT; 28880a586ceaSMark Shellenbaum goto out2; 28890a586ceaSMark Shellenbaum } 28900a586ceaSMark Shellenbaum } 28910a586ceaSMark Shellenbaum 28920a586ceaSMark Shellenbaum if (mask & AT_GID) { 28930a586ceaSMark Shellenbaum new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 28940a586ceaSMark Shellenbaum cr, ZFS_GROUP, &fuidp); 28950a586ceaSMark Shellenbaum if (new_gid != zp->z_gid && 28960a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 28970a586ceaSMark Shellenbaum err = EDQUOT; 28980a586ceaSMark Shellenbaum goto out2; 28990a586ceaSMark Shellenbaum } 29000a586ceaSMark Shellenbaum } 29010a586ceaSMark Shellenbaum } 2902fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 2903fa9e4066Sahrens 2904fa9e4066Sahrens if (mask & AT_MODE) { 29050a586ceaSMark Shellenbaum uint64_t pmode = zp->z_mode; 2906169cdae2Smarks new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2907fa9e4066Sahrens 290814843421SMatthew Ahrens if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 290914843421SMatthew Ahrens goto out; 29100a586ceaSMark Shellenbaum 29110a586ceaSMark Shellenbaum if (!zp->z_is_sa && ZFS_EXTERNAL_ACL(zp)) { 29120a586ceaSMark Shellenbaum /* 29130a586ceaSMark Shellenbaum * Are we upgrading ACL from old V0 format 29140a586ceaSMark Shellenbaum * to V1 format? 29150a586ceaSMark Shellenbaum */ 2916da6c28aaSamw if (zfsvfs->z_version <= ZPL_VERSION_FUID && 29170a586ceaSMark Shellenbaum ZNODE_ACL_VERSION(zp) == 2918da6c28aaSamw ZFS_ACL_VERSION_INITIAL) { 2919da6c28aaSamw dmu_tx_hold_free(tx, 29200a586ceaSMark Shellenbaum ZFS_EXTERNAL_ACL(zp), 0, 2921da6c28aaSamw DMU_OBJECT_END); 2922da6c28aaSamw dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 29234c841f60Smarks 0, aclp->z_acl_bytes); 2924da6c28aaSamw } else { 29250a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, ZFS_EXTERNAL_ACL(zp), 0, 29264c841f60Smarks aclp->z_acl_bytes); 29274c841f60Smarks } 29280a586ceaSMark Shellenbaum } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 29296d38e247Smarks dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 29306d38e247Smarks 0, aclp->z_acl_bytes); 2931da6c28aaSamw } 29320a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 29330a586ceaSMark Shellenbaum } else { 29340a586ceaSMark Shellenbaum if ((mask & AT_XVATTR) && 29350a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 29360a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 29370a586ceaSMark Shellenbaum else 29380a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2939fa9e4066Sahrens } 2940fa9e4066Sahrens 29410a586ceaSMark Shellenbaum if (attrzp) { 29420a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 2943d2443e76Smarks } 2944d2443e76Smarks 29450a586ceaSMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 29460a586ceaSMark Shellenbaum if (fuid_dirtied) 29470a586ceaSMark Shellenbaum zfs_fuid_txhold(zfsvfs, tx); 29480a586ceaSMark Shellenbaum 29490a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 29500a586ceaSMark Shellenbaum 29511209a471SNeil Perrin err = dmu_tx_assign(tx, TXG_NOWAIT); 2952fa9e4066Sahrens if (err) { 295314843421SMatthew Ahrens if (err == ERESTART) 29548a2f1b91Sahrens dmu_tx_wait(tx); 295514843421SMatthew Ahrens goto out; 2956fa9e4066Sahrens } 2957fa9e4066Sahrens 29580a586ceaSMark Shellenbaum count = 0; 2959fa9e4066Sahrens /* 2960fa9e4066Sahrens * Set each attribute requested. 2961fa9e4066Sahrens * We group settings according to the locks they need to acquire. 2962fa9e4066Sahrens * 2963fa9e4066Sahrens * Note: you cannot set ctime directly, although it will be 2964fa9e4066Sahrens * updated as a side-effect of calling this function. 2965fa9e4066Sahrens */ 2966fa9e4066Sahrens 2967fa9e4066Sahrens mutex_enter(&zp->z_lock); 2968fa9e4066Sahrens 29690a586ceaSMark Shellenbaum if (attrzp) 29700a586ceaSMark Shellenbaum mutex_enter(&attrzp->z_lock); 29710a586ceaSMark Shellenbaum 297227dd1e87SMark Shellenbaum if (mask & (AT_UID|AT_GID)) { 297327dd1e87SMark Shellenbaum 297427dd1e87SMark Shellenbaum if (mask & AT_UID) { 297527dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 297627dd1e87SMark Shellenbaum &new_uid, sizeof (new_uid)); 297727dd1e87SMark Shellenbaum zp->z_uid = zfs_fuid_map_id(zfsvfs, new_uid, 297827dd1e87SMark Shellenbaum cr, ZFS_OWNER); 297927dd1e87SMark Shellenbaum if (attrzp) { 298027dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 298127dd1e87SMark Shellenbaum SA_ZPL_UID(zfsvfs), NULL, &new_uid, 298227dd1e87SMark Shellenbaum sizeof (new_uid)); 298327dd1e87SMark Shellenbaum attrzp->z_gid = zp->z_uid; 298427dd1e87SMark Shellenbaum } 29850a586ceaSMark Shellenbaum } 29860a586ceaSMark Shellenbaum 298727dd1e87SMark Shellenbaum if (mask & AT_GID) { 298827dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 298927dd1e87SMark Shellenbaum NULL, &new_gid, sizeof (new_gid)); 299027dd1e87SMark Shellenbaum zp->z_gid = zfs_fuid_map_id(zfsvfs, new_gid, cr, 299127dd1e87SMark Shellenbaum ZFS_GROUP); 299227dd1e87SMark Shellenbaum if (attrzp) { 299327dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 299427dd1e87SMark Shellenbaum SA_ZPL_GID(zfsvfs), NULL, &new_gid, 299527dd1e87SMark Shellenbaum sizeof (new_gid)); 299627dd1e87SMark Shellenbaum attrzp->z_gid = zp->z_gid; 299727dd1e87SMark Shellenbaum } 299827dd1e87SMark Shellenbaum } 299927dd1e87SMark Shellenbaum if (!(mask & AT_MODE)) { 300027dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 300127dd1e87SMark Shellenbaum NULL, &new_mode, sizeof (new_mode)); 300227dd1e87SMark Shellenbaum new_mode = zp->z_mode; 300327dd1e87SMark Shellenbaum } 300427dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(zp); 300527dd1e87SMark Shellenbaum ASSERT(err == 0); 30060a586ceaSMark Shellenbaum if (attrzp) { 300727dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(attrzp); 300827dd1e87SMark Shellenbaum ASSERT(err == 0); 30090a586ceaSMark Shellenbaum } 30100a586ceaSMark Shellenbaum } 30110a586ceaSMark Shellenbaum 3012fa9e4066Sahrens if (mask & AT_MODE) { 30134c841f60Smarks mutex_enter(&zp->z_acl_lock); 30140a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 30150a586ceaSMark Shellenbaum &new_mode, sizeof (new_mode)); 30160a586ceaSMark Shellenbaum zp->z_mode = new_mode; 301727dd1e87SMark Shellenbaum ASSERT3U((uintptr_t)aclp, !=, NULL); 301889459e17SMark Shellenbaum err = zfs_aclset_common(zp, aclp, cr, tx); 3019fa9e4066Sahrens ASSERT3U(err, ==, 0); 30204929fd5eSTim Haley zp->z_acl_cached = aclp; 30214929fd5eSTim Haley aclp = NULL; 30224c841f60Smarks mutex_exit(&zp->z_acl_lock); 3023fa9e4066Sahrens } 3024fa9e4066Sahrens 3025d2443e76Smarks if (attrzp) 30260a586ceaSMark Shellenbaum mutex_exit(&attrzp->z_lock); 3027d2443e76Smarks 30280a586ceaSMark Shellenbaum if (mask & AT_ATIME) { 30290a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 30300a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 30310a586ceaSMark Shellenbaum &zp->z_atime, sizeof (zp->z_atime)); 3032d2443e76Smarks } 3033fa9e4066Sahrens 30340a586ceaSMark Shellenbaum if (mask & AT_MTIME) { 30350a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 30360a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 30370a586ceaSMark Shellenbaum mtime, sizeof (mtime)); 3038d2443e76Smarks } 3039d2443e76Smarks 3040cdb0ab79Smaybee /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 30410a586ceaSMark Shellenbaum if (mask & AT_SIZE && !(mask & AT_MTIME)) { 30420a586ceaSMark Shellenbaum if (!(mask & AT_MTIME)) 30430a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 30440a586ceaSMark Shellenbaum NULL, mtime, sizeof (mtime)); 30450a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 30460a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 30470a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 30480a586ceaSMark Shellenbaum B_TRUE); 30490a586ceaSMark Shellenbaum } else if (mask != 0) { 30500a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 30510a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 30520a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 30530a586ceaSMark Shellenbaum B_TRUE); 30540a586ceaSMark Shellenbaum if (attrzp) { 30550a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 30560a586ceaSMark Shellenbaum SA_ZPL_CTIME(zfsvfs), NULL, 30570a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 30580a586ceaSMark Shellenbaum zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 30590a586ceaSMark Shellenbaum mtime, ctime, B_TRUE); 30600a586ceaSMark Shellenbaum } 30610a586ceaSMark Shellenbaum } 3062da6c28aaSamw /* 3063da6c28aaSamw * Do this after setting timestamps to prevent timestamp 3064da6c28aaSamw * update from toggling bit 3065da6c28aaSamw */ 3066da6c28aaSamw 3067da6c28aaSamw if (xoap && (mask & AT_XVATTR)) { 3068ae4caef8SMark Shellenbaum 3069ae4caef8SMark Shellenbaum /* 3070ae4caef8SMark Shellenbaum * restore trimmed off masks 3071ae4caef8SMark Shellenbaum * so that return masks can be set for caller. 3072ae4caef8SMark Shellenbaum */ 3073ae4caef8SMark Shellenbaum 3074ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3075ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_APPENDONLY); 3076ae4caef8SMark Shellenbaum } 3077ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3078ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NOUNLINK); 3079ae4caef8SMark Shellenbaum } 3080ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3081ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3082ae4caef8SMark Shellenbaum } 3083ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3084ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NODUMP); 3085ae4caef8SMark Shellenbaum } 3086ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3087ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3088ae4caef8SMark Shellenbaum } 3089ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3090ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3091ae4caef8SMark Shellenbaum } 3092ae4caef8SMark Shellenbaum 30930a586ceaSMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3094da6c28aaSamw ASSERT(vp->v_type == VREG); 3095da6c28aaSamw 30960a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 30970a586ceaSMark Shellenbaum &zp->z_pflags, sizeof (zp->z_pflags)); 30980a586ceaSMark Shellenbaum zfs_xvattr_set(zp, xvap, tx); 3099da6c28aaSamw } 3100fa9e4066Sahrens 310189459e17SMark Shellenbaum if (fuid_dirtied) 310289459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 310389459e17SMark Shellenbaum 31045730cc9aSmaybee if (mask != 0) 3105da6c28aaSamw zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3106fa9e4066Sahrens 3107fa9e4066Sahrens mutex_exit(&zp->z_lock); 3108fa9e4066Sahrens 310914843421SMatthew Ahrens out: 31100a586ceaSMark Shellenbaum if (err == 0 && attrzp) { 31110a586ceaSMark Shellenbaum err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 31120a586ceaSMark Shellenbaum xattr_count, tx); 31130a586ceaSMark Shellenbaum ASSERT(err2 == 0); 31140a586ceaSMark Shellenbaum } 31150a586ceaSMark Shellenbaum 3116d2443e76Smarks if (attrzp) 3117d2443e76Smarks VN_RELE(ZTOV(attrzp)); 31184929fd5eSTim Haley if (aclp) 31194929fd5eSTim Haley zfs_acl_free(aclp); 31204929fd5eSTim Haley 312114843421SMatthew Ahrens if (fuidp) { 312214843421SMatthew Ahrens zfs_fuid_info_free(fuidp); 312314843421SMatthew Ahrens fuidp = NULL; 312414843421SMatthew Ahrens } 312514843421SMatthew Ahrens 31260a586ceaSMark Shellenbaum if (err) { 312714843421SMatthew Ahrens dmu_tx_abort(tx); 31280a586ceaSMark Shellenbaum if (err == ERESTART) 31290a586ceaSMark Shellenbaum goto top; 31300a586ceaSMark Shellenbaum } else { 31310a586ceaSMark Shellenbaum err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 313214843421SMatthew Ahrens dmu_tx_commit(tx); 31330a586ceaSMark Shellenbaum } 313414843421SMatthew Ahrens 3135fa9e4066Sahrens 31360a586ceaSMark Shellenbaum out2: 3137*55da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3138*55da60b9SMark J Musante zil_commit(zilog, UINT64_MAX, 0); 3139*55da60b9SMark J Musante 3140fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3141fa9e4066Sahrens return (err); 3142fa9e4066Sahrens } 3143fa9e4066Sahrens 3144fa9e4066Sahrens typedef struct zfs_zlock { 3145fa9e4066Sahrens krwlock_t *zl_rwlock; /* lock we acquired */ 3146fa9e4066Sahrens znode_t *zl_znode; /* znode we held */ 3147fa9e4066Sahrens struct zfs_zlock *zl_next; /* next in list */ 3148fa9e4066Sahrens } zfs_zlock_t; 3149fa9e4066Sahrens 3150ff008e00Smaybee /* 3151ff008e00Smaybee * Drop locks and release vnodes that were held by zfs_rename_lock(). 3152ff008e00Smaybee */ 3153ff008e00Smaybee static void 3154ff008e00Smaybee zfs_rename_unlock(zfs_zlock_t **zlpp) 3155ff008e00Smaybee { 3156ff008e00Smaybee zfs_zlock_t *zl; 3157ff008e00Smaybee 3158ff008e00Smaybee while ((zl = *zlpp) != NULL) { 3159ff008e00Smaybee if (zl->zl_znode != NULL) 3160ff008e00Smaybee VN_RELE(ZTOV(zl->zl_znode)); 3161ff008e00Smaybee rw_exit(zl->zl_rwlock); 3162ff008e00Smaybee *zlpp = zl->zl_next; 3163ff008e00Smaybee kmem_free(zl, sizeof (*zl)); 3164ff008e00Smaybee } 3165ff008e00Smaybee } 3166ff008e00Smaybee 3167ff008e00Smaybee /* 3168ff008e00Smaybee * Search back through the directory tree, using the ".." entries. 3169ff008e00Smaybee * Lock each directory in the chain to prevent concurrent renames. 3170ff008e00Smaybee * Fail any attempt to move a directory into one of its own descendants. 3171ff008e00Smaybee * XXX - z_parent_lock can overlap with map or grow locks 3172ff008e00Smaybee */ 3173fa9e4066Sahrens static int 3174fa9e4066Sahrens zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3175fa9e4066Sahrens { 3176fa9e4066Sahrens zfs_zlock_t *zl; 3177feb08c6bSbillm znode_t *zp = tdzp; 3178fa9e4066Sahrens uint64_t rootid = zp->z_zfsvfs->z_root; 31790a586ceaSMark Shellenbaum uint64_t oidp = zp->z_id; 3180fa9e4066Sahrens krwlock_t *rwlp = &szp->z_parent_lock; 3181fa9e4066Sahrens krw_t rw = RW_WRITER; 3182fa9e4066Sahrens 3183fa9e4066Sahrens /* 3184fa9e4066Sahrens * First pass write-locks szp and compares to zp->z_id. 3185fa9e4066Sahrens * Later passes read-lock zp and compare to zp->z_parent. 3186fa9e4066Sahrens */ 3187fa9e4066Sahrens do { 3188ff008e00Smaybee if (!rw_tryenter(rwlp, rw)) { 3189ff008e00Smaybee /* 3190ff008e00Smaybee * Another thread is renaming in this path. 3191ff008e00Smaybee * Note that if we are a WRITER, we don't have any 3192ff008e00Smaybee * parent_locks held yet. 3193ff008e00Smaybee */ 3194ff008e00Smaybee if (rw == RW_READER && zp->z_id > szp->z_id) { 3195ff008e00Smaybee /* 3196ff008e00Smaybee * Drop our locks and restart 3197ff008e00Smaybee */ 3198ff008e00Smaybee zfs_rename_unlock(&zl); 3199ff008e00Smaybee *zlpp = NULL; 3200ff008e00Smaybee zp = tdzp; 32010a586ceaSMark Shellenbaum oidp = zp->z_id; 3202ff008e00Smaybee rwlp = &szp->z_parent_lock; 3203ff008e00Smaybee rw = RW_WRITER; 3204ff008e00Smaybee continue; 3205ff008e00Smaybee } else { 3206ff008e00Smaybee /* 3207ff008e00Smaybee * Wait for other thread to drop its locks 3208ff008e00Smaybee */ 3209ff008e00Smaybee rw_enter(rwlp, rw); 3210ff008e00Smaybee } 3211ff008e00Smaybee } 3212ff008e00Smaybee 3213fa9e4066Sahrens zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3214fa9e4066Sahrens zl->zl_rwlock = rwlp; 3215fa9e4066Sahrens zl->zl_znode = NULL; 3216fa9e4066Sahrens zl->zl_next = *zlpp; 3217fa9e4066Sahrens *zlpp = zl; 3218fa9e4066Sahrens 32190a586ceaSMark Shellenbaum if (oidp == szp->z_id) /* We're a descendant of szp */ 3220fa9e4066Sahrens return (EINVAL); 3221fa9e4066Sahrens 32220a586ceaSMark Shellenbaum if (oidp == rootid) /* We've hit the top */ 3223fa9e4066Sahrens return (0); 3224fa9e4066Sahrens 3225fa9e4066Sahrens if (rw == RW_READER) { /* i.e. not the first pass */ 32260a586ceaSMark Shellenbaum int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3227fa9e4066Sahrens if (error) 3228fa9e4066Sahrens return (error); 3229fa9e4066Sahrens zl->zl_znode = zp; 3230fa9e4066Sahrens } 32310a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 32320a586ceaSMark Shellenbaum &oidp, sizeof (oidp)); 3233fa9e4066Sahrens rwlp = &zp->z_parent_lock; 3234fa9e4066Sahrens rw = RW_READER; 3235fa9e4066Sahrens 3236fa9e4066Sahrens } while (zp->z_id != sdzp->z_id); 3237fa9e4066Sahrens 3238fa9e4066Sahrens return (0); 3239fa9e4066Sahrens } 3240fa9e4066Sahrens 3241fa9e4066Sahrens /* 3242fa9e4066Sahrens * Move an entry from the provided source directory to the target 3243fa9e4066Sahrens * directory. Change the entry name as indicated. 3244fa9e4066Sahrens * 3245fa9e4066Sahrens * IN: sdvp - Source directory containing the "old entry". 3246fa9e4066Sahrens * snm - Old entry name. 3247fa9e4066Sahrens * tdvp - Target directory to contain the "new entry". 3248fa9e4066Sahrens * tnm - New entry name. 3249fa9e4066Sahrens * cr - credentials of caller. 3250da6c28aaSamw * ct - caller context 3251da6c28aaSamw * flags - case flags 3252fa9e4066Sahrens * 3253fa9e4066Sahrens * RETURN: 0 if success 3254fa9e4066Sahrens * error code if failure 3255fa9e4066Sahrens * 3256fa9e4066Sahrens * Timestamps: 3257fa9e4066Sahrens * sdvp,tdvp - ctime|mtime updated 3258fa9e4066Sahrens */ 3259da6c28aaSamw /*ARGSUSED*/ 3260fa9e4066Sahrens static int 3261da6c28aaSamw zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3262da6c28aaSamw caller_context_t *ct, int flags) 3263fa9e4066Sahrens { 3264fa9e4066Sahrens znode_t *tdzp, *szp, *tzp; 3265fa9e4066Sahrens znode_t *sdzp = VTOZ(sdvp); 3266fa9e4066Sahrens zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3267f18faf3fSek zilog_t *zilog; 3268fa9e4066Sahrens vnode_t *realvp; 3269fa9e4066Sahrens zfs_dirlock_t *sdl, *tdl; 3270fa9e4066Sahrens dmu_tx_t *tx; 3271fa9e4066Sahrens zfs_zlock_t *zl; 3272da6c28aaSamw int cmp, serr, terr; 3273da6c28aaSamw int error = 0; 3274da6c28aaSamw int zflg = 0; 3275fa9e4066Sahrens 32763cb34c60Sahrens ZFS_ENTER(zfsvfs); 32773cb34c60Sahrens ZFS_VERIFY_ZP(sdzp); 3278f18faf3fSek zilog = zfsvfs->z_log; 3279fa9e4066Sahrens 3280fa9e4066Sahrens /* 3281fa9e4066Sahrens * Make sure we have the real vp for the target directory. 3282fa9e4066Sahrens */ 3283da6c28aaSamw if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3284fa9e4066Sahrens tdvp = realvp; 3285fa9e4066Sahrens 3286d39ee142SMark Shellenbaum if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { 3287fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3288fa9e4066Sahrens return (EXDEV); 3289fa9e4066Sahrens } 3290fa9e4066Sahrens 3291fa9e4066Sahrens tdzp = VTOZ(tdvp); 32923cb34c60Sahrens ZFS_VERIFY_ZP(tdzp); 3293de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(tnm, 3294da6c28aaSamw strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3295da6c28aaSamw ZFS_EXIT(zfsvfs); 3296da6c28aaSamw return (EILSEQ); 3297da6c28aaSamw } 3298da6c28aaSamw 3299da6c28aaSamw if (flags & FIGNORECASE) 3300da6c28aaSamw zflg |= ZCILOOK; 3301da6c28aaSamw 3302fa9e4066Sahrens top: 3303fa9e4066Sahrens szp = NULL; 3304fa9e4066Sahrens tzp = NULL; 3305fa9e4066Sahrens zl = NULL; 3306fa9e4066Sahrens 3307fa9e4066Sahrens /* 3308fa9e4066Sahrens * This is to prevent the creation of links into attribute space 3309fa9e4066Sahrens * by renaming a linked file into/outof an attribute directory. 3310fa9e4066Sahrens * See the comment in zfs_link() for why this is considered bad. 3311fa9e4066Sahrens */ 33120a586ceaSMark Shellenbaum if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3313fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3314fa9e4066Sahrens return (EINVAL); 3315fa9e4066Sahrens } 3316fa9e4066Sahrens 3317fa9e4066Sahrens /* 3318fa9e4066Sahrens * Lock source and target directory entries. To prevent deadlock, 3319fa9e4066Sahrens * a lock ordering must be defined. We lock the directory with 3320fa9e4066Sahrens * the smallest object id first, or if it's a tie, the one with 3321fa9e4066Sahrens * the lexically first name. 3322fa9e4066Sahrens */ 3323fa9e4066Sahrens if (sdzp->z_id < tdzp->z_id) { 3324fa9e4066Sahrens cmp = -1; 3325fa9e4066Sahrens } else if (sdzp->z_id > tdzp->z_id) { 3326fa9e4066Sahrens cmp = 1; 3327fa9e4066Sahrens } else { 3328da6c28aaSamw /* 3329da6c28aaSamw * First compare the two name arguments without 3330da6c28aaSamw * considering any case folding. 3331da6c28aaSamw */ 3332da6c28aaSamw int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3333da6c28aaSamw 3334da6c28aaSamw cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3335de8267e0Stimh ASSERT(error == 0 || !zfsvfs->z_utf8); 3336fa9e4066Sahrens if (cmp == 0) { 3337fa9e4066Sahrens /* 3338fa9e4066Sahrens * POSIX: "If the old argument and the new argument 3339fa9e4066Sahrens * both refer to links to the same existing file, 3340fa9e4066Sahrens * the rename() function shall return successfully 3341fa9e4066Sahrens * and perform no other action." 3342fa9e4066Sahrens */ 3343fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3344fa9e4066Sahrens return (0); 3345fa9e4066Sahrens } 3346da6c28aaSamw /* 3347da6c28aaSamw * If the file system is case-folding, then we may 3348da6c28aaSamw * have some more checking to do. A case-folding file 3349da6c28aaSamw * system is either supporting mixed case sensitivity 3350da6c28aaSamw * access or is completely case-insensitive. Note 3351da6c28aaSamw * that the file system is always case preserving. 3352da6c28aaSamw * 3353da6c28aaSamw * In mixed sensitivity mode case sensitive behavior 3354da6c28aaSamw * is the default. FIGNORECASE must be used to 3355da6c28aaSamw * explicitly request case insensitive behavior. 3356da6c28aaSamw * 3357da6c28aaSamw * If the source and target names provided differ only 3358da6c28aaSamw * by case (e.g., a request to rename 'tim' to 'Tim'), 3359da6c28aaSamw * we will treat this as a special case in the 3360da6c28aaSamw * case-insensitive mode: as long as the source name 3361da6c28aaSamw * is an exact match, we will allow this to proceed as 3362da6c28aaSamw * a name-change request. 3363da6c28aaSamw */ 3364de8267e0Stimh if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3365de8267e0Stimh (zfsvfs->z_case == ZFS_CASE_MIXED && 3366de8267e0Stimh flags & FIGNORECASE)) && 3367da6c28aaSamw u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3368da6c28aaSamw &error) == 0) { 3369da6c28aaSamw /* 3370da6c28aaSamw * case preserving rename request, require exact 3371da6c28aaSamw * name matches 3372da6c28aaSamw */ 3373da6c28aaSamw zflg |= ZCIEXACT; 3374da6c28aaSamw zflg &= ~ZCILOOK; 3375da6c28aaSamw } 3376fa9e4066Sahrens } 3377da6c28aaSamw 3378afefc7e4SSanjeev Bagewadi /* 3379afefc7e4SSanjeev Bagewadi * If the source and destination directories are the same, we should 3380afefc7e4SSanjeev Bagewadi * grab the z_name_lock of that directory only once. 3381afefc7e4SSanjeev Bagewadi */ 3382afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) { 3383afefc7e4SSanjeev Bagewadi zflg |= ZHAVELOCK; 3384afefc7e4SSanjeev Bagewadi rw_enter(&sdzp->z_name_lock, RW_READER); 3385afefc7e4SSanjeev Bagewadi } 3386afefc7e4SSanjeev Bagewadi 3387fa9e4066Sahrens if (cmp < 0) { 3388da6c28aaSamw serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3389da6c28aaSamw ZEXISTS | zflg, NULL, NULL); 3390da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3391da6c28aaSamw tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3392fa9e4066Sahrens } else { 3393da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3394da6c28aaSamw tdzp, tnm, &tzp, zflg, NULL, NULL); 3395da6c28aaSamw serr = zfs_dirent_lock(&sdl, 3396da6c28aaSamw sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3397da6c28aaSamw NULL, NULL); 3398fa9e4066Sahrens } 3399fa9e4066Sahrens 3400fa9e4066Sahrens if (serr) { 3401fa9e4066Sahrens /* 3402fa9e4066Sahrens * Source entry invalid or not there. 3403fa9e4066Sahrens */ 3404fa9e4066Sahrens if (!terr) { 3405fa9e4066Sahrens zfs_dirent_unlock(tdl); 3406fa9e4066Sahrens if (tzp) 3407fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3408fa9e4066Sahrens } 3409afefc7e4SSanjeev Bagewadi 3410afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3411afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3412afefc7e4SSanjeev Bagewadi 3413fa9e4066Sahrens if (strcmp(snm, "..") == 0) 3414fa9e4066Sahrens serr = EINVAL; 3415fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3416fa9e4066Sahrens return (serr); 3417fa9e4066Sahrens } 3418fa9e4066Sahrens if (terr) { 3419fa9e4066Sahrens zfs_dirent_unlock(sdl); 3420fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3421afefc7e4SSanjeev Bagewadi 3422afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3423afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3424afefc7e4SSanjeev Bagewadi 3425fa9e4066Sahrens if (strcmp(tnm, "..") == 0) 3426fa9e4066Sahrens terr = EINVAL; 3427fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3428fa9e4066Sahrens return (terr); 3429fa9e4066Sahrens } 3430fa9e4066Sahrens 3431fa9e4066Sahrens /* 3432fa9e4066Sahrens * Must have write access at the source to remove the old entry 3433fa9e4066Sahrens * and write access at the target to create the new entry. 3434fa9e4066Sahrens * Note that if target and source are the same, this can be 3435fa9e4066Sahrens * done in a single check. 3436fa9e4066Sahrens */ 3437fa9e4066Sahrens 3438fa9e4066Sahrens if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3439fa9e4066Sahrens goto out; 3440fa9e4066Sahrens 3441fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3442fa9e4066Sahrens /* 3443fa9e4066Sahrens * Check to make sure rename is valid. 3444fa9e4066Sahrens * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3445fa9e4066Sahrens */ 3446fa9e4066Sahrens if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3447fa9e4066Sahrens goto out; 3448fa9e4066Sahrens } 3449fa9e4066Sahrens 3450fa9e4066Sahrens /* 3451fa9e4066Sahrens * Does target exist? 3452fa9e4066Sahrens */ 3453fa9e4066Sahrens if (tzp) { 3454fa9e4066Sahrens /* 3455fa9e4066Sahrens * Source and target must be the same type. 3456fa9e4066Sahrens */ 3457fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3458fa9e4066Sahrens if (ZTOV(tzp)->v_type != VDIR) { 3459fa9e4066Sahrens error = ENOTDIR; 3460fa9e4066Sahrens goto out; 3461fa9e4066Sahrens } 3462fa9e4066Sahrens } else { 3463fa9e4066Sahrens if (ZTOV(tzp)->v_type == VDIR) { 3464fa9e4066Sahrens error = EISDIR; 3465fa9e4066Sahrens goto out; 3466fa9e4066Sahrens } 3467fa9e4066Sahrens } 3468fa9e4066Sahrens /* 3469fa9e4066Sahrens * POSIX dictates that when the source and target 3470fa9e4066Sahrens * entries refer to the same file object, rename 3471fa9e4066Sahrens * must do nothing and exit without error. 3472fa9e4066Sahrens */ 3473fa9e4066Sahrens if (szp->z_id == tzp->z_id) { 3474fa9e4066Sahrens error = 0; 3475fa9e4066Sahrens goto out; 3476fa9e4066Sahrens } 3477fa9e4066Sahrens } 3478fa9e4066Sahrens 3479da6c28aaSamw vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3480fa9e4066Sahrens if (tzp) 3481da6c28aaSamw vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3482df2381bfSpraks 3483df2381bfSpraks /* 3484df2381bfSpraks * notify the target directory if it is not the same 3485df2381bfSpraks * as source directory. 3486df2381bfSpraks */ 3487df2381bfSpraks if (tdvp != sdvp) { 3488da6c28aaSamw vnevent_rename_dest_dir(tdvp, ct); 3489df2381bfSpraks } 3490fa9e4066Sahrens 3491fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 34920a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 34930a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3494ea8dc4b6Seschrock dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3495ea8dc4b6Seschrock dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 34960a586ceaSMark Shellenbaum if (sdzp != tdzp) { 34970a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 34980a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tdzp); 34990a586ceaSMark Shellenbaum } 35000a586ceaSMark Shellenbaum if (tzp) { 35010a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 35020a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tzp); 35030a586ceaSMark Shellenbaum } 35040a586ceaSMark Shellenbaum 35050a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 3506893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 35071209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 3508fa9e4066Sahrens if (error) { 3509fa9e4066Sahrens if (zl != NULL) 3510fa9e4066Sahrens zfs_rename_unlock(&zl); 3511fa9e4066Sahrens zfs_dirent_unlock(sdl); 3512fa9e4066Sahrens zfs_dirent_unlock(tdl); 3513afefc7e4SSanjeev Bagewadi 3514afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3515afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3516afefc7e4SSanjeev Bagewadi 3517fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3518fa9e4066Sahrens if (tzp) 3519fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 35201209a471SNeil Perrin if (error == ERESTART) { 35218a2f1b91Sahrens dmu_tx_wait(tx); 35228a2f1b91Sahrens dmu_tx_abort(tx); 3523fa9e4066Sahrens goto top; 3524fa9e4066Sahrens } 35258a2f1b91Sahrens dmu_tx_abort(tx); 3526fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3527fa9e4066Sahrens return (error); 3528fa9e4066Sahrens } 3529fa9e4066Sahrens 3530fa9e4066Sahrens if (tzp) /* Attempt to remove the existing target */ 3531da6c28aaSamw error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3532fa9e4066Sahrens 3533fa9e4066Sahrens if (error == 0) { 3534fa9e4066Sahrens error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3535fa9e4066Sahrens if (error == 0) { 35360a586ceaSMark Shellenbaum szp->z_pflags |= ZFS_AV_MODIFIED; 35370a586ceaSMark Shellenbaum 35380a586ceaSMark Shellenbaum error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 35390a586ceaSMark Shellenbaum (void *)&szp->z_pflags, sizeof (uint64_t), tx); 35400a586ceaSMark Shellenbaum ASSERT3U(error, ==, 0); 3541da6c28aaSamw 3542fa9e4066Sahrens error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 35430a586ceaSMark Shellenbaum ASSERT3U(error, ==, 0); 3544da6c28aaSamw 3545da6c28aaSamw zfs_log_rename(zilog, tx, 3546da6c28aaSamw TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), 3547da6c28aaSamw sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); 354851ece835Seschrock 354951ece835Seschrock /* Update path information for the target vnode */ 355051ece835Seschrock vn_renamepath(tdvp, ZTOV(szp), tnm, strlen(tnm)); 3551fa9e4066Sahrens } 3552fa9e4066Sahrens } 3553fa9e4066Sahrens 3554fa9e4066Sahrens dmu_tx_commit(tx); 3555fa9e4066Sahrens out: 3556fa9e4066Sahrens if (zl != NULL) 3557fa9e4066Sahrens zfs_rename_unlock(&zl); 3558fa9e4066Sahrens 3559fa9e4066Sahrens zfs_dirent_unlock(sdl); 3560fa9e4066Sahrens zfs_dirent_unlock(tdl); 3561fa9e4066Sahrens 3562afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3563afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3564afefc7e4SSanjeev Bagewadi 3565afefc7e4SSanjeev Bagewadi 3566fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3567fa9e4066Sahrens if (tzp) 3568fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3569fa9e4066Sahrens 3570*55da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3571*55da60b9SMark J Musante zil_commit(zilog, UINT64_MAX, 0); 3572*55da60b9SMark J Musante 3573fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3574fa9e4066Sahrens return (error); 3575fa9e4066Sahrens } 3576fa9e4066Sahrens 3577fa9e4066Sahrens /* 3578fa9e4066Sahrens * Insert the indicated symbolic reference entry into the directory. 3579fa9e4066Sahrens * 3580fa9e4066Sahrens * IN: dvp - Directory to contain new symbolic link. 3581fa9e4066Sahrens * link - Name for new symlink entry. 3582fa9e4066Sahrens * vap - Attributes of new entry. 3583fa9e4066Sahrens * target - Target path of new symlink. 3584fa9e4066Sahrens * cr - credentials of caller. 3585da6c28aaSamw * ct - caller context 3586da6c28aaSamw * flags - case flags 3587fa9e4066Sahrens * 3588fa9e4066Sahrens * RETURN: 0 if success 3589fa9e4066Sahrens * error code if failure 3590fa9e4066Sahrens * 3591fa9e4066Sahrens * Timestamps: 3592fa9e4066Sahrens * dvp - ctime|mtime updated 3593fa9e4066Sahrens */ 3594da6c28aaSamw /*ARGSUSED*/ 3595fa9e4066Sahrens static int 3596da6c28aaSamw zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, 3597da6c28aaSamw caller_context_t *ct, int flags) 3598fa9e4066Sahrens { 3599fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 3600fa9e4066Sahrens zfs_dirlock_t *dl; 3601fa9e4066Sahrens dmu_tx_t *tx; 3602fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3603f18faf3fSek zilog_t *zilog; 36040a586ceaSMark Shellenbaum uint64_t len = strlen(link); 3605fa9e4066Sahrens int error; 3606da6c28aaSamw int zflg = ZNEW; 360789459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 360889459e17SMark Shellenbaum boolean_t fuid_dirtied; 36090a586ceaSMark Shellenbaum uint64_t txtype = TX_SYMLINK; 3610fa9e4066Sahrens 3611fa9e4066Sahrens ASSERT(vap->va_type == VLNK); 3612fa9e4066Sahrens 36133cb34c60Sahrens ZFS_ENTER(zfsvfs); 36143cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 3615f18faf3fSek zilog = zfsvfs->z_log; 3616da6c28aaSamw 3617de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3618da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3619da6c28aaSamw ZFS_EXIT(zfsvfs); 3620da6c28aaSamw return (EILSEQ); 3621da6c28aaSamw } 3622da6c28aaSamw if (flags & FIGNORECASE) 3623da6c28aaSamw zflg |= ZCILOOK; 3624fa9e4066Sahrens top: 3625da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3626fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3627fa9e4066Sahrens return (error); 3628fa9e4066Sahrens } 3629fa9e4066Sahrens 3630fa9e4066Sahrens if (len > MAXPATHLEN) { 3631fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3632fa9e4066Sahrens return (ENAMETOOLONG); 3633fa9e4066Sahrens } 3634fa9e4066Sahrens 3635fa9e4066Sahrens /* 3636fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 3637fa9e4066Sahrens */ 3638da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 3639da6c28aaSamw if (error) { 3640fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3641fa9e4066Sahrens return (error); 3642fa9e4066Sahrens } 3643fa9e4066Sahrens 364489459e17SMark Shellenbaum VERIFY(0 == zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids)); 364514843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 364614843421SMatthew Ahrens zfs_acl_ids_free(&acl_ids); 364714843421SMatthew Ahrens zfs_dirent_unlock(dl); 364814843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 364914843421SMatthew Ahrens return (EDQUOT); 365014843421SMatthew Ahrens } 3651fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 365289459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 3653fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3654ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 36550a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 36560a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE + len); 36570a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 36580a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 36590a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 36600a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 36610a586ceaSMark Shellenbaum } 366214843421SMatthew Ahrens if (fuid_dirtied) 366314843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 36641209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 3665fa9e4066Sahrens if (error) { 366689459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3667fa9e4066Sahrens zfs_dirent_unlock(dl); 36681209a471SNeil Perrin if (error == ERESTART) { 36698a2f1b91Sahrens dmu_tx_wait(tx); 36708a2f1b91Sahrens dmu_tx_abort(tx); 3671fa9e4066Sahrens goto top; 3672fa9e4066Sahrens } 36738a2f1b91Sahrens dmu_tx_abort(tx); 3674fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3675fa9e4066Sahrens return (error); 3676fa9e4066Sahrens } 3677fa9e4066Sahrens 3678fa9e4066Sahrens /* 3679fa9e4066Sahrens * Create a new object for the symlink. 36800a586ceaSMark Shellenbaum * for version 4 ZPL datsets the symlink will be an SA attribute 3681fa9e4066Sahrens */ 36820a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 3683fa9e4066Sahrens 36840a586ceaSMark Shellenbaum if (fuid_dirtied) 36850a586ceaSMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 3686fa9e4066Sahrens 36870a586ceaSMark Shellenbaum if (zp->z_is_sa) 36880a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 36890a586ceaSMark Shellenbaum link, len, tx); 36900a586ceaSMark Shellenbaum else 36910a586ceaSMark Shellenbaum zfs_sa_symlink(zp, link, len, tx); 3692fa9e4066Sahrens 36930a586ceaSMark Shellenbaum zp->z_size = len; 36940a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 36950a586ceaSMark Shellenbaum &zp->z_size, sizeof (zp->z_size), tx); 3696fa9e4066Sahrens /* 3697fa9e4066Sahrens * Insert the new object into the directory. 3698fa9e4066Sahrens */ 3699fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 37000a586ceaSMark Shellenbaum 37010a586ceaSMark Shellenbaum if (flags & FIGNORECASE) 37020a586ceaSMark Shellenbaum txtype |= TX_CI; 37030a586ceaSMark Shellenbaum zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 370489459e17SMark Shellenbaum 370589459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3706fa9e4066Sahrens 3707fa9e4066Sahrens dmu_tx_commit(tx); 3708fa9e4066Sahrens 3709fa9e4066Sahrens zfs_dirent_unlock(dl); 3710fa9e4066Sahrens 3711fa9e4066Sahrens VN_RELE(ZTOV(zp)); 3712fa9e4066Sahrens 3713*55da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3714*55da60b9SMark J Musante zil_commit(zilog, UINT64_MAX, 0); 3715*55da60b9SMark J Musante 3716fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3717fa9e4066Sahrens return (error); 3718fa9e4066Sahrens } 3719fa9e4066Sahrens 3720fa9e4066Sahrens /* 3721fa9e4066Sahrens * Return, in the buffer contained in the provided uio structure, 3722fa9e4066Sahrens * the symbolic path referred to by vp. 3723fa9e4066Sahrens * 3724fa9e4066Sahrens * IN: vp - vnode of symbolic link. 3725fa9e4066Sahrens * uoip - structure to contain the link path. 3726fa9e4066Sahrens * cr - credentials of caller. 3727da6c28aaSamw * ct - caller context 3728fa9e4066Sahrens * 3729fa9e4066Sahrens * OUT: uio - structure to contain the link path. 3730fa9e4066Sahrens * 3731fa9e4066Sahrens * RETURN: 0 if success 3732fa9e4066Sahrens * error code if failure 3733fa9e4066Sahrens * 3734fa9e4066Sahrens * Timestamps: 3735fa9e4066Sahrens * vp - atime updated 3736fa9e4066Sahrens */ 3737fa9e4066Sahrens /* ARGSUSED */ 3738fa9e4066Sahrens static int 3739da6c28aaSamw zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3740fa9e4066Sahrens { 3741fa9e4066Sahrens znode_t *zp = VTOZ(vp); 3742fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3743fa9e4066Sahrens int error; 3744fa9e4066Sahrens 37453cb34c60Sahrens ZFS_ENTER(zfsvfs); 37463cb34c60Sahrens ZFS_VERIFY_ZP(zp); 3747fa9e4066Sahrens 37480a586ceaSMark Shellenbaum if (zp->z_is_sa) 37490a586ceaSMark Shellenbaum error = sa_lookup_uio(zp->z_sa_hdl, 37500a586ceaSMark Shellenbaum SA_ZPL_SYMLINK(zfsvfs), uio); 37510a586ceaSMark Shellenbaum else 37520a586ceaSMark Shellenbaum error = zfs_sa_readlink(zp, uio); 3753fa9e4066Sahrens 3754fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 37550a586ceaSMark Shellenbaum 3756fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3757fa9e4066Sahrens return (error); 3758fa9e4066Sahrens } 3759fa9e4066Sahrens 3760fa9e4066Sahrens /* 3761fa9e4066Sahrens * Insert a new entry into directory tdvp referencing svp. 3762fa9e4066Sahrens * 3763fa9e4066Sahrens * IN: tdvp - Directory to contain new entry. 3764fa9e4066Sahrens * svp - vnode of new entry. 3765fa9e4066Sahrens * name - name of new entry. 3766fa9e4066Sahrens * cr - credentials of caller. 3767da6c28aaSamw * ct - caller context 3768fa9e4066Sahrens * 3769fa9e4066Sahrens * RETURN: 0 if success 3770fa9e4066Sahrens * error code if failure 3771fa9e4066Sahrens * 3772fa9e4066Sahrens * Timestamps: 3773fa9e4066Sahrens * tdvp - ctime|mtime updated 3774fa9e4066Sahrens * svp - ctime updated 3775fa9e4066Sahrens */ 3776fa9e4066Sahrens /* ARGSUSED */ 3777fa9e4066Sahrens static int 3778da6c28aaSamw zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 3779da6c28aaSamw caller_context_t *ct, int flags) 3780fa9e4066Sahrens { 3781fa9e4066Sahrens znode_t *dzp = VTOZ(tdvp); 3782fa9e4066Sahrens znode_t *tzp, *szp; 3783fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3784f18faf3fSek zilog_t *zilog; 3785fa9e4066Sahrens zfs_dirlock_t *dl; 3786fa9e4066Sahrens dmu_tx_t *tx; 3787fa9e4066Sahrens vnode_t *realvp; 3788fa9e4066Sahrens int error; 3789da6c28aaSamw int zf = ZNEW; 3790d39ee142SMark Shellenbaum uint64_t parent; 3791fa9e4066Sahrens 3792fa9e4066Sahrens ASSERT(tdvp->v_type == VDIR); 3793fa9e4066Sahrens 37943cb34c60Sahrens ZFS_ENTER(zfsvfs); 37953cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 3796f18faf3fSek zilog = zfsvfs->z_log; 3797fa9e4066Sahrens 3798da6c28aaSamw if (VOP_REALVP(svp, &realvp, ct) == 0) 3799fa9e4066Sahrens svp = realvp; 3800fa9e4066Sahrens 3801d39ee142SMark Shellenbaum /* 3802d39ee142SMark Shellenbaum * POSIX dictates that we return EPERM here. 3803d39ee142SMark Shellenbaum * Better choices include ENOTSUP or EISDIR. 3804d39ee142SMark Shellenbaum */ 3805d39ee142SMark Shellenbaum if (svp->v_type == VDIR) { 3806d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 3807d39ee142SMark Shellenbaum return (EPERM); 3808d39ee142SMark Shellenbaum } 3809d39ee142SMark Shellenbaum 3810d39ee142SMark Shellenbaum if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { 3811fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3812fa9e4066Sahrens return (EXDEV); 3813fa9e4066Sahrens } 3814d39ee142SMark Shellenbaum 38153cb34c60Sahrens szp = VTOZ(svp); 38163cb34c60Sahrens ZFS_VERIFY_ZP(szp); 3817fa9e4066Sahrens 3818d39ee142SMark Shellenbaum /* Prevent links to .zfs/shares files */ 3819d39ee142SMark Shellenbaum 3820d39ee142SMark Shellenbaum if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 3821d39ee142SMark Shellenbaum &parent, sizeof (uint64_t))) != 0) { 3822d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 3823d39ee142SMark Shellenbaum return (error); 3824d39ee142SMark Shellenbaum } 3825d39ee142SMark Shellenbaum if (parent == zfsvfs->z_shares_dir) { 3826d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 3827d39ee142SMark Shellenbaum return (EPERM); 3828d39ee142SMark Shellenbaum } 3829d39ee142SMark Shellenbaum 3830de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, 3831da6c28aaSamw strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3832da6c28aaSamw ZFS_EXIT(zfsvfs); 3833da6c28aaSamw return (EILSEQ); 3834da6c28aaSamw } 3835da6c28aaSamw if (flags & FIGNORECASE) 3836da6c28aaSamw zf |= ZCILOOK; 3837da6c28aaSamw 3838fa9e4066Sahrens /* 3839fa9e4066Sahrens * We do not support links between attributes and non-attributes 3840fa9e4066Sahrens * because of the potential security risk of creating links 3841fa9e4066Sahrens * into "normal" file space in order to circumvent restrictions 3842fa9e4066Sahrens * imposed in attribute space. 3843fa9e4066Sahrens */ 38440a586ceaSMark Shellenbaum if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 3845fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3846fa9e4066Sahrens return (EINVAL); 3847fa9e4066Sahrens } 3848fa9e4066Sahrens 3849fa9e4066Sahrens 38500a586ceaSMark Shellenbaum if (szp->z_uid != crgetuid(cr) && 3851fa9e4066Sahrens secpolicy_basic_link(cr) != 0) { 3852fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3853fa9e4066Sahrens return (EPERM); 3854fa9e4066Sahrens } 3855fa9e4066Sahrens 3856da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3857fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3858fa9e4066Sahrens return (error); 3859fa9e4066Sahrens } 3860fa9e4066Sahrens 3861d39ee142SMark Shellenbaum top: 3862fa9e4066Sahrens /* 3863fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 3864fa9e4066Sahrens */ 3865da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 3866da6c28aaSamw if (error) { 3867fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3868fa9e4066Sahrens return (error); 3869fa9e4066Sahrens } 3870fa9e4066Sahrens 3871fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 38720a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 3873ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 38740a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 38750a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 38761209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 3877fa9e4066Sahrens if (error) { 3878fa9e4066Sahrens zfs_dirent_unlock(dl); 38791209a471SNeil Perrin if (error == ERESTART) { 38808a2f1b91Sahrens dmu_tx_wait(tx); 38818a2f1b91Sahrens dmu_tx_abort(tx); 3882fa9e4066Sahrens goto top; 3883fa9e4066Sahrens } 38848a2f1b91Sahrens dmu_tx_abort(tx); 3885fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3886fa9e4066Sahrens return (error); 3887fa9e4066Sahrens } 3888fa9e4066Sahrens 3889fa9e4066Sahrens error = zfs_link_create(dl, szp, tx, 0); 3890fa9e4066Sahrens 3891da6c28aaSamw if (error == 0) { 3892da6c28aaSamw uint64_t txtype = TX_LINK; 3893da6c28aaSamw if (flags & FIGNORECASE) 3894da6c28aaSamw txtype |= TX_CI; 3895da6c28aaSamw zfs_log_link(zilog, tx, txtype, dzp, szp, name); 3896da6c28aaSamw } 3897fa9e4066Sahrens 3898fa9e4066Sahrens dmu_tx_commit(tx); 3899fa9e4066Sahrens 3900fa9e4066Sahrens zfs_dirent_unlock(dl); 3901fa9e4066Sahrens 3902df2381bfSpraks if (error == 0) { 3903da6c28aaSamw vnevent_link(svp, ct); 3904df2381bfSpraks } 3905df2381bfSpraks 3906*55da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 3907*55da60b9SMark J Musante zil_commit(zilog, UINT64_MAX, 0); 3908*55da60b9SMark J Musante 3909fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3910fa9e4066Sahrens return (error); 3911fa9e4066Sahrens } 3912fa9e4066Sahrens 3913fa9e4066Sahrens /* 3914fa9e4066Sahrens * zfs_null_putapage() is used when the file system has been force 3915fa9e4066Sahrens * unmounted. It just drops the pages. 3916fa9e4066Sahrens */ 3917fa9e4066Sahrens /* ARGSUSED */ 3918fa9e4066Sahrens static int 3919fa9e4066Sahrens zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 3920fa9e4066Sahrens size_t *lenp, int flags, cred_t *cr) 3921fa9e4066Sahrens { 3922fa9e4066Sahrens pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 3923fa9e4066Sahrens return (0); 3924fa9e4066Sahrens } 3925fa9e4066Sahrens 392644eda4d7Smaybee /* 392744eda4d7Smaybee * Push a page out to disk, klustering if possible. 392844eda4d7Smaybee * 392944eda4d7Smaybee * IN: vp - file to push page to. 393044eda4d7Smaybee * pp - page to push. 393144eda4d7Smaybee * flags - additional flags. 393244eda4d7Smaybee * cr - credentials of caller. 393344eda4d7Smaybee * 393444eda4d7Smaybee * OUT: offp - start of range pushed. 393544eda4d7Smaybee * lenp - len of range pushed. 393644eda4d7Smaybee * 393744eda4d7Smaybee * RETURN: 0 if success 393844eda4d7Smaybee * error code if failure 393944eda4d7Smaybee * 394044eda4d7Smaybee * NOTE: callers must have locked the page to be pushed. On 394144eda4d7Smaybee * exit, the page (and all other pages in the kluster) must be 394244eda4d7Smaybee * unlocked. 394344eda4d7Smaybee */ 3944fa9e4066Sahrens /* ARGSUSED */ 3945fa9e4066Sahrens static int 3946fa9e4066Sahrens zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 3947fa9e4066Sahrens size_t *lenp, int flags, cred_t *cr) 3948fa9e4066Sahrens { 3949fa9e4066Sahrens znode_t *zp = VTOZ(vp); 3950fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3951fa9e4066Sahrens dmu_tx_t *tx; 395244eda4d7Smaybee u_offset_t off, koff; 395344eda4d7Smaybee size_t len, klen; 3954fa9e4066Sahrens int err; 3955fa9e4066Sahrens 3956fa9e4066Sahrens off = pp->p_offset; 395744eda4d7Smaybee len = PAGESIZE; 395844eda4d7Smaybee /* 395944eda4d7Smaybee * If our blocksize is bigger than the page size, try to kluster 39601209a471SNeil Perrin * multiple pages so that we write a full block (thus avoiding 396144eda4d7Smaybee * a read-modify-write). 396244eda4d7Smaybee */ 39630a586ceaSMark Shellenbaum if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 3964ac05c741SMark Maybee klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 3965ac05c741SMark Maybee koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 39660a586ceaSMark Shellenbaum ASSERT(koff <= zp->z_size); 39670a586ceaSMark Shellenbaum if (koff + klen > zp->z_size) 39680a586ceaSMark Shellenbaum klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 396944eda4d7Smaybee pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 397044eda4d7Smaybee } 397144eda4d7Smaybee ASSERT3U(btop(len), ==, btopr(len)); 3972ac05c741SMark Maybee 3973dd6ef538Smaybee /* 3974dd6ef538Smaybee * Can't push pages past end-of-file. 3975dd6ef538Smaybee */ 39760a586ceaSMark Shellenbaum if (off >= zp->z_size) { 3977f4d2e9e6Smaybee /* ignore all pages */ 397844eda4d7Smaybee err = 0; 397944eda4d7Smaybee goto out; 39800a586ceaSMark Shellenbaum } else if (off + len > zp->z_size) { 39810a586ceaSMark Shellenbaum int npages = btopr(zp->z_size - off); 398244eda4d7Smaybee page_t *trunc; 398344eda4d7Smaybee 398444eda4d7Smaybee page_list_break(&pp, &trunc, npages); 3985f4d2e9e6Smaybee /* ignore pages past end of file */ 398644eda4d7Smaybee if (trunc) 3987f4d2e9e6Smaybee pvn_write_done(trunc, flags); 39880a586ceaSMark Shellenbaum len = zp->z_size - off; 3989dd6ef538Smaybee } 399014843421SMatthew Ahrens 39910a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 39920a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 399314843421SMatthew Ahrens err = EDQUOT; 399414843421SMatthew Ahrens goto out; 399514843421SMatthew Ahrens } 3996ac05c741SMark Maybee top: 3997fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 3998fa9e4066Sahrens dmu_tx_hold_write(tx, zp->z_id, off, len); 39990a586ceaSMark Shellenbaum 40000a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 40010a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 40021209a471SNeil Perrin err = dmu_tx_assign(tx, TXG_NOWAIT); 4003fa9e4066Sahrens if (err != 0) { 40041209a471SNeil Perrin if (err == ERESTART) { 40058a2f1b91Sahrens dmu_tx_wait(tx); 40068a2f1b91Sahrens dmu_tx_abort(tx); 4007fa9e4066Sahrens goto top; 4008fa9e4066Sahrens } 40098a2f1b91Sahrens dmu_tx_abort(tx); 4010fa9e4066Sahrens goto out; 4011fa9e4066Sahrens } 4012fa9e4066Sahrens 401344eda4d7Smaybee if (zp->z_blksz <= PAGESIZE) { 40140fab61baSJonathan W Adams caddr_t va = zfs_map_page(pp, S_READ); 401544eda4d7Smaybee ASSERT3U(len, <=, PAGESIZE); 401644eda4d7Smaybee dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 40170fab61baSJonathan W Adams zfs_unmap_page(pp, va); 401844eda4d7Smaybee } else { 401944eda4d7Smaybee err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 402044eda4d7Smaybee } 4021fa9e4066Sahrens 402244eda4d7Smaybee if (err == 0) { 40230a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 40240a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[2]; 40250a586ceaSMark Shellenbaum int count = 0; 40260a586ceaSMark Shellenbaum 40270a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 40280a586ceaSMark Shellenbaum &mtime, 16); 40290a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 40300a586ceaSMark Shellenbaum &ctime, 16); 40310a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 40320a586ceaSMark Shellenbaum B_TRUE); 4033ac05c741SMark Maybee zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 403444eda4d7Smaybee } 403568857716SLin Ling dmu_tx_commit(tx); 4036fa9e4066Sahrens 403744eda4d7Smaybee out: 4038f4d2e9e6Smaybee pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4039fa9e4066Sahrens if (offp) 4040fa9e4066Sahrens *offp = off; 4041fa9e4066Sahrens if (lenp) 4042fa9e4066Sahrens *lenp = len; 4043fa9e4066Sahrens 4044fa9e4066Sahrens return (err); 4045fa9e4066Sahrens } 4046fa9e4066Sahrens 4047fa9e4066Sahrens /* 4048fa9e4066Sahrens * Copy the portion of the file indicated from pages into the file. 4049fa9e4066Sahrens * The pages are stored in a page list attached to the files vnode. 4050fa9e4066Sahrens * 4051fa9e4066Sahrens * IN: vp - vnode of file to push page data to. 4052fa9e4066Sahrens * off - position in file to put data. 4053fa9e4066Sahrens * len - amount of data to write. 4054fa9e4066Sahrens * flags - flags to control the operation. 4055fa9e4066Sahrens * cr - credentials of caller. 4056da6c28aaSamw * ct - caller context. 4057fa9e4066Sahrens * 4058fa9e4066Sahrens * RETURN: 0 if success 4059fa9e4066Sahrens * error code if failure 4060fa9e4066Sahrens * 4061fa9e4066Sahrens * Timestamps: 4062fa9e4066Sahrens * vp - ctime|mtime updated 4063fa9e4066Sahrens */ 4064da6c28aaSamw /*ARGSUSED*/ 4065fa9e4066Sahrens static int 4066da6c28aaSamw zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4067da6c28aaSamw caller_context_t *ct) 4068fa9e4066Sahrens { 4069fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4070fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4071fa9e4066Sahrens page_t *pp; 4072fa9e4066Sahrens size_t io_len; 4073fa9e4066Sahrens u_offset_t io_off; 4074ac05c741SMark Maybee uint_t blksz; 4075ac05c741SMark Maybee rl_t *rl; 4076fa9e4066Sahrens int error = 0; 4077fa9e4066Sahrens 40783cb34c60Sahrens ZFS_ENTER(zfsvfs); 40793cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4080fa9e4066Sahrens 4081ac05c741SMark Maybee /* 4082ac05c741SMark Maybee * Align this request to the file block size in case we kluster. 4083ac05c741SMark Maybee * XXX - this can result in pretty aggresive locking, which can 4084ac05c741SMark Maybee * impact simultanious read/write access. One option might be 4085ac05c741SMark Maybee * to break up long requests (len == 0) into block-by-block 4086ac05c741SMark Maybee * operations to get narrower locking. 4087ac05c741SMark Maybee */ 4088ac05c741SMark Maybee blksz = zp->z_blksz; 4089ac05c741SMark Maybee if (ISP2(blksz)) 4090ac05c741SMark Maybee io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4091ac05c741SMark Maybee else 4092ac05c741SMark Maybee io_off = 0; 4093ac05c741SMark Maybee if (len > 0 && ISP2(blksz)) 40945a6f5619SMark Maybee io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4095ac05c741SMark Maybee else 4096ac05c741SMark Maybee io_len = 0; 4097ac05c741SMark Maybee 4098ac05c741SMark Maybee if (io_len == 0) { 4099fa9e4066Sahrens /* 4100ac05c741SMark Maybee * Search the entire vp list for pages >= io_off. 4101fa9e4066Sahrens */ 4102ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4103ac05c741SMark Maybee error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4104fe9cf88cSperrin goto out; 4105fa9e4066Sahrens } 4106ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4107fa9e4066Sahrens 41080a586ceaSMark Shellenbaum if (off > zp->z_size) { 4109fa9e4066Sahrens /* past end of file */ 4110ac05c741SMark Maybee zfs_range_unlock(rl); 4111fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4112fa9e4066Sahrens return (0); 4113fa9e4066Sahrens } 4114fa9e4066Sahrens 41150a586ceaSMark Shellenbaum len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4116fa9e4066Sahrens 4117ac05c741SMark Maybee for (off = io_off; io_off < off + len; io_off += io_len) { 4118fa9e4066Sahrens if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4119104e2ed7Sperrin pp = page_lookup(vp, io_off, 4120ecb72030Sperrin (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4121fa9e4066Sahrens } else { 4122fa9e4066Sahrens pp = page_lookup_nowait(vp, io_off, 4123ecb72030Sperrin (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4124fa9e4066Sahrens } 4125fa9e4066Sahrens 4126fa9e4066Sahrens if (pp != NULL && pvn_getdirty(pp, flags)) { 4127fa9e4066Sahrens int err; 4128fa9e4066Sahrens 4129fa9e4066Sahrens /* 4130fa9e4066Sahrens * Found a dirty page to push 4131fa9e4066Sahrens */ 4132104e2ed7Sperrin err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4133104e2ed7Sperrin if (err) 4134fa9e4066Sahrens error = err; 4135fa9e4066Sahrens } else { 4136fa9e4066Sahrens io_len = PAGESIZE; 4137fa9e4066Sahrens } 4138fa9e4066Sahrens } 4139fe9cf88cSperrin out: 4140ac05c741SMark Maybee zfs_range_unlock(rl); 4141*55da60b9SMark J Musante if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4142b19a79ecSperrin zil_commit(zfsvfs->z_log, UINT64_MAX, zp->z_id); 4143fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4144fa9e4066Sahrens return (error); 4145fa9e4066Sahrens } 4146fa9e4066Sahrens 4147da6c28aaSamw /*ARGSUSED*/ 4148fa9e4066Sahrens void 4149da6c28aaSamw zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4150fa9e4066Sahrens { 4151fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4152fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4153fa9e4066Sahrens int error; 4154fa9e4066Sahrens 4155f18faf3fSek rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 41560a586ceaSMark Shellenbaum if (zp->z_sa_hdl == NULL) { 41574ccbb6e7Sahrens /* 4158874395d5Smaybee * The fs has been unmounted, or we did a 4159874395d5Smaybee * suspend/resume and this file no longer exists. 41604ccbb6e7Sahrens */ 4161fa9e4066Sahrens if (vn_has_cached_data(vp)) { 4162fa9e4066Sahrens (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage, 4163fa9e4066Sahrens B_INVAL, cr); 4164fa9e4066Sahrens } 4165fa9e4066Sahrens 4166ea8dc4b6Seschrock mutex_enter(&zp->z_lock); 4167cd2adeceSChris Kirby mutex_enter(&vp->v_lock); 4168cd2adeceSChris Kirby ASSERT(vp->v_count == 1); 4169cd2adeceSChris Kirby vp->v_count = 0; 4170cd2adeceSChris Kirby mutex_exit(&vp->v_lock); 41714ccbb6e7Sahrens mutex_exit(&zp->z_lock); 4172f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 4173874395d5Smaybee zfs_znode_free(zp); 4174fa9e4066Sahrens return; 4175fa9e4066Sahrens } 4176fa9e4066Sahrens 4177fa9e4066Sahrens /* 4178fa9e4066Sahrens * Attempt to push any data in the page cache. If this fails 4179fa9e4066Sahrens * we will get kicked out later in zfs_zinactive(). 4180fa9e4066Sahrens */ 41818afd4dd6Sperrin if (vn_has_cached_data(vp)) { 41828afd4dd6Sperrin (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC, 41838afd4dd6Sperrin cr); 41848afd4dd6Sperrin } 4185fa9e4066Sahrens 4186893a6d32Sahrens if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4187fa9e4066Sahrens dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4188fa9e4066Sahrens 41890a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 41900a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 4191fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 4192fa9e4066Sahrens if (error) { 4193fa9e4066Sahrens dmu_tx_abort(tx); 4194fa9e4066Sahrens } else { 4195fa9e4066Sahrens mutex_enter(&zp->z_lock); 41960a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 41970a586ceaSMark Shellenbaum (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4198fa9e4066Sahrens zp->z_atime_dirty = 0; 4199fa9e4066Sahrens mutex_exit(&zp->z_lock); 4200fa9e4066Sahrens dmu_tx_commit(tx); 4201fa9e4066Sahrens } 4202fa9e4066Sahrens } 4203fa9e4066Sahrens 4204fa9e4066Sahrens zfs_zinactive(zp); 4205f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 4206fa9e4066Sahrens } 4207fa9e4066Sahrens 4208fa9e4066Sahrens /* 4209fa9e4066Sahrens * Bounds-check the seek operation. 4210fa9e4066Sahrens * 4211fa9e4066Sahrens * IN: vp - vnode seeking within 4212fa9e4066Sahrens * ooff - old file offset 4213fa9e4066Sahrens * noffp - pointer to new file offset 4214da6c28aaSamw * ct - caller context 4215fa9e4066Sahrens * 4216fa9e4066Sahrens * RETURN: 0 if success 4217fa9e4066Sahrens * EINVAL if new offset invalid 4218fa9e4066Sahrens */ 4219fa9e4066Sahrens /* ARGSUSED */ 4220fa9e4066Sahrens static int 4221da6c28aaSamw zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4222da6c28aaSamw caller_context_t *ct) 4223fa9e4066Sahrens { 4224fa9e4066Sahrens if (vp->v_type == VDIR) 4225fa9e4066Sahrens return (0); 4226fa9e4066Sahrens return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4227fa9e4066Sahrens } 4228fa9e4066Sahrens 4229fa9e4066Sahrens /* 4230fa9e4066Sahrens * Pre-filter the generic locking function to trap attempts to place 4231fa9e4066Sahrens * a mandatory lock on a memory mapped file. 4232fa9e4066Sahrens */ 4233fa9e4066Sahrens static int 4234fa9e4066Sahrens zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4235da6c28aaSamw flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4236fa9e4066Sahrens { 4237fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4238fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4239fa9e4066Sahrens 42403cb34c60Sahrens ZFS_ENTER(zfsvfs); 42413cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4242fa9e4066Sahrens 4243fa9e4066Sahrens /* 4244ea8dc4b6Seschrock * We are following the UFS semantics with respect to mapcnt 4245ea8dc4b6Seschrock * here: If we see that the file is mapped already, then we will 4246ea8dc4b6Seschrock * return an error, but we don't worry about races between this 4247ea8dc4b6Seschrock * function and zfs_map(). 4248fa9e4066Sahrens */ 42490a586ceaSMark Shellenbaum if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4250fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4251fa9e4066Sahrens return (EAGAIN); 4252fa9e4066Sahrens } 4253fa9e4066Sahrens ZFS_EXIT(zfsvfs); 425404ce3d0bSMark Shellenbaum return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4255fa9e4066Sahrens } 4256fa9e4066Sahrens 4257fa9e4066Sahrens /* 4258fa9e4066Sahrens * If we can't find a page in the cache, we will create a new page 4259fa9e4066Sahrens * and fill it with file data. For efficiency, we may try to fill 4260ac05c741SMark Maybee * multiple pages at once (klustering) to fill up the supplied page 4261ed886187SMark Maybee * list. Note that the pages to be filled are held with an exclusive 4262ed886187SMark Maybee * lock to prevent access by other threads while they are being filled. 4263fa9e4066Sahrens */ 4264fa9e4066Sahrens static int 4265fa9e4066Sahrens zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4266fa9e4066Sahrens caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4267fa9e4066Sahrens { 4268fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4269fa9e4066Sahrens page_t *pp, *cur_pp; 4270fa9e4066Sahrens objset_t *os = zp->z_zfsvfs->z_os; 4271fa9e4066Sahrens u_offset_t io_off, total; 4272fa9e4066Sahrens size_t io_len; 4273fa9e4066Sahrens int err; 4274fa9e4066Sahrens 427544eda4d7Smaybee if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4276ac05c741SMark Maybee /* 4277ac05c741SMark Maybee * We only have a single page, don't bother klustering 4278ac05c741SMark Maybee */ 4279fa9e4066Sahrens io_off = off; 4280fa9e4066Sahrens io_len = PAGESIZE; 4281ed886187SMark Maybee pp = page_create_va(vp, io_off, io_len, 4282ed886187SMark Maybee PG_EXCL | PG_WAIT, seg, addr); 4283fa9e4066Sahrens } else { 4284fa9e4066Sahrens /* 4285ac05c741SMark Maybee * Try to find enough pages to fill the page list 4286fa9e4066Sahrens */ 4287fa9e4066Sahrens pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4288ac05c741SMark Maybee &io_len, off, plsz, 0); 4289fa9e4066Sahrens } 4290fa9e4066Sahrens if (pp == NULL) { 4291fa9e4066Sahrens /* 4292ac05c741SMark Maybee * The page already exists, nothing to do here. 4293fa9e4066Sahrens */ 4294fa9e4066Sahrens *pl = NULL; 4295fa9e4066Sahrens return (0); 4296fa9e4066Sahrens } 4297fa9e4066Sahrens 4298fa9e4066Sahrens /* 4299fa9e4066Sahrens * Fill the pages in the kluster. 4300fa9e4066Sahrens */ 4301fa9e4066Sahrens cur_pp = pp; 4302fa9e4066Sahrens for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4303ac05c741SMark Maybee caddr_t va; 4304ac05c741SMark Maybee 430544eda4d7Smaybee ASSERT3U(io_off, ==, cur_pp->p_offset); 43060fab61baSJonathan W Adams va = zfs_map_page(cur_pp, S_WRITE); 43077bfdf011SNeil Perrin err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 43087bfdf011SNeil Perrin DMU_READ_PREFETCH); 43090fab61baSJonathan W Adams zfs_unmap_page(cur_pp, va); 4310fa9e4066Sahrens if (err) { 4311fa9e4066Sahrens /* On error, toss the entire kluster */ 4312fa9e4066Sahrens pvn_read_done(pp, B_ERROR); 4313b87f3af3Sperrin /* convert checksum errors into IO errors */ 4314b87f3af3Sperrin if (err == ECKSUM) 4315b87f3af3Sperrin err = EIO; 4316fa9e4066Sahrens return (err); 4317fa9e4066Sahrens } 4318fa9e4066Sahrens cur_pp = cur_pp->p_next; 4319fa9e4066Sahrens } 4320ac05c741SMark Maybee 4321fa9e4066Sahrens /* 4322ac05c741SMark Maybee * Fill in the page list array from the kluster starting 4323ac05c741SMark Maybee * from the desired offset `off'. 4324fa9e4066Sahrens * NOTE: the page list will always be null terminated. 4325fa9e4066Sahrens */ 4326fa9e4066Sahrens pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4327ac05c741SMark Maybee ASSERT(pl == NULL || (*pl)->p_offset == off); 4328fa9e4066Sahrens 4329fa9e4066Sahrens return (0); 4330fa9e4066Sahrens } 4331fa9e4066Sahrens 4332fa9e4066Sahrens /* 4333fa9e4066Sahrens * Return pointers to the pages for the file region [off, off + len] 4334fa9e4066Sahrens * in the pl array. If plsz is greater than len, this function may 4335ac05c741SMark Maybee * also return page pointers from after the specified region 4336ac05c741SMark Maybee * (i.e. the region [off, off + plsz]). These additional pages are 4337ac05c741SMark Maybee * only returned if they are already in the cache, or were created as 4338ac05c741SMark Maybee * part of a klustered read. 4339fa9e4066Sahrens * 4340fa9e4066Sahrens * IN: vp - vnode of file to get data from. 4341fa9e4066Sahrens * off - position in file to get data from. 4342fa9e4066Sahrens * len - amount of data to retrieve. 4343fa9e4066Sahrens * plsz - length of provided page list. 4344fa9e4066Sahrens * seg - segment to obtain pages for. 4345fa9e4066Sahrens * addr - virtual address of fault. 4346fa9e4066Sahrens * rw - mode of created pages. 4347fa9e4066Sahrens * cr - credentials of caller. 4348da6c28aaSamw * ct - caller context. 4349fa9e4066Sahrens * 4350fa9e4066Sahrens * OUT: protp - protection mode of created pages. 4351fa9e4066Sahrens * pl - list of pages created. 4352fa9e4066Sahrens * 4353fa9e4066Sahrens * RETURN: 0 if success 4354fa9e4066Sahrens * error code if failure 4355fa9e4066Sahrens * 4356fa9e4066Sahrens * Timestamps: 4357fa9e4066Sahrens * vp - atime updated 4358fa9e4066Sahrens */ 4359fa9e4066Sahrens /* ARGSUSED */ 4360fa9e4066Sahrens static int 4361fa9e4066Sahrens zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4362fa9e4066Sahrens page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4363da6c28aaSamw enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4364fa9e4066Sahrens { 4365fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4366fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4367ac05c741SMark Maybee page_t **pl0 = pl; 4368ac05c741SMark Maybee int err = 0; 4369ac05c741SMark Maybee 4370ac05c741SMark Maybee /* we do our own caching, faultahead is unnecessary */ 4371ac05c741SMark Maybee if (pl == NULL) 4372ac05c741SMark Maybee return (0); 4373ac05c741SMark Maybee else if (len > plsz) 4374ac05c741SMark Maybee len = plsz; 437527bd165aSMark Maybee else 437627bd165aSMark Maybee len = P2ROUNDUP(len, PAGESIZE); 4377ac05c741SMark Maybee ASSERT(plsz >= len); 4378fa9e4066Sahrens 43793cb34c60Sahrens ZFS_ENTER(zfsvfs); 43803cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4381fa9e4066Sahrens 4382fa9e4066Sahrens if (protp) 4383fa9e4066Sahrens *protp = PROT_ALL; 4384fa9e4066Sahrens 4385fa9e4066Sahrens /* 4386ed886187SMark Maybee * Loop through the requested range [off, off + len) looking 4387fa9e4066Sahrens * for pages. If we don't find a page, we will need to create 4388fa9e4066Sahrens * a new page and fill it with data from the file. 4389fa9e4066Sahrens */ 4390fa9e4066Sahrens while (len > 0) { 4391ac05c741SMark Maybee if (*pl = page_lookup(vp, off, SE_SHARED)) 4392ac05c741SMark Maybee *(pl+1) = NULL; 4393ac05c741SMark Maybee else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4394ac05c741SMark Maybee goto out; 4395ac05c741SMark Maybee while (*pl) { 4396ac05c741SMark Maybee ASSERT3U((*pl)->p_offset, ==, off); 4397fa9e4066Sahrens off += PAGESIZE; 4398fa9e4066Sahrens addr += PAGESIZE; 439927bd165aSMark Maybee if (len > 0) { 440027bd165aSMark Maybee ASSERT3U(len, >=, PAGESIZE); 4401ac05c741SMark Maybee len -= PAGESIZE; 440227bd165aSMark Maybee } 4403ac05c741SMark Maybee ASSERT3U(plsz, >=, PAGESIZE); 4404fa9e4066Sahrens plsz -= PAGESIZE; 4405ac05c741SMark Maybee pl++; 4406fa9e4066Sahrens } 4407fa9e4066Sahrens } 4408fa9e4066Sahrens 4409fa9e4066Sahrens /* 4410fa9e4066Sahrens * Fill out the page array with any pages already in the cache. 4411fa9e4066Sahrens */ 4412ac05c741SMark Maybee while (plsz > 0 && 4413ac05c741SMark Maybee (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4414ac05c741SMark Maybee off += PAGESIZE; 4415ac05c741SMark Maybee plsz -= PAGESIZE; 4416fa9e4066Sahrens } 4417fa9e4066Sahrens out: 4418fe2f476aSperrin if (err) { 4419fe2f476aSperrin /* 4420fe2f476aSperrin * Release any pages we have previously locked. 4421fe2f476aSperrin */ 4422fe2f476aSperrin while (pl > pl0) 4423fe2f476aSperrin page_unlock(*--pl); 4424ac05c741SMark Maybee } else { 4425ac05c741SMark Maybee ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4426fe2f476aSperrin } 4427fe2f476aSperrin 4428fa9e4066Sahrens *pl = NULL; 4429fa9e4066Sahrens 4430fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4431fa9e4066Sahrens return (err); 4432fa9e4066Sahrens } 4433fa9e4066Sahrens 4434ea8dc4b6Seschrock /* 4435ea8dc4b6Seschrock * Request a memory map for a section of a file. This code interacts 4436ea8dc4b6Seschrock * with common code and the VM system as follows: 4437ea8dc4b6Seschrock * 4438ea8dc4b6Seschrock * common code calls mmap(), which ends up in smmap_common() 4439ea8dc4b6Seschrock * 4440ea8dc4b6Seschrock * this calls VOP_MAP(), which takes you into (say) zfs 4441ea8dc4b6Seschrock * 4442ea8dc4b6Seschrock * zfs_map() calls as_map(), passing segvn_create() as the callback 4443ea8dc4b6Seschrock * 4444ea8dc4b6Seschrock * segvn_create() creates the new segment and calls VOP_ADDMAP() 4445ea8dc4b6Seschrock * 4446ea8dc4b6Seschrock * zfs_addmap() updates z_mapcnt 4447ea8dc4b6Seschrock */ 4448da6c28aaSamw /*ARGSUSED*/ 4449fa9e4066Sahrens static int 4450fa9e4066Sahrens zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4451da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4452da6c28aaSamw caller_context_t *ct) 4453fa9e4066Sahrens { 4454fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4455fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4456fa9e4066Sahrens segvn_crargs_t vn_a; 4457fa9e4066Sahrens int error; 4458fa9e4066Sahrens 44590616c50eSmarks ZFS_ENTER(zfsvfs); 44600616c50eSmarks ZFS_VERIFY_ZP(zp); 44610616c50eSmarks 44620a586ceaSMark Shellenbaum if ((prot & PROT_WRITE) && (zp->z_pflags & 44630a586ceaSMark Shellenbaum (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 44640616c50eSmarks ZFS_EXIT(zfsvfs); 4465da6c28aaSamw return (EPERM); 44660616c50eSmarks } 4467da6c28aaSamw 44680616c50eSmarks if ((prot & (PROT_READ | PROT_EXEC)) && 44690a586ceaSMark Shellenbaum (zp->z_pflags & ZFS_AV_QUARANTINED)) { 44700616c50eSmarks ZFS_EXIT(zfsvfs); 44710616c50eSmarks return (EACCES); 44720616c50eSmarks } 4473fa9e4066Sahrens 4474fa9e4066Sahrens if (vp->v_flag & VNOMAP) { 4475fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4476fa9e4066Sahrens return (ENOSYS); 4477fa9e4066Sahrens } 4478fa9e4066Sahrens 4479fa9e4066Sahrens if (off < 0 || len > MAXOFFSET_T - off) { 4480fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4481fa9e4066Sahrens return (ENXIO); 4482fa9e4066Sahrens } 4483fa9e4066Sahrens 4484fa9e4066Sahrens if (vp->v_type != VREG) { 4485fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4486fa9e4066Sahrens return (ENODEV); 4487fa9e4066Sahrens } 4488fa9e4066Sahrens 4489fa9e4066Sahrens /* 4490fa9e4066Sahrens * If file is locked, disallow mapping. 4491fa9e4066Sahrens */ 44920a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 4493ea8dc4b6Seschrock ZFS_EXIT(zfsvfs); 4494ea8dc4b6Seschrock return (EAGAIN); 4495fa9e4066Sahrens } 4496fa9e4066Sahrens 4497fa9e4066Sahrens as_rangelock(as); 449860946fe0Smec error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 449960946fe0Smec if (error != 0) { 450060946fe0Smec as_rangeunlock(as); 450160946fe0Smec ZFS_EXIT(zfsvfs); 450260946fe0Smec return (error); 4503fa9e4066Sahrens } 4504fa9e4066Sahrens 4505fa9e4066Sahrens vn_a.vp = vp; 4506fa9e4066Sahrens vn_a.offset = (u_offset_t)off; 4507fa9e4066Sahrens vn_a.type = flags & MAP_TYPE; 4508fa9e4066Sahrens vn_a.prot = prot; 4509fa9e4066Sahrens vn_a.maxprot = maxprot; 4510fa9e4066Sahrens vn_a.cred = cr; 4511fa9e4066Sahrens vn_a.amp = NULL; 4512fa9e4066Sahrens vn_a.flags = flags & ~MAP_TYPE; 45134944b02eSkchow vn_a.szc = 0; 45144944b02eSkchow vn_a.lgrp_mem_policy_flags = 0; 4515fa9e4066Sahrens 4516fa9e4066Sahrens error = as_map(as, *addrp, len, segvn_create, &vn_a); 4517fa9e4066Sahrens 4518fa9e4066Sahrens as_rangeunlock(as); 4519fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4520fa9e4066Sahrens return (error); 4521fa9e4066Sahrens } 4522fa9e4066Sahrens 4523fa9e4066Sahrens /* ARGSUSED */ 4524fa9e4066Sahrens static int 4525fa9e4066Sahrens zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4526da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4527da6c28aaSamw caller_context_t *ct) 4528fa9e4066Sahrens { 4529ea8dc4b6Seschrock uint64_t pages = btopr(len); 4530ea8dc4b6Seschrock 4531ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 4532fa9e4066Sahrens return (0); 4533fa9e4066Sahrens } 4534fa9e4066Sahrens 4535b468a217Seschrock /* 4536b468a217Seschrock * The reason we push dirty pages as part of zfs_delmap() is so that we get a 4537b468a217Seschrock * more accurate mtime for the associated file. Since we don't have a way of 4538b468a217Seschrock * detecting when the data was actually modified, we have to resort to 4539b468a217Seschrock * heuristics. If an explicit msync() is done, then we mark the mtime when the 4540b468a217Seschrock * last page is pushed. The problem occurs when the msync() call is omitted, 4541b468a217Seschrock * which by far the most common case: 4542b468a217Seschrock * 4543b468a217Seschrock * open() 4544b468a217Seschrock * mmap() 4545b468a217Seschrock * <modify memory> 4546b468a217Seschrock * munmap() 4547b468a217Seschrock * close() 4548b468a217Seschrock * <time lapse> 4549b468a217Seschrock * putpage() via fsflush 4550b468a217Seschrock * 4551b468a217Seschrock * If we wait until fsflush to come along, we can have a modification time that 4552b468a217Seschrock * is some arbitrary point in the future. In order to prevent this in the 4553b468a217Seschrock * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 4554b468a217Seschrock * torn down. 4555b468a217Seschrock */ 4556fa9e4066Sahrens /* ARGSUSED */ 4557fa9e4066Sahrens static int 4558fa9e4066Sahrens zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4559da6c28aaSamw size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 4560da6c28aaSamw caller_context_t *ct) 4561fa9e4066Sahrens { 4562ea8dc4b6Seschrock uint64_t pages = btopr(len); 4563ea8dc4b6Seschrock 4564ea8dc4b6Seschrock ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 4565ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 4566b468a217Seschrock 4567b468a217Seschrock if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 4568b468a217Seschrock vn_has_cached_data(vp)) 4569da6c28aaSamw (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 4570b468a217Seschrock 4571fa9e4066Sahrens return (0); 4572fa9e4066Sahrens } 4573fa9e4066Sahrens 4574fa9e4066Sahrens /* 4575fa9e4066Sahrens * Free or allocate space in a file. Currently, this function only 4576fa9e4066Sahrens * supports the `F_FREESP' command. However, this command is somewhat 4577fa9e4066Sahrens * misnamed, as its functionality includes the ability to allocate as 4578fa9e4066Sahrens * well as free space. 4579fa9e4066Sahrens * 4580fa9e4066Sahrens * IN: vp - vnode of file to free data in. 4581fa9e4066Sahrens * cmd - action to take (only F_FREESP supported). 4582fa9e4066Sahrens * bfp - section of file to free/alloc. 4583fa9e4066Sahrens * flag - current file open mode flags. 4584fa9e4066Sahrens * offset - current file offset. 4585fa9e4066Sahrens * cr - credentials of caller [UNUSED]. 4586da6c28aaSamw * ct - caller context. 4587fa9e4066Sahrens * 4588fa9e4066Sahrens * RETURN: 0 if success 4589fa9e4066Sahrens * error code if failure 4590fa9e4066Sahrens * 4591fa9e4066Sahrens * Timestamps: 4592fa9e4066Sahrens * vp - ctime|mtime updated 4593fa9e4066Sahrens */ 4594fa9e4066Sahrens /* ARGSUSED */ 4595fa9e4066Sahrens static int 4596fa9e4066Sahrens zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 4597fa9e4066Sahrens offset_t offset, cred_t *cr, caller_context_t *ct) 4598fa9e4066Sahrens { 4599fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4600fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4601fa9e4066Sahrens uint64_t off, len; 4602fa9e4066Sahrens int error; 4603fa9e4066Sahrens 46043cb34c60Sahrens ZFS_ENTER(zfsvfs); 46053cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4606fa9e4066Sahrens 4607fa9e4066Sahrens if (cmd != F_FREESP) { 4608fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4609fa9e4066Sahrens return (EINVAL); 4610fa9e4066Sahrens } 4611fa9e4066Sahrens 4612fa9e4066Sahrens if (error = convoff(vp, bfp, 0, offset)) { 4613fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4614fa9e4066Sahrens return (error); 4615fa9e4066Sahrens } 4616fa9e4066Sahrens 4617fa9e4066Sahrens if (bfp->l_len < 0) { 4618fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4619fa9e4066Sahrens return (EINVAL); 4620fa9e4066Sahrens } 4621fa9e4066Sahrens 4622fa9e4066Sahrens off = bfp->l_start; 4623104e2ed7Sperrin len = bfp->l_len; /* 0 means from off to end of file */ 4624104e2ed7Sperrin 4625cdb0ab79Smaybee error = zfs_freesp(zp, off, len, flag, TRUE); 4626fa9e4066Sahrens 4627fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4628fa9e4066Sahrens return (error); 4629fa9e4066Sahrens } 4630fa9e4066Sahrens 4631da6c28aaSamw /*ARGSUSED*/ 4632fa9e4066Sahrens static int 4633da6c28aaSamw zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4634fa9e4066Sahrens { 4635fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4636fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4637f18faf3fSek uint32_t gen; 46380a586ceaSMark Shellenbaum uint64_t gen64; 4639fa9e4066Sahrens uint64_t object = zp->z_id; 4640fa9e4066Sahrens zfid_short_t *zfid; 46410a586ceaSMark Shellenbaum int size, i, error; 4642fa9e4066Sahrens 46433cb34c60Sahrens ZFS_ENTER(zfsvfs); 46443cb34c60Sahrens ZFS_VERIFY_ZP(zp); 46450a586ceaSMark Shellenbaum 46460a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4647f3e6fb2fSMark Shellenbaum &gen64, sizeof (uint64_t))) != 0) { 4648f3e6fb2fSMark Shellenbaum ZFS_EXIT(zfsvfs); 46490a586ceaSMark Shellenbaum return (error); 4650f3e6fb2fSMark Shellenbaum } 46510a586ceaSMark Shellenbaum 46520a586ceaSMark Shellenbaum gen = (uint32_t)gen64; 4653fa9e4066Sahrens 4654fa9e4066Sahrens size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4655fa9e4066Sahrens if (fidp->fid_len < size) { 4656fa9e4066Sahrens fidp->fid_len = size; 46570f2dc02eSek ZFS_EXIT(zfsvfs); 4658fa9e4066Sahrens return (ENOSPC); 4659fa9e4066Sahrens } 4660fa9e4066Sahrens 4661fa9e4066Sahrens zfid = (zfid_short_t *)fidp; 4662fa9e4066Sahrens 4663fa9e4066Sahrens zfid->zf_len = size; 4664fa9e4066Sahrens 4665fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 4666fa9e4066Sahrens zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4667fa9e4066Sahrens 4668fa9e4066Sahrens /* Must have a non-zero generation number to distinguish from .zfs */ 4669fa9e4066Sahrens if (gen == 0) 4670fa9e4066Sahrens gen = 1; 4671fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 4672fa9e4066Sahrens zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4673fa9e4066Sahrens 4674fa9e4066Sahrens if (size == LONG_FID_LEN) { 4675fa9e4066Sahrens uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4676fa9e4066Sahrens zfid_long_t *zlfid; 4677fa9e4066Sahrens 4678fa9e4066Sahrens zlfid = (zfid_long_t *)fidp; 4679fa9e4066Sahrens 4680fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4681fa9e4066Sahrens zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4682fa9e4066Sahrens 4683fa9e4066Sahrens /* XXX - this should be the generation number for the objset */ 4684fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4685fa9e4066Sahrens zlfid->zf_setgen[i] = 0; 4686fa9e4066Sahrens } 4687fa9e4066Sahrens 4688fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4689fa9e4066Sahrens return (0); 4690fa9e4066Sahrens } 4691fa9e4066Sahrens 4692fa9e4066Sahrens static int 4693da6c28aaSamw zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4694da6c28aaSamw caller_context_t *ct) 4695fa9e4066Sahrens { 4696fa9e4066Sahrens znode_t *zp, *xzp; 4697fa9e4066Sahrens zfsvfs_t *zfsvfs; 4698fa9e4066Sahrens zfs_dirlock_t *dl; 4699fa9e4066Sahrens int error; 4700fa9e4066Sahrens 4701fa9e4066Sahrens switch (cmd) { 4702fa9e4066Sahrens case _PC_LINK_MAX: 4703fa9e4066Sahrens *valp = ULONG_MAX; 4704fa9e4066Sahrens return (0); 4705fa9e4066Sahrens 4706fa9e4066Sahrens case _PC_FILESIZEBITS: 4707fa9e4066Sahrens *valp = 64; 4708fa9e4066Sahrens return (0); 4709fa9e4066Sahrens 4710fa9e4066Sahrens case _PC_XATTR_EXISTS: 4711fa9e4066Sahrens zp = VTOZ(vp); 4712fa9e4066Sahrens zfsvfs = zp->z_zfsvfs; 47133cb34c60Sahrens ZFS_ENTER(zfsvfs); 47143cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4715fa9e4066Sahrens *valp = 0; 4716fa9e4066Sahrens error = zfs_dirent_lock(&dl, zp, "", &xzp, 4717da6c28aaSamw ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 4718fa9e4066Sahrens if (error == 0) { 4719fa9e4066Sahrens zfs_dirent_unlock(dl); 4720fa9e4066Sahrens if (!zfs_dirempty(xzp)) 4721fa9e4066Sahrens *valp = 1; 4722fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 4723fa9e4066Sahrens } else if (error == ENOENT) { 4724fa9e4066Sahrens /* 4725fa9e4066Sahrens * If there aren't extended attributes, it's the 4726fa9e4066Sahrens * same as having zero of them. 4727fa9e4066Sahrens */ 4728fa9e4066Sahrens error = 0; 4729fa9e4066Sahrens } 4730fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4731fa9e4066Sahrens return (error); 4732fa9e4066Sahrens 4733da6c28aaSamw case _PC_SATTR_ENABLED: 4734da6c28aaSamw case _PC_SATTR_EXISTS: 47359660e5cbSJanice Chang *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4736da6c28aaSamw (vp->v_type == VREG || vp->v_type == VDIR); 4737da6c28aaSamw return (0); 4738da6c28aaSamw 4739e802abbdSTim Haley case _PC_ACCESS_FILTERING: 4740e802abbdSTim Haley *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4741e802abbdSTim Haley vp->v_type == VDIR; 4742e802abbdSTim Haley return (0); 4743e802abbdSTim Haley 4744fa9e4066Sahrens case _PC_ACL_ENABLED: 4745fa9e4066Sahrens *valp = _ACL_ACE_ENABLED; 4746fa9e4066Sahrens return (0); 4747fa9e4066Sahrens 4748fa9e4066Sahrens case _PC_MIN_HOLE_SIZE: 4749fa9e4066Sahrens *valp = (ulong_t)SPA_MINBLOCKSIZE; 4750fa9e4066Sahrens return (0); 4751fa9e4066Sahrens 47523b862e9aSRoger A. Faulkner case _PC_TIMESTAMP_RESOLUTION: 47533b862e9aSRoger A. Faulkner /* nanosecond timestamp resolution */ 47543b862e9aSRoger A. Faulkner *valp = 1L; 47553b862e9aSRoger A. Faulkner return (0); 47563b862e9aSRoger A. Faulkner 4757fa9e4066Sahrens default: 4758da6c28aaSamw return (fs_pathconf(vp, cmd, valp, cr, ct)); 4759fa9e4066Sahrens } 4760fa9e4066Sahrens } 4761fa9e4066Sahrens 4762fa9e4066Sahrens /*ARGSUSED*/ 4763fa9e4066Sahrens static int 4764da6c28aaSamw zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4765da6c28aaSamw caller_context_t *ct) 4766fa9e4066Sahrens { 4767fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4768fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4769fa9e4066Sahrens int error; 4770da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4771fa9e4066Sahrens 47723cb34c60Sahrens ZFS_ENTER(zfsvfs); 47733cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4774da6c28aaSamw error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4775fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4776fa9e4066Sahrens 4777fa9e4066Sahrens return (error); 4778fa9e4066Sahrens } 4779fa9e4066Sahrens 4780fa9e4066Sahrens /*ARGSUSED*/ 4781fa9e4066Sahrens static int 4782da6c28aaSamw zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4783da6c28aaSamw caller_context_t *ct) 4784fa9e4066Sahrens { 4785fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4786fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4787fa9e4066Sahrens int error; 4788da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4789*55da60b9SMark J Musante zilog_t *zilog = zfsvfs->z_log; 4790fa9e4066Sahrens 47913cb34c60Sahrens ZFS_ENTER(zfsvfs); 47923cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4793*55da60b9SMark J Musante 4794da6c28aaSamw error = zfs_setacl(zp, vsecp, skipaclchk, cr); 4795*55da60b9SMark J Musante 4796*55da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 4797*55da60b9SMark J Musante zil_commit(zilog, UINT64_MAX, 0); 4798*55da60b9SMark J Musante 4799fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4800fa9e4066Sahrens return (error); 4801fa9e4066Sahrens } 4802fa9e4066Sahrens 4803c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 4804c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Tunable, both must be a power of 2. 4805c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * 4806c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf 4807c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * zcr_blksz_max: if set to less than the file block size, allow loaning out of 4808c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * an arcbuf for a partial block read 4809c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 4810c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_min = (1 << 10); /* 1K */ 4811c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_max = (1 << 17); /* 128K */ 4812c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4813c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 4814c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 4815c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 4816c242f9a0Schunli zhang - Sun Microsystems - Irvine United States caller_context_t *ct) 4817c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 4818c242f9a0Schunli zhang - Sun Microsystems - Irvine United States znode_t *zp = VTOZ(vp); 4819c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4820c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int max_blksz = zfsvfs->z_max_blksz; 4821c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio_t *uio = &xuio->xu_uio; 4822c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t size = uio->uio_resid; 4823c242f9a0Schunli zhang - Sun Microsystems - Irvine United States offset_t offset = uio->uio_loffset; 4824c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz; 4825c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int fullblk, i; 4826c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 4827c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t maxsize; 4828c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int preamble, postamble; 4829c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4830c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio->xu_type != UIOTYPE_ZEROCOPY) 4831c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (EINVAL); 4832c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4833c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_ENTER(zfsvfs); 4834c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_VERIFY_ZP(zp); 4835c242f9a0Schunli zhang - Sun Microsystems - Irvine United States switch (ioflag) { 4836c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_WRITE: 4837c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 4838c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for write if write size is bigger than 4839c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * max_blksz, and the file's block size is also max_blksz. 4840c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 4841c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = max_blksz; 4842c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || zp->z_blksz != blksz) { 4843c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 4844c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (EINVAL); 4845c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4846c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 4847c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Caller requests buffers for write before knowing where the 4848c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write offset might be (e.g. NFS TCP write). 4849c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 4850c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (offset == -1) { 4851c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = 0; 4852c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 4853c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = P2PHASE(offset, blksz); 4854c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 4855c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = blksz - preamble; 4856c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= preamble; 4857c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4858c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4859c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4860c242f9a0Schunli zhang - Sun Microsystems - Irvine United States postamble = P2PHASE(size, blksz); 4861c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= postamble; 4862c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4863c242f9a0Schunli zhang - Sun Microsystems - Irvine United States fullblk = size / blksz; 4864570de38fSSurya Prakki (void) dmu_xuio_init(xuio, 4865c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 4866c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 4867c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int, postamble, int, 4868c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 4869c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4870c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 4871c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Have to fix iov base/len for partial buffers. They 4872c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * currently represent full arc_buf's. 4873c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 4874c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 4875c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data begins in the middle of the arc_buf */ 48760a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 48770a586ceaSMark Shellenbaum blksz); 4878c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 4879570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 4880570de38fSSurya Prakki blksz - preamble, preamble); 4881c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4882c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4883c242f9a0Schunli zhang - Sun Microsystems - Irvine United States for (i = 0; i < fullblk; i++) { 48840a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 48850a586ceaSMark Shellenbaum blksz); 4886c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 4887570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, blksz); 4888c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4889c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4890c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (postamble) { 4891c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data ends in the middle of the arc_buf */ 48920a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 48930a586ceaSMark Shellenbaum blksz); 4894c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 4895570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, postamble); 4896c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4897c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 4898c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_READ: 4899c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 4900c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for read if the read size is larger than 4901c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * the current file block size. Block alignment is not 4902c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * considered. Partial arc_buf will be loaned out for read. 4903c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 4904c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zp->z_blksz; 4905c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz < zcr_blksz_min) 4906c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_min; 4907c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > zcr_blksz_max) 4908c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_max; 4909c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* avoid potential complexity of dealing with it */ 4910c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > max_blksz) { 4911c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 4912c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (EINVAL); 4913c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4914c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 49150a586ceaSMark Shellenbaum maxsize = zp->z_size - uio->uio_loffset; 4916c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size > maxsize) 4917c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size = maxsize; 4918c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4919c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || vn_has_cached_data(vp)) { 4920c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 4921c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (EINVAL); 4922c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4923c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 4924c242f9a0Schunli zhang - Sun Microsystems - Irvine United States default: 4925c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 4926c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (EINVAL); 4927c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4928c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4929c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio->uio_extflg = UIO_XUIO; 4930c242f9a0Schunli zhang - Sun Microsystems - Irvine United States XUIO_XUZC_RW(xuio) = ioflag; 4931c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 4932c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 4933c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4934c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4935c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 4936c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 4937c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 4938c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 4939c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i; 4940c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 4941c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int ioflag = XUIO_XUZC_RW(xuio); 4942c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4943c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 4944c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4945c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i = dmu_xuio_cnt(xuio); 4946c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (i-- > 0) { 4947c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i); 4948c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 4949c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * if abuf == NULL, it must be a write buffer 4950c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * that has been returned in zfs_write(). 4951c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 4952c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (abuf) 4953c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 4954c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf || ioflag == UIO_WRITE); 4955c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4956c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4957c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_fini(xuio); 4958c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 4959c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 4960c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 4961fa9e4066Sahrens /* 4962fa9e4066Sahrens * Predeclare these here so that the compiler assumes that 4963fa9e4066Sahrens * this is an "old style" function declaration that does 4964fa9e4066Sahrens * not include arguments => we won't get type mismatch errors 4965fa9e4066Sahrens * in the initializations that follow. 4966fa9e4066Sahrens */ 4967fa9e4066Sahrens static int zfs_inval(); 4968fa9e4066Sahrens static int zfs_isdir(); 4969fa9e4066Sahrens 4970fa9e4066Sahrens static int 4971fa9e4066Sahrens zfs_inval() 4972fa9e4066Sahrens { 4973fa9e4066Sahrens return (EINVAL); 4974fa9e4066Sahrens } 4975fa9e4066Sahrens 4976fa9e4066Sahrens static int 4977fa9e4066Sahrens zfs_isdir() 4978fa9e4066Sahrens { 4979fa9e4066Sahrens return (EISDIR); 4980fa9e4066Sahrens } 4981fa9e4066Sahrens /* 4982fa9e4066Sahrens * Directory vnode operations template 4983fa9e4066Sahrens */ 4984fa9e4066Sahrens vnodeops_t *zfs_dvnodeops; 4985fa9e4066Sahrens const fs_operation_def_t zfs_dvnodeops_template[] = { 4986aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 4987aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 4988aa59c4cbSrsb VOPNAME_READ, { .error = zfs_isdir }, 4989aa59c4cbSrsb VOPNAME_WRITE, { .error = zfs_isdir }, 4990aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 4991aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 4992aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 4993aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 4994aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 4995aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 4996aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 4997aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 4998aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 4999aa59c4cbSrsb VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5000aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5001aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5002aa59c4cbSrsb VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5003aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5004aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5005aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5006aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5007aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5008aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5009aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5010df2381bfSpraks VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5011aa59c4cbSrsb NULL, NULL 5012fa9e4066Sahrens }; 5013fa9e4066Sahrens 5014fa9e4066Sahrens /* 5015fa9e4066Sahrens * Regular file vnode operations template 5016fa9e4066Sahrens */ 5017fa9e4066Sahrens vnodeops_t *zfs_fvnodeops; 5018fa9e4066Sahrens const fs_operation_def_t zfs_fvnodeops_template[] = { 5019aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5020aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5021aa59c4cbSrsb VOPNAME_READ, { .vop_read = zfs_read }, 5022aa59c4cbSrsb VOPNAME_WRITE, { .vop_write = zfs_write }, 5023aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5024aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5025aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5026aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5027aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5028aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5029aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5030aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5031aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5032aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5033aa59c4cbSrsb VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5034aa59c4cbSrsb VOPNAME_SPACE, { .vop_space = zfs_space }, 5035aa59c4cbSrsb VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5036aa59c4cbSrsb VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5037aa59c4cbSrsb VOPNAME_MAP, { .vop_map = zfs_map }, 5038aa59c4cbSrsb VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5039aa59c4cbSrsb VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5040aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5041aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5042aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5043aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5044c242f9a0Schunli zhang - Sun Microsystems - Irvine United States VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 5045c242f9a0Schunli zhang - Sun Microsystems - Irvine United States VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5046aa59c4cbSrsb NULL, NULL 5047fa9e4066Sahrens }; 5048fa9e4066Sahrens 5049fa9e4066Sahrens /* 5050fa9e4066Sahrens * Symbolic link vnode operations template 5051fa9e4066Sahrens */ 5052fa9e4066Sahrens vnodeops_t *zfs_symvnodeops; 5053fa9e4066Sahrens const fs_operation_def_t zfs_symvnodeops_template[] = { 5054aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5055aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5056aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5057aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5058aa59c4cbSrsb VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5059aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5060aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5061aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5062aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5063aa59c4cbSrsb NULL, NULL 5064fa9e4066Sahrens }; 5065fa9e4066Sahrens 5066743a77edSAlan Wright /* 5067743a77edSAlan Wright * special share hidden files vnode operations template 5068743a77edSAlan Wright */ 5069743a77edSAlan Wright vnodeops_t *zfs_sharevnodeops; 5070743a77edSAlan Wright const fs_operation_def_t zfs_sharevnodeops_template[] = { 5071743a77edSAlan Wright VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5072743a77edSAlan Wright VOPNAME_ACCESS, { .vop_access = zfs_access }, 5073743a77edSAlan Wright VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5074743a77edSAlan Wright VOPNAME_FID, { .vop_fid = zfs_fid }, 5075743a77edSAlan Wright VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5076743a77edSAlan Wright VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5077743a77edSAlan Wright VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5078743a77edSAlan Wright VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5079743a77edSAlan Wright NULL, NULL 5080743a77edSAlan Wright }; 5081743a77edSAlan Wright 5082fa9e4066Sahrens /* 5083fa9e4066Sahrens * Extended attribute directory vnode operations template 5084fa9e4066Sahrens * This template is identical to the directory vnodes 5085fa9e4066Sahrens * operation template except for restricted operations: 5086fa9e4066Sahrens * VOP_MKDIR() 5087fa9e4066Sahrens * VOP_SYMLINK() 5088fa9e4066Sahrens * Note that there are other restrictions embedded in: 5089fa9e4066Sahrens * zfs_create() - restrict type to VREG 5090fa9e4066Sahrens * zfs_link() - no links into/out of attribute space 5091fa9e4066Sahrens * zfs_rename() - no moves into/out of attribute space 5092fa9e4066Sahrens */ 5093fa9e4066Sahrens vnodeops_t *zfs_xdvnodeops; 5094fa9e4066Sahrens const fs_operation_def_t zfs_xdvnodeops_template[] = { 5095aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5096aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5097aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5098aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5099aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5100aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5101aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5102aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 5103aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5104aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 5105aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5106aa59c4cbSrsb VOPNAME_MKDIR, { .error = zfs_inval }, 5107aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5108aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5109aa59c4cbSrsb VOPNAME_SYMLINK, { .error = zfs_inval }, 5110aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5111aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5112aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5113aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5114aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5115aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5116aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5117aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5118aa59c4cbSrsb NULL, NULL 5119fa9e4066Sahrens }; 5120fa9e4066Sahrens 5121fa9e4066Sahrens /* 5122fa9e4066Sahrens * Error vnode operations template 5123fa9e4066Sahrens */ 5124fa9e4066Sahrens vnodeops_t *zfs_evnodeops; 5125fa9e4066Sahrens const fs_operation_def_t zfs_evnodeops_template[] = { 5126aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5127aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5128aa59c4cbSrsb NULL, NULL 5129fa9e4066Sahrens }; 5130