1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5736b9155Smarks * Common Development and Distribution License (the "License"). 6736b9155Smarks * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22ac05c741SMark Maybee * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 2675c76197Speteh /* Portions Copyright 2007 Jeremy Teo */ 2775c76197Speteh 28fa9e4066Sahrens #include <sys/types.h> 29fa9e4066Sahrens #include <sys/param.h> 30fa9e4066Sahrens #include <sys/time.h> 31fa9e4066Sahrens #include <sys/systm.h> 32fa9e4066Sahrens #include <sys/sysmacros.h> 33fa9e4066Sahrens #include <sys/resource.h> 34fa9e4066Sahrens #include <sys/vfs.h> 35aa59c4cbSrsb #include <sys/vfs_opreg.h> 36fa9e4066Sahrens #include <sys/vnode.h> 37fa9e4066Sahrens #include <sys/file.h> 38fa9e4066Sahrens #include <sys/stat.h> 39fa9e4066Sahrens #include <sys/kmem.h> 40fa9e4066Sahrens #include <sys/taskq.h> 41fa9e4066Sahrens #include <sys/uio.h> 42fa9e4066Sahrens #include <sys/vmsystm.h> 43fa9e4066Sahrens #include <sys/atomic.h> 4444eda4d7Smaybee #include <sys/vm.h> 45fa9e4066Sahrens #include <vm/seg_vn.h> 46fa9e4066Sahrens #include <vm/pvn.h> 47fa9e4066Sahrens #include <vm/as.h> 480fab61baSJonathan W Adams #include <vm/kpm.h> 490fab61baSJonathan W Adams #include <vm/seg_kpm.h> 50fa9e4066Sahrens #include <sys/mman.h> 51fa9e4066Sahrens #include <sys/pathname.h> 52fa9e4066Sahrens #include <sys/cmn_err.h> 53fa9e4066Sahrens #include <sys/errno.h> 54fa9e4066Sahrens #include <sys/unistd.h> 55fa9e4066Sahrens #include <sys/zfs_dir.h> 56fa9e4066Sahrens #include <sys/zfs_acl.h> 57fa9e4066Sahrens #include <sys/zfs_ioctl.h> 58fa9e4066Sahrens #include <sys/fs/zfs.h> 59fa9e4066Sahrens #include <sys/dmu.h> 60fa9e4066Sahrens #include <sys/spa.h> 61fa9e4066Sahrens #include <sys/txg.h> 62fa9e4066Sahrens #include <sys/dbuf.h> 63fa9e4066Sahrens #include <sys/zap.h> 64fa9e4066Sahrens #include <sys/dirent.h> 65fa9e4066Sahrens #include <sys/policy.h> 66fa9e4066Sahrens #include <sys/sunddi.h> 67fa9e4066Sahrens #include <sys/filio.h> 68c1ce5987SMark Shellenbaum #include <sys/sid.h> 69fa9e4066Sahrens #include "fs/fs_subr.h" 70fa9e4066Sahrens #include <sys/zfs_ctldir.h> 71da6c28aaSamw #include <sys/zfs_fuid.h> 72033f9833Sek #include <sys/dnlc.h> 73104e2ed7Sperrin #include <sys/zfs_rlock.h> 74da6c28aaSamw #include <sys/extdirent.h> 75da6c28aaSamw #include <sys/kidmap.h> 76*67dbe2beSCasper H.S. Dik #include <sys/cred.h> 77b38f0970Sck #include <sys/attr.h> 78fa9e4066Sahrens 79fa9e4066Sahrens /* 80fa9e4066Sahrens * Programming rules. 81fa9e4066Sahrens * 82fa9e4066Sahrens * Each vnode op performs some logical unit of work. To do this, the ZPL must 83fa9e4066Sahrens * properly lock its in-core state, create a DMU transaction, do the work, 84fa9e4066Sahrens * record this work in the intent log (ZIL), commit the DMU transaction, 85da6c28aaSamw * and wait for the intent log to commit if it is a synchronous operation. 86da6c28aaSamw * Moreover, the vnode ops must work in both normal and log replay context. 87fa9e4066Sahrens * The ordering of events is important to avoid deadlocks and references 88fa9e4066Sahrens * to freed memory. The example below illustrates the following Big Rules: 89fa9e4066Sahrens * 90fa9e4066Sahrens * (1) A check must be made in each zfs thread for a mounted file system. 913cb34c60Sahrens * This is done avoiding races using ZFS_ENTER(zfsvfs). 923cb34c60Sahrens * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 933cb34c60Sahrens * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 943cb34c60Sahrens * can return EIO from the calling function. 95fa9e4066Sahrens * 96fa9e4066Sahrens * (2) VN_RELE() should always be the last thing except for zil_commit() 97b19a79ecSperrin * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 98fa9e4066Sahrens * First, if it's the last reference, the vnode/znode 99fa9e4066Sahrens * can be freed, so the zp may point to freed memory. Second, the last 100fa9e4066Sahrens * reference will call zfs_zinactive(), which may induce a lot of work -- 101104e2ed7Sperrin * pushing cached pages (which acquires range locks) and syncing out 102fa9e4066Sahrens * cached atime changes. Third, zfs_zinactive() may require a new tx, 103fa9e4066Sahrens * which could deadlock the system if you were already holding one. 1049d3574bfSNeil Perrin * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 105fa9e4066Sahrens * 1067885c754Sperrin * (3) All range locks must be grabbed before calling dmu_tx_assign(), 1077885c754Sperrin * as they can span dmu_tx_assign() calls. 1087885c754Sperrin * 1091209a471SNeil Perrin * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 110fa9e4066Sahrens * This is critical because we don't want to block while holding locks. 111fa9e4066Sahrens * Note, in particular, that if a lock is sometimes acquired before 112fa9e4066Sahrens * the tx assigns, and sometimes after (e.g. z_lock), then failing to 113fa9e4066Sahrens * use a non-blocking assign can deadlock the system. The scenario: 114fa9e4066Sahrens * 115fa9e4066Sahrens * Thread A has grabbed a lock before calling dmu_tx_assign(). 116fa9e4066Sahrens * Thread B is in an already-assigned tx, and blocks for this lock. 117fa9e4066Sahrens * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 118fa9e4066Sahrens * forever, because the previous txg can't quiesce until B's tx commits. 119fa9e4066Sahrens * 120fa9e4066Sahrens * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 1218a2f1b91Sahrens * then drop all locks, call dmu_tx_wait(), and try again. 122fa9e4066Sahrens * 1237885c754Sperrin * (5) If the operation succeeded, generate the intent log entry for it 124fa9e4066Sahrens * before dropping locks. This ensures that the ordering of events 125fa9e4066Sahrens * in the intent log matches the order in which they actually occurred. 1261209a471SNeil Perrin * During ZIL replay the zfs_log_* functions will update the sequence 1271209a471SNeil Perrin * number to indicate the zil transaction has replayed. 128fa9e4066Sahrens * 1297885c754Sperrin * (6) At the end of each vnode op, the DMU tx must always commit, 130fa9e4066Sahrens * regardless of whether there were any errors. 131fa9e4066Sahrens * 132b19a79ecSperrin * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid) 133fa9e4066Sahrens * to ensure that synchronous semantics are provided when necessary. 134fa9e4066Sahrens * 135fa9e4066Sahrens * In general, this is how things should be ordered in each vnode op: 136fa9e4066Sahrens * 137fa9e4066Sahrens * ZFS_ENTER(zfsvfs); // exit if unmounted 138fa9e4066Sahrens * top: 139fa9e4066Sahrens * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 140fa9e4066Sahrens * rw_enter(...); // grab any other locks you need 141fa9e4066Sahrens * tx = dmu_tx_create(...); // get DMU tx 142fa9e4066Sahrens * dmu_tx_hold_*(); // hold each object you might modify 1431209a471SNeil Perrin * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 144fa9e4066Sahrens * if (error) { 145fa9e4066Sahrens * rw_exit(...); // drop locks 146fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 147fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1481209a471SNeil Perrin * if (error == ERESTART) { 1498a2f1b91Sahrens * dmu_tx_wait(tx); 1508a2f1b91Sahrens * dmu_tx_abort(tx); 151fa9e4066Sahrens * goto top; 152fa9e4066Sahrens * } 1538a2f1b91Sahrens * dmu_tx_abort(tx); // abort DMU tx 154fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 155fa9e4066Sahrens * return (error); // really out of space 156fa9e4066Sahrens * } 157fa9e4066Sahrens * error = do_real_work(); // do whatever this VOP does 158fa9e4066Sahrens * if (error == 0) 159b19a79ecSperrin * zfs_log_*(...); // on success, make ZIL entry 160fa9e4066Sahrens * dmu_tx_commit(tx); // commit DMU tx -- error or not 161fa9e4066Sahrens * rw_exit(...); // drop locks 162fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 163fa9e4066Sahrens * VN_RELE(...); // release held vnodes 164b19a79ecSperrin * zil_commit(zilog, seq, foid); // synchronous when necessary 165fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 166fa9e4066Sahrens * return (error); // done, report error 167fa9e4066Sahrens */ 1683cb34c60Sahrens 169fa9e4066Sahrens /* ARGSUSED */ 170fa9e4066Sahrens static int 171da6c28aaSamw zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 172fa9e4066Sahrens { 17367bd71c6Sperrin znode_t *zp = VTOZ(*vpp); 174b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 175b614fdaaSMark Shellenbaum 176b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 177b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 17867bd71c6Sperrin 179da6c28aaSamw if ((flag & FWRITE) && (zp->z_phys->zp_flags & ZFS_APPENDONLY) && 180da6c28aaSamw ((flag & FAPPEND) == 0)) { 181b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 182da6c28aaSamw return (EPERM); 183da6c28aaSamw } 184da6c28aaSamw 185da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 186da6c28aaSamw ZTOV(zp)->v_type == VREG && 187da6c28aaSamw !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 188b614fdaaSMark Shellenbaum zp->z_phys->zp_size > 0) { 189b614fdaaSMark Shellenbaum if (fs_vscan(*vpp, cr, 0) != 0) { 190b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 191da6c28aaSamw return (EACCES); 192b614fdaaSMark Shellenbaum } 193b614fdaaSMark Shellenbaum } 194da6c28aaSamw 19567bd71c6Sperrin /* Keep a count of the synchronous opens in the znode */ 19667bd71c6Sperrin if (flag & (FSYNC | FDSYNC)) 19767bd71c6Sperrin atomic_inc_32(&zp->z_sync_cnt); 198da6c28aaSamw 199b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 200fa9e4066Sahrens return (0); 201fa9e4066Sahrens } 202fa9e4066Sahrens 203fa9e4066Sahrens /* ARGSUSED */ 204fa9e4066Sahrens static int 205da6c28aaSamw zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 206da6c28aaSamw caller_context_t *ct) 207fa9e4066Sahrens { 20867bd71c6Sperrin znode_t *zp = VTOZ(vp); 209b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 210b614fdaaSMark Shellenbaum 211ee8143cbSChris Kirby /* 212ee8143cbSChris Kirby * Clean up any locks held by this process on the vp. 213ee8143cbSChris Kirby */ 214ee8143cbSChris Kirby cleanlocks(vp, ddi_get_pid(), 0); 215ee8143cbSChris Kirby cleanshares(vp, ddi_get_pid()); 216ee8143cbSChris Kirby 217b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 218b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 21967bd71c6Sperrin 22067bd71c6Sperrin /* Decrement the synchronous opens in the znode */ 221ecb72030Sperrin if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 22267bd71c6Sperrin atomic_dec_32(&zp->z_sync_cnt); 22367bd71c6Sperrin 224da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 225da6c28aaSamw ZTOV(zp)->v_type == VREG && 226da6c28aaSamw !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 227da6c28aaSamw zp->z_phys->zp_size > 0) 228da6c28aaSamw VERIFY(fs_vscan(vp, cr, 1) == 0); 229da6c28aaSamw 230b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 231fa9e4066Sahrens return (0); 232fa9e4066Sahrens } 233fa9e4066Sahrens 234fa9e4066Sahrens /* 235fa9e4066Sahrens * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 236fa9e4066Sahrens * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 237fa9e4066Sahrens */ 238fa9e4066Sahrens static int 239fa9e4066Sahrens zfs_holey(vnode_t *vp, int cmd, offset_t *off) 240fa9e4066Sahrens { 241fa9e4066Sahrens znode_t *zp = VTOZ(vp); 242fa9e4066Sahrens uint64_t noff = (uint64_t)*off; /* new offset */ 243fa9e4066Sahrens uint64_t file_sz; 244fa9e4066Sahrens int error; 245fa9e4066Sahrens boolean_t hole; 246fa9e4066Sahrens 247fa9e4066Sahrens file_sz = zp->z_phys->zp_size; 248fa9e4066Sahrens if (noff >= file_sz) { 249fa9e4066Sahrens return (ENXIO); 250fa9e4066Sahrens } 251fa9e4066Sahrens 252fa9e4066Sahrens if (cmd == _FIO_SEEK_HOLE) 253fa9e4066Sahrens hole = B_TRUE; 254fa9e4066Sahrens else 255fa9e4066Sahrens hole = B_FALSE; 256fa9e4066Sahrens 257fa9e4066Sahrens error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 258fa9e4066Sahrens 259fa9e4066Sahrens /* end of file? */ 260fa9e4066Sahrens if ((error == ESRCH) || (noff > file_sz)) { 261fa9e4066Sahrens /* 262fa9e4066Sahrens * Handle the virtual hole at the end of file. 263fa9e4066Sahrens */ 264fa9e4066Sahrens if (hole) { 265fa9e4066Sahrens *off = file_sz; 266fa9e4066Sahrens return (0); 267fa9e4066Sahrens } 268fa9e4066Sahrens return (ENXIO); 269fa9e4066Sahrens } 270fa9e4066Sahrens 271fa9e4066Sahrens if (noff < *off) 272fa9e4066Sahrens return (error); 273fa9e4066Sahrens *off = noff; 274fa9e4066Sahrens return (error); 275fa9e4066Sahrens } 276fa9e4066Sahrens 277fa9e4066Sahrens /* ARGSUSED */ 278fa9e4066Sahrens static int 279fa9e4066Sahrens zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, 280da6c28aaSamw int *rvalp, caller_context_t *ct) 281fa9e4066Sahrens { 282fa9e4066Sahrens offset_t off; 283fa9e4066Sahrens int error; 284fa9e4066Sahrens zfsvfs_t *zfsvfs; 285f18faf3fSek znode_t *zp; 286fa9e4066Sahrens 287fa9e4066Sahrens switch (com) { 288ecb72030Sperrin case _FIOFFS: 289fa9e4066Sahrens return (zfs_sync(vp->v_vfsp, 0, cred)); 290fa9e4066Sahrens 291ea8dc4b6Seschrock /* 292ea8dc4b6Seschrock * The following two ioctls are used by bfu. Faking out, 293ea8dc4b6Seschrock * necessary to avoid bfu errors. 294ea8dc4b6Seschrock */ 295ecb72030Sperrin case _FIOGDIO: 296ecb72030Sperrin case _FIOSDIO: 297ea8dc4b6Seschrock return (0); 298ea8dc4b6Seschrock 299ecb72030Sperrin case _FIO_SEEK_DATA: 300ecb72030Sperrin case _FIO_SEEK_HOLE: 301fa9e4066Sahrens if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 302fa9e4066Sahrens return (EFAULT); 303fa9e4066Sahrens 304f18faf3fSek zp = VTOZ(vp); 305f18faf3fSek zfsvfs = zp->z_zfsvfs; 3063cb34c60Sahrens ZFS_ENTER(zfsvfs); 3073cb34c60Sahrens ZFS_VERIFY_ZP(zp); 308fa9e4066Sahrens 309fa9e4066Sahrens /* offset parameter is in/out */ 310fa9e4066Sahrens error = zfs_holey(vp, com, &off); 311fa9e4066Sahrens ZFS_EXIT(zfsvfs); 312fa9e4066Sahrens if (error) 313fa9e4066Sahrens return (error); 314fa9e4066Sahrens if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 315fa9e4066Sahrens return (EFAULT); 316fa9e4066Sahrens return (0); 317fa9e4066Sahrens } 318fa9e4066Sahrens return (ENOTTY); 319fa9e4066Sahrens } 320fa9e4066Sahrens 3210fab61baSJonathan W Adams /* 3220fab61baSJonathan W Adams * Utility functions to map and unmap a single physical page. These 3230fab61baSJonathan W Adams * are used to manage the mappable copies of ZFS file data, and therefore 3240fab61baSJonathan W Adams * do not update ref/mod bits. 3250fab61baSJonathan W Adams */ 3260fab61baSJonathan W Adams caddr_t 3270fab61baSJonathan W Adams zfs_map_page(page_t *pp, enum seg_rw rw) 3280fab61baSJonathan W Adams { 3290fab61baSJonathan W Adams if (kpm_enable) 3300fab61baSJonathan W Adams return (hat_kpm_mapin(pp, 0)); 3310fab61baSJonathan W Adams ASSERT(rw == S_READ || rw == S_WRITE); 3320fab61baSJonathan W Adams return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), 3330fab61baSJonathan W Adams (caddr_t)-1)); 3340fab61baSJonathan W Adams } 3350fab61baSJonathan W Adams 3360fab61baSJonathan W Adams void 3370fab61baSJonathan W Adams zfs_unmap_page(page_t *pp, caddr_t addr) 3380fab61baSJonathan W Adams { 3390fab61baSJonathan W Adams if (kpm_enable) { 3400fab61baSJonathan W Adams hat_kpm_mapout(pp, 0, addr); 3410fab61baSJonathan W Adams } else { 3420fab61baSJonathan W Adams ppmapout(addr); 3430fab61baSJonathan W Adams } 3440fab61baSJonathan W Adams } 3450fab61baSJonathan W Adams 346fa9e4066Sahrens /* 347fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 348fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 349fa9e4066Sahrens * 350fa9e4066Sahrens * On Write: If we find a memory mapped page, we write to *both* 351fa9e4066Sahrens * the page and the dmu buffer. 352fa9e4066Sahrens */ 353ac05c741SMark Maybee static void 354ac05c741SMark Maybee update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) 355fa9e4066Sahrens { 356ac05c741SMark Maybee int64_t off; 357fa9e4066Sahrens 358fa9e4066Sahrens off = start & PAGEOFFSET; 359fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 360fa9e4066Sahrens page_t *pp; 361ac05c741SMark Maybee uint64_t nbytes = MIN(PAGESIZE - off, len); 362fa9e4066Sahrens 363fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 364fa9e4066Sahrens caddr_t va; 365fa9e4066Sahrens 3660fab61baSJonathan W Adams va = zfs_map_page(pp, S_WRITE); 3677bfdf011SNeil Perrin (void) dmu_read(os, oid, start+off, nbytes, va+off, 3687bfdf011SNeil Perrin DMU_READ_PREFETCH); 3690fab61baSJonathan W Adams zfs_unmap_page(pp, va); 370fa9e4066Sahrens page_unlock(pp); 371fa9e4066Sahrens } 372ac05c741SMark Maybee len -= nbytes; 373fa9e4066Sahrens off = 0; 374fa9e4066Sahrens } 375fa9e4066Sahrens } 376fa9e4066Sahrens 377fa9e4066Sahrens /* 378fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 379fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 380fa9e4066Sahrens * 381fa9e4066Sahrens * On Read: We "read" preferentially from memory mapped pages, 382fa9e4066Sahrens * else we default from the dmu buffer. 383fa9e4066Sahrens * 384fa9e4066Sahrens * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 385fa9e4066Sahrens * the file is memory mapped. 386fa9e4066Sahrens */ 387fa9e4066Sahrens static int 388feb08c6bSbillm mappedread(vnode_t *vp, int nbytes, uio_t *uio) 389fa9e4066Sahrens { 390feb08c6bSbillm znode_t *zp = VTOZ(vp); 391feb08c6bSbillm objset_t *os = zp->z_zfsvfs->z_os; 392feb08c6bSbillm int64_t start, off; 393fa9e4066Sahrens int len = nbytes; 394fa9e4066Sahrens int error = 0; 395fa9e4066Sahrens 396fa9e4066Sahrens start = uio->uio_loffset; 397fa9e4066Sahrens off = start & PAGEOFFSET; 398fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 399fa9e4066Sahrens page_t *pp; 400feb08c6bSbillm uint64_t bytes = MIN(PAGESIZE - off, len); 401fa9e4066Sahrens 402fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 403fa9e4066Sahrens caddr_t va; 404fa9e4066Sahrens 4050fab61baSJonathan W Adams va = zfs_map_page(pp, S_READ); 406fa9e4066Sahrens error = uiomove(va + off, bytes, UIO_READ, uio); 4070fab61baSJonathan W Adams zfs_unmap_page(pp, va); 408fa9e4066Sahrens page_unlock(pp); 409fa9e4066Sahrens } else { 410feb08c6bSbillm error = dmu_read_uio(os, zp->z_id, uio, bytes); 411fa9e4066Sahrens } 412fa9e4066Sahrens len -= bytes; 413fa9e4066Sahrens off = 0; 414fa9e4066Sahrens if (error) 415fa9e4066Sahrens break; 416fa9e4066Sahrens } 417fa9e4066Sahrens return (error); 418fa9e4066Sahrens } 419fa9e4066Sahrens 420feb08c6bSbillm offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 421fa9e4066Sahrens 422fa9e4066Sahrens /* 423fa9e4066Sahrens * Read bytes from specified file into supplied buffer. 424fa9e4066Sahrens * 425fa9e4066Sahrens * IN: vp - vnode of file to be read from. 426fa9e4066Sahrens * uio - structure supplying read location, range info, 427fa9e4066Sahrens * and return buffer. 428fa9e4066Sahrens * ioflag - SYNC flags; used to provide FRSYNC semantics. 429fa9e4066Sahrens * cr - credentials of caller. 430da6c28aaSamw * ct - caller context 431fa9e4066Sahrens * 432fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 433fa9e4066Sahrens * 434fa9e4066Sahrens * RETURN: 0 if success 435fa9e4066Sahrens * error code if failure 436fa9e4066Sahrens * 437fa9e4066Sahrens * Side Effects: 438fa9e4066Sahrens * vp - atime updated if byte count > 0 439fa9e4066Sahrens */ 440fa9e4066Sahrens /* ARGSUSED */ 441fa9e4066Sahrens static int 442fa9e4066Sahrens zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 443fa9e4066Sahrens { 444fa9e4066Sahrens znode_t *zp = VTOZ(vp); 445fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 446f18faf3fSek objset_t *os; 447feb08c6bSbillm ssize_t n, nbytes; 448feb08c6bSbillm int error; 449104e2ed7Sperrin rl_t *rl; 450fa9e4066Sahrens 4513cb34c60Sahrens ZFS_ENTER(zfsvfs); 4523cb34c60Sahrens ZFS_VERIFY_ZP(zp); 453f18faf3fSek os = zfsvfs->z_os; 454fa9e4066Sahrens 4550616c50eSmarks if (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) { 4560616c50eSmarks ZFS_EXIT(zfsvfs); 4570616c50eSmarks return (EACCES); 4580616c50eSmarks } 4590616c50eSmarks 460fa9e4066Sahrens /* 461fa9e4066Sahrens * Validate file offset 462fa9e4066Sahrens */ 463fa9e4066Sahrens if (uio->uio_loffset < (offset_t)0) { 464fa9e4066Sahrens ZFS_EXIT(zfsvfs); 465fa9e4066Sahrens return (EINVAL); 466fa9e4066Sahrens } 467fa9e4066Sahrens 468fa9e4066Sahrens /* 469fa9e4066Sahrens * Fasttrack empty reads 470fa9e4066Sahrens */ 471fa9e4066Sahrens if (uio->uio_resid == 0) { 472fa9e4066Sahrens ZFS_EXIT(zfsvfs); 473fa9e4066Sahrens return (0); 474fa9e4066Sahrens } 475fa9e4066Sahrens 476fa9e4066Sahrens /* 477104e2ed7Sperrin * Check for mandatory locks 478fa9e4066Sahrens */ 479fa9e4066Sahrens if (MANDMODE((mode_t)zp->z_phys->zp_mode)) { 480fa9e4066Sahrens if (error = chklock(vp, FREAD, 481fa9e4066Sahrens uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 482fa9e4066Sahrens ZFS_EXIT(zfsvfs); 483fa9e4066Sahrens return (error); 484fa9e4066Sahrens } 485fa9e4066Sahrens } 486fa9e4066Sahrens 487fa9e4066Sahrens /* 488fa9e4066Sahrens * If we're in FRSYNC mode, sync out this znode before reading it. 489fa9e4066Sahrens */ 490b19a79ecSperrin if (ioflag & FRSYNC) 491b19a79ecSperrin zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 492fa9e4066Sahrens 493fa9e4066Sahrens /* 494104e2ed7Sperrin * Lock the range against changes. 495fa9e4066Sahrens */ 496104e2ed7Sperrin rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 497104e2ed7Sperrin 498fa9e4066Sahrens /* 499fa9e4066Sahrens * If we are reading past end-of-file we can skip 500fa9e4066Sahrens * to the end; but we might still need to set atime. 501fa9e4066Sahrens */ 502fa9e4066Sahrens if (uio->uio_loffset >= zp->z_phys->zp_size) { 503fa9e4066Sahrens error = 0; 504fa9e4066Sahrens goto out; 505fa9e4066Sahrens } 506fa9e4066Sahrens 507feb08c6bSbillm ASSERT(uio->uio_loffset < zp->z_phys->zp_size); 508feb08c6bSbillm n = MIN(uio->uio_resid, zp->z_phys->zp_size - uio->uio_loffset); 509feb08c6bSbillm 510feb08c6bSbillm while (n > 0) { 511feb08c6bSbillm nbytes = MIN(n, zfs_read_chunk_size - 512feb08c6bSbillm P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 513fa9e4066Sahrens 514feb08c6bSbillm if (vn_has_cached_data(vp)) 515feb08c6bSbillm error = mappedread(vp, nbytes, uio); 516feb08c6bSbillm else 517feb08c6bSbillm error = dmu_read_uio(os, zp->z_id, uio, nbytes); 518b87f3af3Sperrin if (error) { 519b87f3af3Sperrin /* convert checksum errors into IO errors */ 520b87f3af3Sperrin if (error == ECKSUM) 521b87f3af3Sperrin error = EIO; 522feb08c6bSbillm break; 523b87f3af3Sperrin } 524fa9e4066Sahrens 525feb08c6bSbillm n -= nbytes; 526fa9e4066Sahrens } 527feb08c6bSbillm 528fa9e4066Sahrens out: 529c5c6ffa0Smaybee zfs_range_unlock(rl); 530fa9e4066Sahrens 531fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 532fa9e4066Sahrens ZFS_EXIT(zfsvfs); 533fa9e4066Sahrens return (error); 534fa9e4066Sahrens } 535fa9e4066Sahrens 536fa9e4066Sahrens /* 537fa9e4066Sahrens * Write the bytes to a file. 538fa9e4066Sahrens * 539fa9e4066Sahrens * IN: vp - vnode of file to be written to. 540fa9e4066Sahrens * uio - structure supplying write location, range info, 541fa9e4066Sahrens * and data buffer. 542fa9e4066Sahrens * ioflag - FAPPEND flag set if in append mode. 543fa9e4066Sahrens * cr - credentials of caller. 544da6c28aaSamw * ct - caller context (NFS/CIFS fem monitor only) 545fa9e4066Sahrens * 546fa9e4066Sahrens * OUT: uio - updated offset and range. 547fa9e4066Sahrens * 548fa9e4066Sahrens * RETURN: 0 if success 549fa9e4066Sahrens * error code if failure 550fa9e4066Sahrens * 551fa9e4066Sahrens * Timestamps: 552fa9e4066Sahrens * vp - ctime|mtime updated if byte count > 0 553fa9e4066Sahrens */ 554fa9e4066Sahrens /* ARGSUSED */ 555fa9e4066Sahrens static int 556fa9e4066Sahrens zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 557fa9e4066Sahrens { 558fa9e4066Sahrens znode_t *zp = VTOZ(vp); 559fa9e4066Sahrens rlim64_t limit = uio->uio_llimit; 560fa9e4066Sahrens ssize_t start_resid = uio->uio_resid; 561fa9e4066Sahrens ssize_t tx_bytes; 562fa9e4066Sahrens uint64_t end_size; 563fa9e4066Sahrens dmu_tx_t *tx; 564fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 565f18faf3fSek zilog_t *zilog; 566fa9e4066Sahrens offset_t woff; 567fa9e4066Sahrens ssize_t n, nbytes; 568104e2ed7Sperrin rl_t *rl; 569fa9e4066Sahrens int max_blksz = zfsvfs->z_max_blksz; 570c09193bfSmarks uint64_t pflags; 571104e2ed7Sperrin int error; 5722fdbea25SAleksandr Guzovskiy arc_buf_t *abuf; 573fa9e4066Sahrens 574fa9e4066Sahrens /* 575fa9e4066Sahrens * Fasttrack empty write 576fa9e4066Sahrens */ 577104e2ed7Sperrin n = start_resid; 578fa9e4066Sahrens if (n == 0) 579fa9e4066Sahrens return (0); 580fa9e4066Sahrens 581104e2ed7Sperrin if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 582104e2ed7Sperrin limit = MAXOFFSET_T; 583104e2ed7Sperrin 5843cb34c60Sahrens ZFS_ENTER(zfsvfs); 5853cb34c60Sahrens ZFS_VERIFY_ZP(zp); 586c09193bfSmarks 587c09193bfSmarks /* 588c09193bfSmarks * If immutable or not appending then return EPERM 589c09193bfSmarks */ 590c09193bfSmarks pflags = zp->z_phys->zp_flags; 591c09193bfSmarks if ((pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 592c09193bfSmarks ((pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 593c09193bfSmarks (uio->uio_loffset < zp->z_phys->zp_size))) { 594c09193bfSmarks ZFS_EXIT(zfsvfs); 595c09193bfSmarks return (EPERM); 596c09193bfSmarks } 597c09193bfSmarks 598f18faf3fSek zilog = zfsvfs->z_log; 599fa9e4066Sahrens 60041865f27SWilliam Gorrell /* 60141865f27SWilliam Gorrell * Validate file offset 60241865f27SWilliam Gorrell */ 60341865f27SWilliam Gorrell woff = ioflag & FAPPEND ? zp->z_phys->zp_size : uio->uio_loffset; 60441865f27SWilliam Gorrell if (woff < 0) { 60541865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 60641865f27SWilliam Gorrell return (EINVAL); 60741865f27SWilliam Gorrell } 60841865f27SWilliam Gorrell 60941865f27SWilliam Gorrell /* 61041865f27SWilliam Gorrell * Check for mandatory locks before calling zfs_range_lock() 61141865f27SWilliam Gorrell * in order to prevent a deadlock with locks set via fcntl(). 61241865f27SWilliam Gorrell */ 61341865f27SWilliam Gorrell if (MANDMODE((mode_t)zp->z_phys->zp_mode) && 61441865f27SWilliam Gorrell (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 61541865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 61641865f27SWilliam Gorrell return (error); 61741865f27SWilliam Gorrell } 61841865f27SWilliam Gorrell 619fa9e4066Sahrens /* 620c5c6ffa0Smaybee * Pre-fault the pages to ensure slow (eg NFS) pages 621104e2ed7Sperrin * don't hold up txg. 622fa9e4066Sahrens */ 6236f5f1c63SDonghai Qiao uio_prefaultpages(n, uio); 624fa9e4066Sahrens 625fa9e4066Sahrens /* 626fa9e4066Sahrens * If in append mode, set the io offset pointer to eof. 627fa9e4066Sahrens */ 628104e2ed7Sperrin if (ioflag & FAPPEND) { 629104e2ed7Sperrin /* 63041865f27SWilliam Gorrell * Obtain an appending range lock to guarantee file append 63141865f27SWilliam Gorrell * semantics. We reset the write offset once we have the lock. 632104e2ed7Sperrin */ 633104e2ed7Sperrin rl = zfs_range_lock(zp, 0, n, RL_APPEND); 63441865f27SWilliam Gorrell woff = rl->r_off; 635104e2ed7Sperrin if (rl->r_len == UINT64_MAX) { 63641865f27SWilliam Gorrell /* 63741865f27SWilliam Gorrell * We overlocked the file because this write will cause 63841865f27SWilliam Gorrell * the file block size to increase. 63941865f27SWilliam Gorrell * Note that zp_size cannot change with this lock held. 64041865f27SWilliam Gorrell */ 64141865f27SWilliam Gorrell woff = zp->z_phys->zp_size; 642104e2ed7Sperrin } 64341865f27SWilliam Gorrell uio->uio_loffset = woff; 644fa9e4066Sahrens } else { 645fa9e4066Sahrens /* 64641865f27SWilliam Gorrell * Note that if the file block size will change as a result of 64741865f27SWilliam Gorrell * this write, then this range lock will lock the entire file 64841865f27SWilliam Gorrell * so that we can re-write the block safely. 649fa9e4066Sahrens */ 650104e2ed7Sperrin rl = zfs_range_lock(zp, woff, n, RL_WRITER); 651fa9e4066Sahrens } 652fa9e4066Sahrens 653fa9e4066Sahrens if (woff >= limit) { 654feb08c6bSbillm zfs_range_unlock(rl); 655feb08c6bSbillm ZFS_EXIT(zfsvfs); 656feb08c6bSbillm return (EFBIG); 657fa9e4066Sahrens } 658fa9e4066Sahrens 659fa9e4066Sahrens if ((woff + n) > limit || woff > (limit - n)) 660fa9e4066Sahrens n = limit - woff; 661fa9e4066Sahrens 662feb08c6bSbillm end_size = MAX(zp->z_phys->zp_size, woff + n); 663fa9e4066Sahrens 664104e2ed7Sperrin /* 665feb08c6bSbillm * Write the file in reasonable size chunks. Each chunk is written 666feb08c6bSbillm * in a separate transaction; this keeps the intent log records small 667feb08c6bSbillm * and allows us to do more fine-grained space accounting. 668104e2ed7Sperrin */ 669feb08c6bSbillm while (n > 0) { 6702fdbea25SAleksandr Guzovskiy abuf = NULL; 6712fdbea25SAleksandr Guzovskiy woff = uio->uio_loffset; 6722fdbea25SAleksandr Guzovskiy 6732fdbea25SAleksandr Guzovskiy again: 67414843421SMatthew Ahrens if (zfs_usergroup_overquota(zfsvfs, 67514843421SMatthew Ahrens B_FALSE, zp->z_phys->zp_uid) || 67614843421SMatthew Ahrens zfs_usergroup_overquota(zfsvfs, 67714843421SMatthew Ahrens B_TRUE, zp->z_phys->zp_gid)) { 6782fdbea25SAleksandr Guzovskiy if (abuf != NULL) 6792fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 68014843421SMatthew Ahrens error = EDQUOT; 68114843421SMatthew Ahrens break; 68214843421SMatthew Ahrens } 6832fdbea25SAleksandr Guzovskiy 6842fdbea25SAleksandr Guzovskiy /* 6852fdbea25SAleksandr Guzovskiy * If dmu_assign_arcbuf() is expected to execute with minimum 6862fdbea25SAleksandr Guzovskiy * overhead loan an arc buffer and copy user data to it before 6872fdbea25SAleksandr Guzovskiy * we enter a txg. This avoids holding a txg forever while we 6882fdbea25SAleksandr Guzovskiy * pagefault on a hanging NFS server mapping. 6892fdbea25SAleksandr Guzovskiy */ 6902fdbea25SAleksandr Guzovskiy if (abuf == NULL && n >= max_blksz && 6912fdbea25SAleksandr Guzovskiy woff >= zp->z_phys->zp_size && 6922fdbea25SAleksandr Guzovskiy P2PHASE(woff, max_blksz) == 0 && 6932fdbea25SAleksandr Guzovskiy zp->z_blksz == max_blksz) { 6942fdbea25SAleksandr Guzovskiy size_t cbytes; 6952fdbea25SAleksandr Guzovskiy 6962fdbea25SAleksandr Guzovskiy abuf = dmu_request_arcbuf(zp->z_dbuf, max_blksz); 6972fdbea25SAleksandr Guzovskiy ASSERT(abuf != NULL); 6982fdbea25SAleksandr Guzovskiy ASSERT(arc_buf_size(abuf) == max_blksz); 6992fdbea25SAleksandr Guzovskiy if (error = uiocopy(abuf->b_data, max_blksz, 7002fdbea25SAleksandr Guzovskiy UIO_WRITE, uio, &cbytes)) { 7012fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 7022fdbea25SAleksandr Guzovskiy break; 7032fdbea25SAleksandr Guzovskiy } 7042fdbea25SAleksandr Guzovskiy ASSERT(cbytes == max_blksz); 7052fdbea25SAleksandr Guzovskiy } 7062fdbea25SAleksandr Guzovskiy 7072fdbea25SAleksandr Guzovskiy /* 7082fdbea25SAleksandr Guzovskiy * Start a transaction. 7092fdbea25SAleksandr Guzovskiy */ 710feb08c6bSbillm tx = dmu_tx_create(zfsvfs->z_os); 711feb08c6bSbillm dmu_tx_hold_bonus(tx, zp->z_id); 712feb08c6bSbillm dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 7131209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 714feb08c6bSbillm if (error) { 7151209a471SNeil Perrin if (error == ERESTART) { 716feb08c6bSbillm dmu_tx_wait(tx); 717feb08c6bSbillm dmu_tx_abort(tx); 7182fdbea25SAleksandr Guzovskiy goto again; 719feb08c6bSbillm } 720feb08c6bSbillm dmu_tx_abort(tx); 7212fdbea25SAleksandr Guzovskiy if (abuf != NULL) 7222fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 723feb08c6bSbillm break; 724feb08c6bSbillm } 725104e2ed7Sperrin 726feb08c6bSbillm /* 727feb08c6bSbillm * If zfs_range_lock() over-locked we grow the blocksize 728feb08c6bSbillm * and then reduce the lock range. This will only happen 729feb08c6bSbillm * on the first iteration since zfs_range_reduce() will 730feb08c6bSbillm * shrink down r_len to the appropriate size. 731feb08c6bSbillm */ 732feb08c6bSbillm if (rl->r_len == UINT64_MAX) { 733feb08c6bSbillm uint64_t new_blksz; 734feb08c6bSbillm 735feb08c6bSbillm if (zp->z_blksz > max_blksz) { 736feb08c6bSbillm ASSERT(!ISP2(zp->z_blksz)); 737feb08c6bSbillm new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 738feb08c6bSbillm } else { 739feb08c6bSbillm new_blksz = MIN(end_size, max_blksz); 740feb08c6bSbillm } 741feb08c6bSbillm zfs_grow_blocksize(zp, new_blksz, tx); 742feb08c6bSbillm zfs_range_reduce(rl, woff, n); 743fa9e4066Sahrens } 744fa9e4066Sahrens 745fa9e4066Sahrens /* 746fa9e4066Sahrens * XXX - should we really limit each write to z_max_blksz? 747fa9e4066Sahrens * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 748fa9e4066Sahrens */ 749fa9e4066Sahrens nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 750fa9e4066Sahrens 7512fdbea25SAleksandr Guzovskiy if (abuf == NULL) { 7522fdbea25SAleksandr Guzovskiy tx_bytes = uio->uio_resid; 7532fdbea25SAleksandr Guzovskiy error = dmu_write_uio(zfsvfs->z_os, zp->z_id, uio, 7542fdbea25SAleksandr Guzovskiy nbytes, tx); 7552fdbea25SAleksandr Guzovskiy tx_bytes -= uio->uio_resid; 7562fdbea25SAleksandr Guzovskiy } else { 7572fdbea25SAleksandr Guzovskiy tx_bytes = nbytes; 7582fdbea25SAleksandr Guzovskiy ASSERT(tx_bytes == max_blksz); 7592fdbea25SAleksandr Guzovskiy dmu_assign_arcbuf(zp->z_dbuf, woff, abuf, tx); 7602fdbea25SAleksandr Guzovskiy ASSERT(tx_bytes <= uio->uio_resid); 7612fdbea25SAleksandr Guzovskiy uioskip(uio, tx_bytes); 7622fdbea25SAleksandr Guzovskiy } 7632fdbea25SAleksandr Guzovskiy if (tx_bytes && vn_has_cached_data(vp)) { 764ac05c741SMark Maybee update_pages(vp, woff, 765ac05c741SMark Maybee tx_bytes, zfsvfs->z_os, zp->z_id); 7662fdbea25SAleksandr Guzovskiy } 767fa9e4066Sahrens 768feb08c6bSbillm /* 769feb08c6bSbillm * If we made no progress, we're done. If we made even 770feb08c6bSbillm * partial progress, update the znode and ZIL accordingly. 771feb08c6bSbillm */ 772feb08c6bSbillm if (tx_bytes == 0) { 773af2c4821Smaybee dmu_tx_commit(tx); 774feb08c6bSbillm ASSERT(error != 0); 775fa9e4066Sahrens break; 776fa9e4066Sahrens } 777fa9e4066Sahrens 778169cdae2Smarks /* 779169cdae2Smarks * Clear Set-UID/Set-GID bits on successful write if not 780169cdae2Smarks * privileged and at least one of the excute bits is set. 781169cdae2Smarks * 782169cdae2Smarks * It would be nice to to this after all writes have 783169cdae2Smarks * been done, but that would still expose the ISUID/ISGID 784169cdae2Smarks * to another app after the partial write is committed. 785da6c28aaSamw * 786da6c28aaSamw * Note: we don't call zfs_fuid_map_id() here because 787da6c28aaSamw * user 0 is not an ephemeral uid. 788169cdae2Smarks */ 789169cdae2Smarks mutex_enter(&zp->z_acl_lock); 790169cdae2Smarks if ((zp->z_phys->zp_mode & (S_IXUSR | (S_IXUSR >> 3) | 791169cdae2Smarks (S_IXUSR >> 6))) != 0 && 792169cdae2Smarks (zp->z_phys->zp_mode & (S_ISUID | S_ISGID)) != 0 && 793169cdae2Smarks secpolicy_vnode_setid_retain(cr, 794169cdae2Smarks (zp->z_phys->zp_mode & S_ISUID) != 0 && 795169cdae2Smarks zp->z_phys->zp_uid == 0) != 0) { 796ecb72030Sperrin zp->z_phys->zp_mode &= ~(S_ISUID | S_ISGID); 797169cdae2Smarks } 798169cdae2Smarks mutex_exit(&zp->z_acl_lock); 799169cdae2Smarks 800fa9e4066Sahrens /* 801feb08c6bSbillm * Update time stamp. NOTE: This marks the bonus buffer as 802feb08c6bSbillm * dirty, so we don't have to do it again for zp_size. 803fa9e4066Sahrens */ 804fa9e4066Sahrens zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 805fa9e4066Sahrens 806fa9e4066Sahrens /* 807feb08c6bSbillm * Update the file size (zp_size) if it has changed; 808feb08c6bSbillm * account for possible concurrent updates. 809fa9e4066Sahrens */ 810feb08c6bSbillm while ((end_size = zp->z_phys->zp_size) < uio->uio_loffset) 811fa9e4066Sahrens (void) atomic_cas_64(&zp->z_phys->zp_size, end_size, 812fa9e4066Sahrens uio->uio_loffset); 813feb08c6bSbillm zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 814feb08c6bSbillm dmu_tx_commit(tx); 815fa9e4066Sahrens 816feb08c6bSbillm if (error != 0) 817feb08c6bSbillm break; 818feb08c6bSbillm ASSERT(tx_bytes == nbytes); 819feb08c6bSbillm n -= nbytes; 820feb08c6bSbillm } 821fa9e4066Sahrens 822c5c6ffa0Smaybee zfs_range_unlock(rl); 823fa9e4066Sahrens 824fa9e4066Sahrens /* 825fa9e4066Sahrens * If we're in replay mode, or we made no progress, return error. 826fa9e4066Sahrens * Otherwise, it's at least a partial write, so it's successful. 827fa9e4066Sahrens */ 8281209a471SNeil Perrin if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 829fa9e4066Sahrens ZFS_EXIT(zfsvfs); 830fa9e4066Sahrens return (error); 831fa9e4066Sahrens } 832fa9e4066Sahrens 833b19a79ecSperrin if (ioflag & (FSYNC | FDSYNC)) 834b19a79ecSperrin zil_commit(zilog, zp->z_last_itx, zp->z_id); 835fa9e4066Sahrens 836fa9e4066Sahrens ZFS_EXIT(zfsvfs); 837fa9e4066Sahrens return (0); 838fa9e4066Sahrens } 839fa9e4066Sahrens 840c5c6ffa0Smaybee void 841b24ab676SJeff Bonwick zfs_get_done(zgd_t *zgd, int error) 842c5c6ffa0Smaybee { 843b24ab676SJeff Bonwick znode_t *zp = zgd->zgd_private; 844b24ab676SJeff Bonwick objset_t *os = zp->z_zfsvfs->z_os; 845b24ab676SJeff Bonwick 846b24ab676SJeff Bonwick if (zgd->zgd_db) 847b24ab676SJeff Bonwick dmu_buf_rele(zgd->zgd_db, zgd); 848b24ab676SJeff Bonwick 849b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 850c5c6ffa0Smaybee 8519d3574bfSNeil Perrin /* 8529d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 8539d3574bfSNeil Perrin * txg stopped from syncing. 8549d3574bfSNeil Perrin */ 855b24ab676SJeff Bonwick VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 856b24ab676SJeff Bonwick 857b24ab676SJeff Bonwick if (error == 0 && zgd->zgd_bp) 858b24ab676SJeff Bonwick zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 859b24ab676SJeff Bonwick 86067bd71c6Sperrin kmem_free(zgd, sizeof (zgd_t)); 861c5c6ffa0Smaybee } 862c5c6ffa0Smaybee 863c87b8fc5SMark J Musante #ifdef DEBUG 864c87b8fc5SMark J Musante static int zil_fault_io = 0; 865c87b8fc5SMark J Musante #endif 866c87b8fc5SMark J Musante 867fa9e4066Sahrens /* 868fa9e4066Sahrens * Get data to generate a TX_WRITE intent log record. 869fa9e4066Sahrens */ 870fa9e4066Sahrens int 871c5c6ffa0Smaybee zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 872fa9e4066Sahrens { 873fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 874fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 875fa9e4066Sahrens znode_t *zp; 876b24ab676SJeff Bonwick uint64_t object = lr->lr_foid; 877b24ab676SJeff Bonwick uint64_t offset = lr->lr_offset; 878b24ab676SJeff Bonwick uint64_t size = lr->lr_length; 879b24ab676SJeff Bonwick blkptr_t *bp = &lr->lr_blkptr; 880c5c6ffa0Smaybee dmu_buf_t *db; 88167bd71c6Sperrin zgd_t *zgd; 882fa9e4066Sahrens int error = 0; 883fa9e4066Sahrens 884b24ab676SJeff Bonwick ASSERT(zio != NULL); 885b24ab676SJeff Bonwick ASSERT(size != 0); 886fa9e4066Sahrens 887fa9e4066Sahrens /* 888104e2ed7Sperrin * Nothing to do if the file has been removed 889fa9e4066Sahrens */ 890b24ab676SJeff Bonwick if (zfs_zget(zfsvfs, object, &zp) != 0) 891fa9e4066Sahrens return (ENOENT); 892893a6d32Sahrens if (zp->z_unlinked) { 8939d3574bfSNeil Perrin /* 8949d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 8959d3574bfSNeil Perrin * txg stopped from syncing. 8969d3574bfSNeil Perrin */ 8979d3574bfSNeil Perrin VN_RELE_ASYNC(ZTOV(zp), 8989d3574bfSNeil Perrin dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 899fa9e4066Sahrens return (ENOENT); 900fa9e4066Sahrens } 901fa9e4066Sahrens 902b24ab676SJeff Bonwick zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 903b24ab676SJeff Bonwick zgd->zgd_zilog = zfsvfs->z_log; 904b24ab676SJeff Bonwick zgd->zgd_private = zp; 905b24ab676SJeff Bonwick 906fa9e4066Sahrens /* 907fa9e4066Sahrens * Write records come in two flavors: immediate and indirect. 908fa9e4066Sahrens * For small writes it's cheaper to store the data with the 909fa9e4066Sahrens * log record (immediate); for large writes it's cheaper to 910fa9e4066Sahrens * sync the data and get a pointer to it (indirect) so that 911fa9e4066Sahrens * we don't have to write the data twice. 912fa9e4066Sahrens */ 913104e2ed7Sperrin if (buf != NULL) { /* immediate write */ 914b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 915104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 916b24ab676SJeff Bonwick if (offset >= zp->z_phys->zp_size) { 917104e2ed7Sperrin error = ENOENT; 918b24ab676SJeff Bonwick } else { 919b24ab676SJeff Bonwick error = dmu_read(os, object, offset, size, buf, 920b24ab676SJeff Bonwick DMU_READ_NO_PREFETCH); 921104e2ed7Sperrin } 922b24ab676SJeff Bonwick ASSERT(error == 0 || error == ENOENT); 923104e2ed7Sperrin } else { /* indirect write */ 924fa9e4066Sahrens /* 925104e2ed7Sperrin * Have to lock the whole block to ensure when it's 926104e2ed7Sperrin * written out and it's checksum is being calculated 927104e2ed7Sperrin * that no one can change the data. We need to re-check 928104e2ed7Sperrin * blocksize after we get the lock in case it's changed! 929fa9e4066Sahrens */ 930104e2ed7Sperrin for (;;) { 931b24ab676SJeff Bonwick uint64_t blkoff; 932b24ab676SJeff Bonwick size = zp->z_blksz; 933dfe73b3dSJeff Bonwick blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 934b24ab676SJeff Bonwick offset -= blkoff; 935b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, 936b24ab676SJeff Bonwick RL_READER); 937b24ab676SJeff Bonwick if (zp->z_blksz == size) 938104e2ed7Sperrin break; 939b24ab676SJeff Bonwick offset += blkoff; 940b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 941104e2ed7Sperrin } 942104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 943dfe73b3dSJeff Bonwick if (lr->lr_offset >= zp->z_phys->zp_size) 944104e2ed7Sperrin error = ENOENT; 945c87b8fc5SMark J Musante #ifdef DEBUG 946c87b8fc5SMark J Musante if (zil_fault_io) { 947c87b8fc5SMark J Musante error = EIO; 948c87b8fc5SMark J Musante zil_fault_io = 0; 949c87b8fc5SMark J Musante } 950c87b8fc5SMark J Musante #endif 951b24ab676SJeff Bonwick if (error == 0) 952b24ab676SJeff Bonwick error = dmu_buf_hold(os, object, offset, zgd, &db); 953c87b8fc5SMark J Musante 954975c32a0SNeil Perrin if (error == 0) { 955b24ab676SJeff Bonwick zgd->zgd_db = db; 956b24ab676SJeff Bonwick zgd->zgd_bp = bp; 957b24ab676SJeff Bonwick 958b24ab676SJeff Bonwick ASSERT(db->db_offset == offset); 959b24ab676SJeff Bonwick ASSERT(db->db_size == size); 960b24ab676SJeff Bonwick 961b24ab676SJeff Bonwick error = dmu_sync(zio, lr->lr_common.lrc_txg, 962b24ab676SJeff Bonwick zfs_get_done, zgd); 963b24ab676SJeff Bonwick ASSERT(error || lr->lr_length <= zp->z_blksz); 964b24ab676SJeff Bonwick 965975c32a0SNeil Perrin /* 966b24ab676SJeff Bonwick * On success, we need to wait for the write I/O 967b24ab676SJeff Bonwick * initiated by dmu_sync() to complete before we can 968b24ab676SJeff Bonwick * release this dbuf. We will finish everything up 969b24ab676SJeff Bonwick * in the zfs_get_done() callback. 970975c32a0SNeil Perrin */ 971b24ab676SJeff Bonwick if (error == 0) 972b24ab676SJeff Bonwick return (0); 973975c32a0SNeil Perrin 974b24ab676SJeff Bonwick if (error == EALREADY) { 975b24ab676SJeff Bonwick lr->lr_common.lrc_txtype = TX_WRITE2; 976b24ab676SJeff Bonwick error = 0; 977b24ab676SJeff Bonwick } 978975c32a0SNeil Perrin } 979fa9e4066Sahrens } 980b24ab676SJeff Bonwick 981b24ab676SJeff Bonwick zfs_get_done(zgd, error); 982b24ab676SJeff Bonwick 983fa9e4066Sahrens return (error); 984fa9e4066Sahrens } 985fa9e4066Sahrens 986fa9e4066Sahrens /*ARGSUSED*/ 987fa9e4066Sahrens static int 988da6c28aaSamw zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 989da6c28aaSamw caller_context_t *ct) 990fa9e4066Sahrens { 991fa9e4066Sahrens znode_t *zp = VTOZ(vp); 992fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 993fa9e4066Sahrens int error; 994fa9e4066Sahrens 9953cb34c60Sahrens ZFS_ENTER(zfsvfs); 9963cb34c60Sahrens ZFS_VERIFY_ZP(zp); 997da6c28aaSamw 998da6c28aaSamw if (flag & V_ACE_MASK) 999da6c28aaSamw error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1000da6c28aaSamw else 1001da6c28aaSamw error = zfs_zaccess_rwx(zp, mode, flag, cr); 1002da6c28aaSamw 1003fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1004fa9e4066Sahrens return (error); 1005fa9e4066Sahrens } 1006fa9e4066Sahrens 1007d47621a4STim Haley /* 1008d47621a4STim Haley * If vnode is for a device return a specfs vnode instead. 1009d47621a4STim Haley */ 1010d47621a4STim Haley static int 1011d47621a4STim Haley specvp_check(vnode_t **vpp, cred_t *cr) 1012d47621a4STim Haley { 1013d47621a4STim Haley int error = 0; 1014d47621a4STim Haley 1015d47621a4STim Haley if (IS_DEVVP(*vpp)) { 1016d47621a4STim Haley struct vnode *svp; 1017d47621a4STim Haley 1018d47621a4STim Haley svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1019d47621a4STim Haley VN_RELE(*vpp); 1020d47621a4STim Haley if (svp == NULL) 1021d47621a4STim Haley error = ENOSYS; 1022d47621a4STim Haley *vpp = svp; 1023d47621a4STim Haley } 1024d47621a4STim Haley return (error); 1025d47621a4STim Haley } 1026d47621a4STim Haley 1027d47621a4STim Haley 1028fa9e4066Sahrens /* 1029fa9e4066Sahrens * Lookup an entry in a directory, or an extended attribute directory. 1030fa9e4066Sahrens * If it exists, return a held vnode reference for it. 1031fa9e4066Sahrens * 1032fa9e4066Sahrens * IN: dvp - vnode of directory to search. 1033fa9e4066Sahrens * nm - name of entry to lookup. 1034fa9e4066Sahrens * pnp - full pathname to lookup [UNUSED]. 1035fa9e4066Sahrens * flags - LOOKUP_XATTR set if looking for an attribute. 1036fa9e4066Sahrens * rdir - root directory vnode [UNUSED]. 1037fa9e4066Sahrens * cr - credentials of caller. 1038da6c28aaSamw * ct - caller context 1039da6c28aaSamw * direntflags - directory lookup flags 1040da6c28aaSamw * realpnp - returned pathname. 1041fa9e4066Sahrens * 1042fa9e4066Sahrens * OUT: vpp - vnode of located entry, NULL if not found. 1043fa9e4066Sahrens * 1044fa9e4066Sahrens * RETURN: 0 if success 1045fa9e4066Sahrens * error code if failure 1046fa9e4066Sahrens * 1047fa9e4066Sahrens * Timestamps: 1048fa9e4066Sahrens * NA 1049fa9e4066Sahrens */ 1050fa9e4066Sahrens /* ARGSUSED */ 1051fa9e4066Sahrens static int 1052fa9e4066Sahrens zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 1053da6c28aaSamw int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 1054da6c28aaSamw int *direntflags, pathname_t *realpnp) 1055fa9e4066Sahrens { 1056fa9e4066Sahrens znode_t *zdp = VTOZ(dvp); 1057fa9e4066Sahrens zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1058d47621a4STim Haley int error = 0; 1059d47621a4STim Haley 1060d47621a4STim Haley /* fast path */ 1061d47621a4STim Haley if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1062d47621a4STim Haley 1063d47621a4STim Haley if (dvp->v_type != VDIR) { 1064d47621a4STim Haley return (ENOTDIR); 1065d47621a4STim Haley } else if (zdp->z_dbuf == NULL) { 1066d47621a4STim Haley return (EIO); 1067d47621a4STim Haley } 1068d47621a4STim Haley 1069d47621a4STim Haley if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1070d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1071d47621a4STim Haley if (!error) { 1072d47621a4STim Haley *vpp = dvp; 1073d47621a4STim Haley VN_HOLD(*vpp); 1074d47621a4STim Haley return (0); 1075d47621a4STim Haley } 1076d47621a4STim Haley return (error); 1077d47621a4STim Haley } else { 1078d47621a4STim Haley vnode_t *tvp = dnlc_lookup(dvp, nm); 1079d47621a4STim Haley 1080d47621a4STim Haley if (tvp) { 1081d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1082d47621a4STim Haley if (error) { 1083d47621a4STim Haley VN_RELE(tvp); 1084d47621a4STim Haley return (error); 1085d47621a4STim Haley } 1086d47621a4STim Haley if (tvp == DNLC_NO_VNODE) { 1087d47621a4STim Haley VN_RELE(tvp); 1088d47621a4STim Haley return (ENOENT); 1089d47621a4STim Haley } else { 1090d47621a4STim Haley *vpp = tvp; 1091d47621a4STim Haley return (specvp_check(vpp, cr)); 1092d47621a4STim Haley } 1093d47621a4STim Haley } 1094d47621a4STim Haley } 1095d47621a4STim Haley } 1096d47621a4STim Haley 1097d47621a4STim Haley DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1098fa9e4066Sahrens 10993cb34c60Sahrens ZFS_ENTER(zfsvfs); 11003cb34c60Sahrens ZFS_VERIFY_ZP(zdp); 1101fa9e4066Sahrens 1102fa9e4066Sahrens *vpp = NULL; 1103fa9e4066Sahrens 1104fa9e4066Sahrens if (flags & LOOKUP_XATTR) { 11057b55fa8eSck /* 11067b55fa8eSck * If the xattr property is off, refuse the lookup request. 11077b55fa8eSck */ 11087b55fa8eSck if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 11097b55fa8eSck ZFS_EXIT(zfsvfs); 11107b55fa8eSck return (EINVAL); 11117b55fa8eSck } 11127b55fa8eSck 1113fa9e4066Sahrens /* 1114fa9e4066Sahrens * We don't allow recursive attributes.. 1115fa9e4066Sahrens * Maybe someday we will. 1116fa9e4066Sahrens */ 1117fa9e4066Sahrens if (zdp->z_phys->zp_flags & ZFS_XATTR) { 1118fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1119fa9e4066Sahrens return (EINVAL); 1120fa9e4066Sahrens } 1121fa9e4066Sahrens 11223f063a9dSck if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1123fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1124fa9e4066Sahrens return (error); 1125fa9e4066Sahrens } 1126fa9e4066Sahrens 1127fa9e4066Sahrens /* 1128fa9e4066Sahrens * Do we have permission to get into attribute directory? 1129fa9e4066Sahrens */ 1130fa9e4066Sahrens 1131da6c28aaSamw if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1132da6c28aaSamw B_FALSE, cr)) { 1133fa9e4066Sahrens VN_RELE(*vpp); 1134da6c28aaSamw *vpp = NULL; 1135fa9e4066Sahrens } 1136fa9e4066Sahrens 1137fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1138fa9e4066Sahrens return (error); 1139fa9e4066Sahrens } 1140fa9e4066Sahrens 11410f2dc02eSek if (dvp->v_type != VDIR) { 11420f2dc02eSek ZFS_EXIT(zfsvfs); 1143736b9155Smarks return (ENOTDIR); 11440f2dc02eSek } 1145736b9155Smarks 1146fa9e4066Sahrens /* 1147fa9e4066Sahrens * Check accessibility of directory. 1148fa9e4066Sahrens */ 1149fa9e4066Sahrens 1150da6c28aaSamw if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1151fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1152fa9e4066Sahrens return (error); 1153fa9e4066Sahrens } 1154fa9e4066Sahrens 1155de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1156da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1157da6c28aaSamw ZFS_EXIT(zfsvfs); 1158da6c28aaSamw return (EILSEQ); 1159da6c28aaSamw } 1160fa9e4066Sahrens 1161da6c28aaSamw error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1162d47621a4STim Haley if (error == 0) 1163d47621a4STim Haley error = specvp_check(vpp, cr); 1164fa9e4066Sahrens 1165fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1166fa9e4066Sahrens return (error); 1167fa9e4066Sahrens } 1168fa9e4066Sahrens 1169fa9e4066Sahrens /* 1170fa9e4066Sahrens * Attempt to create a new entry in a directory. If the entry 1171fa9e4066Sahrens * already exists, truncate the file if permissible, else return 1172fa9e4066Sahrens * an error. Return the vp of the created or trunc'd file. 1173fa9e4066Sahrens * 1174fa9e4066Sahrens * IN: dvp - vnode of directory to put new file entry in. 1175fa9e4066Sahrens * name - name of new file entry. 1176fa9e4066Sahrens * vap - attributes of new file. 1177fa9e4066Sahrens * excl - flag indicating exclusive or non-exclusive mode. 1178fa9e4066Sahrens * mode - mode to open file with. 1179fa9e4066Sahrens * cr - credentials of caller. 1180fa9e4066Sahrens * flag - large file flag [UNUSED]. 1181da6c28aaSamw * ct - caller context 1182da6c28aaSamw * vsecp - ACL to be set 1183fa9e4066Sahrens * 1184fa9e4066Sahrens * OUT: vpp - vnode of created or trunc'd entry. 1185fa9e4066Sahrens * 1186fa9e4066Sahrens * RETURN: 0 if success 1187fa9e4066Sahrens * error code if failure 1188fa9e4066Sahrens * 1189fa9e4066Sahrens * Timestamps: 1190fa9e4066Sahrens * dvp - ctime|mtime updated if new entry created 1191fa9e4066Sahrens * vp - ctime|mtime always, atime if new 1192fa9e4066Sahrens */ 1193da6c28aaSamw 1194fa9e4066Sahrens /* ARGSUSED */ 1195fa9e4066Sahrens static int 1196fa9e4066Sahrens zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, 1197da6c28aaSamw int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, 1198da6c28aaSamw vsecattr_t *vsecp) 1199fa9e4066Sahrens { 1200fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1201fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1202f18faf3fSek zilog_t *zilog; 1203f18faf3fSek objset_t *os; 1204fa9e4066Sahrens zfs_dirlock_t *dl; 1205fa9e4066Sahrens dmu_tx_t *tx; 1206fa9e4066Sahrens int error; 1207c1ce5987SMark Shellenbaum ksid_t *ksid; 1208c1ce5987SMark Shellenbaum uid_t uid; 1209c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 121089459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 121189459e17SMark Shellenbaum boolean_t fuid_dirtied; 1212da6c28aaSamw 1213da6c28aaSamw /* 1214da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1215da6c28aaSamw * make sure file system is at proper version 1216da6c28aaSamw */ 1217da6c28aaSamw 1218c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1219c1ce5987SMark Shellenbaum if (ksid) 1220c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1221c1ce5987SMark Shellenbaum else 1222c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1223c1ce5987SMark Shellenbaum 1224da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1225da6c28aaSamw (vsecp || (vap->va_mask & AT_XVATTR) || 1226c1ce5987SMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1227da6c28aaSamw return (EINVAL); 1228fa9e4066Sahrens 12293cb34c60Sahrens ZFS_ENTER(zfsvfs); 12303cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1231f18faf3fSek os = zfsvfs->z_os; 1232f18faf3fSek zilog = zfsvfs->z_log; 1233fa9e4066Sahrens 1234de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1235da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1236da6c28aaSamw ZFS_EXIT(zfsvfs); 1237da6c28aaSamw return (EILSEQ); 1238da6c28aaSamw } 1239da6c28aaSamw 1240da6c28aaSamw if (vap->va_mask & AT_XVATTR) { 1241da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1242da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1243da6c28aaSamw ZFS_EXIT(zfsvfs); 1244da6c28aaSamw return (error); 1245da6c28aaSamw } 1246da6c28aaSamw } 1247fa9e4066Sahrens top: 1248fa9e4066Sahrens *vpp = NULL; 1249fa9e4066Sahrens 1250fa9e4066Sahrens if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) 1251fa9e4066Sahrens vap->va_mode &= ~VSVTX; 1252fa9e4066Sahrens 1253fa9e4066Sahrens if (*name == '\0') { 1254fa9e4066Sahrens /* 1255fa9e4066Sahrens * Null component name refers to the directory itself. 1256fa9e4066Sahrens */ 1257fa9e4066Sahrens VN_HOLD(dvp); 1258fa9e4066Sahrens zp = dzp; 1259fa9e4066Sahrens dl = NULL; 1260fa9e4066Sahrens error = 0; 1261fa9e4066Sahrens } else { 1262fa9e4066Sahrens /* possible VN_HOLD(zp) */ 1263da6c28aaSamw int zflg = 0; 1264da6c28aaSamw 1265da6c28aaSamw if (flag & FIGNORECASE) 1266da6c28aaSamw zflg |= ZCILOOK; 1267da6c28aaSamw 1268da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1269da6c28aaSamw NULL, NULL); 1270da6c28aaSamw if (error) { 1271fa9e4066Sahrens if (strcmp(name, "..") == 0) 1272fa9e4066Sahrens error = EISDIR; 1273fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1274fa9e4066Sahrens return (error); 1275fa9e4066Sahrens } 1276fa9e4066Sahrens } 1277fa9e4066Sahrens if (zp == NULL) { 1278da6c28aaSamw uint64_t txtype; 1279da6c28aaSamw 1280fa9e4066Sahrens /* 1281fa9e4066Sahrens * Create a new file object and update the directory 1282fa9e4066Sahrens * to reference it. 1283fa9e4066Sahrens */ 1284da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1285fa9e4066Sahrens goto out; 1286fa9e4066Sahrens } 1287fa9e4066Sahrens 1288fa9e4066Sahrens /* 1289fa9e4066Sahrens * We only support the creation of regular files in 1290fa9e4066Sahrens * extended attribute directories. 1291fa9e4066Sahrens */ 1292fa9e4066Sahrens if ((dzp->z_phys->zp_flags & ZFS_XATTR) && 1293fa9e4066Sahrens (vap->va_type != VREG)) { 1294fa9e4066Sahrens error = EINVAL; 1295fa9e4066Sahrens goto out; 1296fa9e4066Sahrens } 1297fa9e4066Sahrens 129889459e17SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 129989459e17SMark Shellenbaum &acl_ids)) != 0) 130089459e17SMark Shellenbaum goto out; 130114843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 13024929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 130314843421SMatthew Ahrens error = EDQUOT; 130414843421SMatthew Ahrens goto out; 130514843421SMatthew Ahrens } 130689459e17SMark Shellenbaum 1307fa9e4066Sahrens tx = dmu_tx_create(os); 1308fa9e4066Sahrens dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 130989459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 131014843421SMatthew Ahrens if (fuid_dirtied) 131114843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 1312fa9e4066Sahrens dmu_tx_hold_bonus(tx, dzp->z_id); 1313ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 131489459e17SMark Shellenbaum if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1315fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1316fa9e4066Sahrens 0, SPA_MAXBLOCKSIZE); 1317da6c28aaSamw } 13181209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 1319fa9e4066Sahrens if (error) { 132089459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1321fa9e4066Sahrens zfs_dirent_unlock(dl); 13221209a471SNeil Perrin if (error == ERESTART) { 13238a2f1b91Sahrens dmu_tx_wait(tx); 13248a2f1b91Sahrens dmu_tx_abort(tx); 1325fa9e4066Sahrens goto top; 1326fa9e4066Sahrens } 13278a2f1b91Sahrens dmu_tx_abort(tx); 1328fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1329fa9e4066Sahrens return (error); 1330fa9e4066Sahrens } 133189459e17SMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 133289459e17SMark Shellenbaum 133389459e17SMark Shellenbaum if (fuid_dirtied) 133489459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 133589459e17SMark Shellenbaum 1336fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 133789459e17SMark Shellenbaum 1338da6c28aaSamw txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1339da6c28aaSamw if (flag & FIGNORECASE) 1340da6c28aaSamw txtype |= TX_CI; 1341da6c28aaSamw zfs_log_create(zilog, tx, txtype, dzp, zp, name, 134289459e17SMark Shellenbaum vsecp, acl_ids.z_fuidp, vap); 134389459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1344fa9e4066Sahrens dmu_tx_commit(tx); 1345fa9e4066Sahrens } else { 1346da6c28aaSamw int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1347da6c28aaSamw 1348fa9e4066Sahrens /* 1349fa9e4066Sahrens * A directory entry already exists for this name. 1350fa9e4066Sahrens */ 1351fa9e4066Sahrens /* 1352fa9e4066Sahrens * Can't truncate an existing file if in exclusive mode. 1353fa9e4066Sahrens */ 1354fa9e4066Sahrens if (excl == EXCL) { 1355fa9e4066Sahrens error = EEXIST; 1356fa9e4066Sahrens goto out; 1357fa9e4066Sahrens } 1358fa9e4066Sahrens /* 1359fa9e4066Sahrens * Can't open a directory for writing. 1360fa9e4066Sahrens */ 1361fa9e4066Sahrens if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1362fa9e4066Sahrens error = EISDIR; 1363fa9e4066Sahrens goto out; 1364fa9e4066Sahrens } 1365fa9e4066Sahrens /* 1366fa9e4066Sahrens * Verify requested access to file. 1367fa9e4066Sahrens */ 1368da6c28aaSamw if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1369fa9e4066Sahrens goto out; 1370fa9e4066Sahrens } 1371fa9e4066Sahrens 1372fa9e4066Sahrens mutex_enter(&dzp->z_lock); 1373fa9e4066Sahrens dzp->z_seq++; 1374fa9e4066Sahrens mutex_exit(&dzp->z_lock); 1375fa9e4066Sahrens 13765730cc9aSmaybee /* 13775730cc9aSmaybee * Truncate regular files if requested. 13785730cc9aSmaybee */ 13795730cc9aSmaybee if ((ZTOV(zp)->v_type == VREG) && 1380fa9e4066Sahrens (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1381cdb0ab79Smaybee /* we can't hold any locks when calling zfs_freesp() */ 1382cdb0ab79Smaybee zfs_dirent_unlock(dl); 1383cdb0ab79Smaybee dl = NULL; 13845730cc9aSmaybee error = zfs_freesp(zp, 0, 0, mode, TRUE); 1385df2381bfSpraks if (error == 0) { 1386da6c28aaSamw vnevent_create(ZTOV(zp), ct); 1387df2381bfSpraks } 1388fa9e4066Sahrens } 1389fa9e4066Sahrens } 1390fa9e4066Sahrens out: 1391fa9e4066Sahrens 1392fa9e4066Sahrens if (dl) 1393fa9e4066Sahrens zfs_dirent_unlock(dl); 1394fa9e4066Sahrens 1395fa9e4066Sahrens if (error) { 1396fa9e4066Sahrens if (zp) 1397fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1398fa9e4066Sahrens } else { 1399fa9e4066Sahrens *vpp = ZTOV(zp); 1400d47621a4STim Haley error = specvp_check(vpp, cr); 1401fa9e4066Sahrens } 1402fa9e4066Sahrens 1403fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1404fa9e4066Sahrens return (error); 1405fa9e4066Sahrens } 1406fa9e4066Sahrens 1407fa9e4066Sahrens /* 1408fa9e4066Sahrens * Remove an entry from a directory. 1409fa9e4066Sahrens * 1410fa9e4066Sahrens * IN: dvp - vnode of directory to remove entry from. 1411fa9e4066Sahrens * name - name of entry to remove. 1412fa9e4066Sahrens * cr - credentials of caller. 1413da6c28aaSamw * ct - caller context 1414da6c28aaSamw * flags - case flags 1415fa9e4066Sahrens * 1416fa9e4066Sahrens * RETURN: 0 if success 1417fa9e4066Sahrens * error code if failure 1418fa9e4066Sahrens * 1419fa9e4066Sahrens * Timestamps: 1420fa9e4066Sahrens * dvp - ctime|mtime 1421fa9e4066Sahrens * vp - ctime (if nlink > 0) 1422fa9e4066Sahrens */ 1423da6c28aaSamw /*ARGSUSED*/ 1424fa9e4066Sahrens static int 1425da6c28aaSamw zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1426da6c28aaSamw int flags) 1427fa9e4066Sahrens { 1428fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1429fa9e4066Sahrens znode_t *xzp = NULL; 1430fa9e4066Sahrens vnode_t *vp; 1431fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1432f18faf3fSek zilog_t *zilog; 1433fa9e4066Sahrens uint64_t acl_obj, xattr_obj; 1434fa9e4066Sahrens zfs_dirlock_t *dl; 1435fa9e4066Sahrens dmu_tx_t *tx; 1436893a6d32Sahrens boolean_t may_delete_now, delete_now = FALSE; 1437cdb0ab79Smaybee boolean_t unlinked, toobig = FALSE; 1438da6c28aaSamw uint64_t txtype; 1439da6c28aaSamw pathname_t *realnmp = NULL; 1440da6c28aaSamw pathname_t realnm; 1441fa9e4066Sahrens int error; 1442da6c28aaSamw int zflg = ZEXISTS; 1443fa9e4066Sahrens 14443cb34c60Sahrens ZFS_ENTER(zfsvfs); 14453cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1446f18faf3fSek zilog = zfsvfs->z_log; 1447fa9e4066Sahrens 1448da6c28aaSamw if (flags & FIGNORECASE) { 1449da6c28aaSamw zflg |= ZCILOOK; 1450da6c28aaSamw pn_alloc(&realnm); 1451da6c28aaSamw realnmp = &realnm; 1452da6c28aaSamw } 1453da6c28aaSamw 1454fa9e4066Sahrens top: 1455fa9e4066Sahrens /* 1456fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1457fa9e4066Sahrens */ 1458da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1459da6c28aaSamw NULL, realnmp)) { 1460da6c28aaSamw if (realnmp) 1461da6c28aaSamw pn_free(realnmp); 1462fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1463fa9e4066Sahrens return (error); 1464fa9e4066Sahrens } 1465fa9e4066Sahrens 1466fa9e4066Sahrens vp = ZTOV(zp); 1467fa9e4066Sahrens 1468fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1469fa9e4066Sahrens goto out; 1470fa9e4066Sahrens } 1471fa9e4066Sahrens 1472fa9e4066Sahrens /* 1473fa9e4066Sahrens * Need to use rmdir for removing directories. 1474fa9e4066Sahrens */ 1475fa9e4066Sahrens if (vp->v_type == VDIR) { 1476fa9e4066Sahrens error = EPERM; 1477fa9e4066Sahrens goto out; 1478fa9e4066Sahrens } 1479fa9e4066Sahrens 1480da6c28aaSamw vnevent_remove(vp, dvp, name, ct); 1481fa9e4066Sahrens 1482da6c28aaSamw if (realnmp) 1483ab04eb8eStimh dnlc_remove(dvp, realnmp->pn_buf); 1484da6c28aaSamw else 1485da6c28aaSamw dnlc_remove(dvp, name); 1486033f9833Sek 1487fa9e4066Sahrens mutex_enter(&vp->v_lock); 1488fa9e4066Sahrens may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1489fa9e4066Sahrens mutex_exit(&vp->v_lock); 1490fa9e4066Sahrens 1491fa9e4066Sahrens /* 1492893a6d32Sahrens * We may delete the znode now, or we may put it in the unlinked set; 1493fa9e4066Sahrens * it depends on whether we're the last link, and on whether there are 1494fa9e4066Sahrens * other holds on the vnode. So we dmu_tx_hold() the right things to 1495fa9e4066Sahrens * allow for either case. 1496fa9e4066Sahrens */ 1497fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1498ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1499fa9e4066Sahrens dmu_tx_hold_bonus(tx, zp->z_id); 1500cdb0ab79Smaybee if (may_delete_now) { 1501cdb0ab79Smaybee toobig = 1502cdb0ab79Smaybee zp->z_phys->zp_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1503cdb0ab79Smaybee /* if the file is too big, only hold_free a token amount */ 1504cdb0ab79Smaybee dmu_tx_hold_free(tx, zp->z_id, 0, 1505cdb0ab79Smaybee (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1506cdb0ab79Smaybee } 1507fa9e4066Sahrens 1508fa9e4066Sahrens /* are there any extended attributes? */ 1509fa9e4066Sahrens if ((xattr_obj = zp->z_phys->zp_xattr) != 0) { 1510fa9e4066Sahrens /* XXX - do we need this if we are deleting? */ 1511fa9e4066Sahrens dmu_tx_hold_bonus(tx, xattr_obj); 1512fa9e4066Sahrens } 1513fa9e4066Sahrens 1514fa9e4066Sahrens /* are there any additional acls */ 1515fa9e4066Sahrens if ((acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj) != 0 && 1516fa9e4066Sahrens may_delete_now) 1517fa9e4066Sahrens dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1518fa9e4066Sahrens 1519fa9e4066Sahrens /* charge as an update -- would be nice not to charge at all */ 1520893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1521fa9e4066Sahrens 15221209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 1523fa9e4066Sahrens if (error) { 1524fa9e4066Sahrens zfs_dirent_unlock(dl); 1525fa9e4066Sahrens VN_RELE(vp); 15261209a471SNeil Perrin if (error == ERESTART) { 15278a2f1b91Sahrens dmu_tx_wait(tx); 15288a2f1b91Sahrens dmu_tx_abort(tx); 1529fa9e4066Sahrens goto top; 1530fa9e4066Sahrens } 1531da6c28aaSamw if (realnmp) 1532da6c28aaSamw pn_free(realnmp); 15338a2f1b91Sahrens dmu_tx_abort(tx); 1534fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1535fa9e4066Sahrens return (error); 1536fa9e4066Sahrens } 1537fa9e4066Sahrens 1538fa9e4066Sahrens /* 1539fa9e4066Sahrens * Remove the directory entry. 1540fa9e4066Sahrens */ 1541da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1542fa9e4066Sahrens 1543fa9e4066Sahrens if (error) { 1544fa9e4066Sahrens dmu_tx_commit(tx); 1545fa9e4066Sahrens goto out; 1546fa9e4066Sahrens } 1547fa9e4066Sahrens 1548893a6d32Sahrens if (unlinked) { 1549fa9e4066Sahrens mutex_enter(&vp->v_lock); 1550cdb0ab79Smaybee delete_now = may_delete_now && !toobig && 1551fa9e4066Sahrens vp->v_count == 1 && !vn_has_cached_data(vp) && 1552fa9e4066Sahrens zp->z_phys->zp_xattr == xattr_obj && 1553fa9e4066Sahrens zp->z_phys->zp_acl.z_acl_extern_obj == acl_obj; 1554fa9e4066Sahrens mutex_exit(&vp->v_lock); 1555fa9e4066Sahrens } 1556fa9e4066Sahrens 1557fa9e4066Sahrens if (delete_now) { 1558fa9e4066Sahrens if (zp->z_phys->zp_xattr) { 1559fa9e4066Sahrens error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp); 1560fa9e4066Sahrens ASSERT3U(error, ==, 0); 1561fa9e4066Sahrens ASSERT3U(xzp->z_phys->zp_links, ==, 2); 1562fa9e4066Sahrens dmu_buf_will_dirty(xzp->z_dbuf, tx); 1563fa9e4066Sahrens mutex_enter(&xzp->z_lock); 1564893a6d32Sahrens xzp->z_unlinked = 1; 1565fa9e4066Sahrens xzp->z_phys->zp_links = 0; 1566fa9e4066Sahrens mutex_exit(&xzp->z_lock); 1567893a6d32Sahrens zfs_unlinked_add(xzp, tx); 1568fa9e4066Sahrens zp->z_phys->zp_xattr = 0; /* probably unnecessary */ 1569fa9e4066Sahrens } 1570fa9e4066Sahrens mutex_enter(&zp->z_lock); 1571fa9e4066Sahrens mutex_enter(&vp->v_lock); 1572fa9e4066Sahrens vp->v_count--; 1573fa9e4066Sahrens ASSERT3U(vp->v_count, ==, 0); 1574fa9e4066Sahrens mutex_exit(&vp->v_lock); 1575fa9e4066Sahrens mutex_exit(&zp->z_lock); 1576fa9e4066Sahrens zfs_znode_delete(zp, tx); 1577893a6d32Sahrens } else if (unlinked) { 1578893a6d32Sahrens zfs_unlinked_add(zp, tx); 1579fa9e4066Sahrens } 1580fa9e4066Sahrens 1581da6c28aaSamw txtype = TX_REMOVE; 1582da6c28aaSamw if (flags & FIGNORECASE) 1583da6c28aaSamw txtype |= TX_CI; 1584da6c28aaSamw zfs_log_remove(zilog, tx, txtype, dzp, name); 1585fa9e4066Sahrens 1586fa9e4066Sahrens dmu_tx_commit(tx); 1587fa9e4066Sahrens out: 1588da6c28aaSamw if (realnmp) 1589da6c28aaSamw pn_free(realnmp); 1590da6c28aaSamw 1591fa9e4066Sahrens zfs_dirent_unlock(dl); 1592fa9e4066Sahrens 1593fa9e4066Sahrens if (!delete_now) { 1594fa9e4066Sahrens VN_RELE(vp); 1595fa9e4066Sahrens } else if (xzp) { 1596cdb0ab79Smaybee /* this rele is delayed to prevent nesting transactions */ 1597fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 1598fa9e4066Sahrens } 1599fa9e4066Sahrens 1600fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1601fa9e4066Sahrens return (error); 1602fa9e4066Sahrens } 1603fa9e4066Sahrens 1604fa9e4066Sahrens /* 1605fa9e4066Sahrens * Create a new directory and insert it into dvp using the name 1606fa9e4066Sahrens * provided. Return a pointer to the inserted directory. 1607fa9e4066Sahrens * 1608fa9e4066Sahrens * IN: dvp - vnode of directory to add subdir to. 1609fa9e4066Sahrens * dirname - name of new directory. 1610fa9e4066Sahrens * vap - attributes of new directory. 1611fa9e4066Sahrens * cr - credentials of caller. 1612da6c28aaSamw * ct - caller context 1613da6c28aaSamw * vsecp - ACL to be set 1614fa9e4066Sahrens * 1615fa9e4066Sahrens * OUT: vpp - vnode of created directory. 1616fa9e4066Sahrens * 1617fa9e4066Sahrens * RETURN: 0 if success 1618fa9e4066Sahrens * error code if failure 1619fa9e4066Sahrens * 1620fa9e4066Sahrens * Timestamps: 1621fa9e4066Sahrens * dvp - ctime|mtime updated 1622fa9e4066Sahrens * vp - ctime|mtime|atime updated 1623fa9e4066Sahrens */ 1624da6c28aaSamw /*ARGSUSED*/ 1625fa9e4066Sahrens static int 1626da6c28aaSamw zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1627da6c28aaSamw caller_context_t *ct, int flags, vsecattr_t *vsecp) 1628fa9e4066Sahrens { 1629fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1630fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1631f18faf3fSek zilog_t *zilog; 1632fa9e4066Sahrens zfs_dirlock_t *dl; 1633da6c28aaSamw uint64_t txtype; 1634fa9e4066Sahrens dmu_tx_t *tx; 1635fa9e4066Sahrens int error; 1636da6c28aaSamw int zf = ZNEW; 1637c1ce5987SMark Shellenbaum ksid_t *ksid; 1638c1ce5987SMark Shellenbaum uid_t uid; 1639c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 164089459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 164189459e17SMark Shellenbaum boolean_t fuid_dirtied; 1642fa9e4066Sahrens 1643fa9e4066Sahrens ASSERT(vap->va_type == VDIR); 1644fa9e4066Sahrens 1645da6c28aaSamw /* 1646da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1647da6c28aaSamw * make sure file system is at proper version 1648da6c28aaSamw */ 1649da6c28aaSamw 1650c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1651c1ce5987SMark Shellenbaum if (ksid) 1652c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1653c1ce5987SMark Shellenbaum else 1654c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1655da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1656c1ce5987SMark Shellenbaum (vsecp || (vap->va_mask & AT_XVATTR) || 1657756962ecSMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1658da6c28aaSamw return (EINVAL); 1659da6c28aaSamw 16603cb34c60Sahrens ZFS_ENTER(zfsvfs); 16613cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1662f18faf3fSek zilog = zfsvfs->z_log; 1663fa9e4066Sahrens 1664fa9e4066Sahrens if (dzp->z_phys->zp_flags & ZFS_XATTR) { 1665fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1666fa9e4066Sahrens return (EINVAL); 1667fa9e4066Sahrens } 1668da6c28aaSamw 1669de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(dirname, 1670da6c28aaSamw strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1671da6c28aaSamw ZFS_EXIT(zfsvfs); 1672da6c28aaSamw return (EILSEQ); 1673da6c28aaSamw } 1674da6c28aaSamw if (flags & FIGNORECASE) 1675da6c28aaSamw zf |= ZCILOOK; 1676da6c28aaSamw 1677da6c28aaSamw if (vap->va_mask & AT_XVATTR) 1678da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1679da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1680da6c28aaSamw ZFS_EXIT(zfsvfs); 1681da6c28aaSamw return (error); 1682da6c28aaSamw } 1683fa9e4066Sahrens 1684fa9e4066Sahrens /* 1685fa9e4066Sahrens * First make sure the new directory doesn't exist. 1686fa9e4066Sahrens */ 1687da6c28aaSamw top: 1688da6c28aaSamw *vpp = NULL; 1689da6c28aaSamw 1690da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1691da6c28aaSamw NULL, NULL)) { 1692fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1693fa9e4066Sahrens return (error); 1694fa9e4066Sahrens } 1695fa9e4066Sahrens 1696da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1697d2443e76Smarks zfs_dirent_unlock(dl); 1698d2443e76Smarks ZFS_EXIT(zfsvfs); 1699d2443e76Smarks return (error); 1700d2443e76Smarks } 1701d2443e76Smarks 170289459e17SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 170389459e17SMark Shellenbaum &acl_ids)) != 0) { 170489459e17SMark Shellenbaum zfs_dirent_unlock(dl); 170589459e17SMark Shellenbaum ZFS_EXIT(zfsvfs); 170689459e17SMark Shellenbaum return (error); 1707da6c28aaSamw } 170814843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 17094929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 171014843421SMatthew Ahrens zfs_dirent_unlock(dl); 171114843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 171214843421SMatthew Ahrens return (EDQUOT); 171314843421SMatthew Ahrens } 171489459e17SMark Shellenbaum 1715fa9e4066Sahrens /* 1716fa9e4066Sahrens * Add a new entry to the directory. 1717fa9e4066Sahrens */ 1718fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1719ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1720ea8dc4b6Seschrock dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 172189459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 172214843421SMatthew Ahrens if (fuid_dirtied) 172314843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 172489459e17SMark Shellenbaum if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) 1725fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1726fa9e4066Sahrens 0, SPA_MAXBLOCKSIZE); 17271209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 1728fa9e4066Sahrens if (error) { 172989459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1730fa9e4066Sahrens zfs_dirent_unlock(dl); 17311209a471SNeil Perrin if (error == ERESTART) { 17328a2f1b91Sahrens dmu_tx_wait(tx); 17338a2f1b91Sahrens dmu_tx_abort(tx); 1734fa9e4066Sahrens goto top; 1735fa9e4066Sahrens } 17368a2f1b91Sahrens dmu_tx_abort(tx); 1737fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1738fa9e4066Sahrens return (error); 1739fa9e4066Sahrens } 1740fa9e4066Sahrens 1741fa9e4066Sahrens /* 1742fa9e4066Sahrens * Create new node. 1743fa9e4066Sahrens */ 174489459e17SMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 1745fa9e4066Sahrens 174689459e17SMark Shellenbaum if (fuid_dirtied) 174789459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 1748fa9e4066Sahrens /* 1749fa9e4066Sahrens * Now put new name in parent dir. 1750fa9e4066Sahrens */ 1751fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1752fa9e4066Sahrens 1753fa9e4066Sahrens *vpp = ZTOV(zp); 1754fa9e4066Sahrens 1755da6c28aaSamw txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 1756da6c28aaSamw if (flags & FIGNORECASE) 1757da6c28aaSamw txtype |= TX_CI; 175889459e17SMark Shellenbaum zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 175989459e17SMark Shellenbaum acl_ids.z_fuidp, vap); 1760da6c28aaSamw 176189459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1762fa9e4066Sahrens dmu_tx_commit(tx); 1763fa9e4066Sahrens 1764fa9e4066Sahrens zfs_dirent_unlock(dl); 1765fa9e4066Sahrens 1766fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1767fa9e4066Sahrens return (0); 1768fa9e4066Sahrens } 1769fa9e4066Sahrens 1770fa9e4066Sahrens /* 1771fa9e4066Sahrens * Remove a directory subdir entry. If the current working 1772fa9e4066Sahrens * directory is the same as the subdir to be removed, the 1773fa9e4066Sahrens * remove will fail. 1774fa9e4066Sahrens * 1775fa9e4066Sahrens * IN: dvp - vnode of directory to remove from. 1776fa9e4066Sahrens * name - name of directory to be removed. 1777fa9e4066Sahrens * cwd - vnode of current working directory. 1778fa9e4066Sahrens * cr - credentials of caller. 1779da6c28aaSamw * ct - caller context 1780da6c28aaSamw * flags - case flags 1781fa9e4066Sahrens * 1782fa9e4066Sahrens * RETURN: 0 if success 1783fa9e4066Sahrens * error code if failure 1784fa9e4066Sahrens * 1785fa9e4066Sahrens * Timestamps: 1786fa9e4066Sahrens * dvp - ctime|mtime updated 1787fa9e4066Sahrens */ 1788da6c28aaSamw /*ARGSUSED*/ 1789fa9e4066Sahrens static int 1790da6c28aaSamw zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 1791da6c28aaSamw caller_context_t *ct, int flags) 1792fa9e4066Sahrens { 1793fa9e4066Sahrens znode_t *dzp = VTOZ(dvp); 1794fa9e4066Sahrens znode_t *zp; 1795fa9e4066Sahrens vnode_t *vp; 1796fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1797f18faf3fSek zilog_t *zilog; 1798fa9e4066Sahrens zfs_dirlock_t *dl; 1799fa9e4066Sahrens dmu_tx_t *tx; 1800fa9e4066Sahrens int error; 1801da6c28aaSamw int zflg = ZEXISTS; 1802fa9e4066Sahrens 18033cb34c60Sahrens ZFS_ENTER(zfsvfs); 18043cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1805f18faf3fSek zilog = zfsvfs->z_log; 1806fa9e4066Sahrens 1807da6c28aaSamw if (flags & FIGNORECASE) 1808da6c28aaSamw zflg |= ZCILOOK; 1809fa9e4066Sahrens top: 1810fa9e4066Sahrens zp = NULL; 1811fa9e4066Sahrens 1812fa9e4066Sahrens /* 1813fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1814fa9e4066Sahrens */ 1815da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1816da6c28aaSamw NULL, NULL)) { 1817fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1818fa9e4066Sahrens return (error); 1819fa9e4066Sahrens } 1820fa9e4066Sahrens 1821fa9e4066Sahrens vp = ZTOV(zp); 1822fa9e4066Sahrens 1823fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1824fa9e4066Sahrens goto out; 1825fa9e4066Sahrens } 1826fa9e4066Sahrens 1827fa9e4066Sahrens if (vp->v_type != VDIR) { 1828fa9e4066Sahrens error = ENOTDIR; 1829fa9e4066Sahrens goto out; 1830fa9e4066Sahrens } 1831fa9e4066Sahrens 1832fa9e4066Sahrens if (vp == cwd) { 1833fa9e4066Sahrens error = EINVAL; 1834fa9e4066Sahrens goto out; 1835fa9e4066Sahrens } 1836fa9e4066Sahrens 1837da6c28aaSamw vnevent_rmdir(vp, dvp, name, ct); 1838fa9e4066Sahrens 1839fa9e4066Sahrens /* 1840af2c4821Smaybee * Grab a lock on the directory to make sure that noone is 1841af2c4821Smaybee * trying to add (or lookup) entries while we are removing it. 1842af2c4821Smaybee */ 1843af2c4821Smaybee rw_enter(&zp->z_name_lock, RW_WRITER); 1844af2c4821Smaybee 1845af2c4821Smaybee /* 1846af2c4821Smaybee * Grab a lock on the parent pointer to make sure we play well 1847fa9e4066Sahrens * with the treewalk and directory rename code. 1848fa9e4066Sahrens */ 1849fa9e4066Sahrens rw_enter(&zp->z_parent_lock, RW_WRITER); 1850fa9e4066Sahrens 1851fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1852ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1853fa9e4066Sahrens dmu_tx_hold_bonus(tx, zp->z_id); 1854893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 18551209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 1856fa9e4066Sahrens if (error) { 1857fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 1858af2c4821Smaybee rw_exit(&zp->z_name_lock); 1859fa9e4066Sahrens zfs_dirent_unlock(dl); 1860fa9e4066Sahrens VN_RELE(vp); 18611209a471SNeil Perrin if (error == ERESTART) { 18628a2f1b91Sahrens dmu_tx_wait(tx); 18638a2f1b91Sahrens dmu_tx_abort(tx); 1864fa9e4066Sahrens goto top; 1865fa9e4066Sahrens } 18668a2f1b91Sahrens dmu_tx_abort(tx); 1867fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1868fa9e4066Sahrens return (error); 1869fa9e4066Sahrens } 1870fa9e4066Sahrens 1871da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 1872fa9e4066Sahrens 1873da6c28aaSamw if (error == 0) { 1874da6c28aaSamw uint64_t txtype = TX_RMDIR; 1875da6c28aaSamw if (flags & FIGNORECASE) 1876da6c28aaSamw txtype |= TX_CI; 1877da6c28aaSamw zfs_log_remove(zilog, tx, txtype, dzp, name); 1878da6c28aaSamw } 1879fa9e4066Sahrens 1880fa9e4066Sahrens dmu_tx_commit(tx); 1881fa9e4066Sahrens 1882fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 1883af2c4821Smaybee rw_exit(&zp->z_name_lock); 1884fa9e4066Sahrens out: 1885fa9e4066Sahrens zfs_dirent_unlock(dl); 1886fa9e4066Sahrens 1887fa9e4066Sahrens VN_RELE(vp); 1888fa9e4066Sahrens 1889fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1890fa9e4066Sahrens return (error); 1891fa9e4066Sahrens } 1892fa9e4066Sahrens 1893fa9e4066Sahrens /* 1894fa9e4066Sahrens * Read as many directory entries as will fit into the provided 1895fa9e4066Sahrens * buffer from the given directory cursor position (specified in 1896fa9e4066Sahrens * the uio structure. 1897fa9e4066Sahrens * 1898fa9e4066Sahrens * IN: vp - vnode of directory to read. 1899fa9e4066Sahrens * uio - structure supplying read location, range info, 1900fa9e4066Sahrens * and return buffer. 1901fa9e4066Sahrens * cr - credentials of caller. 1902da6c28aaSamw * ct - caller context 1903da6c28aaSamw * flags - case flags 1904fa9e4066Sahrens * 1905fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 1906fa9e4066Sahrens * eofp - set to true if end-of-file detected. 1907fa9e4066Sahrens * 1908fa9e4066Sahrens * RETURN: 0 if success 1909fa9e4066Sahrens * error code if failure 1910fa9e4066Sahrens * 1911fa9e4066Sahrens * Timestamps: 1912fa9e4066Sahrens * vp - atime updated 1913fa9e4066Sahrens * 1914fa9e4066Sahrens * Note that the low 4 bits of the cookie returned by zap is always zero. 1915fa9e4066Sahrens * This allows us to use the low range for "special" directory entries: 1916fa9e4066Sahrens * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 1917fa9e4066Sahrens * we use the offset 2 for the '.zfs' directory. 1918fa9e4066Sahrens */ 1919fa9e4066Sahrens /* ARGSUSED */ 1920fa9e4066Sahrens static int 1921da6c28aaSamw zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, 1922da6c28aaSamw caller_context_t *ct, int flags) 1923fa9e4066Sahrens { 1924fa9e4066Sahrens znode_t *zp = VTOZ(vp); 1925fa9e4066Sahrens iovec_t *iovp; 1926da6c28aaSamw edirent_t *eodp; 1927fa9e4066Sahrens dirent64_t *odp; 1928fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 19297f6e3e7dSperrin objset_t *os; 1930fa9e4066Sahrens caddr_t outbuf; 1931fa9e4066Sahrens size_t bufsize; 1932fa9e4066Sahrens zap_cursor_t zc; 1933fa9e4066Sahrens zap_attribute_t zap; 1934fa9e4066Sahrens uint_t bytes_wanted; 1935fa9e4066Sahrens uint64_t offset; /* must be unsigned; checks for < 1 */ 1936fa9e4066Sahrens int local_eof; 19377f6e3e7dSperrin int outcount; 19387f6e3e7dSperrin int error; 19397f6e3e7dSperrin uint8_t prefetch; 1940b38f0970Sck boolean_t check_sysattrs; 1941fa9e4066Sahrens 19423cb34c60Sahrens ZFS_ENTER(zfsvfs); 19433cb34c60Sahrens ZFS_VERIFY_ZP(zp); 1944fa9e4066Sahrens 1945fa9e4066Sahrens /* 1946fa9e4066Sahrens * If we are not given an eof variable, 1947fa9e4066Sahrens * use a local one. 1948fa9e4066Sahrens */ 1949fa9e4066Sahrens if (eofp == NULL) 1950fa9e4066Sahrens eofp = &local_eof; 1951fa9e4066Sahrens 1952fa9e4066Sahrens /* 1953fa9e4066Sahrens * Check for valid iov_len. 1954fa9e4066Sahrens */ 1955fa9e4066Sahrens if (uio->uio_iov->iov_len <= 0) { 1956fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1957fa9e4066Sahrens return (EINVAL); 1958fa9e4066Sahrens } 1959fa9e4066Sahrens 1960fa9e4066Sahrens /* 1961fa9e4066Sahrens * Quit if directory has been removed (posix) 1962fa9e4066Sahrens */ 1963893a6d32Sahrens if ((*eofp = zp->z_unlinked) != 0) { 1964fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1965fa9e4066Sahrens return (0); 1966fa9e4066Sahrens } 1967fa9e4066Sahrens 19687f6e3e7dSperrin error = 0; 19697f6e3e7dSperrin os = zfsvfs->z_os; 19707f6e3e7dSperrin offset = uio->uio_loffset; 19717f6e3e7dSperrin prefetch = zp->z_zn_prefetch; 19727f6e3e7dSperrin 1973fa9e4066Sahrens /* 1974fa9e4066Sahrens * Initialize the iterator cursor. 1975fa9e4066Sahrens */ 1976fa9e4066Sahrens if (offset <= 3) { 1977fa9e4066Sahrens /* 1978fa9e4066Sahrens * Start iteration from the beginning of the directory. 1979fa9e4066Sahrens */ 19807f6e3e7dSperrin zap_cursor_init(&zc, os, zp->z_id); 1981fa9e4066Sahrens } else { 1982fa9e4066Sahrens /* 1983fa9e4066Sahrens * The offset is a serialized cursor. 1984fa9e4066Sahrens */ 19857f6e3e7dSperrin zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 1986fa9e4066Sahrens } 1987fa9e4066Sahrens 1988fa9e4066Sahrens /* 1989fa9e4066Sahrens * Get space to change directory entries into fs independent format. 1990fa9e4066Sahrens */ 1991fa9e4066Sahrens iovp = uio->uio_iov; 1992fa9e4066Sahrens bytes_wanted = iovp->iov_len; 1993fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 1994fa9e4066Sahrens bufsize = bytes_wanted; 1995fa9e4066Sahrens outbuf = kmem_alloc(bufsize, KM_SLEEP); 1996fa9e4066Sahrens odp = (struct dirent64 *)outbuf; 1997fa9e4066Sahrens } else { 1998fa9e4066Sahrens bufsize = bytes_wanted; 1999fa9e4066Sahrens odp = (struct dirent64 *)iovp->iov_base; 2000fa9e4066Sahrens } 2001da6c28aaSamw eodp = (struct edirent *)odp; 2002fa9e4066Sahrens 2003b38f0970Sck /* 20049660e5cbSJanice Chang * If this VFS supports the system attribute view interface; and 20059660e5cbSJanice Chang * we're looking at an extended attribute directory; and we care 20069660e5cbSJanice Chang * about normalization conflicts on this vfs; then we must check 20079660e5cbSJanice Chang * for normalization conflicts with the sysattr name space. 2008b38f0970Sck */ 20099660e5cbSJanice Chang check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2010b38f0970Sck (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2011b38f0970Sck (flags & V_RDDIR_ENTFLAGS); 2012b38f0970Sck 2013fa9e4066Sahrens /* 2014fa9e4066Sahrens * Transform to file-system independent format 2015fa9e4066Sahrens */ 2016fa9e4066Sahrens outcount = 0; 2017fa9e4066Sahrens while (outcount < bytes_wanted) { 2018b1b8ab34Slling ino64_t objnum; 2019b1b8ab34Slling ushort_t reclen; 2020b1b8ab34Slling off64_t *next; 2021b1b8ab34Slling 2022fa9e4066Sahrens /* 2023fa9e4066Sahrens * Special case `.', `..', and `.zfs'. 2024fa9e4066Sahrens */ 2025fa9e4066Sahrens if (offset == 0) { 2026fa9e4066Sahrens (void) strcpy(zap.za_name, "."); 2027da6c28aaSamw zap.za_normalization_conflict = 0; 2028b1b8ab34Slling objnum = zp->z_id; 2029fa9e4066Sahrens } else if (offset == 1) { 2030fa9e4066Sahrens (void) strcpy(zap.za_name, ".."); 2031da6c28aaSamw zap.za_normalization_conflict = 0; 2032b1b8ab34Slling objnum = zp->z_phys->zp_parent; 2033fa9e4066Sahrens } else if (offset == 2 && zfs_show_ctldir(zp)) { 2034fa9e4066Sahrens (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2035da6c28aaSamw zap.za_normalization_conflict = 0; 2036b1b8ab34Slling objnum = ZFSCTL_INO_ROOT; 2037fa9e4066Sahrens } else { 2038fa9e4066Sahrens /* 2039fa9e4066Sahrens * Grab next entry. 2040fa9e4066Sahrens */ 2041fa9e4066Sahrens if (error = zap_cursor_retrieve(&zc, &zap)) { 2042fa9e4066Sahrens if ((*eofp = (error == ENOENT)) != 0) 2043fa9e4066Sahrens break; 2044fa9e4066Sahrens else 2045fa9e4066Sahrens goto update; 2046fa9e4066Sahrens } 2047fa9e4066Sahrens 2048fa9e4066Sahrens if (zap.za_integer_length != 8 || 2049fa9e4066Sahrens zap.za_num_integers != 1) { 2050fa9e4066Sahrens cmn_err(CE_WARN, "zap_readdir: bad directory " 2051fa9e4066Sahrens "entry, obj = %lld, offset = %lld\n", 2052fa9e4066Sahrens (u_longlong_t)zp->z_id, 2053fa9e4066Sahrens (u_longlong_t)offset); 2054fa9e4066Sahrens error = ENXIO; 2055fa9e4066Sahrens goto update; 2056fa9e4066Sahrens } 2057b1b8ab34Slling 2058b1b8ab34Slling objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2059b1b8ab34Slling /* 2060b1b8ab34Slling * MacOS X can extract the object type here such as: 2061b1b8ab34Slling * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2062b1b8ab34Slling */ 2063b38f0970Sck 2064b38f0970Sck if (check_sysattrs && !zap.za_normalization_conflict) { 2065b38f0970Sck zap.za_normalization_conflict = 2066b38f0970Sck xattr_sysattr_casechk(zap.za_name); 2067b38f0970Sck } 2068fa9e4066Sahrens } 2069da6c28aaSamw 2070e802abbdSTim Haley if (flags & V_RDDIR_ACCFILTER) { 2071e802abbdSTim Haley /* 2072e802abbdSTim Haley * If we have no access at all, don't include 2073e802abbdSTim Haley * this entry in the returned information 2074e802abbdSTim Haley */ 2075e802abbdSTim Haley znode_t *ezp; 2076e802abbdSTim Haley if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2077e802abbdSTim Haley goto skip_entry; 2078e802abbdSTim Haley if (!zfs_has_access(ezp, cr)) { 2079e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2080e802abbdSTim Haley goto skip_entry; 2081e802abbdSTim Haley } 2082e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2083e802abbdSTim Haley } 2084e802abbdSTim Haley 2085da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) 2086da6c28aaSamw reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2087da6c28aaSamw else 2088da6c28aaSamw reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2089fa9e4066Sahrens 2090fa9e4066Sahrens /* 2091fa9e4066Sahrens * Will this entry fit in the buffer? 2092fa9e4066Sahrens */ 2093b1b8ab34Slling if (outcount + reclen > bufsize) { 2094fa9e4066Sahrens /* 2095fa9e4066Sahrens * Did we manage to fit anything in the buffer? 2096fa9e4066Sahrens */ 2097fa9e4066Sahrens if (!outcount) { 2098fa9e4066Sahrens error = EINVAL; 2099fa9e4066Sahrens goto update; 2100fa9e4066Sahrens } 2101fa9e4066Sahrens break; 2102fa9e4066Sahrens } 2103da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) { 2104da6c28aaSamw /* 2105da6c28aaSamw * Add extended flag entry: 2106da6c28aaSamw */ 2107da6c28aaSamw eodp->ed_ino = objnum; 2108da6c28aaSamw eodp->ed_reclen = reclen; 2109da6c28aaSamw /* NOTE: ed_off is the offset for the *next* entry */ 2110da6c28aaSamw next = &(eodp->ed_off); 2111da6c28aaSamw eodp->ed_eflags = zap.za_normalization_conflict ? 2112da6c28aaSamw ED_CASE_CONFLICT : 0; 2113da6c28aaSamw (void) strncpy(eodp->ed_name, zap.za_name, 2114da6c28aaSamw EDIRENT_NAMELEN(reclen)); 2115da6c28aaSamw eodp = (edirent_t *)((intptr_t)eodp + reclen); 2116da6c28aaSamw } else { 2117da6c28aaSamw /* 2118da6c28aaSamw * Add normal entry: 2119da6c28aaSamw */ 2120da6c28aaSamw odp->d_ino = objnum; 2121da6c28aaSamw odp->d_reclen = reclen; 2122da6c28aaSamw /* NOTE: d_off is the offset for the *next* entry */ 2123da6c28aaSamw next = &(odp->d_off); 2124da6c28aaSamw (void) strncpy(odp->d_name, zap.za_name, 2125da6c28aaSamw DIRENT64_NAMELEN(reclen)); 2126da6c28aaSamw odp = (dirent64_t *)((intptr_t)odp + reclen); 2127da6c28aaSamw } 2128b1b8ab34Slling outcount += reclen; 2129fa9e4066Sahrens 2130fa9e4066Sahrens ASSERT(outcount <= bufsize); 2131fa9e4066Sahrens 2132fa9e4066Sahrens /* Prefetch znode */ 21337f6e3e7dSperrin if (prefetch) 2134b1b8ab34Slling dmu_prefetch(os, objnum, 0, 0); 2135fa9e4066Sahrens 2136e802abbdSTim Haley skip_entry: 2137fa9e4066Sahrens /* 2138fa9e4066Sahrens * Move to the next entry, fill in the previous offset. 2139fa9e4066Sahrens */ 2140fa9e4066Sahrens if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2141fa9e4066Sahrens zap_cursor_advance(&zc); 2142fa9e4066Sahrens offset = zap_cursor_serialize(&zc); 2143fa9e4066Sahrens } else { 2144fa9e4066Sahrens offset += 1; 2145fa9e4066Sahrens } 2146fa9e4066Sahrens *next = offset; 2147fa9e4066Sahrens } 21487f6e3e7dSperrin zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2149fa9e4066Sahrens 2150fa9e4066Sahrens if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2151fa9e4066Sahrens iovp->iov_base += outcount; 2152fa9e4066Sahrens iovp->iov_len -= outcount; 2153fa9e4066Sahrens uio->uio_resid -= outcount; 2154fa9e4066Sahrens } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2155fa9e4066Sahrens /* 2156fa9e4066Sahrens * Reset the pointer. 2157fa9e4066Sahrens */ 2158fa9e4066Sahrens offset = uio->uio_loffset; 2159fa9e4066Sahrens } 2160fa9e4066Sahrens 2161fa9e4066Sahrens update: 216287e5029aSahrens zap_cursor_fini(&zc); 2163fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2164fa9e4066Sahrens kmem_free(outbuf, bufsize); 2165fa9e4066Sahrens 2166fa9e4066Sahrens if (error == ENOENT) 2167fa9e4066Sahrens error = 0; 2168fa9e4066Sahrens 2169fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2170fa9e4066Sahrens 2171fa9e4066Sahrens uio->uio_loffset = offset; 2172fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2173fa9e4066Sahrens return (error); 2174fa9e4066Sahrens } 2175fa9e4066Sahrens 2176ec533521Sfr ulong_t zfs_fsync_sync_cnt = 4; 2177ec533521Sfr 2178fa9e4066Sahrens static int 2179da6c28aaSamw zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2180fa9e4066Sahrens { 2181fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2182fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2183fa9e4066Sahrens 2184b468a217Seschrock /* 2185b468a217Seschrock * Regardless of whether this is required for standards conformance, 2186b468a217Seschrock * this is the logical behavior when fsync() is called on a file with 2187b468a217Seschrock * dirty pages. We use B_ASYNC since the ZIL transactions are already 2188b468a217Seschrock * going to be pushed out as part of the zil_commit(). 2189b468a217Seschrock */ 2190b468a217Seschrock if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && 2191b468a217Seschrock (vp->v_type == VREG) && !(IS_SWAPVP(vp))) 2192da6c28aaSamw (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); 2193b468a217Seschrock 2194ec533521Sfr (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2195ec533521Sfr 21963cb34c60Sahrens ZFS_ENTER(zfsvfs); 21973cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2198b19a79ecSperrin zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 2199fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2200fa9e4066Sahrens return (0); 2201fa9e4066Sahrens } 2202fa9e4066Sahrens 2203da6c28aaSamw 2204fa9e4066Sahrens /* 2205fa9e4066Sahrens * Get the requested file attributes and place them in the provided 2206fa9e4066Sahrens * vattr structure. 2207fa9e4066Sahrens * 2208fa9e4066Sahrens * IN: vp - vnode of file. 2209fa9e4066Sahrens * vap - va_mask identifies requested attributes. 2210da6c28aaSamw * If AT_XVATTR set, then optional attrs are requested 2211da6c28aaSamw * flags - ATTR_NOACLCHECK (CIFS server context) 2212fa9e4066Sahrens * cr - credentials of caller. 2213da6c28aaSamw * ct - caller context 2214fa9e4066Sahrens * 2215fa9e4066Sahrens * OUT: vap - attribute values. 2216fa9e4066Sahrens * 2217fa9e4066Sahrens * RETURN: 0 (always succeeds) 2218fa9e4066Sahrens */ 2219fa9e4066Sahrens /* ARGSUSED */ 2220fa9e4066Sahrens static int 2221da6c28aaSamw zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2222da6c28aaSamw caller_context_t *ct) 2223fa9e4066Sahrens { 2224fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2225fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2226f18faf3fSek znode_phys_t *pzp; 2227da6c28aaSamw int error = 0; 2228ecd6cf80Smarks uint64_t links; 2229da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2230da6c28aaSamw xoptattr_t *xoap = NULL; 2231da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2232fa9e4066Sahrens 22333cb34c60Sahrens ZFS_ENTER(zfsvfs); 22343cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2235f18faf3fSek pzp = zp->z_phys; 2236fa9e4066Sahrens 2237da6c28aaSamw /* 2238da6c28aaSamw * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2239da6c28aaSamw * Also, if we are the owner don't bother, since owner should 2240da6c28aaSamw * always be allowed to read basic attributes of file. 2241da6c28aaSamw */ 2242da6c28aaSamw if (!(pzp->zp_flags & ZFS_ACL_TRIVIAL) && 2243da6c28aaSamw (pzp->zp_uid != crgetuid(cr))) { 2244da6c28aaSamw if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2245da6c28aaSamw skipaclchk, cr)) { 2246da6c28aaSamw ZFS_EXIT(zfsvfs); 2247da6c28aaSamw return (error); 2248da6c28aaSamw } 2249da6c28aaSamw } 2250da6c28aaSamw 2251fa9e4066Sahrens /* 2252fa9e4066Sahrens * Return all attributes. It's cheaper to provide the answer 2253fa9e4066Sahrens * than to determine whether we were asked the question. 2254fa9e4066Sahrens */ 2255fa9e4066Sahrens 225634f345efSRay Hassan mutex_enter(&zp->z_lock); 2257fa9e4066Sahrens vap->va_type = vp->v_type; 2258fa9e4066Sahrens vap->va_mode = pzp->zp_mode & MODEMASK; 2259bda89588Sjp zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2260fa9e4066Sahrens vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2261fa9e4066Sahrens vap->va_nodeid = zp->z_id; 2262ecd6cf80Smarks if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2263ecd6cf80Smarks links = pzp->zp_links + 1; 2264ecd6cf80Smarks else 2265ecd6cf80Smarks links = pzp->zp_links; 2266ecd6cf80Smarks vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 2267fa9e4066Sahrens vap->va_size = pzp->zp_size; 226872fc53bcSmarks vap->va_rdev = vp->v_rdev; 2269fa9e4066Sahrens vap->va_seq = zp->z_seq; 2270fa9e4066Sahrens 2271fa9e4066Sahrens /* 2272da6c28aaSamw * Add in any requested optional attributes and the create time. 2273da6c28aaSamw * Also set the corresponding bits in the returned attribute bitmap. 2274fa9e4066Sahrens */ 2275da6c28aaSamw if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2276da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2277da6c28aaSamw xoap->xoa_archive = 2278da6c28aaSamw ((pzp->zp_flags & ZFS_ARCHIVE) != 0); 2279da6c28aaSamw XVA_SET_RTN(xvap, XAT_ARCHIVE); 2280da6c28aaSamw } 2281da6c28aaSamw 2282da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2283da6c28aaSamw xoap->xoa_readonly = 2284da6c28aaSamw ((pzp->zp_flags & ZFS_READONLY) != 0); 2285da6c28aaSamw XVA_SET_RTN(xvap, XAT_READONLY); 2286da6c28aaSamw } 2287da6c28aaSamw 2288da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2289da6c28aaSamw xoap->xoa_system = 2290da6c28aaSamw ((pzp->zp_flags & ZFS_SYSTEM) != 0); 2291da6c28aaSamw XVA_SET_RTN(xvap, XAT_SYSTEM); 2292da6c28aaSamw } 2293da6c28aaSamw 2294da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2295da6c28aaSamw xoap->xoa_hidden = 2296da6c28aaSamw ((pzp->zp_flags & ZFS_HIDDEN) != 0); 2297da6c28aaSamw XVA_SET_RTN(xvap, XAT_HIDDEN); 2298da6c28aaSamw } 2299da6c28aaSamw 2300da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2301da6c28aaSamw xoap->xoa_nounlink = 2302da6c28aaSamw ((pzp->zp_flags & ZFS_NOUNLINK) != 0); 2303da6c28aaSamw XVA_SET_RTN(xvap, XAT_NOUNLINK); 2304da6c28aaSamw } 2305da6c28aaSamw 2306da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2307da6c28aaSamw xoap->xoa_immutable = 2308da6c28aaSamw ((pzp->zp_flags & ZFS_IMMUTABLE) != 0); 2309da6c28aaSamw XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2310da6c28aaSamw } 2311da6c28aaSamw 2312da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2313da6c28aaSamw xoap->xoa_appendonly = 2314da6c28aaSamw ((pzp->zp_flags & ZFS_APPENDONLY) != 0); 2315da6c28aaSamw XVA_SET_RTN(xvap, XAT_APPENDONLY); 2316da6c28aaSamw } 2317da6c28aaSamw 2318da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2319da6c28aaSamw xoap->xoa_nodump = 2320da6c28aaSamw ((pzp->zp_flags & ZFS_NODUMP) != 0); 2321da6c28aaSamw XVA_SET_RTN(xvap, XAT_NODUMP); 2322da6c28aaSamw } 2323da6c28aaSamw 2324da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2325da6c28aaSamw xoap->xoa_opaque = 2326da6c28aaSamw ((pzp->zp_flags & ZFS_OPAQUE) != 0); 2327da6c28aaSamw XVA_SET_RTN(xvap, XAT_OPAQUE); 2328da6c28aaSamw } 2329da6c28aaSamw 2330da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2331da6c28aaSamw xoap->xoa_av_quarantined = 2332da6c28aaSamw ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0); 2333da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2334da6c28aaSamw } 2335da6c28aaSamw 2336da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2337da6c28aaSamw xoap->xoa_av_modified = 2338da6c28aaSamw ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0); 2339da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2340da6c28aaSamw } 2341da6c28aaSamw 2342da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2343da6c28aaSamw vp->v_type == VREG && 2344da6c28aaSamw (pzp->zp_flags & ZFS_BONUS_SCANSTAMP)) { 2345da6c28aaSamw size_t len; 2346da6c28aaSamw dmu_object_info_t doi; 2347da6c28aaSamw 2348da6c28aaSamw /* 2349da6c28aaSamw * Only VREG files have anti-virus scanstamps, so we 2350da6c28aaSamw * won't conflict with symlinks in the bonus buffer. 2351da6c28aaSamw */ 2352da6c28aaSamw dmu_object_info_from_db(zp->z_dbuf, &doi); 2353da6c28aaSamw len = sizeof (xoap->xoa_av_scanstamp) + 2354da6c28aaSamw sizeof (znode_phys_t); 2355da6c28aaSamw if (len <= doi.doi_bonus_size) { 2356da6c28aaSamw /* 2357da6c28aaSamw * pzp points to the start of the 2358da6c28aaSamw * znode_phys_t. pzp + 1 points to the 2359da6c28aaSamw * first byte after the znode_phys_t. 2360da6c28aaSamw */ 2361da6c28aaSamw (void) memcpy(xoap->xoa_av_scanstamp, 2362da6c28aaSamw pzp + 1, 2363da6c28aaSamw sizeof (xoap->xoa_av_scanstamp)); 2364da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 2365da6c28aaSamw } 2366da6c28aaSamw } 2367da6c28aaSamw 2368da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2369da6c28aaSamw ZFS_TIME_DECODE(&xoap->xoa_createtime, pzp->zp_crtime); 2370da6c28aaSamw XVA_SET_RTN(xvap, XAT_CREATETIME); 2371fa9e4066Sahrens } 23727a286c47SDai Ngo 23737a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 23747a286c47SDai Ngo xoap->xoa_reparse = 23757a286c47SDai Ngo ((pzp->zp_flags & ZFS_REPARSE) != 0); 23767a286c47SDai Ngo XVA_SET_RTN(xvap, XAT_REPARSE); 23777a286c47SDai Ngo } 2378fa9e4066Sahrens } 2379fa9e4066Sahrens 2380da6c28aaSamw ZFS_TIME_DECODE(&vap->va_atime, pzp->zp_atime); 2381da6c28aaSamw ZFS_TIME_DECODE(&vap->va_mtime, pzp->zp_mtime); 2382da6c28aaSamw ZFS_TIME_DECODE(&vap->va_ctime, pzp->zp_ctime); 2383da6c28aaSamw 2384fa9e4066Sahrens mutex_exit(&zp->z_lock); 2385fa9e4066Sahrens 2386fa9e4066Sahrens dmu_object_size_from_db(zp->z_dbuf, &vap->va_blksize, &vap->va_nblocks); 2387fa9e4066Sahrens 2388fa9e4066Sahrens if (zp->z_blksz == 0) { 2389fa9e4066Sahrens /* 2390fa9e4066Sahrens * Block size hasn't been set; suggest maximal I/O transfers. 2391fa9e4066Sahrens */ 2392fa9e4066Sahrens vap->va_blksize = zfsvfs->z_max_blksz; 2393fa9e4066Sahrens } 2394fa9e4066Sahrens 2395fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2396fa9e4066Sahrens return (0); 2397fa9e4066Sahrens } 2398fa9e4066Sahrens 2399fa9e4066Sahrens /* 2400fa9e4066Sahrens * Set the file attributes to the values contained in the 2401fa9e4066Sahrens * vattr structure. 2402fa9e4066Sahrens * 2403fa9e4066Sahrens * IN: vp - vnode of file to be modified. 2404fa9e4066Sahrens * vap - new attribute values. 2405da6c28aaSamw * If AT_XVATTR set, then optional attrs are being set 2406fa9e4066Sahrens * flags - ATTR_UTIME set if non-default time values provided. 2407da6c28aaSamw * - ATTR_NOACLCHECK (CIFS context only). 2408fa9e4066Sahrens * cr - credentials of caller. 2409da6c28aaSamw * ct - caller context 2410fa9e4066Sahrens * 2411fa9e4066Sahrens * RETURN: 0 if success 2412fa9e4066Sahrens * error code if failure 2413fa9e4066Sahrens * 2414fa9e4066Sahrens * Timestamps: 2415fa9e4066Sahrens * vp - ctime updated, mtime updated if size changed. 2416fa9e4066Sahrens */ 2417fa9e4066Sahrens /* ARGSUSED */ 2418fa9e4066Sahrens static int 2419fa9e4066Sahrens zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2420fa9e4066Sahrens caller_context_t *ct) 2421fa9e4066Sahrens { 2422f18faf3fSek znode_t *zp = VTOZ(vp); 2423f18faf3fSek znode_phys_t *pzp; 2424fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2425f18faf3fSek zilog_t *zilog; 2426fa9e4066Sahrens dmu_tx_t *tx; 2427fa9e4066Sahrens vattr_t oldva; 2428ae4caef8SMark Shellenbaum xvattr_t tmpxvattr; 24295730cc9aSmaybee uint_t mask = vap->va_mask; 24305730cc9aSmaybee uint_t saved_mask; 2431f92daba9Smarks int trim_mask = 0; 2432fa9e4066Sahrens uint64_t new_mode; 243389459e17SMark Shellenbaum uint64_t new_uid, new_gid; 2434d2443e76Smarks znode_t *attrzp; 2435fa9e4066Sahrens int need_policy = FALSE; 2436fa9e4066Sahrens int err; 2437da6c28aaSamw zfs_fuid_info_t *fuidp = NULL; 2438da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2439da6c28aaSamw xoptattr_t *xoap; 24404c841f60Smarks zfs_acl_t *aclp = NULL; 2441da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 244289459e17SMark Shellenbaum boolean_t fuid_dirtied = B_FALSE; 2443fa9e4066Sahrens 2444fa9e4066Sahrens if (mask == 0) 2445fa9e4066Sahrens return (0); 2446fa9e4066Sahrens 2447fa9e4066Sahrens if (mask & AT_NOSET) 2448fa9e4066Sahrens return (EINVAL); 2449fa9e4066Sahrens 24503cb34c60Sahrens ZFS_ENTER(zfsvfs); 24513cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2452da6c28aaSamw 2453da6c28aaSamw pzp = zp->z_phys; 2454da6c28aaSamw zilog = zfsvfs->z_log; 2455da6c28aaSamw 2456da6c28aaSamw /* 2457da6c28aaSamw * Make sure that if we have ephemeral uid/gid or xvattr specified 2458da6c28aaSamw * that file system is at proper version level 2459da6c28aaSamw */ 2460da6c28aaSamw 2461da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 2462da6c28aaSamw (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2463da6c28aaSamw ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 246402dcba3bStimh (mask & AT_XVATTR))) { 246502dcba3bStimh ZFS_EXIT(zfsvfs); 2466da6c28aaSamw return (EINVAL); 246702dcba3bStimh } 2468da6c28aaSamw 246902dcba3bStimh if (mask & AT_SIZE && vp->v_type == VDIR) { 247002dcba3bStimh ZFS_EXIT(zfsvfs); 2471fa9e4066Sahrens return (EISDIR); 247202dcba3bStimh } 2473fa9e4066Sahrens 247402dcba3bStimh if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 247502dcba3bStimh ZFS_EXIT(zfsvfs); 247684c5a155Smarks return (EINVAL); 247702dcba3bStimh } 247884c5a155Smarks 2479da6c28aaSamw /* 2480da6c28aaSamw * If this is an xvattr_t, then get a pointer to the structure of 2481da6c28aaSamw * optional attributes. If this is NULL, then we have a vattr_t. 2482da6c28aaSamw */ 2483da6c28aaSamw xoap = xva_getxoptattr(xvap); 2484da6c28aaSamw 2485ae4caef8SMark Shellenbaum xva_init(&tmpxvattr); 2486ae4caef8SMark Shellenbaum 2487da6c28aaSamw /* 2488da6c28aaSamw * Immutable files can only alter immutable bit and atime 2489da6c28aaSamw */ 2490da6c28aaSamw if ((pzp->zp_flags & ZFS_IMMUTABLE) && 2491da6c28aaSamw ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 249202dcba3bStimh ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 249302dcba3bStimh ZFS_EXIT(zfsvfs); 2494da6c28aaSamw return (EPERM); 249502dcba3bStimh } 2496da6c28aaSamw 249702dcba3bStimh if ((mask & AT_SIZE) && (pzp->zp_flags & ZFS_READONLY)) { 249802dcba3bStimh ZFS_EXIT(zfsvfs); 2499da6c28aaSamw return (EPERM); 250002dcba3bStimh } 2501fa9e4066Sahrens 250293129341Smarks /* 250393129341Smarks * Verify timestamps doesn't overflow 32 bits. 250493129341Smarks * ZFS can handle large timestamps, but 32bit syscalls can't 250593129341Smarks * handle times greater than 2039. This check should be removed 250693129341Smarks * once large timestamps are fully supported. 250793129341Smarks */ 250893129341Smarks if (mask & (AT_ATIME | AT_MTIME)) { 250993129341Smarks if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 251093129341Smarks ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 251193129341Smarks ZFS_EXIT(zfsvfs); 251293129341Smarks return (EOVERFLOW); 251393129341Smarks } 251493129341Smarks } 251593129341Smarks 2516fa9e4066Sahrens top: 2517d2443e76Smarks attrzp = NULL; 2518fa9e4066Sahrens 2519d47621a4STim Haley /* Can this be moved to before the top label? */ 2520fa9e4066Sahrens if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2521fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2522fa9e4066Sahrens return (EROFS); 2523fa9e4066Sahrens } 2524fa9e4066Sahrens 2525fa9e4066Sahrens /* 2526fa9e4066Sahrens * First validate permissions 2527fa9e4066Sahrens */ 2528fa9e4066Sahrens 2529fa9e4066Sahrens if (mask & AT_SIZE) { 2530da6c28aaSamw err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2531fa9e4066Sahrens if (err) { 2532fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2533fa9e4066Sahrens return (err); 2534fa9e4066Sahrens } 25355730cc9aSmaybee /* 25365730cc9aSmaybee * XXX - Note, we are not providing any open 25375730cc9aSmaybee * mode flags here (like FNDELAY), so we may 25385730cc9aSmaybee * block if there are locks present... this 25395730cc9aSmaybee * should be addressed in openat(). 25405730cc9aSmaybee */ 2541cdb0ab79Smaybee /* XXX - would it be OK to generate a log record here? */ 2542cdb0ab79Smaybee err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 25435730cc9aSmaybee if (err) { 25445730cc9aSmaybee ZFS_EXIT(zfsvfs); 25455730cc9aSmaybee return (err); 25465730cc9aSmaybee } 2547fa9e4066Sahrens } 2548fa9e4066Sahrens 2549da6c28aaSamw if (mask & (AT_ATIME|AT_MTIME) || 2550da6c28aaSamw ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2551da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_READONLY) || 2552da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2553da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2554da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) 2555da6c28aaSamw need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2556da6c28aaSamw skipaclchk, cr); 2557fa9e4066Sahrens 2558fa9e4066Sahrens if (mask & (AT_UID|AT_GID)) { 2559fa9e4066Sahrens int idmask = (mask & (AT_UID|AT_GID)); 2560fa9e4066Sahrens int take_owner; 2561fa9e4066Sahrens int take_group; 2562fa9e4066Sahrens 2563a933bc41Smarks /* 2564a933bc41Smarks * NOTE: even if a new mode is being set, 2565a933bc41Smarks * we may clear S_ISUID/S_ISGID bits. 2566a933bc41Smarks */ 2567a933bc41Smarks 2568a933bc41Smarks if (!(mask & AT_MODE)) 2569a933bc41Smarks vap->va_mode = pzp->zp_mode; 2570a933bc41Smarks 2571fa9e4066Sahrens /* 2572fa9e4066Sahrens * Take ownership or chgrp to group we are a member of 2573fa9e4066Sahrens */ 2574fa9e4066Sahrens 2575fa9e4066Sahrens take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2576da6c28aaSamw take_group = (mask & AT_GID) && 2577da6c28aaSamw zfs_groupmember(zfsvfs, vap->va_gid, cr); 2578fa9e4066Sahrens 2579fa9e4066Sahrens /* 2580fa9e4066Sahrens * If both AT_UID and AT_GID are set then take_owner and 2581fa9e4066Sahrens * take_group must both be set in order to allow taking 2582fa9e4066Sahrens * ownership. 2583fa9e4066Sahrens * 2584fa9e4066Sahrens * Otherwise, send the check through secpolicy_vnode_setattr() 2585fa9e4066Sahrens * 2586fa9e4066Sahrens */ 2587fa9e4066Sahrens 2588fa9e4066Sahrens if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2589fa9e4066Sahrens ((idmask == AT_UID) && take_owner) || 2590fa9e4066Sahrens ((idmask == AT_GID) && take_group)) { 2591da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2592da6c28aaSamw skipaclchk, cr) == 0) { 2593fa9e4066Sahrens /* 2594fa9e4066Sahrens * Remove setuid/setgid for non-privileged users 2595fa9e4066Sahrens */ 259613f9f30eSmarks secpolicy_setid_clear(vap, cr); 2597f92daba9Smarks trim_mask = (mask & (AT_UID|AT_GID)); 2598fa9e4066Sahrens } else { 2599fa9e4066Sahrens need_policy = TRUE; 2600fa9e4066Sahrens } 2601fa9e4066Sahrens } else { 2602fa9e4066Sahrens need_policy = TRUE; 2603fa9e4066Sahrens } 2604fa9e4066Sahrens } 2605fa9e4066Sahrens 2606f92daba9Smarks mutex_enter(&zp->z_lock); 2607f92daba9Smarks oldva.va_mode = pzp->zp_mode; 2608bda89588Sjp zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2609da6c28aaSamw if (mask & AT_XVATTR) { 2610ae4caef8SMark Shellenbaum /* 2611ae4caef8SMark Shellenbaum * Update xvattr mask to include only those attributes 2612ae4caef8SMark Shellenbaum * that are actually changing. 2613ae4caef8SMark Shellenbaum * 2614ae4caef8SMark Shellenbaum * the bits will be restored prior to actually setting 2615ae4caef8SMark Shellenbaum * the attributes so the caller thinks they were set. 2616ae4caef8SMark Shellenbaum */ 2617ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2618ae4caef8SMark Shellenbaum if (xoap->xoa_appendonly != 2619ae4caef8SMark Shellenbaum ((pzp->zp_flags & ZFS_APPENDONLY) != 0)) { 2620ae4caef8SMark Shellenbaum need_policy = TRUE; 2621ae4caef8SMark Shellenbaum } else { 2622ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2623ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2624ae4caef8SMark Shellenbaum } 2625ae4caef8SMark Shellenbaum } 2626ae4caef8SMark Shellenbaum 2627ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2628ae4caef8SMark Shellenbaum if (xoap->xoa_nounlink != 2629ae4caef8SMark Shellenbaum ((pzp->zp_flags & ZFS_NOUNLINK) != 0)) { 2630ae4caef8SMark Shellenbaum need_policy = TRUE; 2631ae4caef8SMark Shellenbaum } else { 2632ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2633ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2634ae4caef8SMark Shellenbaum } 2635ae4caef8SMark Shellenbaum } 2636ae4caef8SMark Shellenbaum 2637ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2638ae4caef8SMark Shellenbaum if (xoap->xoa_immutable != 2639ae4caef8SMark Shellenbaum ((pzp->zp_flags & ZFS_IMMUTABLE) != 0)) { 2640ae4caef8SMark Shellenbaum need_policy = TRUE; 2641ae4caef8SMark Shellenbaum } else { 2642ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2643ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2644ae4caef8SMark Shellenbaum } 2645ae4caef8SMark Shellenbaum } 2646ae4caef8SMark Shellenbaum 2647ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2648ae4caef8SMark Shellenbaum if (xoap->xoa_nodump != 2649ae4caef8SMark Shellenbaum ((pzp->zp_flags & ZFS_NODUMP) != 0)) { 2650ae4caef8SMark Shellenbaum need_policy = TRUE; 2651ae4caef8SMark Shellenbaum } else { 2652ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NODUMP); 2653ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2654ae4caef8SMark Shellenbaum } 2655ae4caef8SMark Shellenbaum } 2656ae4caef8SMark Shellenbaum 2657ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2658ae4caef8SMark Shellenbaum if (xoap->xoa_av_modified != 2659ae4caef8SMark Shellenbaum ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0)) { 2660ae4caef8SMark Shellenbaum need_policy = TRUE; 2661ae4caef8SMark Shellenbaum } else { 2662ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2663ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2664ae4caef8SMark Shellenbaum } 2665ae4caef8SMark Shellenbaum } 2666ae4caef8SMark Shellenbaum 2667ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2668ae4caef8SMark Shellenbaum if ((vp->v_type != VREG && 2669ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined) || 2670ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined != 2671ae4caef8SMark Shellenbaum ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0)) { 2672ae4caef8SMark Shellenbaum need_policy = TRUE; 2673ae4caef8SMark Shellenbaum } else { 2674ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2675ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2676ae4caef8SMark Shellenbaum } 2677ae4caef8SMark Shellenbaum } 2678ae4caef8SMark Shellenbaum 26797a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 26807a286c47SDai Ngo mutex_exit(&zp->z_lock); 26817a286c47SDai Ngo ZFS_EXIT(zfsvfs); 26827a286c47SDai Ngo return (EPERM); 26837a286c47SDai Ngo } 26847a286c47SDai Ngo 2685ae4caef8SMark Shellenbaum if (need_policy == FALSE && 2686ae4caef8SMark Shellenbaum (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2687ae4caef8SMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2688da6c28aaSamw need_policy = TRUE; 2689da6c28aaSamw } 2690da6c28aaSamw } 2691da6c28aaSamw 2692f92daba9Smarks mutex_exit(&zp->z_lock); 2693fa9e4066Sahrens 2694f92daba9Smarks if (mask & AT_MODE) { 2695da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2696f92daba9Smarks err = secpolicy_setid_setsticky_clear(vp, vap, 2697f92daba9Smarks &oldva, cr); 2698f92daba9Smarks if (err) { 2699f92daba9Smarks ZFS_EXIT(zfsvfs); 2700f92daba9Smarks return (err); 2701f92daba9Smarks } 2702f92daba9Smarks trim_mask |= AT_MODE; 2703f92daba9Smarks } else { 2704f92daba9Smarks need_policy = TRUE; 2705f92daba9Smarks } 2706f92daba9Smarks } 270713f9f30eSmarks 2708f92daba9Smarks if (need_policy) { 270913f9f30eSmarks /* 271013f9f30eSmarks * If trim_mask is set then take ownership 2711f92daba9Smarks * has been granted or write_acl is present and user 2712f92daba9Smarks * has the ability to modify mode. In that case remove 2713f92daba9Smarks * UID|GID and or MODE from mask so that 271413f9f30eSmarks * secpolicy_vnode_setattr() doesn't revoke it. 271513f9f30eSmarks */ 271613f9f30eSmarks 2717f92daba9Smarks if (trim_mask) { 2718f92daba9Smarks saved_mask = vap->va_mask; 2719f92daba9Smarks vap->va_mask &= ~trim_mask; 2720f92daba9Smarks } 2721fa9e4066Sahrens err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2722da6c28aaSamw (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2723fa9e4066Sahrens if (err) { 2724fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2725fa9e4066Sahrens return (err); 2726fa9e4066Sahrens } 272713f9f30eSmarks 272813f9f30eSmarks if (trim_mask) 2729f92daba9Smarks vap->va_mask |= saved_mask; 2730fa9e4066Sahrens } 2731fa9e4066Sahrens 2732fa9e4066Sahrens /* 2733fa9e4066Sahrens * secpolicy_vnode_setattr, or take ownership may have 2734fa9e4066Sahrens * changed va_mask 2735fa9e4066Sahrens */ 2736fa9e4066Sahrens mask = vap->va_mask; 2737fa9e4066Sahrens 2738fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 2739fa9e4066Sahrens dmu_tx_hold_bonus(tx, zp->z_id); 2740fa9e4066Sahrens 2741fa9e4066Sahrens if (mask & AT_MODE) { 2742169cdae2Smarks uint64_t pmode = pzp->zp_mode; 2743fa9e4066Sahrens 2744169cdae2Smarks new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2745fa9e4066Sahrens 274614843421SMatthew Ahrens if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 274714843421SMatthew Ahrens goto out; 2748da6c28aaSamw if (pzp->zp_acl.z_acl_extern_obj) { 2749da6c28aaSamw /* Are we upgrading ACL from old V0 format to new V1 */ 2750da6c28aaSamw if (zfsvfs->z_version <= ZPL_VERSION_FUID && 2751da6c28aaSamw pzp->zp_acl.z_acl_version == 2752da6c28aaSamw ZFS_ACL_VERSION_INITIAL) { 2753da6c28aaSamw dmu_tx_hold_free(tx, 2754da6c28aaSamw pzp->zp_acl.z_acl_extern_obj, 0, 2755da6c28aaSamw DMU_OBJECT_END); 2756da6c28aaSamw dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 27574c841f60Smarks 0, aclp->z_acl_bytes); 2758da6c28aaSamw } else { 2759da6c28aaSamw dmu_tx_hold_write(tx, 2760da6c28aaSamw pzp->zp_acl.z_acl_extern_obj, 0, 27614c841f60Smarks aclp->z_acl_bytes); 27624c841f60Smarks } 27636d38e247Smarks } else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) { 27646d38e247Smarks dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 27656d38e247Smarks 0, aclp->z_acl_bytes); 2766da6c28aaSamw } 2767fa9e4066Sahrens } 2768fa9e4066Sahrens 276989459e17SMark Shellenbaum if (mask & (AT_UID | AT_GID)) { 277089459e17SMark Shellenbaum if (pzp->zp_xattr) { 277189459e17SMark Shellenbaum err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp); 277214843421SMatthew Ahrens if (err) 277314843421SMatthew Ahrens goto out; 277489459e17SMark Shellenbaum dmu_tx_hold_bonus(tx, attrzp->z_id); 277589459e17SMark Shellenbaum } 277689459e17SMark Shellenbaum if (mask & AT_UID) { 277789459e17SMark Shellenbaum new_uid = zfs_fuid_create(zfsvfs, 277889459e17SMark Shellenbaum (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 277914843421SMatthew Ahrens if (new_uid != pzp->zp_uid && 278014843421SMatthew Ahrens zfs_usergroup_overquota(zfsvfs, B_FALSE, new_uid)) { 278114843421SMatthew Ahrens err = EDQUOT; 278214843421SMatthew Ahrens goto out; 278314843421SMatthew Ahrens } 278489459e17SMark Shellenbaum } 278514843421SMatthew Ahrens 278689459e17SMark Shellenbaum if (mask & AT_GID) { 278789459e17SMark Shellenbaum new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 278889459e17SMark Shellenbaum cr, ZFS_GROUP, &fuidp); 278914843421SMatthew Ahrens if (new_gid != pzp->zp_gid && 279014843421SMatthew Ahrens zfs_usergroup_overquota(zfsvfs, B_TRUE, new_gid)) { 279114843421SMatthew Ahrens err = EDQUOT; 279214843421SMatthew Ahrens goto out; 279314843421SMatthew Ahrens } 279489459e17SMark Shellenbaum } 279589459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 279689459e17SMark Shellenbaum if (fuid_dirtied) { 279789459e17SMark Shellenbaum if (zfsvfs->z_fuid_obj == 0) { 279889459e17SMark Shellenbaum dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 279989459e17SMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 280089459e17SMark Shellenbaum FUID_SIZE_ESTIMATE(zfsvfs)); 280189459e17SMark Shellenbaum dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 280289459e17SMark Shellenbaum FALSE, NULL); 280389459e17SMark Shellenbaum } else { 280489459e17SMark Shellenbaum dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); 280589459e17SMark Shellenbaum dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, 280689459e17SMark Shellenbaum FUID_SIZE_ESTIMATE(zfsvfs)); 280789459e17SMark Shellenbaum } 2808d2443e76Smarks } 2809d2443e76Smarks } 2810d2443e76Smarks 28111209a471SNeil Perrin err = dmu_tx_assign(tx, TXG_NOWAIT); 2812fa9e4066Sahrens if (err) { 281314843421SMatthew Ahrens if (err == ERESTART) 28148a2f1b91Sahrens dmu_tx_wait(tx); 281514843421SMatthew Ahrens goto out; 2816fa9e4066Sahrens } 2817fa9e4066Sahrens 2818fa9e4066Sahrens dmu_buf_will_dirty(zp->z_dbuf, tx); 2819fa9e4066Sahrens 2820fa9e4066Sahrens /* 2821fa9e4066Sahrens * Set each attribute requested. 2822fa9e4066Sahrens * We group settings according to the locks they need to acquire. 2823fa9e4066Sahrens * 2824fa9e4066Sahrens * Note: you cannot set ctime directly, although it will be 2825fa9e4066Sahrens * updated as a side-effect of calling this function. 2826fa9e4066Sahrens */ 2827fa9e4066Sahrens 2828fa9e4066Sahrens mutex_enter(&zp->z_lock); 2829fa9e4066Sahrens 2830fa9e4066Sahrens if (mask & AT_MODE) { 28314c841f60Smarks mutex_enter(&zp->z_acl_lock); 28324c841f60Smarks zp->z_phys->zp_mode = new_mode; 283389459e17SMark Shellenbaum err = zfs_aclset_common(zp, aclp, cr, tx); 2834fa9e4066Sahrens ASSERT3U(err, ==, 0); 28354929fd5eSTim Haley zp->z_acl_cached = aclp; 28364929fd5eSTim Haley aclp = NULL; 28374c841f60Smarks mutex_exit(&zp->z_acl_lock); 2838fa9e4066Sahrens } 2839fa9e4066Sahrens 2840d2443e76Smarks if (attrzp) 2841d2443e76Smarks mutex_enter(&attrzp->z_lock); 2842d2443e76Smarks 2843d2443e76Smarks if (mask & AT_UID) { 284489459e17SMark Shellenbaum pzp->zp_uid = new_uid; 284589459e17SMark Shellenbaum if (attrzp) 284689459e17SMark Shellenbaum attrzp->z_phys->zp_uid = new_uid; 2847d2443e76Smarks } 2848fa9e4066Sahrens 2849d2443e76Smarks if (mask & AT_GID) { 285089459e17SMark Shellenbaum pzp->zp_gid = new_gid; 2851d2443e76Smarks if (attrzp) 285289459e17SMark Shellenbaum attrzp->z_phys->zp_gid = new_gid; 2853d2443e76Smarks } 2854d2443e76Smarks 2855d2443e76Smarks if (attrzp) 2856d2443e76Smarks mutex_exit(&attrzp->z_lock); 2857fa9e4066Sahrens 2858fa9e4066Sahrens if (mask & AT_ATIME) 2859fa9e4066Sahrens ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 2860fa9e4066Sahrens 2861fa9e4066Sahrens if (mask & AT_MTIME) 2862fa9e4066Sahrens ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 2863fa9e4066Sahrens 2864cdb0ab79Smaybee /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 28655730cc9aSmaybee if (mask & AT_SIZE) 2866fa9e4066Sahrens zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx); 28675730cc9aSmaybee else if (mask != 0) 2868fa9e4066Sahrens zfs_time_stamper_locked(zp, STATE_CHANGED, tx); 2869da6c28aaSamw /* 2870da6c28aaSamw * Do this after setting timestamps to prevent timestamp 2871da6c28aaSamw * update from toggling bit 2872da6c28aaSamw */ 2873da6c28aaSamw 2874da6c28aaSamw if (xoap && (mask & AT_XVATTR)) { 2875ae4caef8SMark Shellenbaum 2876ae4caef8SMark Shellenbaum /* 2877ae4caef8SMark Shellenbaum * restore trimmed off masks 2878ae4caef8SMark Shellenbaum * so that return masks can be set for caller. 2879ae4caef8SMark Shellenbaum */ 2880ae4caef8SMark Shellenbaum 2881ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 2882ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_APPENDONLY); 2883ae4caef8SMark Shellenbaum } 2884ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 2885ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NOUNLINK); 2886ae4caef8SMark Shellenbaum } 2887ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 2888ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_IMMUTABLE); 2889ae4caef8SMark Shellenbaum } 2890ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 2891ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NODUMP); 2892ae4caef8SMark Shellenbaum } 2893ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 2894ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 2895ae4caef8SMark Shellenbaum } 2896ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 2897ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 2898ae4caef8SMark Shellenbaum } 2899ae4caef8SMark Shellenbaum 2900da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 2901da6c28aaSamw size_t len; 2902da6c28aaSamw dmu_object_info_t doi; 2903da6c28aaSamw 2904da6c28aaSamw ASSERT(vp->v_type == VREG); 2905da6c28aaSamw 2906da6c28aaSamw /* Grow the bonus buffer if necessary. */ 2907da6c28aaSamw dmu_object_info_from_db(zp->z_dbuf, &doi); 2908da6c28aaSamw len = sizeof (xoap->xoa_av_scanstamp) + 2909da6c28aaSamw sizeof (znode_phys_t); 2910da6c28aaSamw if (len > doi.doi_bonus_size) 2911da6c28aaSamw VERIFY(dmu_set_bonus(zp->z_dbuf, len, tx) == 0); 2912da6c28aaSamw } 2913da6c28aaSamw zfs_xvattr_set(zp, xvap); 2914da6c28aaSamw } 2915fa9e4066Sahrens 291689459e17SMark Shellenbaum if (fuid_dirtied) 291789459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 291889459e17SMark Shellenbaum 29195730cc9aSmaybee if (mask != 0) 2920da6c28aaSamw zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 2921fa9e4066Sahrens 2922fa9e4066Sahrens mutex_exit(&zp->z_lock); 2923fa9e4066Sahrens 292414843421SMatthew Ahrens out: 2925d2443e76Smarks if (attrzp) 2926d2443e76Smarks VN_RELE(ZTOV(attrzp)); 2927d2443e76Smarks 29284929fd5eSTim Haley if (aclp) 29294929fd5eSTim Haley zfs_acl_free(aclp); 29304929fd5eSTim Haley 293114843421SMatthew Ahrens if (fuidp) { 293214843421SMatthew Ahrens zfs_fuid_info_free(fuidp); 293314843421SMatthew Ahrens fuidp = NULL; 293414843421SMatthew Ahrens } 293514843421SMatthew Ahrens 293614843421SMatthew Ahrens if (err) 293714843421SMatthew Ahrens dmu_tx_abort(tx); 293814843421SMatthew Ahrens else 293914843421SMatthew Ahrens dmu_tx_commit(tx); 294014843421SMatthew Ahrens 294114843421SMatthew Ahrens if (err == ERESTART) 294214843421SMatthew Ahrens goto top; 2943fa9e4066Sahrens 2944fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2945fa9e4066Sahrens return (err); 2946fa9e4066Sahrens } 2947fa9e4066Sahrens 2948fa9e4066Sahrens typedef struct zfs_zlock { 2949fa9e4066Sahrens krwlock_t *zl_rwlock; /* lock we acquired */ 2950fa9e4066Sahrens znode_t *zl_znode; /* znode we held */ 2951fa9e4066Sahrens struct zfs_zlock *zl_next; /* next in list */ 2952fa9e4066Sahrens } zfs_zlock_t; 2953fa9e4066Sahrens 2954ff008e00Smaybee /* 2955ff008e00Smaybee * Drop locks and release vnodes that were held by zfs_rename_lock(). 2956ff008e00Smaybee */ 2957ff008e00Smaybee static void 2958ff008e00Smaybee zfs_rename_unlock(zfs_zlock_t **zlpp) 2959ff008e00Smaybee { 2960ff008e00Smaybee zfs_zlock_t *zl; 2961ff008e00Smaybee 2962ff008e00Smaybee while ((zl = *zlpp) != NULL) { 2963ff008e00Smaybee if (zl->zl_znode != NULL) 2964ff008e00Smaybee VN_RELE(ZTOV(zl->zl_znode)); 2965ff008e00Smaybee rw_exit(zl->zl_rwlock); 2966ff008e00Smaybee *zlpp = zl->zl_next; 2967ff008e00Smaybee kmem_free(zl, sizeof (*zl)); 2968ff008e00Smaybee } 2969ff008e00Smaybee } 2970ff008e00Smaybee 2971ff008e00Smaybee /* 2972ff008e00Smaybee * Search back through the directory tree, using the ".." entries. 2973ff008e00Smaybee * Lock each directory in the chain to prevent concurrent renames. 2974ff008e00Smaybee * Fail any attempt to move a directory into one of its own descendants. 2975ff008e00Smaybee * XXX - z_parent_lock can overlap with map or grow locks 2976ff008e00Smaybee */ 2977fa9e4066Sahrens static int 2978fa9e4066Sahrens zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 2979fa9e4066Sahrens { 2980fa9e4066Sahrens zfs_zlock_t *zl; 2981feb08c6bSbillm znode_t *zp = tdzp; 2982fa9e4066Sahrens uint64_t rootid = zp->z_zfsvfs->z_root; 2983fa9e4066Sahrens uint64_t *oidp = &zp->z_id; 2984fa9e4066Sahrens krwlock_t *rwlp = &szp->z_parent_lock; 2985fa9e4066Sahrens krw_t rw = RW_WRITER; 2986fa9e4066Sahrens 2987fa9e4066Sahrens /* 2988fa9e4066Sahrens * First pass write-locks szp and compares to zp->z_id. 2989fa9e4066Sahrens * Later passes read-lock zp and compare to zp->z_parent. 2990fa9e4066Sahrens */ 2991fa9e4066Sahrens do { 2992ff008e00Smaybee if (!rw_tryenter(rwlp, rw)) { 2993ff008e00Smaybee /* 2994ff008e00Smaybee * Another thread is renaming in this path. 2995ff008e00Smaybee * Note that if we are a WRITER, we don't have any 2996ff008e00Smaybee * parent_locks held yet. 2997ff008e00Smaybee */ 2998ff008e00Smaybee if (rw == RW_READER && zp->z_id > szp->z_id) { 2999ff008e00Smaybee /* 3000ff008e00Smaybee * Drop our locks and restart 3001ff008e00Smaybee */ 3002ff008e00Smaybee zfs_rename_unlock(&zl); 3003ff008e00Smaybee *zlpp = NULL; 3004ff008e00Smaybee zp = tdzp; 3005ff008e00Smaybee oidp = &zp->z_id; 3006ff008e00Smaybee rwlp = &szp->z_parent_lock; 3007ff008e00Smaybee rw = RW_WRITER; 3008ff008e00Smaybee continue; 3009ff008e00Smaybee } else { 3010ff008e00Smaybee /* 3011ff008e00Smaybee * Wait for other thread to drop its locks 3012ff008e00Smaybee */ 3013ff008e00Smaybee rw_enter(rwlp, rw); 3014ff008e00Smaybee } 3015ff008e00Smaybee } 3016ff008e00Smaybee 3017fa9e4066Sahrens zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3018fa9e4066Sahrens zl->zl_rwlock = rwlp; 3019fa9e4066Sahrens zl->zl_znode = NULL; 3020fa9e4066Sahrens zl->zl_next = *zlpp; 3021fa9e4066Sahrens *zlpp = zl; 3022fa9e4066Sahrens 3023fa9e4066Sahrens if (*oidp == szp->z_id) /* We're a descendant of szp */ 3024fa9e4066Sahrens return (EINVAL); 3025fa9e4066Sahrens 3026fa9e4066Sahrens if (*oidp == rootid) /* We've hit the top */ 3027fa9e4066Sahrens return (0); 3028fa9e4066Sahrens 3029fa9e4066Sahrens if (rw == RW_READER) { /* i.e. not the first pass */ 3030fa9e4066Sahrens int error = zfs_zget(zp->z_zfsvfs, *oidp, &zp); 3031fa9e4066Sahrens if (error) 3032fa9e4066Sahrens return (error); 3033fa9e4066Sahrens zl->zl_znode = zp; 3034fa9e4066Sahrens } 3035fa9e4066Sahrens oidp = &zp->z_phys->zp_parent; 3036fa9e4066Sahrens rwlp = &zp->z_parent_lock; 3037fa9e4066Sahrens rw = RW_READER; 3038fa9e4066Sahrens 3039fa9e4066Sahrens } while (zp->z_id != sdzp->z_id); 3040fa9e4066Sahrens 3041fa9e4066Sahrens return (0); 3042fa9e4066Sahrens } 3043fa9e4066Sahrens 3044fa9e4066Sahrens /* 3045fa9e4066Sahrens * Move an entry from the provided source directory to the target 3046fa9e4066Sahrens * directory. Change the entry name as indicated. 3047fa9e4066Sahrens * 3048fa9e4066Sahrens * IN: sdvp - Source directory containing the "old entry". 3049fa9e4066Sahrens * snm - Old entry name. 3050fa9e4066Sahrens * tdvp - Target directory to contain the "new entry". 3051fa9e4066Sahrens * tnm - New entry name. 3052fa9e4066Sahrens * cr - credentials of caller. 3053da6c28aaSamw * ct - caller context 3054da6c28aaSamw * flags - case flags 3055fa9e4066Sahrens * 3056fa9e4066Sahrens * RETURN: 0 if success 3057fa9e4066Sahrens * error code if failure 3058fa9e4066Sahrens * 3059fa9e4066Sahrens * Timestamps: 3060fa9e4066Sahrens * sdvp,tdvp - ctime|mtime updated 3061fa9e4066Sahrens */ 3062da6c28aaSamw /*ARGSUSED*/ 3063fa9e4066Sahrens static int 3064da6c28aaSamw zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3065da6c28aaSamw caller_context_t *ct, int flags) 3066fa9e4066Sahrens { 3067fa9e4066Sahrens znode_t *tdzp, *szp, *tzp; 3068fa9e4066Sahrens znode_t *sdzp = VTOZ(sdvp); 3069fa9e4066Sahrens zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3070f18faf3fSek zilog_t *zilog; 3071fa9e4066Sahrens vnode_t *realvp; 3072fa9e4066Sahrens zfs_dirlock_t *sdl, *tdl; 3073fa9e4066Sahrens dmu_tx_t *tx; 3074fa9e4066Sahrens zfs_zlock_t *zl; 3075da6c28aaSamw int cmp, serr, terr; 3076da6c28aaSamw int error = 0; 3077da6c28aaSamw int zflg = 0; 3078fa9e4066Sahrens 30793cb34c60Sahrens ZFS_ENTER(zfsvfs); 30803cb34c60Sahrens ZFS_VERIFY_ZP(sdzp); 3081f18faf3fSek zilog = zfsvfs->z_log; 3082fa9e4066Sahrens 3083fa9e4066Sahrens /* 3084fa9e4066Sahrens * Make sure we have the real vp for the target directory. 3085fa9e4066Sahrens */ 3086da6c28aaSamw if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3087fa9e4066Sahrens tdvp = realvp; 3088fa9e4066Sahrens 3089fa9e4066Sahrens if (tdvp->v_vfsp != sdvp->v_vfsp) { 3090fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3091fa9e4066Sahrens return (EXDEV); 3092fa9e4066Sahrens } 3093fa9e4066Sahrens 3094fa9e4066Sahrens tdzp = VTOZ(tdvp); 30953cb34c60Sahrens ZFS_VERIFY_ZP(tdzp); 3096de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(tnm, 3097da6c28aaSamw strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3098da6c28aaSamw ZFS_EXIT(zfsvfs); 3099da6c28aaSamw return (EILSEQ); 3100da6c28aaSamw } 3101da6c28aaSamw 3102da6c28aaSamw if (flags & FIGNORECASE) 3103da6c28aaSamw zflg |= ZCILOOK; 3104da6c28aaSamw 3105fa9e4066Sahrens top: 3106fa9e4066Sahrens szp = NULL; 3107fa9e4066Sahrens tzp = NULL; 3108fa9e4066Sahrens zl = NULL; 3109fa9e4066Sahrens 3110fa9e4066Sahrens /* 3111fa9e4066Sahrens * This is to prevent the creation of links into attribute space 3112fa9e4066Sahrens * by renaming a linked file into/outof an attribute directory. 3113fa9e4066Sahrens * See the comment in zfs_link() for why this is considered bad. 3114fa9e4066Sahrens */ 3115fa9e4066Sahrens if ((tdzp->z_phys->zp_flags & ZFS_XATTR) != 3116fa9e4066Sahrens (sdzp->z_phys->zp_flags & ZFS_XATTR)) { 3117fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3118fa9e4066Sahrens return (EINVAL); 3119fa9e4066Sahrens } 3120fa9e4066Sahrens 3121fa9e4066Sahrens /* 3122fa9e4066Sahrens * Lock source and target directory entries. To prevent deadlock, 3123fa9e4066Sahrens * a lock ordering must be defined. We lock the directory with 3124fa9e4066Sahrens * the smallest object id first, or if it's a tie, the one with 3125fa9e4066Sahrens * the lexically first name. 3126fa9e4066Sahrens */ 3127fa9e4066Sahrens if (sdzp->z_id < tdzp->z_id) { 3128fa9e4066Sahrens cmp = -1; 3129fa9e4066Sahrens } else if (sdzp->z_id > tdzp->z_id) { 3130fa9e4066Sahrens cmp = 1; 3131fa9e4066Sahrens } else { 3132da6c28aaSamw /* 3133da6c28aaSamw * First compare the two name arguments without 3134da6c28aaSamw * considering any case folding. 3135da6c28aaSamw */ 3136da6c28aaSamw int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3137da6c28aaSamw 3138da6c28aaSamw cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3139de8267e0Stimh ASSERT(error == 0 || !zfsvfs->z_utf8); 3140fa9e4066Sahrens if (cmp == 0) { 3141fa9e4066Sahrens /* 3142fa9e4066Sahrens * POSIX: "If the old argument and the new argument 3143fa9e4066Sahrens * both refer to links to the same existing file, 3144fa9e4066Sahrens * the rename() function shall return successfully 3145fa9e4066Sahrens * and perform no other action." 3146fa9e4066Sahrens */ 3147fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3148fa9e4066Sahrens return (0); 3149fa9e4066Sahrens } 3150da6c28aaSamw /* 3151da6c28aaSamw * If the file system is case-folding, then we may 3152da6c28aaSamw * have some more checking to do. A case-folding file 3153da6c28aaSamw * system is either supporting mixed case sensitivity 3154da6c28aaSamw * access or is completely case-insensitive. Note 3155da6c28aaSamw * that the file system is always case preserving. 3156da6c28aaSamw * 3157da6c28aaSamw * In mixed sensitivity mode case sensitive behavior 3158da6c28aaSamw * is the default. FIGNORECASE must be used to 3159da6c28aaSamw * explicitly request case insensitive behavior. 3160da6c28aaSamw * 3161da6c28aaSamw * If the source and target names provided differ only 3162da6c28aaSamw * by case (e.g., a request to rename 'tim' to 'Tim'), 3163da6c28aaSamw * we will treat this as a special case in the 3164da6c28aaSamw * case-insensitive mode: as long as the source name 3165da6c28aaSamw * is an exact match, we will allow this to proceed as 3166da6c28aaSamw * a name-change request. 3167da6c28aaSamw */ 3168de8267e0Stimh if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3169de8267e0Stimh (zfsvfs->z_case == ZFS_CASE_MIXED && 3170de8267e0Stimh flags & FIGNORECASE)) && 3171da6c28aaSamw u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3172da6c28aaSamw &error) == 0) { 3173da6c28aaSamw /* 3174da6c28aaSamw * case preserving rename request, require exact 3175da6c28aaSamw * name matches 3176da6c28aaSamw */ 3177da6c28aaSamw zflg |= ZCIEXACT; 3178da6c28aaSamw zflg &= ~ZCILOOK; 3179da6c28aaSamw } 3180fa9e4066Sahrens } 3181da6c28aaSamw 3182fa9e4066Sahrens if (cmp < 0) { 3183da6c28aaSamw serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3184da6c28aaSamw ZEXISTS | zflg, NULL, NULL); 3185da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3186da6c28aaSamw tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3187fa9e4066Sahrens } else { 3188da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3189da6c28aaSamw tdzp, tnm, &tzp, zflg, NULL, NULL); 3190da6c28aaSamw serr = zfs_dirent_lock(&sdl, 3191da6c28aaSamw sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3192da6c28aaSamw NULL, NULL); 3193fa9e4066Sahrens } 3194fa9e4066Sahrens 3195fa9e4066Sahrens if (serr) { 3196fa9e4066Sahrens /* 3197fa9e4066Sahrens * Source entry invalid or not there. 3198fa9e4066Sahrens */ 3199fa9e4066Sahrens if (!terr) { 3200fa9e4066Sahrens zfs_dirent_unlock(tdl); 3201fa9e4066Sahrens if (tzp) 3202fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3203fa9e4066Sahrens } 3204fa9e4066Sahrens if (strcmp(snm, "..") == 0) 3205fa9e4066Sahrens serr = EINVAL; 3206fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3207fa9e4066Sahrens return (serr); 3208fa9e4066Sahrens } 3209fa9e4066Sahrens if (terr) { 3210fa9e4066Sahrens zfs_dirent_unlock(sdl); 3211fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3212fa9e4066Sahrens if (strcmp(tnm, "..") == 0) 3213fa9e4066Sahrens terr = EINVAL; 3214fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3215fa9e4066Sahrens return (terr); 3216fa9e4066Sahrens } 3217fa9e4066Sahrens 3218fa9e4066Sahrens /* 3219fa9e4066Sahrens * Must have write access at the source to remove the old entry 3220fa9e4066Sahrens * and write access at the target to create the new entry. 3221fa9e4066Sahrens * Note that if target and source are the same, this can be 3222fa9e4066Sahrens * done in a single check. 3223fa9e4066Sahrens */ 3224fa9e4066Sahrens 3225fa9e4066Sahrens if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3226fa9e4066Sahrens goto out; 3227fa9e4066Sahrens 3228fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3229fa9e4066Sahrens /* 3230fa9e4066Sahrens * Check to make sure rename is valid. 3231fa9e4066Sahrens * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3232fa9e4066Sahrens */ 3233fa9e4066Sahrens if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3234fa9e4066Sahrens goto out; 3235fa9e4066Sahrens } 3236fa9e4066Sahrens 3237fa9e4066Sahrens /* 3238fa9e4066Sahrens * Does target exist? 3239fa9e4066Sahrens */ 3240fa9e4066Sahrens if (tzp) { 3241fa9e4066Sahrens /* 3242fa9e4066Sahrens * Source and target must be the same type. 3243fa9e4066Sahrens */ 3244fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3245fa9e4066Sahrens if (ZTOV(tzp)->v_type != VDIR) { 3246fa9e4066Sahrens error = ENOTDIR; 3247fa9e4066Sahrens goto out; 3248fa9e4066Sahrens } 3249fa9e4066Sahrens } else { 3250fa9e4066Sahrens if (ZTOV(tzp)->v_type == VDIR) { 3251fa9e4066Sahrens error = EISDIR; 3252fa9e4066Sahrens goto out; 3253fa9e4066Sahrens } 3254fa9e4066Sahrens } 3255fa9e4066Sahrens /* 3256fa9e4066Sahrens * POSIX dictates that when the source and target 3257fa9e4066Sahrens * entries refer to the same file object, rename 3258fa9e4066Sahrens * must do nothing and exit without error. 3259fa9e4066Sahrens */ 3260fa9e4066Sahrens if (szp->z_id == tzp->z_id) { 3261fa9e4066Sahrens error = 0; 3262fa9e4066Sahrens goto out; 3263fa9e4066Sahrens } 3264fa9e4066Sahrens } 3265fa9e4066Sahrens 3266da6c28aaSamw vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3267fa9e4066Sahrens if (tzp) 3268da6c28aaSamw vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3269df2381bfSpraks 3270df2381bfSpraks /* 3271df2381bfSpraks * notify the target directory if it is not the same 3272df2381bfSpraks * as source directory. 3273df2381bfSpraks */ 3274df2381bfSpraks if (tdvp != sdvp) { 3275da6c28aaSamw vnevent_rename_dest_dir(tdvp, ct); 3276df2381bfSpraks } 3277fa9e4066Sahrens 3278fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 3279fa9e4066Sahrens dmu_tx_hold_bonus(tx, szp->z_id); /* nlink changes */ 3280fa9e4066Sahrens dmu_tx_hold_bonus(tx, sdzp->z_id); /* nlink changes */ 3281ea8dc4b6Seschrock dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3282ea8dc4b6Seschrock dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 3283ea8dc4b6Seschrock if (sdzp != tdzp) 3284fa9e4066Sahrens dmu_tx_hold_bonus(tx, tdzp->z_id); /* nlink changes */ 3285ea8dc4b6Seschrock if (tzp) 3286ea8dc4b6Seschrock dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */ 3287893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 32881209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 3289fa9e4066Sahrens if (error) { 3290fa9e4066Sahrens if (zl != NULL) 3291fa9e4066Sahrens zfs_rename_unlock(&zl); 3292fa9e4066Sahrens zfs_dirent_unlock(sdl); 3293fa9e4066Sahrens zfs_dirent_unlock(tdl); 3294fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3295fa9e4066Sahrens if (tzp) 3296fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 32971209a471SNeil Perrin if (error == ERESTART) { 32988a2f1b91Sahrens dmu_tx_wait(tx); 32998a2f1b91Sahrens dmu_tx_abort(tx); 3300fa9e4066Sahrens goto top; 3301fa9e4066Sahrens } 33028a2f1b91Sahrens dmu_tx_abort(tx); 3303fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3304fa9e4066Sahrens return (error); 3305fa9e4066Sahrens } 3306fa9e4066Sahrens 3307fa9e4066Sahrens if (tzp) /* Attempt to remove the existing target */ 3308da6c28aaSamw error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3309fa9e4066Sahrens 3310fa9e4066Sahrens if (error == 0) { 3311fa9e4066Sahrens error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3312fa9e4066Sahrens if (error == 0) { 3313da6c28aaSamw szp->z_phys->zp_flags |= ZFS_AV_MODIFIED; 3314da6c28aaSamw 3315fa9e4066Sahrens error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 3316fa9e4066Sahrens ASSERT(error == 0); 3317da6c28aaSamw 3318da6c28aaSamw zfs_log_rename(zilog, tx, 3319da6c28aaSamw TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), 3320da6c28aaSamw sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); 332151ece835Seschrock 332251ece835Seschrock /* Update path information for the target vnode */ 332351ece835Seschrock vn_renamepath(tdvp, ZTOV(szp), tnm, strlen(tnm)); 3324fa9e4066Sahrens } 3325fa9e4066Sahrens } 3326fa9e4066Sahrens 3327fa9e4066Sahrens dmu_tx_commit(tx); 3328fa9e4066Sahrens out: 3329fa9e4066Sahrens if (zl != NULL) 3330fa9e4066Sahrens zfs_rename_unlock(&zl); 3331fa9e4066Sahrens 3332fa9e4066Sahrens zfs_dirent_unlock(sdl); 3333fa9e4066Sahrens zfs_dirent_unlock(tdl); 3334fa9e4066Sahrens 3335fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3336fa9e4066Sahrens if (tzp) 3337fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3338fa9e4066Sahrens 3339fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3340fa9e4066Sahrens return (error); 3341fa9e4066Sahrens } 3342fa9e4066Sahrens 3343fa9e4066Sahrens /* 3344fa9e4066Sahrens * Insert the indicated symbolic reference entry into the directory. 3345fa9e4066Sahrens * 3346fa9e4066Sahrens * IN: dvp - Directory to contain new symbolic link. 3347fa9e4066Sahrens * link - Name for new symlink entry. 3348fa9e4066Sahrens * vap - Attributes of new entry. 3349fa9e4066Sahrens * target - Target path of new symlink. 3350fa9e4066Sahrens * cr - credentials of caller. 3351da6c28aaSamw * ct - caller context 3352da6c28aaSamw * flags - case flags 3353fa9e4066Sahrens * 3354fa9e4066Sahrens * RETURN: 0 if success 3355fa9e4066Sahrens * error code if failure 3356fa9e4066Sahrens * 3357fa9e4066Sahrens * Timestamps: 3358fa9e4066Sahrens * dvp - ctime|mtime updated 3359fa9e4066Sahrens */ 3360da6c28aaSamw /*ARGSUSED*/ 3361fa9e4066Sahrens static int 3362da6c28aaSamw zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, 3363da6c28aaSamw caller_context_t *ct, int flags) 3364fa9e4066Sahrens { 3365fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 3366fa9e4066Sahrens zfs_dirlock_t *dl; 3367fa9e4066Sahrens dmu_tx_t *tx; 3368fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3369f18faf3fSek zilog_t *zilog; 3370fa9e4066Sahrens int len = strlen(link); 3371fa9e4066Sahrens int error; 3372da6c28aaSamw int zflg = ZNEW; 337389459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 337489459e17SMark Shellenbaum boolean_t fuid_dirtied; 3375fa9e4066Sahrens 3376fa9e4066Sahrens ASSERT(vap->va_type == VLNK); 3377fa9e4066Sahrens 33783cb34c60Sahrens ZFS_ENTER(zfsvfs); 33793cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 3380f18faf3fSek zilog = zfsvfs->z_log; 3381da6c28aaSamw 3382de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3383da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3384da6c28aaSamw ZFS_EXIT(zfsvfs); 3385da6c28aaSamw return (EILSEQ); 3386da6c28aaSamw } 3387da6c28aaSamw if (flags & FIGNORECASE) 3388da6c28aaSamw zflg |= ZCILOOK; 3389fa9e4066Sahrens top: 3390da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3391fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3392fa9e4066Sahrens return (error); 3393fa9e4066Sahrens } 3394fa9e4066Sahrens 3395fa9e4066Sahrens if (len > MAXPATHLEN) { 3396fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3397fa9e4066Sahrens return (ENAMETOOLONG); 3398fa9e4066Sahrens } 3399fa9e4066Sahrens 3400fa9e4066Sahrens /* 3401fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 3402fa9e4066Sahrens */ 3403da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 3404da6c28aaSamw if (error) { 3405fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3406fa9e4066Sahrens return (error); 3407fa9e4066Sahrens } 3408fa9e4066Sahrens 340989459e17SMark Shellenbaum VERIFY(0 == zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids)); 341014843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 341114843421SMatthew Ahrens zfs_acl_ids_free(&acl_ids); 341214843421SMatthew Ahrens zfs_dirent_unlock(dl); 341314843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 341414843421SMatthew Ahrens return (EDQUOT); 341514843421SMatthew Ahrens } 3416fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 341789459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 3418fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3419fa9e4066Sahrens dmu_tx_hold_bonus(tx, dzp->z_id); 3420ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 342189459e17SMark Shellenbaum if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) 3422fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); 342314843421SMatthew Ahrens if (fuid_dirtied) 342414843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 34251209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 3426fa9e4066Sahrens if (error) { 342789459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3428fa9e4066Sahrens zfs_dirent_unlock(dl); 34291209a471SNeil Perrin if (error == ERESTART) { 34308a2f1b91Sahrens dmu_tx_wait(tx); 34318a2f1b91Sahrens dmu_tx_abort(tx); 3432fa9e4066Sahrens goto top; 3433fa9e4066Sahrens } 34348a2f1b91Sahrens dmu_tx_abort(tx); 3435fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3436fa9e4066Sahrens return (error); 3437fa9e4066Sahrens } 3438fa9e4066Sahrens 3439fa9e4066Sahrens dmu_buf_will_dirty(dzp->z_dbuf, tx); 3440fa9e4066Sahrens 3441fa9e4066Sahrens /* 3442fa9e4066Sahrens * Create a new object for the symlink. 3443fa9e4066Sahrens * Put the link content into bonus buffer if it will fit; 3444fa9e4066Sahrens * otherwise, store it just like any other file data. 3445fa9e4066Sahrens */ 3446fa9e4066Sahrens if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) { 344789459e17SMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, &acl_ids); 3448fa9e4066Sahrens if (len != 0) 3449fa9e4066Sahrens bcopy(link, zp->z_phys + 1, len); 3450fa9e4066Sahrens } else { 3451fa9e4066Sahrens dmu_buf_t *dbp; 3452104e2ed7Sperrin 345389459e17SMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 345489459e17SMark Shellenbaum 345589459e17SMark Shellenbaum if (fuid_dirtied) 345689459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 3457104e2ed7Sperrin /* 3458104e2ed7Sperrin * Nothing can access the znode yet so no locking needed 3459104e2ed7Sperrin * for growing the znode's blocksize. 3460104e2ed7Sperrin */ 3461104e2ed7Sperrin zfs_grow_blocksize(zp, len, tx); 3462fa9e4066Sahrens 34634ccbb6e7Sahrens VERIFY(0 == dmu_buf_hold(zfsvfs->z_os, 34644ccbb6e7Sahrens zp->z_id, 0, FTAG, &dbp)); 3465fa9e4066Sahrens dmu_buf_will_dirty(dbp, tx); 3466fa9e4066Sahrens 3467fa9e4066Sahrens ASSERT3U(len, <=, dbp->db_size); 3468fa9e4066Sahrens bcopy(link, dbp->db_data, len); 3469ea8dc4b6Seschrock dmu_buf_rele(dbp, FTAG); 3470fa9e4066Sahrens } 3471fa9e4066Sahrens zp->z_phys->zp_size = len; 3472fa9e4066Sahrens 3473fa9e4066Sahrens /* 3474fa9e4066Sahrens * Insert the new object into the directory. 3475fa9e4066Sahrens */ 3476fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 3477da6c28aaSamw if (error == 0) { 3478da6c28aaSamw uint64_t txtype = TX_SYMLINK; 3479da6c28aaSamw if (flags & FIGNORECASE) 3480da6c28aaSamw txtype |= TX_CI; 3481da6c28aaSamw zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 3482da6c28aaSamw } 348389459e17SMark Shellenbaum 348489459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3485fa9e4066Sahrens 3486fa9e4066Sahrens dmu_tx_commit(tx); 3487fa9e4066Sahrens 3488fa9e4066Sahrens zfs_dirent_unlock(dl); 3489fa9e4066Sahrens 3490fa9e4066Sahrens VN_RELE(ZTOV(zp)); 3491fa9e4066Sahrens 3492fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3493fa9e4066Sahrens return (error); 3494fa9e4066Sahrens } 3495fa9e4066Sahrens 3496fa9e4066Sahrens /* 3497fa9e4066Sahrens * Return, in the buffer contained in the provided uio structure, 3498fa9e4066Sahrens * the symbolic path referred to by vp. 3499fa9e4066Sahrens * 3500fa9e4066Sahrens * IN: vp - vnode of symbolic link. 3501fa9e4066Sahrens * uoip - structure to contain the link path. 3502fa9e4066Sahrens * cr - credentials of caller. 3503da6c28aaSamw * ct - caller context 3504fa9e4066Sahrens * 3505fa9e4066Sahrens * OUT: uio - structure to contain the link path. 3506fa9e4066Sahrens * 3507fa9e4066Sahrens * RETURN: 0 if success 3508fa9e4066Sahrens * error code if failure 3509fa9e4066Sahrens * 3510fa9e4066Sahrens * Timestamps: 3511fa9e4066Sahrens * vp - atime updated 3512fa9e4066Sahrens */ 3513fa9e4066Sahrens /* ARGSUSED */ 3514fa9e4066Sahrens static int 3515da6c28aaSamw zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3516fa9e4066Sahrens { 3517fa9e4066Sahrens znode_t *zp = VTOZ(vp); 3518fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3519fa9e4066Sahrens size_t bufsz; 3520fa9e4066Sahrens int error; 3521fa9e4066Sahrens 35223cb34c60Sahrens ZFS_ENTER(zfsvfs); 35233cb34c60Sahrens ZFS_VERIFY_ZP(zp); 3524fa9e4066Sahrens 3525fa9e4066Sahrens bufsz = (size_t)zp->z_phys->zp_size; 3526fa9e4066Sahrens if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) { 3527fa9e4066Sahrens error = uiomove(zp->z_phys + 1, 3528fa9e4066Sahrens MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 3529fa9e4066Sahrens } else { 3530ea8dc4b6Seschrock dmu_buf_t *dbp; 3531ea8dc4b6Seschrock error = dmu_buf_hold(zfsvfs->z_os, zp->z_id, 0, FTAG, &dbp); 3532ea8dc4b6Seschrock if (error) { 3533fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3534fa9e4066Sahrens return (error); 3535fa9e4066Sahrens } 3536fa9e4066Sahrens error = uiomove(dbp->db_data, 3537fa9e4066Sahrens MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); 3538ea8dc4b6Seschrock dmu_buf_rele(dbp, FTAG); 3539fa9e4066Sahrens } 3540fa9e4066Sahrens 3541fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 3542fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3543fa9e4066Sahrens return (error); 3544fa9e4066Sahrens } 3545fa9e4066Sahrens 3546fa9e4066Sahrens /* 3547fa9e4066Sahrens * Insert a new entry into directory tdvp referencing svp. 3548fa9e4066Sahrens * 3549fa9e4066Sahrens * IN: tdvp - Directory to contain new entry. 3550fa9e4066Sahrens * svp - vnode of new entry. 3551fa9e4066Sahrens * name - name of new entry. 3552fa9e4066Sahrens * cr - credentials of caller. 3553da6c28aaSamw * ct - caller context 3554fa9e4066Sahrens * 3555fa9e4066Sahrens * RETURN: 0 if success 3556fa9e4066Sahrens * error code if failure 3557fa9e4066Sahrens * 3558fa9e4066Sahrens * Timestamps: 3559fa9e4066Sahrens * tdvp - ctime|mtime updated 3560fa9e4066Sahrens * svp - ctime updated 3561fa9e4066Sahrens */ 3562fa9e4066Sahrens /* ARGSUSED */ 3563fa9e4066Sahrens static int 3564da6c28aaSamw zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 3565da6c28aaSamw caller_context_t *ct, int flags) 3566fa9e4066Sahrens { 3567fa9e4066Sahrens znode_t *dzp = VTOZ(tdvp); 3568fa9e4066Sahrens znode_t *tzp, *szp; 3569fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3570f18faf3fSek zilog_t *zilog; 3571fa9e4066Sahrens zfs_dirlock_t *dl; 3572fa9e4066Sahrens dmu_tx_t *tx; 3573fa9e4066Sahrens vnode_t *realvp; 3574fa9e4066Sahrens int error; 3575da6c28aaSamw int zf = ZNEW; 3576da6c28aaSamw uid_t owner; 3577fa9e4066Sahrens 3578fa9e4066Sahrens ASSERT(tdvp->v_type == VDIR); 3579fa9e4066Sahrens 35803cb34c60Sahrens ZFS_ENTER(zfsvfs); 35813cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 3582f18faf3fSek zilog = zfsvfs->z_log; 3583fa9e4066Sahrens 3584da6c28aaSamw if (VOP_REALVP(svp, &realvp, ct) == 0) 3585fa9e4066Sahrens svp = realvp; 3586fa9e4066Sahrens 3587fa9e4066Sahrens if (svp->v_vfsp != tdvp->v_vfsp) { 3588fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3589fa9e4066Sahrens return (EXDEV); 3590fa9e4066Sahrens } 35913cb34c60Sahrens szp = VTOZ(svp); 35923cb34c60Sahrens ZFS_VERIFY_ZP(szp); 3593fa9e4066Sahrens 3594de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, 3595da6c28aaSamw strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3596da6c28aaSamw ZFS_EXIT(zfsvfs); 3597da6c28aaSamw return (EILSEQ); 3598da6c28aaSamw } 3599da6c28aaSamw if (flags & FIGNORECASE) 3600da6c28aaSamw zf |= ZCILOOK; 3601da6c28aaSamw 3602fa9e4066Sahrens top: 3603fa9e4066Sahrens /* 3604fa9e4066Sahrens * We do not support links between attributes and non-attributes 3605fa9e4066Sahrens * because of the potential security risk of creating links 3606fa9e4066Sahrens * into "normal" file space in order to circumvent restrictions 3607fa9e4066Sahrens * imposed in attribute space. 3608fa9e4066Sahrens */ 3609fa9e4066Sahrens if ((szp->z_phys->zp_flags & ZFS_XATTR) != 3610fa9e4066Sahrens (dzp->z_phys->zp_flags & ZFS_XATTR)) { 3611fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3612fa9e4066Sahrens return (EINVAL); 3613fa9e4066Sahrens } 3614fa9e4066Sahrens 3615fa9e4066Sahrens /* 3616fa9e4066Sahrens * POSIX dictates that we return EPERM here. 3617fa9e4066Sahrens * Better choices include ENOTSUP or EISDIR. 3618fa9e4066Sahrens */ 3619fa9e4066Sahrens if (svp->v_type == VDIR) { 3620fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3621fa9e4066Sahrens return (EPERM); 3622fa9e4066Sahrens } 3623fa9e4066Sahrens 3624e0d35c44Smarks owner = zfs_fuid_map_id(zfsvfs, szp->z_phys->zp_uid, cr, ZFS_OWNER); 3625da6c28aaSamw if (owner != crgetuid(cr) && 3626fa9e4066Sahrens secpolicy_basic_link(cr) != 0) { 3627fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3628fa9e4066Sahrens return (EPERM); 3629fa9e4066Sahrens } 3630fa9e4066Sahrens 3631da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3632fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3633fa9e4066Sahrens return (error); 3634fa9e4066Sahrens } 3635fa9e4066Sahrens 3636fa9e4066Sahrens /* 3637fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 3638fa9e4066Sahrens */ 3639da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 3640da6c28aaSamw if (error) { 3641fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3642fa9e4066Sahrens return (error); 3643fa9e4066Sahrens } 3644fa9e4066Sahrens 3645fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 3646fa9e4066Sahrens dmu_tx_hold_bonus(tx, szp->z_id); 3647ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 36481209a471SNeil Perrin error = dmu_tx_assign(tx, TXG_NOWAIT); 3649fa9e4066Sahrens if (error) { 3650fa9e4066Sahrens zfs_dirent_unlock(dl); 36511209a471SNeil Perrin if (error == ERESTART) { 36528a2f1b91Sahrens dmu_tx_wait(tx); 36538a2f1b91Sahrens dmu_tx_abort(tx); 3654fa9e4066Sahrens goto top; 3655fa9e4066Sahrens } 36568a2f1b91Sahrens dmu_tx_abort(tx); 3657fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3658fa9e4066Sahrens return (error); 3659fa9e4066Sahrens } 3660fa9e4066Sahrens 3661fa9e4066Sahrens error = zfs_link_create(dl, szp, tx, 0); 3662fa9e4066Sahrens 3663da6c28aaSamw if (error == 0) { 3664da6c28aaSamw uint64_t txtype = TX_LINK; 3665da6c28aaSamw if (flags & FIGNORECASE) 3666da6c28aaSamw txtype |= TX_CI; 3667da6c28aaSamw zfs_log_link(zilog, tx, txtype, dzp, szp, name); 3668da6c28aaSamw } 3669fa9e4066Sahrens 3670fa9e4066Sahrens dmu_tx_commit(tx); 3671fa9e4066Sahrens 3672fa9e4066Sahrens zfs_dirent_unlock(dl); 3673fa9e4066Sahrens 3674df2381bfSpraks if (error == 0) { 3675da6c28aaSamw vnevent_link(svp, ct); 3676df2381bfSpraks } 3677df2381bfSpraks 3678fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3679fa9e4066Sahrens return (error); 3680fa9e4066Sahrens } 3681fa9e4066Sahrens 3682fa9e4066Sahrens /* 3683fa9e4066Sahrens * zfs_null_putapage() is used when the file system has been force 3684fa9e4066Sahrens * unmounted. It just drops the pages. 3685fa9e4066Sahrens */ 3686fa9e4066Sahrens /* ARGSUSED */ 3687fa9e4066Sahrens static int 3688fa9e4066Sahrens zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 3689fa9e4066Sahrens size_t *lenp, int flags, cred_t *cr) 3690fa9e4066Sahrens { 3691fa9e4066Sahrens pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 3692fa9e4066Sahrens return (0); 3693fa9e4066Sahrens } 3694fa9e4066Sahrens 369544eda4d7Smaybee /* 369644eda4d7Smaybee * Push a page out to disk, klustering if possible. 369744eda4d7Smaybee * 369844eda4d7Smaybee * IN: vp - file to push page to. 369944eda4d7Smaybee * pp - page to push. 370044eda4d7Smaybee * flags - additional flags. 370144eda4d7Smaybee * cr - credentials of caller. 370244eda4d7Smaybee * 370344eda4d7Smaybee * OUT: offp - start of range pushed. 370444eda4d7Smaybee * lenp - len of range pushed. 370544eda4d7Smaybee * 370644eda4d7Smaybee * RETURN: 0 if success 370744eda4d7Smaybee * error code if failure 370844eda4d7Smaybee * 370944eda4d7Smaybee * NOTE: callers must have locked the page to be pushed. On 371044eda4d7Smaybee * exit, the page (and all other pages in the kluster) must be 371144eda4d7Smaybee * unlocked. 371244eda4d7Smaybee */ 3713fa9e4066Sahrens /* ARGSUSED */ 3714fa9e4066Sahrens static int 3715fa9e4066Sahrens zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 3716fa9e4066Sahrens size_t *lenp, int flags, cred_t *cr) 3717fa9e4066Sahrens { 3718fa9e4066Sahrens znode_t *zp = VTOZ(vp); 3719fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3720fa9e4066Sahrens dmu_tx_t *tx; 372144eda4d7Smaybee u_offset_t off, koff; 372244eda4d7Smaybee size_t len, klen; 3723f4d2e9e6Smaybee uint64_t filesz; 3724fa9e4066Sahrens int err; 3725fa9e4066Sahrens 3726f4d2e9e6Smaybee filesz = zp->z_phys->zp_size; 3727fa9e4066Sahrens off = pp->p_offset; 372844eda4d7Smaybee len = PAGESIZE; 372944eda4d7Smaybee /* 373044eda4d7Smaybee * If our blocksize is bigger than the page size, try to kluster 37311209a471SNeil Perrin * multiple pages so that we write a full block (thus avoiding 373244eda4d7Smaybee * a read-modify-write). 373344eda4d7Smaybee */ 3734f4d2e9e6Smaybee if (off < filesz && zp->z_blksz > PAGESIZE) { 3735ac05c741SMark Maybee klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 3736ac05c741SMark Maybee koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 373744eda4d7Smaybee ASSERT(koff <= filesz); 373844eda4d7Smaybee if (koff + klen > filesz) 373944eda4d7Smaybee klen = P2ROUNDUP(filesz - koff, (uint64_t)PAGESIZE); 374044eda4d7Smaybee pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 374144eda4d7Smaybee } 374244eda4d7Smaybee ASSERT3U(btop(len), ==, btopr(len)); 3743ac05c741SMark Maybee 3744dd6ef538Smaybee /* 3745dd6ef538Smaybee * Can't push pages past end-of-file. 3746dd6ef538Smaybee */ 3747f4d2e9e6Smaybee if (off >= filesz) { 3748f4d2e9e6Smaybee /* ignore all pages */ 374944eda4d7Smaybee err = 0; 375044eda4d7Smaybee goto out; 3751f4d2e9e6Smaybee } else if (off + len > filesz) { 3752f4d2e9e6Smaybee int npages = btopr(filesz - off); 375344eda4d7Smaybee page_t *trunc; 375444eda4d7Smaybee 375544eda4d7Smaybee page_list_break(&pp, &trunc, npages); 3756f4d2e9e6Smaybee /* ignore pages past end of file */ 375744eda4d7Smaybee if (trunc) 3758f4d2e9e6Smaybee pvn_write_done(trunc, flags); 3759f4d2e9e6Smaybee len = filesz - off; 3760dd6ef538Smaybee } 376114843421SMatthew Ahrens 376214843421SMatthew Ahrens if (zfs_usergroup_overquota(zfsvfs, B_FALSE, zp->z_phys->zp_uid) || 376314843421SMatthew Ahrens zfs_usergroup_overquota(zfsvfs, B_TRUE, zp->z_phys->zp_gid)) { 376414843421SMatthew Ahrens err = EDQUOT; 376514843421SMatthew Ahrens goto out; 376614843421SMatthew Ahrens } 3767ac05c741SMark Maybee top: 3768fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 3769fa9e4066Sahrens dmu_tx_hold_write(tx, zp->z_id, off, len); 3770fa9e4066Sahrens dmu_tx_hold_bonus(tx, zp->z_id); 37711209a471SNeil Perrin err = dmu_tx_assign(tx, TXG_NOWAIT); 3772fa9e4066Sahrens if (err != 0) { 37731209a471SNeil Perrin if (err == ERESTART) { 37748a2f1b91Sahrens dmu_tx_wait(tx); 37758a2f1b91Sahrens dmu_tx_abort(tx); 3776fa9e4066Sahrens goto top; 3777fa9e4066Sahrens } 37788a2f1b91Sahrens dmu_tx_abort(tx); 3779fa9e4066Sahrens goto out; 3780fa9e4066Sahrens } 3781fa9e4066Sahrens 378244eda4d7Smaybee if (zp->z_blksz <= PAGESIZE) { 37830fab61baSJonathan W Adams caddr_t va = zfs_map_page(pp, S_READ); 378444eda4d7Smaybee ASSERT3U(len, <=, PAGESIZE); 378544eda4d7Smaybee dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 37860fab61baSJonathan W Adams zfs_unmap_page(pp, va); 378744eda4d7Smaybee } else { 378844eda4d7Smaybee err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 378944eda4d7Smaybee } 3790fa9e4066Sahrens 379144eda4d7Smaybee if (err == 0) { 379244eda4d7Smaybee zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 3793ac05c741SMark Maybee zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 379444eda4d7Smaybee } 379568857716SLin Ling dmu_tx_commit(tx); 3796fa9e4066Sahrens 379744eda4d7Smaybee out: 3798f4d2e9e6Smaybee pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 3799fa9e4066Sahrens if (offp) 3800fa9e4066Sahrens *offp = off; 3801fa9e4066Sahrens if (lenp) 3802fa9e4066Sahrens *lenp = len; 3803fa9e4066Sahrens 3804fa9e4066Sahrens return (err); 3805fa9e4066Sahrens } 3806fa9e4066Sahrens 3807fa9e4066Sahrens /* 3808fa9e4066Sahrens * Copy the portion of the file indicated from pages into the file. 3809fa9e4066Sahrens * The pages are stored in a page list attached to the files vnode. 3810fa9e4066Sahrens * 3811fa9e4066Sahrens * IN: vp - vnode of file to push page data to. 3812fa9e4066Sahrens * off - position in file to put data. 3813fa9e4066Sahrens * len - amount of data to write. 3814fa9e4066Sahrens * flags - flags to control the operation. 3815fa9e4066Sahrens * cr - credentials of caller. 3816da6c28aaSamw * ct - caller context. 3817fa9e4066Sahrens * 3818fa9e4066Sahrens * RETURN: 0 if success 3819fa9e4066Sahrens * error code if failure 3820fa9e4066Sahrens * 3821fa9e4066Sahrens * Timestamps: 3822fa9e4066Sahrens * vp - ctime|mtime updated 3823fa9e4066Sahrens */ 3824da6c28aaSamw /*ARGSUSED*/ 3825fa9e4066Sahrens static int 3826da6c28aaSamw zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 3827da6c28aaSamw caller_context_t *ct) 3828fa9e4066Sahrens { 3829fa9e4066Sahrens znode_t *zp = VTOZ(vp); 3830fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3831fa9e4066Sahrens page_t *pp; 3832fa9e4066Sahrens size_t io_len; 3833fa9e4066Sahrens u_offset_t io_off; 3834ac05c741SMark Maybee uint_t blksz; 3835ac05c741SMark Maybee rl_t *rl; 3836fa9e4066Sahrens int error = 0; 3837fa9e4066Sahrens 38383cb34c60Sahrens ZFS_ENTER(zfsvfs); 38393cb34c60Sahrens ZFS_VERIFY_ZP(zp); 3840fa9e4066Sahrens 3841ac05c741SMark Maybee /* 3842ac05c741SMark Maybee * Align this request to the file block size in case we kluster. 3843ac05c741SMark Maybee * XXX - this can result in pretty aggresive locking, which can 3844ac05c741SMark Maybee * impact simultanious read/write access. One option might be 3845ac05c741SMark Maybee * to break up long requests (len == 0) into block-by-block 3846ac05c741SMark Maybee * operations to get narrower locking. 3847ac05c741SMark Maybee */ 3848ac05c741SMark Maybee blksz = zp->z_blksz; 3849ac05c741SMark Maybee if (ISP2(blksz)) 3850ac05c741SMark Maybee io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 3851ac05c741SMark Maybee else 3852ac05c741SMark Maybee io_off = 0; 3853ac05c741SMark Maybee if (len > 0 && ISP2(blksz)) 38545a6f5619SMark Maybee io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 3855ac05c741SMark Maybee else 3856ac05c741SMark Maybee io_len = 0; 3857ac05c741SMark Maybee 3858ac05c741SMark Maybee if (io_len == 0) { 3859fa9e4066Sahrens /* 3860ac05c741SMark Maybee * Search the entire vp list for pages >= io_off. 3861fa9e4066Sahrens */ 3862ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 3863ac05c741SMark Maybee error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 3864fe9cf88cSperrin goto out; 3865fa9e4066Sahrens } 3866ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 3867fa9e4066Sahrens 3868ac05c741SMark Maybee if (off > zp->z_phys->zp_size) { 3869fa9e4066Sahrens /* past end of file */ 3870ac05c741SMark Maybee zfs_range_unlock(rl); 3871fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3872fa9e4066Sahrens return (0); 3873fa9e4066Sahrens } 3874fa9e4066Sahrens 3875ac05c741SMark Maybee len = MIN(io_len, P2ROUNDUP(zp->z_phys->zp_size, PAGESIZE) - io_off); 3876fa9e4066Sahrens 3877ac05c741SMark Maybee for (off = io_off; io_off < off + len; io_off += io_len) { 3878fa9e4066Sahrens if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 3879104e2ed7Sperrin pp = page_lookup(vp, io_off, 3880ecb72030Sperrin (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 3881fa9e4066Sahrens } else { 3882fa9e4066Sahrens pp = page_lookup_nowait(vp, io_off, 3883ecb72030Sperrin (flags & B_FREE) ? SE_EXCL : SE_SHARED); 3884fa9e4066Sahrens } 3885fa9e4066Sahrens 3886fa9e4066Sahrens if (pp != NULL && pvn_getdirty(pp, flags)) { 3887fa9e4066Sahrens int err; 3888fa9e4066Sahrens 3889fa9e4066Sahrens /* 3890fa9e4066Sahrens * Found a dirty page to push 3891fa9e4066Sahrens */ 3892104e2ed7Sperrin err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 3893104e2ed7Sperrin if (err) 3894fa9e4066Sahrens error = err; 3895fa9e4066Sahrens } else { 3896fa9e4066Sahrens io_len = PAGESIZE; 3897fa9e4066Sahrens } 3898fa9e4066Sahrens } 3899fe9cf88cSperrin out: 3900ac05c741SMark Maybee zfs_range_unlock(rl); 3901b19a79ecSperrin if ((flags & B_ASYNC) == 0) 3902b19a79ecSperrin zil_commit(zfsvfs->z_log, UINT64_MAX, zp->z_id); 3903fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3904fa9e4066Sahrens return (error); 3905fa9e4066Sahrens } 3906fa9e4066Sahrens 3907da6c28aaSamw /*ARGSUSED*/ 3908fa9e4066Sahrens void 3909da6c28aaSamw zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 3910fa9e4066Sahrens { 3911fa9e4066Sahrens znode_t *zp = VTOZ(vp); 3912fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3913fa9e4066Sahrens int error; 3914fa9e4066Sahrens 3915f18faf3fSek rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 39164ccbb6e7Sahrens if (zp->z_dbuf == NULL) { 39174ccbb6e7Sahrens /* 3918874395d5Smaybee * The fs has been unmounted, or we did a 3919874395d5Smaybee * suspend/resume and this file no longer exists. 39204ccbb6e7Sahrens */ 3921fa9e4066Sahrens if (vn_has_cached_data(vp)) { 3922fa9e4066Sahrens (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage, 3923fa9e4066Sahrens B_INVAL, cr); 3924fa9e4066Sahrens } 3925fa9e4066Sahrens 3926ea8dc4b6Seschrock mutex_enter(&zp->z_lock); 3927cd2adeceSChris Kirby mutex_enter(&vp->v_lock); 3928cd2adeceSChris Kirby ASSERT(vp->v_count == 1); 3929cd2adeceSChris Kirby vp->v_count = 0; 3930cd2adeceSChris Kirby mutex_exit(&vp->v_lock); 39314ccbb6e7Sahrens mutex_exit(&zp->z_lock); 3932f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 3933874395d5Smaybee zfs_znode_free(zp); 3934fa9e4066Sahrens return; 3935fa9e4066Sahrens } 3936fa9e4066Sahrens 3937fa9e4066Sahrens /* 3938fa9e4066Sahrens * Attempt to push any data in the page cache. If this fails 3939fa9e4066Sahrens * we will get kicked out later in zfs_zinactive(). 3940fa9e4066Sahrens */ 39418afd4dd6Sperrin if (vn_has_cached_data(vp)) { 39428afd4dd6Sperrin (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC, 39438afd4dd6Sperrin cr); 39448afd4dd6Sperrin } 3945fa9e4066Sahrens 3946893a6d32Sahrens if (zp->z_atime_dirty && zp->z_unlinked == 0) { 3947fa9e4066Sahrens dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 3948fa9e4066Sahrens 3949fa9e4066Sahrens dmu_tx_hold_bonus(tx, zp->z_id); 3950fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 3951fa9e4066Sahrens if (error) { 3952fa9e4066Sahrens dmu_tx_abort(tx); 3953fa9e4066Sahrens } else { 3954fa9e4066Sahrens dmu_buf_will_dirty(zp->z_dbuf, tx); 3955fa9e4066Sahrens mutex_enter(&zp->z_lock); 3956fa9e4066Sahrens zp->z_atime_dirty = 0; 3957fa9e4066Sahrens mutex_exit(&zp->z_lock); 3958fa9e4066Sahrens dmu_tx_commit(tx); 3959fa9e4066Sahrens } 3960fa9e4066Sahrens } 3961fa9e4066Sahrens 3962fa9e4066Sahrens zfs_zinactive(zp); 3963f18faf3fSek rw_exit(&zfsvfs->z_teardown_inactive_lock); 3964fa9e4066Sahrens } 3965fa9e4066Sahrens 3966fa9e4066Sahrens /* 3967fa9e4066Sahrens * Bounds-check the seek operation. 3968fa9e4066Sahrens * 3969fa9e4066Sahrens * IN: vp - vnode seeking within 3970fa9e4066Sahrens * ooff - old file offset 3971fa9e4066Sahrens * noffp - pointer to new file offset 3972da6c28aaSamw * ct - caller context 3973fa9e4066Sahrens * 3974fa9e4066Sahrens * RETURN: 0 if success 3975fa9e4066Sahrens * EINVAL if new offset invalid 3976fa9e4066Sahrens */ 3977fa9e4066Sahrens /* ARGSUSED */ 3978fa9e4066Sahrens static int 3979da6c28aaSamw zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 3980da6c28aaSamw caller_context_t *ct) 3981fa9e4066Sahrens { 3982fa9e4066Sahrens if (vp->v_type == VDIR) 3983fa9e4066Sahrens return (0); 3984fa9e4066Sahrens return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 3985fa9e4066Sahrens } 3986fa9e4066Sahrens 3987fa9e4066Sahrens /* 3988fa9e4066Sahrens * Pre-filter the generic locking function to trap attempts to place 3989fa9e4066Sahrens * a mandatory lock on a memory mapped file. 3990fa9e4066Sahrens */ 3991fa9e4066Sahrens static int 3992fa9e4066Sahrens zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 3993da6c28aaSamw flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 3994fa9e4066Sahrens { 3995fa9e4066Sahrens znode_t *zp = VTOZ(vp); 3996fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 3997fa9e4066Sahrens 39983cb34c60Sahrens ZFS_ENTER(zfsvfs); 39993cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4000fa9e4066Sahrens 4001fa9e4066Sahrens /* 4002ea8dc4b6Seschrock * We are following the UFS semantics with respect to mapcnt 4003ea8dc4b6Seschrock * here: If we see that the file is mapped already, then we will 4004ea8dc4b6Seschrock * return an error, but we don't worry about races between this 4005ea8dc4b6Seschrock * function and zfs_map(). 4006fa9e4066Sahrens */ 4007ea8dc4b6Seschrock if (zp->z_mapcnt > 0 && MANDMODE((mode_t)zp->z_phys->zp_mode)) { 4008fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4009fa9e4066Sahrens return (EAGAIN); 4010fa9e4066Sahrens } 4011fa9e4066Sahrens ZFS_EXIT(zfsvfs); 401204ce3d0bSMark Shellenbaum return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4013fa9e4066Sahrens } 4014fa9e4066Sahrens 4015fa9e4066Sahrens /* 4016fa9e4066Sahrens * If we can't find a page in the cache, we will create a new page 4017fa9e4066Sahrens * and fill it with file data. For efficiency, we may try to fill 4018ac05c741SMark Maybee * multiple pages at once (klustering) to fill up the supplied page 4019ed886187SMark Maybee * list. Note that the pages to be filled are held with an exclusive 4020ed886187SMark Maybee * lock to prevent access by other threads while they are being filled. 4021fa9e4066Sahrens */ 4022fa9e4066Sahrens static int 4023fa9e4066Sahrens zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4024fa9e4066Sahrens caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4025fa9e4066Sahrens { 4026fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4027fa9e4066Sahrens page_t *pp, *cur_pp; 4028fa9e4066Sahrens objset_t *os = zp->z_zfsvfs->z_os; 4029fa9e4066Sahrens u_offset_t io_off, total; 4030fa9e4066Sahrens size_t io_len; 4031fa9e4066Sahrens int err; 4032fa9e4066Sahrens 403344eda4d7Smaybee if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4034ac05c741SMark Maybee /* 4035ac05c741SMark Maybee * We only have a single page, don't bother klustering 4036ac05c741SMark Maybee */ 4037fa9e4066Sahrens io_off = off; 4038fa9e4066Sahrens io_len = PAGESIZE; 4039ed886187SMark Maybee pp = page_create_va(vp, io_off, io_len, 4040ed886187SMark Maybee PG_EXCL | PG_WAIT, seg, addr); 4041fa9e4066Sahrens } else { 4042fa9e4066Sahrens /* 4043ac05c741SMark Maybee * Try to find enough pages to fill the page list 4044fa9e4066Sahrens */ 4045fa9e4066Sahrens pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4046ac05c741SMark Maybee &io_len, off, plsz, 0); 4047fa9e4066Sahrens } 4048fa9e4066Sahrens if (pp == NULL) { 4049fa9e4066Sahrens /* 4050ac05c741SMark Maybee * The page already exists, nothing to do here. 4051fa9e4066Sahrens */ 4052fa9e4066Sahrens *pl = NULL; 4053fa9e4066Sahrens return (0); 4054fa9e4066Sahrens } 4055fa9e4066Sahrens 4056fa9e4066Sahrens /* 4057fa9e4066Sahrens * Fill the pages in the kluster. 4058fa9e4066Sahrens */ 4059fa9e4066Sahrens cur_pp = pp; 4060fa9e4066Sahrens for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4061ac05c741SMark Maybee caddr_t va; 4062ac05c741SMark Maybee 406344eda4d7Smaybee ASSERT3U(io_off, ==, cur_pp->p_offset); 40640fab61baSJonathan W Adams va = zfs_map_page(cur_pp, S_WRITE); 40657bfdf011SNeil Perrin err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 40667bfdf011SNeil Perrin DMU_READ_PREFETCH); 40670fab61baSJonathan W Adams zfs_unmap_page(cur_pp, va); 4068fa9e4066Sahrens if (err) { 4069fa9e4066Sahrens /* On error, toss the entire kluster */ 4070fa9e4066Sahrens pvn_read_done(pp, B_ERROR); 4071b87f3af3Sperrin /* convert checksum errors into IO errors */ 4072b87f3af3Sperrin if (err == ECKSUM) 4073b87f3af3Sperrin err = EIO; 4074fa9e4066Sahrens return (err); 4075fa9e4066Sahrens } 4076fa9e4066Sahrens cur_pp = cur_pp->p_next; 4077fa9e4066Sahrens } 4078ac05c741SMark Maybee 4079fa9e4066Sahrens /* 4080ac05c741SMark Maybee * Fill in the page list array from the kluster starting 4081ac05c741SMark Maybee * from the desired offset `off'. 4082fa9e4066Sahrens * NOTE: the page list will always be null terminated. 4083fa9e4066Sahrens */ 4084fa9e4066Sahrens pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4085ac05c741SMark Maybee ASSERT(pl == NULL || (*pl)->p_offset == off); 4086fa9e4066Sahrens 4087fa9e4066Sahrens return (0); 4088fa9e4066Sahrens } 4089fa9e4066Sahrens 4090fa9e4066Sahrens /* 4091fa9e4066Sahrens * Return pointers to the pages for the file region [off, off + len] 4092fa9e4066Sahrens * in the pl array. If plsz is greater than len, this function may 4093ac05c741SMark Maybee * also return page pointers from after the specified region 4094ac05c741SMark Maybee * (i.e. the region [off, off + plsz]). These additional pages are 4095ac05c741SMark Maybee * only returned if they are already in the cache, or were created as 4096ac05c741SMark Maybee * part of a klustered read. 4097fa9e4066Sahrens * 4098fa9e4066Sahrens * IN: vp - vnode of file to get data from. 4099fa9e4066Sahrens * off - position in file to get data from. 4100fa9e4066Sahrens * len - amount of data to retrieve. 4101fa9e4066Sahrens * plsz - length of provided page list. 4102fa9e4066Sahrens * seg - segment to obtain pages for. 4103fa9e4066Sahrens * addr - virtual address of fault. 4104fa9e4066Sahrens * rw - mode of created pages. 4105fa9e4066Sahrens * cr - credentials of caller. 4106da6c28aaSamw * ct - caller context. 4107fa9e4066Sahrens * 4108fa9e4066Sahrens * OUT: protp - protection mode of created pages. 4109fa9e4066Sahrens * pl - list of pages created. 4110fa9e4066Sahrens * 4111fa9e4066Sahrens * RETURN: 0 if success 4112fa9e4066Sahrens * error code if failure 4113fa9e4066Sahrens * 4114fa9e4066Sahrens * Timestamps: 4115fa9e4066Sahrens * vp - atime updated 4116fa9e4066Sahrens */ 4117fa9e4066Sahrens /* ARGSUSED */ 4118fa9e4066Sahrens static int 4119fa9e4066Sahrens zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4120fa9e4066Sahrens page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4121da6c28aaSamw enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4122fa9e4066Sahrens { 4123fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4124fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4125ac05c741SMark Maybee page_t **pl0 = pl; 4126ac05c741SMark Maybee int err = 0; 4127ac05c741SMark Maybee 4128ac05c741SMark Maybee /* we do our own caching, faultahead is unnecessary */ 4129ac05c741SMark Maybee if (pl == NULL) 4130ac05c741SMark Maybee return (0); 4131ac05c741SMark Maybee else if (len > plsz) 4132ac05c741SMark Maybee len = plsz; 413327bd165aSMark Maybee else 413427bd165aSMark Maybee len = P2ROUNDUP(len, PAGESIZE); 4135ac05c741SMark Maybee ASSERT(plsz >= len); 4136fa9e4066Sahrens 41373cb34c60Sahrens ZFS_ENTER(zfsvfs); 41383cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4139fa9e4066Sahrens 4140fa9e4066Sahrens if (protp) 4141fa9e4066Sahrens *protp = PROT_ALL; 4142fa9e4066Sahrens 4143fa9e4066Sahrens /* 4144ed886187SMark Maybee * Loop through the requested range [off, off + len) looking 4145fa9e4066Sahrens * for pages. If we don't find a page, we will need to create 4146fa9e4066Sahrens * a new page and fill it with data from the file. 4147fa9e4066Sahrens */ 4148fa9e4066Sahrens while (len > 0) { 4149ac05c741SMark Maybee if (*pl = page_lookup(vp, off, SE_SHARED)) 4150ac05c741SMark Maybee *(pl+1) = NULL; 4151ac05c741SMark Maybee else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4152ac05c741SMark Maybee goto out; 4153ac05c741SMark Maybee while (*pl) { 4154ac05c741SMark Maybee ASSERT3U((*pl)->p_offset, ==, off); 4155fa9e4066Sahrens off += PAGESIZE; 4156fa9e4066Sahrens addr += PAGESIZE; 415727bd165aSMark Maybee if (len > 0) { 415827bd165aSMark Maybee ASSERT3U(len, >=, PAGESIZE); 4159ac05c741SMark Maybee len -= PAGESIZE; 416027bd165aSMark Maybee } 4161ac05c741SMark Maybee ASSERT3U(plsz, >=, PAGESIZE); 4162fa9e4066Sahrens plsz -= PAGESIZE; 4163ac05c741SMark Maybee pl++; 4164fa9e4066Sahrens } 4165fa9e4066Sahrens } 4166fa9e4066Sahrens 4167fa9e4066Sahrens /* 4168fa9e4066Sahrens * Fill out the page array with any pages already in the cache. 4169fa9e4066Sahrens */ 4170ac05c741SMark Maybee while (plsz > 0 && 4171ac05c741SMark Maybee (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4172ac05c741SMark Maybee off += PAGESIZE; 4173ac05c741SMark Maybee plsz -= PAGESIZE; 4174fa9e4066Sahrens } 4175fa9e4066Sahrens out: 4176fe2f476aSperrin if (err) { 4177fe2f476aSperrin /* 4178fe2f476aSperrin * Release any pages we have previously locked. 4179fe2f476aSperrin */ 4180fe2f476aSperrin while (pl > pl0) 4181fe2f476aSperrin page_unlock(*--pl); 4182ac05c741SMark Maybee } else { 4183ac05c741SMark Maybee ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4184fe2f476aSperrin } 4185fe2f476aSperrin 4186fa9e4066Sahrens *pl = NULL; 4187fa9e4066Sahrens 4188fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4189fa9e4066Sahrens return (err); 4190fa9e4066Sahrens } 4191fa9e4066Sahrens 4192ea8dc4b6Seschrock /* 4193ea8dc4b6Seschrock * Request a memory map for a section of a file. This code interacts 4194ea8dc4b6Seschrock * with common code and the VM system as follows: 4195ea8dc4b6Seschrock * 4196ea8dc4b6Seschrock * common code calls mmap(), which ends up in smmap_common() 4197ea8dc4b6Seschrock * 4198ea8dc4b6Seschrock * this calls VOP_MAP(), which takes you into (say) zfs 4199ea8dc4b6Seschrock * 4200ea8dc4b6Seschrock * zfs_map() calls as_map(), passing segvn_create() as the callback 4201ea8dc4b6Seschrock * 4202ea8dc4b6Seschrock * segvn_create() creates the new segment and calls VOP_ADDMAP() 4203ea8dc4b6Seschrock * 4204ea8dc4b6Seschrock * zfs_addmap() updates z_mapcnt 4205ea8dc4b6Seschrock */ 4206da6c28aaSamw /*ARGSUSED*/ 4207fa9e4066Sahrens static int 4208fa9e4066Sahrens zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4209da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4210da6c28aaSamw caller_context_t *ct) 4211fa9e4066Sahrens { 4212fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4213fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4214fa9e4066Sahrens segvn_crargs_t vn_a; 4215fa9e4066Sahrens int error; 4216fa9e4066Sahrens 42170616c50eSmarks ZFS_ENTER(zfsvfs); 42180616c50eSmarks ZFS_VERIFY_ZP(zp); 42190616c50eSmarks 4220da6c28aaSamw if ((prot & PROT_WRITE) && 4221da6c28aaSamw (zp->z_phys->zp_flags & (ZFS_IMMUTABLE | ZFS_READONLY | 42220616c50eSmarks ZFS_APPENDONLY))) { 42230616c50eSmarks ZFS_EXIT(zfsvfs); 4224da6c28aaSamw return (EPERM); 42250616c50eSmarks } 4226da6c28aaSamw 42270616c50eSmarks if ((prot & (PROT_READ | PROT_EXEC)) && 42280616c50eSmarks (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED)) { 42290616c50eSmarks ZFS_EXIT(zfsvfs); 42300616c50eSmarks return (EACCES); 42310616c50eSmarks } 4232fa9e4066Sahrens 4233fa9e4066Sahrens if (vp->v_flag & VNOMAP) { 4234fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4235fa9e4066Sahrens return (ENOSYS); 4236fa9e4066Sahrens } 4237fa9e4066Sahrens 4238fa9e4066Sahrens if (off < 0 || len > MAXOFFSET_T - off) { 4239fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4240fa9e4066Sahrens return (ENXIO); 4241fa9e4066Sahrens } 4242fa9e4066Sahrens 4243fa9e4066Sahrens if (vp->v_type != VREG) { 4244fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4245fa9e4066Sahrens return (ENODEV); 4246fa9e4066Sahrens } 4247fa9e4066Sahrens 4248fa9e4066Sahrens /* 4249fa9e4066Sahrens * If file is locked, disallow mapping. 4250fa9e4066Sahrens */ 4251ea8dc4b6Seschrock if (MANDMODE((mode_t)zp->z_phys->zp_mode) && vn_has_flocks(vp)) { 4252ea8dc4b6Seschrock ZFS_EXIT(zfsvfs); 4253ea8dc4b6Seschrock return (EAGAIN); 4254fa9e4066Sahrens } 4255fa9e4066Sahrens 4256fa9e4066Sahrens as_rangelock(as); 425760946fe0Smec error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 425860946fe0Smec if (error != 0) { 425960946fe0Smec as_rangeunlock(as); 426060946fe0Smec ZFS_EXIT(zfsvfs); 426160946fe0Smec return (error); 4262fa9e4066Sahrens } 4263fa9e4066Sahrens 4264fa9e4066Sahrens vn_a.vp = vp; 4265fa9e4066Sahrens vn_a.offset = (u_offset_t)off; 4266fa9e4066Sahrens vn_a.type = flags & MAP_TYPE; 4267fa9e4066Sahrens vn_a.prot = prot; 4268fa9e4066Sahrens vn_a.maxprot = maxprot; 4269fa9e4066Sahrens vn_a.cred = cr; 4270fa9e4066Sahrens vn_a.amp = NULL; 4271fa9e4066Sahrens vn_a.flags = flags & ~MAP_TYPE; 42724944b02eSkchow vn_a.szc = 0; 42734944b02eSkchow vn_a.lgrp_mem_policy_flags = 0; 4274fa9e4066Sahrens 4275fa9e4066Sahrens error = as_map(as, *addrp, len, segvn_create, &vn_a); 4276fa9e4066Sahrens 4277fa9e4066Sahrens as_rangeunlock(as); 4278fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4279fa9e4066Sahrens return (error); 4280fa9e4066Sahrens } 4281fa9e4066Sahrens 4282fa9e4066Sahrens /* ARGSUSED */ 4283fa9e4066Sahrens static int 4284fa9e4066Sahrens zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4285da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4286da6c28aaSamw caller_context_t *ct) 4287fa9e4066Sahrens { 4288ea8dc4b6Seschrock uint64_t pages = btopr(len); 4289ea8dc4b6Seschrock 4290ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 4291fa9e4066Sahrens return (0); 4292fa9e4066Sahrens } 4293fa9e4066Sahrens 4294b468a217Seschrock /* 4295b468a217Seschrock * The reason we push dirty pages as part of zfs_delmap() is so that we get a 4296b468a217Seschrock * more accurate mtime for the associated file. Since we don't have a way of 4297b468a217Seschrock * detecting when the data was actually modified, we have to resort to 4298b468a217Seschrock * heuristics. If an explicit msync() is done, then we mark the mtime when the 4299b468a217Seschrock * last page is pushed. The problem occurs when the msync() call is omitted, 4300b468a217Seschrock * which by far the most common case: 4301b468a217Seschrock * 4302b468a217Seschrock * open() 4303b468a217Seschrock * mmap() 4304b468a217Seschrock * <modify memory> 4305b468a217Seschrock * munmap() 4306b468a217Seschrock * close() 4307b468a217Seschrock * <time lapse> 4308b468a217Seschrock * putpage() via fsflush 4309b468a217Seschrock * 4310b468a217Seschrock * If we wait until fsflush to come along, we can have a modification time that 4311b468a217Seschrock * is some arbitrary point in the future. In order to prevent this in the 4312b468a217Seschrock * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 4313b468a217Seschrock * torn down. 4314b468a217Seschrock */ 4315fa9e4066Sahrens /* ARGSUSED */ 4316fa9e4066Sahrens static int 4317fa9e4066Sahrens zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4318da6c28aaSamw size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 4319da6c28aaSamw caller_context_t *ct) 4320fa9e4066Sahrens { 4321ea8dc4b6Seschrock uint64_t pages = btopr(len); 4322ea8dc4b6Seschrock 4323ea8dc4b6Seschrock ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 4324ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 4325b468a217Seschrock 4326b468a217Seschrock if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 4327b468a217Seschrock vn_has_cached_data(vp)) 4328da6c28aaSamw (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 4329b468a217Seschrock 4330fa9e4066Sahrens return (0); 4331fa9e4066Sahrens } 4332fa9e4066Sahrens 4333fa9e4066Sahrens /* 4334fa9e4066Sahrens * Free or allocate space in a file. Currently, this function only 4335fa9e4066Sahrens * supports the `F_FREESP' command. However, this command is somewhat 4336fa9e4066Sahrens * misnamed, as its functionality includes the ability to allocate as 4337fa9e4066Sahrens * well as free space. 4338fa9e4066Sahrens * 4339fa9e4066Sahrens * IN: vp - vnode of file to free data in. 4340fa9e4066Sahrens * cmd - action to take (only F_FREESP supported). 4341fa9e4066Sahrens * bfp - section of file to free/alloc. 4342fa9e4066Sahrens * flag - current file open mode flags. 4343fa9e4066Sahrens * offset - current file offset. 4344fa9e4066Sahrens * cr - credentials of caller [UNUSED]. 4345da6c28aaSamw * ct - caller context. 4346fa9e4066Sahrens * 4347fa9e4066Sahrens * RETURN: 0 if success 4348fa9e4066Sahrens * error code if failure 4349fa9e4066Sahrens * 4350fa9e4066Sahrens * Timestamps: 4351fa9e4066Sahrens * vp - ctime|mtime updated 4352fa9e4066Sahrens */ 4353fa9e4066Sahrens /* ARGSUSED */ 4354fa9e4066Sahrens static int 4355fa9e4066Sahrens zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 4356fa9e4066Sahrens offset_t offset, cred_t *cr, caller_context_t *ct) 4357fa9e4066Sahrens { 4358fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4359fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4360fa9e4066Sahrens uint64_t off, len; 4361fa9e4066Sahrens int error; 4362fa9e4066Sahrens 43633cb34c60Sahrens ZFS_ENTER(zfsvfs); 43643cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4365fa9e4066Sahrens 4366fa9e4066Sahrens if (cmd != F_FREESP) { 4367fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4368fa9e4066Sahrens return (EINVAL); 4369fa9e4066Sahrens } 4370fa9e4066Sahrens 4371fa9e4066Sahrens if (error = convoff(vp, bfp, 0, offset)) { 4372fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4373fa9e4066Sahrens return (error); 4374fa9e4066Sahrens } 4375fa9e4066Sahrens 4376fa9e4066Sahrens if (bfp->l_len < 0) { 4377fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4378fa9e4066Sahrens return (EINVAL); 4379fa9e4066Sahrens } 4380fa9e4066Sahrens 4381fa9e4066Sahrens off = bfp->l_start; 4382104e2ed7Sperrin len = bfp->l_len; /* 0 means from off to end of file */ 4383104e2ed7Sperrin 4384cdb0ab79Smaybee error = zfs_freesp(zp, off, len, flag, TRUE); 4385fa9e4066Sahrens 4386fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4387fa9e4066Sahrens return (error); 4388fa9e4066Sahrens } 4389fa9e4066Sahrens 4390da6c28aaSamw /*ARGSUSED*/ 4391fa9e4066Sahrens static int 4392da6c28aaSamw zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4393fa9e4066Sahrens { 4394fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4395fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4396f18faf3fSek uint32_t gen; 4397fa9e4066Sahrens uint64_t object = zp->z_id; 4398fa9e4066Sahrens zfid_short_t *zfid; 4399fa9e4066Sahrens int size, i; 4400fa9e4066Sahrens 44013cb34c60Sahrens ZFS_ENTER(zfsvfs); 44023cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4403f18faf3fSek gen = (uint32_t)zp->z_gen; 4404fa9e4066Sahrens 4405fa9e4066Sahrens size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4406fa9e4066Sahrens if (fidp->fid_len < size) { 4407fa9e4066Sahrens fidp->fid_len = size; 44080f2dc02eSek ZFS_EXIT(zfsvfs); 4409fa9e4066Sahrens return (ENOSPC); 4410fa9e4066Sahrens } 4411fa9e4066Sahrens 4412fa9e4066Sahrens zfid = (zfid_short_t *)fidp; 4413fa9e4066Sahrens 4414fa9e4066Sahrens zfid->zf_len = size; 4415fa9e4066Sahrens 4416fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 4417fa9e4066Sahrens zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4418fa9e4066Sahrens 4419fa9e4066Sahrens /* Must have a non-zero generation number to distinguish from .zfs */ 4420fa9e4066Sahrens if (gen == 0) 4421fa9e4066Sahrens gen = 1; 4422fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 4423fa9e4066Sahrens zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4424fa9e4066Sahrens 4425fa9e4066Sahrens if (size == LONG_FID_LEN) { 4426fa9e4066Sahrens uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4427fa9e4066Sahrens zfid_long_t *zlfid; 4428fa9e4066Sahrens 4429fa9e4066Sahrens zlfid = (zfid_long_t *)fidp; 4430fa9e4066Sahrens 4431fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4432fa9e4066Sahrens zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4433fa9e4066Sahrens 4434fa9e4066Sahrens /* XXX - this should be the generation number for the objset */ 4435fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4436fa9e4066Sahrens zlfid->zf_setgen[i] = 0; 4437fa9e4066Sahrens } 4438fa9e4066Sahrens 4439fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4440fa9e4066Sahrens return (0); 4441fa9e4066Sahrens } 4442fa9e4066Sahrens 4443fa9e4066Sahrens static int 4444da6c28aaSamw zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4445da6c28aaSamw caller_context_t *ct) 4446fa9e4066Sahrens { 4447fa9e4066Sahrens znode_t *zp, *xzp; 4448fa9e4066Sahrens zfsvfs_t *zfsvfs; 4449fa9e4066Sahrens zfs_dirlock_t *dl; 4450fa9e4066Sahrens int error; 4451fa9e4066Sahrens 4452fa9e4066Sahrens switch (cmd) { 4453fa9e4066Sahrens case _PC_LINK_MAX: 4454fa9e4066Sahrens *valp = ULONG_MAX; 4455fa9e4066Sahrens return (0); 4456fa9e4066Sahrens 4457fa9e4066Sahrens case _PC_FILESIZEBITS: 4458fa9e4066Sahrens *valp = 64; 4459fa9e4066Sahrens return (0); 4460fa9e4066Sahrens 4461fa9e4066Sahrens case _PC_XATTR_EXISTS: 4462fa9e4066Sahrens zp = VTOZ(vp); 4463fa9e4066Sahrens zfsvfs = zp->z_zfsvfs; 44643cb34c60Sahrens ZFS_ENTER(zfsvfs); 44653cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4466fa9e4066Sahrens *valp = 0; 4467fa9e4066Sahrens error = zfs_dirent_lock(&dl, zp, "", &xzp, 4468da6c28aaSamw ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 4469fa9e4066Sahrens if (error == 0) { 4470fa9e4066Sahrens zfs_dirent_unlock(dl); 4471fa9e4066Sahrens if (!zfs_dirempty(xzp)) 4472fa9e4066Sahrens *valp = 1; 4473fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 4474fa9e4066Sahrens } else if (error == ENOENT) { 4475fa9e4066Sahrens /* 4476fa9e4066Sahrens * If there aren't extended attributes, it's the 4477fa9e4066Sahrens * same as having zero of them. 4478fa9e4066Sahrens */ 4479fa9e4066Sahrens error = 0; 4480fa9e4066Sahrens } 4481fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4482fa9e4066Sahrens return (error); 4483fa9e4066Sahrens 4484da6c28aaSamw case _PC_SATTR_ENABLED: 4485da6c28aaSamw case _PC_SATTR_EXISTS: 44869660e5cbSJanice Chang *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 4487da6c28aaSamw (vp->v_type == VREG || vp->v_type == VDIR); 4488da6c28aaSamw return (0); 4489da6c28aaSamw 4490e802abbdSTim Haley case _PC_ACCESS_FILTERING: 4491e802abbdSTim Haley *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 4492e802abbdSTim Haley vp->v_type == VDIR; 4493e802abbdSTim Haley return (0); 4494e802abbdSTim Haley 4495fa9e4066Sahrens case _PC_ACL_ENABLED: 4496fa9e4066Sahrens *valp = _ACL_ACE_ENABLED; 4497fa9e4066Sahrens return (0); 4498fa9e4066Sahrens 4499fa9e4066Sahrens case _PC_MIN_HOLE_SIZE: 4500fa9e4066Sahrens *valp = (ulong_t)SPA_MINBLOCKSIZE; 4501fa9e4066Sahrens return (0); 4502fa9e4066Sahrens 45033b862e9aSRoger A. Faulkner case _PC_TIMESTAMP_RESOLUTION: 45043b862e9aSRoger A. Faulkner /* nanosecond timestamp resolution */ 45053b862e9aSRoger A. Faulkner *valp = 1L; 45063b862e9aSRoger A. Faulkner return (0); 45073b862e9aSRoger A. Faulkner 4508fa9e4066Sahrens default: 4509da6c28aaSamw return (fs_pathconf(vp, cmd, valp, cr, ct)); 4510fa9e4066Sahrens } 4511fa9e4066Sahrens } 4512fa9e4066Sahrens 4513fa9e4066Sahrens /*ARGSUSED*/ 4514fa9e4066Sahrens static int 4515da6c28aaSamw zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4516da6c28aaSamw caller_context_t *ct) 4517fa9e4066Sahrens { 4518fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4519fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4520fa9e4066Sahrens int error; 4521da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4522fa9e4066Sahrens 45233cb34c60Sahrens ZFS_ENTER(zfsvfs); 45243cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4525da6c28aaSamw error = zfs_getacl(zp, vsecp, skipaclchk, cr); 4526fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4527fa9e4066Sahrens 4528fa9e4066Sahrens return (error); 4529fa9e4066Sahrens } 4530fa9e4066Sahrens 4531fa9e4066Sahrens /*ARGSUSED*/ 4532fa9e4066Sahrens static int 4533da6c28aaSamw zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 4534da6c28aaSamw caller_context_t *ct) 4535fa9e4066Sahrens { 4536fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4537fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4538fa9e4066Sahrens int error; 4539da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 4540fa9e4066Sahrens 45413cb34c60Sahrens ZFS_ENTER(zfsvfs); 45423cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4543da6c28aaSamw error = zfs_setacl(zp, vsecp, skipaclchk, cr); 4544fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4545fa9e4066Sahrens return (error); 4546fa9e4066Sahrens } 4547fa9e4066Sahrens 4548fa9e4066Sahrens /* 4549fa9e4066Sahrens * Predeclare these here so that the compiler assumes that 4550fa9e4066Sahrens * this is an "old style" function declaration that does 4551fa9e4066Sahrens * not include arguments => we won't get type mismatch errors 4552fa9e4066Sahrens * in the initializations that follow. 4553fa9e4066Sahrens */ 4554fa9e4066Sahrens static int zfs_inval(); 4555fa9e4066Sahrens static int zfs_isdir(); 4556fa9e4066Sahrens 4557fa9e4066Sahrens static int 4558fa9e4066Sahrens zfs_inval() 4559fa9e4066Sahrens { 4560fa9e4066Sahrens return (EINVAL); 4561fa9e4066Sahrens } 4562fa9e4066Sahrens 4563fa9e4066Sahrens static int 4564fa9e4066Sahrens zfs_isdir() 4565fa9e4066Sahrens { 4566fa9e4066Sahrens return (EISDIR); 4567fa9e4066Sahrens } 4568fa9e4066Sahrens /* 4569fa9e4066Sahrens * Directory vnode operations template 4570fa9e4066Sahrens */ 4571fa9e4066Sahrens vnodeops_t *zfs_dvnodeops; 4572fa9e4066Sahrens const fs_operation_def_t zfs_dvnodeops_template[] = { 4573aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 4574aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 4575aa59c4cbSrsb VOPNAME_READ, { .error = zfs_isdir }, 4576aa59c4cbSrsb VOPNAME_WRITE, { .error = zfs_isdir }, 4577aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 4578aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 4579aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 4580aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 4581aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 4582aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 4583aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 4584aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 4585aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 4586aa59c4cbSrsb VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 4587aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 4588aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 4589aa59c4cbSrsb VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 4590aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 4591aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 4592aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 4593aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 4594aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 4595aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 4596aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 4597df2381bfSpraks VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 4598aa59c4cbSrsb NULL, NULL 4599fa9e4066Sahrens }; 4600fa9e4066Sahrens 4601fa9e4066Sahrens /* 4602fa9e4066Sahrens * Regular file vnode operations template 4603fa9e4066Sahrens */ 4604fa9e4066Sahrens vnodeops_t *zfs_fvnodeops; 4605fa9e4066Sahrens const fs_operation_def_t zfs_fvnodeops_template[] = { 4606aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 4607aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 4608aa59c4cbSrsb VOPNAME_READ, { .vop_read = zfs_read }, 4609aa59c4cbSrsb VOPNAME_WRITE, { .vop_write = zfs_write }, 4610aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 4611aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 4612aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 4613aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 4614aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 4615aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 4616aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 4617aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 4618aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 4619aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 4620aa59c4cbSrsb VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 4621aa59c4cbSrsb VOPNAME_SPACE, { .vop_space = zfs_space }, 4622aa59c4cbSrsb VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 4623aa59c4cbSrsb VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 4624aa59c4cbSrsb VOPNAME_MAP, { .vop_map = zfs_map }, 4625aa59c4cbSrsb VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 4626aa59c4cbSrsb VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 4627aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 4628aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 4629aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 4630aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 4631aa59c4cbSrsb NULL, NULL 4632fa9e4066Sahrens }; 4633fa9e4066Sahrens 4634fa9e4066Sahrens /* 4635fa9e4066Sahrens * Symbolic link vnode operations template 4636fa9e4066Sahrens */ 4637fa9e4066Sahrens vnodeops_t *zfs_symvnodeops; 4638fa9e4066Sahrens const fs_operation_def_t zfs_symvnodeops_template[] = { 4639aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 4640aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 4641aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 4642aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 4643aa59c4cbSrsb VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 4644aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 4645aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 4646aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 4647aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 4648aa59c4cbSrsb NULL, NULL 4649fa9e4066Sahrens }; 4650fa9e4066Sahrens 4651743a77edSAlan Wright /* 4652743a77edSAlan Wright * special share hidden files vnode operations template 4653743a77edSAlan Wright */ 4654743a77edSAlan Wright vnodeops_t *zfs_sharevnodeops; 4655743a77edSAlan Wright const fs_operation_def_t zfs_sharevnodeops_template[] = { 4656743a77edSAlan Wright VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 4657743a77edSAlan Wright VOPNAME_ACCESS, { .vop_access = zfs_access }, 4658743a77edSAlan Wright VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 4659743a77edSAlan Wright VOPNAME_FID, { .vop_fid = zfs_fid }, 4660743a77edSAlan Wright VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 4661743a77edSAlan Wright VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 4662743a77edSAlan Wright VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 4663743a77edSAlan Wright VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 4664743a77edSAlan Wright NULL, NULL 4665743a77edSAlan Wright }; 4666743a77edSAlan Wright 4667fa9e4066Sahrens /* 4668fa9e4066Sahrens * Extended attribute directory vnode operations template 4669fa9e4066Sahrens * This template is identical to the directory vnodes 4670fa9e4066Sahrens * operation template except for restricted operations: 4671fa9e4066Sahrens * VOP_MKDIR() 4672fa9e4066Sahrens * VOP_SYMLINK() 4673fa9e4066Sahrens * Note that there are other restrictions embedded in: 4674fa9e4066Sahrens * zfs_create() - restrict type to VREG 4675fa9e4066Sahrens * zfs_link() - no links into/out of attribute space 4676fa9e4066Sahrens * zfs_rename() - no moves into/out of attribute space 4677fa9e4066Sahrens */ 4678fa9e4066Sahrens vnodeops_t *zfs_xdvnodeops; 4679fa9e4066Sahrens const fs_operation_def_t zfs_xdvnodeops_template[] = { 4680aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 4681aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 4682aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 4683aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 4684aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 4685aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 4686aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 4687aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 4688aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 4689aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 4690aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 4691aa59c4cbSrsb VOPNAME_MKDIR, { .error = zfs_inval }, 4692aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 4693aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 4694aa59c4cbSrsb VOPNAME_SYMLINK, { .error = zfs_inval }, 4695aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 4696aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 4697aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 4698aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 4699aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 4700aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 4701aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 4702aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 4703aa59c4cbSrsb NULL, NULL 4704fa9e4066Sahrens }; 4705fa9e4066Sahrens 4706fa9e4066Sahrens /* 4707fa9e4066Sahrens * Error vnode operations template 4708fa9e4066Sahrens */ 4709fa9e4066Sahrens vnodeops_t *zfs_evnodeops; 4710fa9e4066Sahrens const fs_operation_def_t zfs_evnodeops_template[] = { 4711aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 4712aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 4713aa59c4cbSrsb NULL, NULL 4714fa9e4066Sahrens }; 4715